SQL: Selecting a minimum value per unique group and changing values - mysql

I have a table that looks like this:
ID Date Category
x 1995 A
x 1996 B
z 1995 B
z 1996 A
y 1995 B
y 1996 B
What I want to do is to set the Category to whatever the value is for the minimum date per ID. So that the final result would look like:
ID Date Category
x 1995 A
x 1996 A
z 1995 B
z 1996 B
y 1995 B
y 1996 B
Does anyone know how to do this in SQL? Thanks!

You can use a subquery:
select
id,
date,
(
select category
from mytable x
where x.id = m.id
and not exists
(
select *
from mytable older
where older.id = x.id
and older.date < x.date
)
) as oldest_category
from mytable m;

This should do it although there might be a smarter way:
select table1.id, table1.date, t3.category
from table1
join (
select t1.id, t1.category
from table1 t1
join (
select id, min(date) as min_date
from table1
group by id
) t2 on t1.id = t2.id and t1.date = t2.min_date
) t3 on table1.id = t3.id
In the query your source table is named table1. The logic is that the inner derived table limits the outer to the min(date)
Sample SQL Fiddle

Try this
declare #t table (id char(1), date int, category char(1))
insert into #t
select 'x', 1995, 'A' union all
select 'x', 1996 , 'B' union all
select 'z', 1995 , 'B' union all
select 'z', 1996 , 'A' union all
select 'y', 1995 , 'B' union all
select 'y', 1996 , 'B'
select t1.Id,t1.Date,t2.category from #t as t1 left join
(
select t1.Id,t1.Date,t1.category from #t as t1 inner join
(
select ID, min(Date) as Date from #t group by ID
) as t2 on t1.Id=t2.Id and t1.Date=t2.Date
) as t2 on t1.Id=t2.Id

Related

Need to aggregate results of the query for the non-exact matching name

I have a query that calculates the number of mentions of each partner in the database.
The query looks like this:
SELECT partner, COUNT(*) AS Total
FROM database.data
GROUP BY 1;
The output then looks like this:
partner | total
---------+------
X | 5
Y | 3
Z | 2
X aa | 6
aa X | 7
Y aa | 1
What I need to do is partners like X, X aa, aa X to be counted together in one row (same for Y and Y aa. I tried playing with adding HAVING function in the end, but wasn't able to make it work and not sure it's actually the right one to use.
Would appreciate any help! Thanks!
Without optimization:
WITH RECURSIVE
-- rank by length
cte1 AS ( SELECT partner, total, DENSE_RANK() OVER (ORDER BY LENGTH(partner)) rnk
FROM data ),
-- find pairs where partner is a substring of anoter partner
cte2 AS ( SELECT partner, total, rnk, partner short
FROM cte1
WHERE rnk = 1
UNION ALL
SELECT cte1.partner,
cte1.total,
cte1.rnk,
CASE WHEN LOCATE(cte2.partner, cte1.partner)
THEN cte2.partner
ELSE cte1.partner
END
FROM cte1, cte2
WHERE cte1.rnk = cte2.rnk + 1 ),
-- select shortest
cte3 AS ( SELECT partner,
total,
rnk, short,
ROW_NUMBER() OVER (PARTITION BY partner ORDER BY LENGTH(short)) rn
FROM cte2 )
-- get needed data
SELECT short partner, SUM(total) total
FROM cte3
WHERE rn = 1
GROUP BY short
ORDER BY partner
fiddle
I think this should do it:
SELECT d1.partner, COUNT(*) AS Total
FROM (SELECT * FROM database.data d1
WHERE CHAR_LENGTH(d1.partner) = 1
GROUP BY d1.partner
) d1
-- i
LEFT JOIN database.data d2 ON
-- get x in aa x and x aa
d2.partner LIKE CONCAT('%', d1.partner, '%')
-- remoove x aa and aa x from the count table
GROUP BY d1.partner;
With mock data:
CREATE TABLE IF NOT EXISTS tmp_mock_data
SELECT * FROM (
SELECT 'X' partner
UNION ALL
SELECT 'Y'
UNION ALL
SELECT 'Z'
UNION ALL
SELECT 'Y aa'
UNION ALL
SELECT 'Z aa'
UNION ALL
SELECT 'X aa'
UNION ALL
SELECT 'aa Z'
UNION ALL
SELECT 'aa Y'
UNION ALL
SELECT 'aa X'
) A
CROSS JOIN (SELECT NULL UNION ALL SELECT NULL) B;
SELECT d1.partner, COUNT(*) AS Total
FROM (SELECT * FROM tmp_mock_data d1
WHERE CHAR_LENGTH(d1.partner) = 1
GROUP BY d1.partner
) d1
-- i
LEFT JOIN tmp_mock_data d2 ON
-- get x in aa x and x aa
d2.partner LIKE CONCAT('%', d1.partner, '%')
-- remoove x aa and aa x from the count table
GROUP BY d1.partner;
DROP TABLE tmp_mock_data;
SELECT partner,COUNT(*) AS Total
FROM(
SELECT (CASE WHEN ASCII(LEFT(partner,1)) BETWEEN 65 AND 90 THEN LEFT(partner,1) ELSE RIGHT(partner,1) END) AS partner
FROM database.data)
GROUP BY 1;

SQL query to select distinct row with minimum value from two table

I want a sql query to get the row of products with a minimum price and get all of other fields of two table.
Consider this table:
T1: T2:
id Title id pcount price t1_id(foreign key)
1 x 1 3 3000 2
2 y 2 8 2500 2
3 z 3 4 1200 1
4 6 1000 1
5 9 4000 3
How can I select the below columns that have the minimum value in the price column, grouped by Title and get below fields? Like this:
id Title pcount price t1_id
1 y 8 2500 2
2 x 6 1000 1
3 z 9 4000 3
For Sql Server you can use OUTER APPLY:
select * from t1
outer apply(select top 1 * from t2 where t1_id = t1.id order by price) oa
Try like this,
SELECT t1.*,
t3.*
FROM T1 t1
CROSS apply (SELECT Min(price) AS price
FROM T2
WHERE t1_id = t1.tableid)t2
LEFT OUTER JOIN t2 t3
ON t3.t1_id = t1.tableid
AND t3.price = t2.price
select *
from t1
left join
(select pcount, price, t2.t1_id
from t2
join
(select t1_id, min(price) pmin
from t2
group by t1_id
) tt
where t2.t1_id = tt.t1_id and price = pmin
) tt1
on t1.id = tt1.t1_id
result
id Title pcount price t1_id
1 x 6 1000 1
2 y 8 2500 2
3 z 9 4000 3
Also check this :
declare #t1 table(id int , title varchar(50))
declare #t2 table(id int , pcount int, price int, t1_id int)
insert into #t1 values (1, 'x' ), (2,'y'), (3,'z')
insert into #t2 values (1, 3, 3000, 2 ), (2, 8, 2500, 2 ),(3, 4, 1200, 1 ),(4, 6, 1000, 1),(5, 9, 4000, 3)
;with cte
as(
select * from (select * , ROW_NUMBER() OVER (PARTITION BY t1_id ORDER BY t1_id asc, price asc ) AS sequence_id from #t2 ) a where sequence_id = 1
)
select t1.*, cte.pcount ,cte.price from #t1 t1 join cte on t1.id = cte.t1_id
--or understand more
;with cte as
(
select t1_id, min(price) price
from #t2 group by t1_id
)
, cte1 as
(
select t1.*,t2.pcount, cte.* from #t1 t1
left outer join #t2 t2 on t1.id = t2.t1_id
left outer join cte cte on t1.id = cte.t1_id and (t2.t1_id =cte.t1_id and t2.price = cte.price)
)
select * from cte1 where t1_id is not null

How to show MIN value with whole table

I have table like:
id col1 col2
1 a 55
2 b 77
In result i want to see:
id col1 col2 MIN(col2)
1 a 55 55
2 b 77
Something like that, or in other case, how i can get one minimum value with whole table.
You can use a CROSS JOIN with a subquery which will select the min(col2) value for the entire table:
select t1.id,
t1.col1,
t1.col2,
t2.minCol2
from yourtable t1
cross join
(
select min(col2) minCol2
from yourtable
) t2
See SQL Fiddle with Demo.
If you want to expand this to only show the min(col2) value on the first row, then you could use user-defined variables:
select id,
col1,
col2,
case when rn = 1 then mincol2 else '' end mincol2
from
(
select t1.id,
t1.col1,
t1.col2,
t2.minCol2,
#row:=case when #prev:=t1.id then #row else 0 end +1 rn,
#prev:=t1.id
from yourtable t1
cross join
(
select min(col2) minCol2
from yourtable
) t2
cross join (select #row:=0, #prev:=null) r
order by t1.id
) d
order by id
See SQL Fiddle with Demo
If you had more than one column that you want to compare, then you could unpivot the data using a UNION ALL query and then select the min value for the result:
select t1.id,
t1.col1,
t1.col2,
t2.MinCol
from yourtable t1
cross join
(
select min(col) MinCol
from
(
select col2 col
from yourtable
union all
select col3
from yourtable
) src
) t2
See SQL Fiddle with Demo
You can't. The number of columns is fixed, so you can get the minimum value on all the rows as described by #bluefeet.
You could get it on a smaller number of rows (typically 1) by using the logic:
(case when t2.minCol2 = t1.col2 then t2.minCol2 end)
But this would put NULLs on the other rows.

Finding the most common entry in a column - SQL

I have a table called MyTable like so
A B
101 Dog
209 Cat
209 Cat
209 Dog
193 Cow
193 Dog
101 Dog
193 Dog
193 Cow
And I want to pull out the most common B for each A so it would end up being like this (note that there can be ties)
A B
101 Dog
209 Cat
193 Dog
193 Cow
How could I write sql to do this?
Alternatively, you can use HAVING clause instead of JOIN.
SELECT A, B
FROM table1 o
GROUP BY A, B
HAVING COUNT(*) =
(
SELECT MAX(totalCOunt)
FROM
(
SELECT A, B, COUNT(*) totalCount
FROM table1
GROUP BY A,B
) x
WHERE o.A = x.A
GROUP BY x.A
)
SQLFiddle Demo
You could use a filtering join to list the (A,B) combination with the highest rowcount:
select src.*
from (
select A
, B
, count(*) cnt
from YourTable
group by
A
, B
) src
join (
select A
, max(cnt) as maxcnt
from (
select A
, B
, count(*) cnt
from YourTable
group by
A
, B
) comb
group by
A
) maxab
on maxab.A = src.A
and maxab.maxcnt = src.cnt
Example at SQL Fiddle.
If your database supports windowing functions, you can use dense_rank(), like:
select *
from (
select dense_rank() over (
partition by A
order by cnt desc) as rn
, *
from (
select A
, B
, count(*) cnt
from YourTable
group by
A
, B
) t1
) t2
where rn = 1
Window function example at SQL Fiddle. Windowing functions are available on recent versions of SQL Server, Oracle and PostgeSQL.
select g3.A,g3.B
from
(
select A,Max(C) MC
from
(
select A,B,count(*) C
from (<your entire select query>) tbl
group by A,B
) g1
group by A
) g2
join
(
select A,B,count(*) C
from (<your entire select query>) tbl
group by A,B
) g3 on g2.A=G3.A and g3.C=g2.MaxC
SQL FIDDLE Example
select
A, B
from
(
select
A, B, row_number() over (partition by A order by cnt desc) as RowNum
from
(
select
T.A, T.B, count(*) over (partition by T.A, T.B) as cnt
from T
) as A
) as B
where RowNum = 1

MySQL GROUP BY and HAVING

I'm grouping my results based on a column X and I want to return the rows that has highest Column Y's value in the group.
SELECT *
FROM mytable
GROUP BY col1
HAVING col2 >= (SELECT MAX(col2)
FROM mytable AS mytable2
WHERE mytable2.col1 = mytable.col1 GROUP BY mytable2.col1)
I want to optimize the query above. Is it doable without sub-queries?
I found the solution and it's simpler than you think:
SELECT * FROM (SELECT * FROM mytable ORDER BY col2 DESC) temp GROUP BY col1
Runs in 5 milliseconds on 20,000 rows.
Using a derived table/inline view for a JOIN:
SELECT x.*
FROM mytable x
JOIN (SELECT t.col1,
MAX(t.col2) AS max_col2
FROM MYTABLE t
GROUP BY t.col1) y ON y.col1 = x.col1
AND y.max_col2 >= x.col2
Be aware that this will duplicate x records if there's more than one related y record. To remove duplicates, use DISTINCT:
SELECT DISTINCT x.*
FROM mytable x
JOIN (SELECT t.col1,
MAX(t.col2) AS max_col2
FROM MYTABLE t
GROUP BY t.col1) y ON y.col1 = x.col1
AND y.max_col2 >= x.col2
The following is untested, but will not return duplicates (assuming valid):
SELECT x.*
FROM mytable x
WHERE EXISTS (SELECT NULL
FROM MYTABLE y
WHERE y.col1 = x.col1
GROUP BY y.col1
HAVING MAX(y.col2) >= x.col2)
Your Col2 never be > then MAX(col2) so i suggest to use col2 = MAX(col2)
so HERE is the QUERY
SELECT * FROM mytable GROUP BY col1 HAVING col2 = MAX( col2 )