MySQL-condition on partition size - mysql

table1 has 3 columns in my database: id, category, timestamp. I need to query the newest 3 rows from each category:
WITH ranked_rows AS
(SELECT t.*, ROW_NUMBER() OVER (PARTITION BY category ORDER BY t.timestamp DESC) AS rn
FROM table1 AS t)
SELECT ranked_rows.* FROM ranked_rows WHERE rn<=3
now I need to add one more condition: select only from the partitions which have at least 3 rows. how to add this condition?

here is another way:
select * from (
SELECT t.*
, ROW_NUMBER() OVER (PARTITION BY category ORDER BY t.timestamp DESC) AS rn
, count(*) OVER (PARTITION BY category) AS cnt
FROM table1 AS t
) t
WHERE rn<=3 and cnt>= 3

You could make another CTE of only the categories matching your condition, then join to that:
WITH ranked_rows AS
(
SELECT t.*, ROW_NUMBER() OVER (PARTITION BY category ORDER BY t.timestamp DESC) AS rn
FROM table1 AS t
),
categories AS
(
SELECT category
FROM table1
GROUP BY category
HAVING COUNT(*) >= 3
)
SELECT r.* FROM ranked_rows AS r
JOIN categories AS c USING (category)
WHERE r.rn <= 3;

Related

mysql query bug

select id, s
from (
select o_user_id as id, sum(total_price) as s
from Orders o
group by o.o_user_id
) as t1
where s = (select max(t1.s) from t1)
it returns a bug said table t1 doesn't exist.
I want to find the id of the user who spends the most money among all of the orders
here is the table of order
That alias is out of scope for the subquery
select id, s
from (
select o_user_id as id, sum(total_price) as s
from Orders o
group by o.o_user_id
) as t1
where s = (select max(t1.s) from t1)
You can do
WITH T1 AS
(
select o_user_id as id, sum(total_price) as s
from Orders o
group by o.o_user_id
)
SELECT id, s
FROM T1
WHERE s = (select max(t1.s) from t1);
If you want only one row, you can use order by and limit:
select o_user_id as id, sum(total_price) as s
from Orders o
group by o.o_user_id
order by s desc
limit 1;
In MySQL 8+, you can use window functions. To get multiple rows in the event of ties, use rank():
select ou.*
from (select o_user_id as id, sum(total_price) as s,
rank() over (order by sum(total_price) desc) as seqnum
from Orders o
group by o.o_user_id
) ou
where seqnum = 1;

SQL - Difficult Query

I want to do a query where for each diagnostic code - ID (this is a column), select the name of the most common medication - p_name (another column) used to treat that condition i.e., the medication name that more often appears associated to prescriptions (table) for that diagnosis.
This is the structure of my prescription table:
| p_name | lab | doctor_VAT | date_timestamp | ID | dosage | prescription_description |
I started by making a query to count the tuple pairs p_name and ID:
SELECT DISTINCT p.ID,
p.p_name,
COUNT(*) OVER (PARTITION BY p.p_name, p.ID) AS Cnt
FROM prescription AS p
ORDER BY Cnt DESC
And then to this I tried to apply the "greatest_n_per_group" problem to this
(SQL select only rows with max value on a column):
FROM (SELECT DISTINCT p.ID,
p.p_name,
COUNT(*) OVER (PARTITION BY p.p_name, p.ID) AS Cnt
FROM prescription AS p
ORDER BY Cnt DESC) as Tabela
INNER JOIN(
SELECT Tabela2.ID, MAX(Tabela2.Cnt)
FROM (SELECT DISTINCT p.ID,
p.p_name,
COUNT(*) OVER (PARTITION BY p.p_name, p.ID) AS Cnt
FROM prescription AS p
ORDER BY Cnt DESC) as Tabela2
GROUP BY Tabela2.ID
)
But this produces errors, am I going at this the right the way? do you suggest a different method?
Since your MySQL supports Window function, You can simply use -
SELECT ID, p_name
FROM (SELECT ID, p_name, RANK() OVER(PARTITION BY ID ORDER BY CNT DESC) RNK
FROM (SELECT ID,
p_name,
COUNT(*) CNT
FROM prescription
GROUP BY ID,
p_name
) T1
) T2
WHERE RNK = 1
You need FROM ( ) T in this case T is the table name alias for the FROM subquery clause
FROM (SELECT DISTINCT p.ID,
p.p_name,
COUNT(*) OVER (PARTITION BY p.p_name, p.ID) AS Cnt
FROM prescription AS p
ORDER BY Cnt DESC) as Tabela
INNER JOIN(
SELECT Tabela2.ID, MAX(Tabela2.Cnt)
FROM (SELECT DISTINCT p.ID,
p.p_name,
COUNT(*) OVER (PARTITION BY p.p_name, p.ID) AS Cnt
FROM prescription AS p
ORDER BY Cnt DESC) as Tabela2
GROUP BY Tabela2.ID
) T
SELECT
p1.ID,
(SELECT TOP 1 p2.p_name
FROM prescription AS p2
WHERE p2.ID=p1.ID
GROUP BY p2.p_name
ORDER BY count(*) DESC) as MostUsed
FROM prescription AS p1
GROUP BY p1.ID
Above is for MSSQL, below is for MySQL
SELECT
p1.ID,
(SELECT p2.p_name
FROM prescription AS p2
WHERE p2.ID=p1.ID
GROUP BY p2.p_name
ORDER BY count(*) DESC
LIMIT 1) as MostUsed
FROM prescription AS p1
GROUP BY p1.ID
dbfiddle

getting same result without using "limit" (MySQL)

How do i get the same result but without using "limit" in mysql?
SELECT user_id
FROM user_interest
GROUP BY user_id
HAVING COUNT(user_id)
ORDER BY (COUNT(user_id)) DESC
LIMIT 2
Here's some suggestion if you don't want to use limit
select t2.user_id
from (
select row_number() over (order by t1.ct desc) as rn, t1.userid
from (
SELECT user_id, COUNT(user_id) as ct
FROM user_interest
GROUP BY user_id
HAVING COUNT(user_id)
)t1
) as t2 where t2.rn < 3

How to get the maximum and minimum values in each group as well as their times?

Let's say I have a table MyTable with the columns Id, NumericValue and UTCTimestamp. I'm trying to group the results of my table MyTable by the hour of their timestamp and return the maximum NumericValuefor each group with its associated timestamp as well as the minimum NumericValue for each group with its associated timestamp value.
For now, I'm able to achieve the first part of my problem with the following query:
SELECT
HOUR(t.UTCTimestamp) AS `Hour`,
t.NumericValue AS MaximumValue,
t.UTCTimestamp AS MaximumValueTime
FROM MyTable t
INNER JOIN (
SELECT HOUR(t2.UTCTimestamp) AS `Hour`, MAX(t2.NumericValue) AS NumericValue
FROM MyTable t2
GROUP BY HOUR(t2.UTCTimestamp)
) maxNumericValue ON HOUR(t.UTCTimestamp) = maxNumericValue.`Hour` AND t.NumericValue = maxNumericValue.NumericValue
GROUP BY HOUR(t.UTCTimestamp);
Which was inspired by this answer.
Here's an MVCE.
How could I also show the minimum value for each group as well as the timestamp associated to it?
Starting from MySQL 8.0 you could use ROW_NUMBER:
WITH cte AS (
SELECT *,ROW_NUMBER() OVER(PARTITION BY HOUR(UTCTimestamp)
ORDER BY UTCTimestamp ASC) AS rn
,ROW_NUMBER() OVER(PARTITION BY HOUR(UTCTimestamp)
ORDER BY UTCTimestamp DESC) AS rn2
FROM MyTable
)
SELECT HOUR(c1.UTCTimestamp),
c1.ID, c1.NumericValue, c1.UTCTimestamp, -- min row
c2.ID, c2.NumericValue, c2.UTCTimestamp -- max row
FROM cte c1
JOIN cte c2
ON HOUR(c1.UTCTimestamp) = HOUR(c2.UTCTimestamp)
AND c1.rn=1
AND c2.rn2=1
ORDER BY HOUR(c1.UTCTimestamp) ASC;
DBFiddle Demo
You can join to MyTable twice (and only use one aggregating subquery)
SELECT bounds.`Hour`
, minT.NumericValue AS MinValue
, minT.UTCTimestamp AS MinTime
, maxT.NumericValue AS MaximumValue
, maxT.UTCTimestamp AS MaximumValueTime
FROM (
SELECT HOUR(t2.UTCTimestamp) AS `Hour`
, MAX(t2.NumericValue) AS maxValue
, MIN(t2.NumericValue) AS minValue
FROM MyTable t2
GROUP BY HOUR(t2.UTCTimestamp)
) bounds
LEFT JOIN MyTable minT ON bounds.`Hour` = HOUR(minT.UTCTimestamp)
AND bounds.minValue = minT.NumericValue
LEFT JOIN MyTable maxT ON bounds.`Hour` = HOUR(maxT.UTCTimestamp)
AND bounds.maxValue = maxT.NumericValue
;
Apply the same technique but with minimum:
select a.*, b.MinimumValueTime from (
SELECT
HOUR(t.UTCTimestamp) AS `Hour`,
t.NumericValue AS MaximumValue,
t.UTCTimestamp AS MaximumValueTime
FROM MyTable t
INNER JOIN (
SELECT HOUR(t2.UTCTimestamp) AS `Hour`, MAX(t2.NumericValue) AS NumericValue
FROM MyTable t2
GROUP BY HOUR(t2.UTCTimestamp)
) maxNumericValue ON HOUR(t.UTCTimestamp) = maxNumericValue.`Hour` AND t.NumericValue = maxNumericValue.NumericValue
GROUP BY HOUR(t.UTCTimestamp))a
join
(
SELECT
HOUR(t.UTCTimestamp) AS `Hour`,
t.NumericValue AS MinimumValue,
t.UTCTimestamp AS MinimumValueTime
FROM MyTable t
INNER JOIN (
SELECT HOUR(t2.UTCTimestamp) AS `Hour`, MIN(t2.NumericValue) AS NumericValue
FROM MyTable t2
GROUP BY HOUR(t2.UTCTimestamp)
) minNumericValue ON HOUR(t.UTCTimestamp) = minNumericValue.`Hour` AND t.NumericValue = minNumericValue.NumericValue
GROUP BY HOUR(t.UTCTimestamp))b on a.hour=b.hour

SQL Server : how to avoid duplicate data?

I want to query above picture.
Left picture is original data, right picture is query data.
select distinct ID, Nickname, Revision
from test_table
This query do not show above picture.
How to avoid duplicate data?
If SQL Server, using window function ROW_NUMBER in subquery:
select t.id, t.nickname, t.revision
from (
select t.*, row_number() over (
partition by t.id order by t.revision desc
) rn
from your_table t
) t
where rn = 1;
Or using TOP with ties with ROW_NUMBER:
select top 1 with ties *
from your_table
order by row_number() over (
partition by id order by revision desc
)
If MySQL:
select t.*
from your_table t
inner join (
select id, MAX(revision) revision
from your_table
group by id
) t1 on t.id = t1.id
and t.revision = t1.revision;
Another trick using TOP 1 with TIES
SELECT Top 1 with ties *
FROM your_table t
Order by row_number() over (partition BY t.id order by t.revision DESC)
select distinct ID, Nickname, MAX(Revision)
from test_table
group by ID