MySQL: Select newest two rows per Group - mysql

I have a table like this:
CREATE TABLE `data` (
`id` int(11) NOT NULL,
`deviceId` int(11) NOT NULL,
`position_x` int(11) NOT NULL,
`position_y` int(11) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
ALTER TABLE `data`
ADD PRIMARY KEY (`id`);
COMMIT;
id, deviceId, position_x, position_y
1 1 100 0
2 2 150 50
3 3 200 20
4 1 220 20
5 1 210 10
6 2 100 40
7 3 120 50
8 3 130 60
9 2 240 15
I need the "newest" two rows per DeviceID, where a bigger ID means newer.
Right now, I'm selecting the newest row per Device via this query:
SELECT
id,
deviceId,
position_x, position_y
FROM data
WHERE deviceId > 0 AND
id IN (SELECT MAX(id) FROM data GROUP BY deviceId)
And in a loop, where I output the data, I select the second latest row for every deviceId in an individual query, which is kinda slow/dirty:
SELECT
position_x
position_y
FROM data
WHERE deviceId = :deviceId AND
id < :id
ORDER BY id DESC
LIMIT 1
Is there a way to combine both queries or at least, in one query, select the second row for every deviceId from query 1?
Thanks

You can try using row_number()
select * from
(
SELECT
id,
deviceId,
position_x, position_y,row_number() over(partition by deviceid order by id desc) as rn
FROM data
WHERE deviceId > 0
)A where rn=2

You can use a correlated subquery for this as well:
SELECT d.*
FROM data d
WHERE d.deviceId > 0 AND
d.id = (SELECT d2.id
FROM data d2
WHERE d2.deviceId = d.deviceId
ORDER BY d2.id DESC
LIMIT 1, 1
);
With an index on data(deviceId, id desc), you might be impressed at the performance.

Related

Aggregate rows by id comparing column values

I have the following table that groups users by their permissions
userIds permissions
4,5,7,8 100,1600,500,501,502,400,401,1500,1501
The numbers in the permissions column are the sections ids.
Some of these sections may have other data associated which I retrieved and stored in another table.
sectionId userId resourceId
100 4 NULL
1600 4 NULL
500 4 NULL
501 4 NULL
502 4 NULL
400 4 NULL
401 4 1
1500 4 NULL
1501 4 NULL
100 5 NULL
1600 5 NULL
500 5 NULL
501 5 NULL
502 5 NULL
400 5 NULL
401 5 1,2
1500 5 NULL
1501 5 NULL
100 7 NULL
1600 7 NULL
500 7 NULL
501 7 NULL
502 7 NULL
400 7 NULL
401 7 2
1500 7 NULL
1501 7 NULL
100 8 NULL
1600 8 NULL
500 8 NULL
501 8 NULL
502 8 NULL
400 8 NULL
401 8 1
1500 8 NULL
1501 8 NULL
My goal is to compare, for each user in the userIds column of the first table (splitted by comma), every row of the second table in order to check if each user has the same resourceId value for that specific sectionId.
If one or more users have the same resourceId value for each section I want to keep them group together, otherwise they need to be on different rows.
This is the output I'm expecting from the sample data provided:
userIds permissions
4,8 100,1600,500,501,502,400,401,1500,1501
5 100,1600,500,501,502,400,401,1500,1501
7 100,1600,500,501,502,400,401,1500,1501
UPDATE
I managed to get the desidered output in the following way:
-- Numbers table creation
DROP temporary TABLE IF EXISTS tally;
CREATE temporary TABLE tally
(
n INT NOT NULL auto_increment PRIMARY KEY
);
INSERT INTO tally
(n)
SELECT NULL
FROM (SELECT 0 AS N
UNION ALL
SELECT 1
UNION ALL
SELECT 2
UNION ALL
SELECT 3
UNION ALL
SELECT 4
UNION ALL
SELECT 5
UNION ALL
SELECT 6
UNION ALL
SELECT 7
UNION ALL
SELECT 8
UNION ALL
SELECT 9) a,
(SELECT 0 AS N
UNION ALL
SELECT 1
UNION ALL
SELECT 2
UNION ALL
SELECT 3
UNION ALL
SELECT 4
UNION ALL
SELECT 5
UNION ALL
SELECT 6
UNION ALL
SELECT 7
UNION ALL
SELECT 8
UNION ALL
SELECT 9) b;
-- Split users by comma from first table
DROP temporary TABLE IF EXISTS tmppermissions2;
CREATE temporary TABLE tmppermissions2
(
userid VARCHAR(255) NOT NULL,
permissions TEXT NOT NULL
);
INSERT INTO tmppermissions2
SELECT userid,
permissions
FROM (SELECT Substring_index(Substring_index(t.userids, ',', tally.n), ',', -1
)
userId,
t.permissions
permissions
FROM tally
INNER JOIN tmppermissions t
ON Char_length(t.userids) - Char_length(
REPLACE(t.userids, ',',
'')) >=
tally.n - 1
ORDER BY n) AS split;
-- Gets the users with the same permissions
DROP temporary TABLE IF EXISTS sharedprofiles;
CREATE temporary TABLE sharedprofiles
(
userids VARCHAR(255) NOT NULL,
permissions TEXT NOT NULL,
profileid INT(11)
);
INSERT INTO sharedprofiles
SELECT Group_concat(userid),
permissions,
NULL
FROM tmppermissions2
WHERE userid NOT IN (SELECT split.userid
FROM (SELECT Substring_index(Substring_index(r.userids,
',',
t.n), ',', -1)
userId
FROM tally t
INNER JOIN tmppermissions r
ON Char_length(r.userids)
- Char_length(
REPLACE(r.userids, ',',
'')) >=
t.n - 1
WHERE Position(',' IN r.userids) > 0
ORDER BY n) AS split
WHERE split.userid IN (SELECT *
FROM (SELECT Group_concat(userid
ORDER
BY userid ASC)
AS
users
FROM
tmpcurrentresources2
GROUP BY resourceid,
sectionid
ORDER BY users) b
WHERE Position(',' IN b.users) =
0))
GROUP BY permissions
ORDER BY Group_concat(userid);
-- Gets the users with specific permissions
DROP temporary TABLE IF EXISTS singleprofiles;
CREATE temporary TABLE singleprofiles
(
userid VARCHAR(255) NOT NULL,
permissions TEXT NOT NULL,
profileid INT(11)
);
INSERT INTO singleprofiles
SELECT userid,
permissions,
NULL
FROM tmppermissions2
WHERE userid IN (SELECT split.userid
FROM (SELECT Substring_index(Substring_index(r.userids, ',',
t.n),
',', -1)
userId
FROM tally t
INNER JOIN tmppermissions r
ON Char_length(r.userids) -
Char_length(
REPLACE(r.userids, ',',
'')) >=
t.n - 1
WHERE Position(',' IN r.userids) > 0
ORDER BY n) AS split
WHERE split.userid IN (SELECT *
FROM (SELECT Group_concat(userid
ORDER BY
userid ASC)
AS
users
FROM tmpcurrentresources2
GROUP BY resourceid,
sectionid
ORDER BY users) b
WHERE Position(',' IN b.users) = 0))
ORDER BY userid;
-- Merge the results
SELECT *
FROM sharedprofiles
UNION
SELECT *
FROM singleprofiles;
I'm wondering if there is a more concise way to accomplish the same result.
The solution (as I suspect you already know) is to normalise your schema.
So instead of...
userIds permissions
4,5 100,1600,500
...you might have
userIds permissions
4 100
4 1600
4 500
5 100
5 1600
5 500

How to find duplicate and update column value of all but the most recent entry

I have a table items with columns item_id, lockup_id, date, archive. I need to be able to go through the lookup_id column and identify duplicates, changing the archive value to 1 on every duplicate EXCEPT the newest entry in the table.
item_id Lookup_id date archive
------------------------------------------------
1234 4 1-1-19 0
1235 4 1-1-19 0
1236 4 1-1-19 0
1237 2 1-1-19 0
1238 1 1-1-19 0
1239 1 1-1-19 0
I've so far managed to find the duplicates using the following statement, but I'm at a bit of a loss where to go with this to achieve my desired result.
'SELECT `item_id` , `lookup_id`, `date`, `archive`
FROM items
WHERE `item_id`
IN (
`SELECT `item_id`
FROM items
GROUP BY `item_id`
HAVING COUNT( `item_id` ) >1
)
ORDER BY `item_id`;
You can do it in two steps.
First set all values in archive to 1:
update items set archive = 1 where 1;
Then set archive = 0 for "newest" entries only:
update items i
inner join (
select max(item_id) as item_id
from items
group by Lookup_id
) x using(item_id)
set i.archive = 0;
You will get the following result:
item_id Lookup_id date archive
1234 4 1-1-19 1
1235 4 1-1-19 1
1236 4 1-1-19 0
1237 2 1-1-19 0
1238 1 1-1-19 1
1239 1 1-1-19 0
This method should be quite efficient with an index on (Lookup_id, item_id).
Demo
Looks like item_id is sequential, assuming newest entry has the highest item_id you could filter for the highest item_id for each lookup_id and then update all records except these.
update items set archive = 1
where item_id not in
(
select max(item_id) from items
group by lookup_id
);
In order to archive all the items with the same lookup_id except the newer one you can use this sql statement
UPDATE tn SET tn.archive = 1
FROM table_name tn
WHERE (SELECT COUNT(tn2.id) FROM table_name AS tn2 WHERE tn2.lookup_id = tn.lookup_id) > 1
AND tn.id NOT IN (SELECT tn2.id FROM table_name AS tn2 WHERE tn2.lookup_id = tn.lookup_id ORDER BY tn.date DESC, tn.id DESC LIMIT 1);
In the where conditions first we check if more than one item with the same lookup_id exists and then we check that the actual item is not the newer of all the items with the same lookup_id.
Looking at you example I am assuming the latest entry is the one with highest item id
if that's the case you can create a CTE with a column and use row number / partition by
Something like this - the join will change depending on which columns are unique
;WITH cte_test AS
(SELECT item_id , lookup_id , ROW_NUMBER() OVER (PARTITION BY lookup_id ORDER BY item_id ) AS rn
FROM items )
UPDATE it2
SET it2.archive = 1
FROM items it2
INNER JOIN cte_test ct
ON ct.item_id = it2.item_id
AND ct.lookup_id = it2.lookup_id where rn > 1

MySQL - select rows with limit

How can I select a certain number of rows, based on a condition (using IN or any other conditional), and if the number of rows returned by the condition is less than LIMIT x then to select from the remainig rows until the LIMIT x is met?
So, if I have the following table:
id comment ord
1 ... null
2 ... 1
3 ... 2
4 ... null
then the result set should be, using LIMIT 3:
id comment ord
2 ... 1
3 ... 2
1 ... null
If the ord column is not null, then I want to select the respective row(s), if not, I want to select one or more from the rest of the rows, having ord NULL, until the LIMIT 3 condition is reached.
Another example, if I have the next table data:
id comment ord
1 ... 3
2 ... 1
3 ... 2
4 ... null
Then the result set should be
id comment ord
2 ... 1
3 ... 2
1 ... 3
I have tried this mysql code:
SELECT t.* FROM table t
WHERE
t.ord IN (SELECT t1.ord FROM table t1 WHERE t1.ord IS NOT NULL ORDER BY t1.ord ASC)
OR
t.id IN (SELECT t2.id FROM table t2 WHERE t2.ord IS NULL ORDER BY t2.id ASC)
LIMIT 3;
But I always get the rows that have ord NULL, even if I have some ord columns not null.
Any help please?
Put the check for whether ord is null in your ORDER BY:
SELECT *
FROM table
ORDER BY ord IS NULL, ord, id
LIMIT 3
ord IS NULL will be 0 for non-null ord, 1 for null ord, so this will put all the non-null rows first. Within those it will order by ord, then the remainder will be ordered by id.
Then LIMIT 3 will select the first 3 of all of these.

Get the Top records from each group in MYSQL

I've a table event_log with the following columns in MYSQL,
CREATE TABLE IF NOT EXISTS `event_log` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`customer_id` varchar(50) DEFAULT NULL,
`event_time` datetime DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 AUTO_INCREMENT=1 ;
The sample Data can be,
id, customer_id, event_time
1 100 '2015-03-22 23:54:37'
2 100 '2015-03-21 23:54:37'
3 100 '2015-03-20 23:54:37'
4 101 '2015-03-19 23:54:37'
5 102 '2015-03-19 23:54:37'
6 102 '2015-03-18 23:54:37'
7 103 '2015-03-17 23:54:37'
8 103 '2015-03-16 23:54:37'
9 103 '2015-03-15 23:54:37'
10 103 '2015-03-14 23:54:37'
I want to group on customer_id and then pick the top 2 records from each group using event_time column (whose time is greater)
Please, suggest
Thanks,
Faisal Nasir
Here is a version that doesn't use variables:
select el.*
from event_log el
where 2 >= (select count(*)
from event_log el2
where el2.customer_id = el.customer_id and
el2.event_time >= el.event_time
);
This could even have reasonable performance with an index on event_log(customer_id, event_time).
One way to use user defined variables to pick 2 recent entries per customer_id
SELECT `id`, `customer_id`, `event_time`,row_num
FROM (
SELECT *,
#r:= CASE WHEN #g = `customer_id` THEN #r +1 ELSE 1 END row_num,
#g:= `customer_id`
FROM event_log
CROSS JOIN(SELECT #g:= NULL,#r:=0) a
ORDER BY `customer_id`,`event_time` desc
) t
where row_num <= 2
DEMO

mysql groupwise max as second where condition

I have a working query that seems awfully inefficient; I'm wondering if I'm missing a simple way to improve it.
Simple table:
id date master_id
-------------------------
1 2015-02-01 0
2 2015-02-02 0
3 2015-02-03 0
4 2015-02-04 1
5 2015-02-02 1
6 2015-02-17 1
7 2015-02-27 1
8 2015-01-01 1
Objective: Get all rows where the master_id is zero, OR the master_id is not zero and no other rows of the same master_id have an earlier date. Order every result by date.
Current query, using a groupwise minimum subquery to create the second WHERE condition.
SELECT *
FROM `test`
WHERE `master_id` =0
OR `id` IN (
SELECT test.`id`
FROM (
SELECT `master_id`, MIN(`date`) AS mindate
FROM `test`
WHERE `master_id` 0
GROUP BY `master_id`
) AS x
INNER JOIN `test` ON x.`master_id` = test.`master_id`
AND x.mindate= test.`date`
)
ORDER BY `date`
It works, but the EXPLAIN makes it seem inefficient:
id select_type table type possible_keys key key_len ref rows Extra
-------------------------------------------------------------------------------------------------------------
1 PRIMARY test ALL NULL NULL NULL NULL 8 Using where; Using filesort
2 DEPENDENT SUBQUERY derived3 system NULL NULL NULL NULL 1
2 DEPENDENT SUBQUERY test eq_ref PRIMARY PRIMARY 4 func 1 Using where
3 DERIVED test ALL NULL NULL NULL NULL 8 Using where; Using temporary; Using filesort
Can I improve this? Should I break it into two queries, one for ID=0 and one for the groupwise min? Thanks in advance.
Avoiding the inner join can improve the query:
SELECT *
FROM `test`
WHERE `master_id` =0
OR `id` IN (
SELECT t1.id
FROM (SELECT *
FROM test t2
WHERE t2.master_id!=0
ORDER BY t2.date ASC) t1
GROUP BY t1.master_id
)
ORDER BY `date`;
How about this...
SELECT * FROM test WHERE master_id = 0
UNION
SELECT x.*
FROM test x
JOIN (SELECT master_id,MIN(date) min_date FROM test GROUP BY master_id) y
ON y.master_id = x.master_id
AND y.min_date = x.date;