mysql removing duplicates with where clause - mysql

i am trying to remove duplicate records having same hid values.
Here's the Fiddle:
https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=86e8ed00cf0a496da490eae5d7aae093
Table product_match_unmatches:
ID hid flag
1 1 1
2 1 1
3 2 1
4 2 1
5 1 2
6 2 2
7 2 2
8 1 1
9 1 1
10 2 1
Now I want to remove duplicates hid from the table but for flag = 1 only.
This query will remove all duplicates except for the recent one, but irrespective of flag values:
DELETE pmu1
FROM dmf_product_match_unmatches as pmu1
LEFT JOIN ( SELECT MAX(ID) as ID, hid
FROM dmf_product_match_unmatches as pmu2
GROUP BY hid) pmu3 USING (ID, hid)
WHERE pmu3.ID IS NULL;
I tried to add where clause flag = 1 in the above query but this is not producing desired result.
DELETE pmu1
FROM dmf_product_match_unmatches as pmu1
LEFT JOIN ( SELECT MAX(ID) as ID, hid
FROM dmf_product_match_unmatches as pmu2
where flag = 1
GROUP BY hid
) pmu3 USING (ID, hid)
WHERE pmu3.ID IS NOT NULL;
The required output is:
ID hid flag
5 1 2
6 2 2
7 2 2
9 1 1
10 2 1

Do you need in
DELETE t1
FROM dmf_product_match_unmatches t1
JOIN dmf_product_match_unmatches t2 USING (hid, flag)
WHERE flag = 1
AND t1.id < t2.id;
?
https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=a5e9e95335573ebedd45cdcd577b5602

Using row_number
DELETE pmu1
FROM dmf_product_match_unmatches pmu1
JOIN (select id,
row_number() over(partition by hid order by id desc) rn
from dmf_product_match_unmatches
where flag = 1
) as pmu3 ON pmu1.ID = pmu3.ID
WHERE pmu3.rn > 1;

In sql server you can use the EXCEPT set operator:
declare #flag int = 1
delete dmf_product_match_unmatches
where id in (
select id
from dmf_product_match_unmatches
where flag = #flag
except
select max(id) id
from dmf_product_match_unmatches
where flag = #flag
group by hid, flag
)
In mysql you can use NOT EXISTS
declare #flag int = 1
delete d1
from dmf_product_match_unmatches d1
where flag = #flag
and not exists (
select max(id) id
from dmf_product_match_unmatches d2
group by hid, flag
having d1.id = max(d2.id)
)

Related

Updating table based on same table with a max value

Have a table data structure like below:
id
regid
docid
archived
1
1000
1
0
2
1000
2
0
3
1000
3
0
4
2000
1
0
5
2000
2
0
6
3000
1
0
7
3000
2
0
8
3000
3
0
9
3000
4
0
What I'm trying to do update the archived column to 1 where the docid is less than the max docid, by each regid group.
So I should end up with id's 3, 5 & 9 not being set to 1
Have tried:
update table t1
join (select max(docid) as maxdocid, regid from table) t2 on t1.docid < t2.maxdocid and t1.regid = t2.regid
set t1.archived = 1
But doesn't work, only does the first regid group.
Here's a solution (in MySQL 8.0+) using a CTE:
WITH numbered_table AS (
SELECT id, ROW_NUMBER() OVER (PARTITION BY regid ORDER BY docid DESC) AS rownum
FROM mytable
)
UPDATE mytable JOIN numbered_table USING (id)
SET archived = 1
WHERE rownum > 1
AND archived = 0;
Second solution, if you use an older version of MySQL that doesn't support CTE syntax:
You don't really need to compute the max docid value. If you want to update all rows except for the row with the max docid value, then you can check if a row can be matched to any other row with a greater docid value.
UPDATE mytable AS t1
INNER JOIN mytable AS t2 ON t1.regid = t2.regid AND t1.docid < t2.docid
SET t1.archived = 1
WHERE t1.archived = 0;
This will be true for all rows except the row with the max value. That row will be excluded automatically by the join.
In steps:
Create a query with the MAX value, per docid:
SELECT
ID,
regid,
docid,
(SELECT MAX(docid) FROM t1 te where te.regid=t.regid) as M
FROM t1 t
Join the result, and update:
UPDATE t1
JOIN (
SELECT
ID,
regid,
docid,
(SELECT MAX(docid) FROM t1 te where te.regid=t.regid) as M
FROM t1 t
) x ON t1.id=x.id
SET archived = 1
WHERE t1.docid<x.M AND t1.archived=0;
see: DBFIDDLE
You could try:
update test_tbl t1
set t1.archived = 1
where t1.archived = 0
and t1.id not in ( select t2.id
from (select max(id) as id,
regid,
max(docid)
from test_tbl
group by regid
) as t2
) ;
Result:
id regid docid archived
1 1000 1 1
2 1000 2 1
3 1000 3 0
4 2000 1 1
5 2000 2 0
6 3000 1 1
7 3000 2 1
8 3000 3 1
9 3000 4 0
Demo
Or you can use a LEFT JOIN
update test_tbl t1
left join ( select max(id) as id,
regid,
max(docid) as docid
from test_tbl
group by regid
) as t2 on t1.id=t2.id
set t1.archived = 1
where t1.archived = 0
and t2.id IS NULL
Demo
Use a self join in the update statement:
UPDATE tablename t1
INNER JOIN tablename t2
ON t2.regid = t1.regid AND t2.docid > t1.docid
SET t1.archived = 1;
See the demo.

MySQL Select first entry of GROUP after custom ORDER BY

I'm trying to fetch the first entry of each group after the custom ORDER BY but don't know how to select that first entry of each group. The groups should be ordered by the #team if exist, otherwise other team else NULL.
SELECT t1.*
FROM tbl t1
INNER JOIN tbl2 t2 ON t2.group_id = t1.group
WHERE t2.region = #region
ORDER BY
CASE
WHEN team=#team THEN 1
WHEN team is NOT NULL THEN 2
WHEN team is NULL THEN 3
END
Content of tbl
id group team
1 1 AA
2 1 BB
3 2 AA
4 2 CC
5 3 BB
6 3 NULL
7 4 NULL
Expected result when #team=AA
id group team
1 1 AA
3 2 AA
5 3 BB
7 4 NULL
Expected result when #team=BB
id group team
2 1 BB
3 2 AA
5 3 BB
7 4 NULL
Use your custom ordering logic with ROW_NUMBER:
WITH cte AS (
SELECT t1.*, ROW_NUMBER() OVER (PARTITION BY t1.`group`
ORDER BY CASE WHEN team = #team THEN 1
WHEN team IS NOT NULL THEN 2
ELSE 3 END) rn
FROM tbl t1
INNER JOIN tbl2 t2 ON t2.group_id = t1.`group`
WHERE t2.region = #region
)
SELECT id, `group`, team
FROM cte
WHERE rn = 1;
Side note: Avoid naming your table columns GROUP, as this is a reserved MySQL keyword, and therefore must always be escaped in backticks.

Incrementing count ONLY for duplicates in MySQL

Here is my MySQL table. I updated the question by adding an 'id' column to it (as instructed in the comments by others).
id data_id
1 2355
2 2031
3 1232
4 9867
5 2355
6 4562
7 1232
8 2355
I want to add a new column called row_num to assign an incrementing number ONLY for duplicates, as shown below. Order of the results does not matter.
id data_id row_num
3 1232 1
7 1232 2
2 2031 null
1 2355 1
5 2355 2
8 2355 3
6 4562 null
4 9867 null
I followed this answer and came up with the code below. But following code adds a count of '1' to non-duplicate values too, how can I modify below code to add a count only for duplicates?
select data_id,row_num
from (
select data_id,
#row:=if(#prev=data_id,#row,0) + 1 as row_num,
#prev:=data_id
from my_table
)t
If you are running MySQL 8.0, you can do this more efficiently with window functions only:
select
data_id,
case when count(*) over(partition by data_id) > 1
then row_number() over(partition by data_id order by data_id) row_num
end
from mytable
When the window count returns more than 1, you know that the current data_id has duplicates, in which case you can use row_number() to assign the incrementing number.
Note that, in absence of an ordering columns to uniquely identify each record within groups sharing the same data_id, it is undefined which record will actually get each number.
I am assuming that id is the column that defines the order on the rows.
In MySQL 8 you can use row_number() to get the number of each data_id and a CASE with EXISTS to exclude the rows which have no duplicate.
SELECT t1.data_id,
CASE
WHEN EXISTS (SELECT *
FROM my_table t2
WHERE t2.data_id = t1.data_id
AND t2.id <> t1.id) THEN
row_number() OVER (PARTITION BY t1.data_id
ORDER BY t1.id)
END row_num
FROM my_table t1;
In older versions you can use a subquery counting the rows with the same data_id but smaller id. With an EXISTS in a HAVING clause you can exclude the rows that have no duplicate.
SELECT t1.data_id,
(SELECT count(*)
FROM my_table t2
WHERE t2.data_id = t1.data_id
AND t2.id < t1.id
HAVING EXISTS (SELECT *
FROM my_table t2
WHERE t2.data_id = t1.data_id
AND t2.id <> t1.id)) + 1 row_num
FROM my_table t1;
db<>fiddle
Join with a query that returns the number of duplicates.
select t1.data_id, IF(t2.dups > 1, row_num, '') AS row_num
from (
select data_id,
#row:=if(#prev=data_id,#row,0) + 1 as row_num,
#prev:=data_id
from my_table
order by data_id
) AS t1
join (
select data_id, COUNT(*) AS dups
FROM my_table
GROUP BY data_id
) AS t2 ON t1.data_id = t2.data_id
If you want to have the old "order" of the old table, you need much more code
SELECT
data_id, IF (row_num = 1 AND cntid = 1, NULL,row_num)
FROM
(SELECT
#row:=IF(#prev = t1.data_id, #row, 0) + 1 AS row_num,
cntid,
#prev:=t1.data_id data_id
FROM
(SELECT
*
FROM
my_table
ORDER BY data_id) t1
INNER JOIN (SELECT Count(*) cntid,data_id FROM my_table GROUP BY data_id)t2
ON t1.data_id = t2.data_id) t2
data_id | IF (row_num = 1 AND cntid = 1, NULL,row_num)
------: | -------------------------------------------:
1232 | 1
1232 | 2
2031 | null
2355 | 1
2355 | 2
2355 | 3
4562 | null
9867 | null
db<>fiddle here

select rows with condition in other rows

I want select rows from my table with last status_Id if there is a row with status_Id = 2 for that rows
ticketStatus_Id ticket_Id status_Id
======================================
1 1 1
2 1 2 -
3 1 3 *
4 2 1
5 3 1
6 3 2 - *
7 4 1
8 4 2 -
9 4 3
10 4 4 *
I want select just rows 3, 6, 10. there are another rows with status_Id = 2 (rows 2, 6, 8) for that ticket_Id,
In other word How to select rows 3,6,10 with ticket_Id =1,3,4 that there are another row with these ticket_Ids and status_Id=2 (rows 2,6,8)
If you want the complete row, then I would view this as exists:
select t.*
from t
where exists (select 1
from t t2
where t2.ticket_id = t.ticket_id and t2.status_id = 2
) and
t.status_Id = (select max(t2.status_id)
from t t2
where t2.ticket_id = t.ticket_id
);
If you just want the ticket_id and status_id (and not the whole row), I would recommend aggregation:
select ticket_id, max(status_id)
from t
group by ticket_id
having sum(status_id = 2) > 0;
In your case, ticketStatus_Id seems to increase with status_id, so you can use:
select max(ticketStatus_Id) as ticketStatus_Id, ticket_id, max(status_id) as Status_Id
from t
group by ticket_id
having sum(status_id = 2) > 0;
First, for each ticket we get the row with the highest status. We can do this with a self-join. Each row is joined with the row with the next highest status. We select the rows which have no higher status, those will be the highest. Here's a more detailed explanation.
select ts1.*
from ticket_statuses ts1
left outer join ticket_statuses ts2
on ts1.ticket_Id = ts2.ticket_Id
and ts1.status_Id < ts2.status_Id
where ts2.ticketStatus_Id is null
3 1 3
4 2 1
6 3 2
10 4 4
11 5 3
Note that I've added a curve-ball of 11, 5, 3 to ensure we only select tickets with a status of 2, not greater than 2.
Then we can use that as a CTE (or subquery if you're not using MySQL 8) and select only those tickets who have a status of 2.
with max_statuses as (
select ts1.*
from ticket_statuses ts1
left outer join ticket_statuses ts2
on ts1.ticket_Id = ts2.ticket_Id
and ts1.status_Id < ts2.status_Id
where ts2.ticketStatus_Id is null
)
select ms.*
from max_statuses ms
join ticket_statuses ts
on ms.ticket_id = ts.ticket_id
and ts.status_id = 2;
3 1 3
6 3 2
10 4 4
This approach ensures we select the complete rows with the highest statuses and any extra data they may contain.
dbfiddle
This is basicaly a "last row per group" problem. You will find some solutions here. My prefered solution would be:
select t.*
from (
select max(ticketStatus_Id) as ticketStatus_Id
from mytable
group by ticket_Id
) tmax
join mytable t using(ticketStatus_Id)
The difference in your question is that you have a condition requiring a specific value within the group. This can be solved with a JOIN within the subquery:
select t.*
from (
select max(t1.ticketStatus_Id) as ticketStatus_Id
from mytable t2
join mytable t1 using(ticket_Id)
where t2.status_Id = 2
group by t2.ticket_Id
) tmax
join mytable t using(ticketStatus_Id)
Result:
| ticketStatus_Id | ticket_Id | status_Id |
| --------------- | --------- | --------- |
| 3 | 1 | 3 |
| 6 | 3 | 2 |
| 10 | 4 | 4 |
View on DB Fiddle
A solution using window functions could be:
select ticketStatus_Id, ticket_Id, status_Id
from (
select *
, row_number() over (partition by ticket_Id order by ticketStatus_Id desc) as rn
, bit_or(status_Id = 2) over (partition by ticket_Id) > 0 as has_status2
from mytable
) x
where has_status2 and rn = 1
A quite expressive way is to use EXISTS and NOT EXISTS subquery conditions:
select t.*
from mytable t
where exists (
select *
from mytable t1
where t1.ticket_Id = t.ticket_Id
and t1.status_Id = 2
)
and not exists (
select *
from mytable t1
where t1.ticket_Id = t.ticket_Id
and t1.ticketStatus_Id > t.ticketStatus_Id
)
SELECT a.*
FROM t a
JOIN
(
SELECT ticket_id, MAX(status_id) max_status_id
FROM t
WHERE status_id >= 2
GROUP BY ticket_id
) b
ON a.ticket_id = b.ticket_id
AND a.status_id = b.max_status_id;
SELECT
MAX(m1.ticketstatus_Id) as ticket_status,
m1.ticket_Id as ticket,
MAX(m1.status_Id) as status
FROM mytable m1
WHERE
m1.ticket_Id in (select m2.ticket_Id from mytable m2 where m2.ticket_Id=m1.ticket_Id and m2.status_Id=2)
GROUP BY m1.ticket_Id

Can I select a row with a condition where the previous and next row have a condition?

I am using MYSQL and am looking for a way to only select a row where it's previous and next row have a field called fav_num with a value of '3'
For example I have 6 rows and two fields. The fields are ID and fav_number.
ID| fav_num
1 | 3
2 | 2
3 | 3
4 | 7
5 | 2
6 | 9
I'd like to find a way to return the ID from the table where the previous and next row have a fav_num of 3.
This query would then return ID 2.
I apologize if my question sounds confusing.
I think this could work..
SELECT id
FROM tab t
WHERE t.id BETWEEN
(SELECT MAX(id) FROM tab tp WHERE tp.id < t.id AND tp.fav_num = 3)
AND (SELECT MIN(id) FROM tab tn WHERE tn.id > t.id AND tn.fav_num = 3)
Please try this:
SELECT a.id FROM test1 a WHERE
3=(SELECT fav_num FROM test1 WHERE id = (SELECT MAX(id) FROM test1 WHERE id <a.id)) AND
3=(SELECT fav_num FROM test1 WHERE id = (SELECT MIN(id) FROM test1 WHERE id >a.id)) ;
SELECT a.id FROM test1 a WHERE
EXISTS (SELECT 1 FROM test1 WHERE id = (SELECT MAX(id) FROM test1 WHERE id <a.id) AND fav_num=3) AND
EXISTS (SELECT 1 FROM test1 WHERE id = (SELECT MIN(id) FROM test1 WHERE id >a.id) AND fav_num=3) ;
I am not sure about the perfornance.
SELECT ID FROM tbl WHERE fav_num BETWEEN '1' AND '3';
http://sqlfiddle.com/#!9/2b6fb0/12
SELECT t1.*
FROM t1
INNER JOIN (SELECT
#d1:=0+#d2 d1,
#d2:=0+#d3 d2,
#d3:=fav_num d3,
#previd:=#id prevID,
#id:=id
FROM (
SELECT *
FROM t1
ORDER BY id) t
) filter
ON filter.d1 = 3
AND filter.d3 = 3
AND t1.id = filter.prevID
;