Delete duplicate data except two - mysql

How delete duplicate data except two row?
id 4 must deleted, because 'mangga' already have 3 row

This is a bit painful in MySQL. The following identifies the rows to be deleted:
select t.*
from (select t.*,
(#rn := if(#n = nama, #rn + 1,
if(#n := nama, 1, 1)
)
) as rn
from t cross join
(select #n := '', #rn := 0) params
order by nama, id
) t
where rn > 2;
You can then do the delete using a join:
delete t
from t join
(select t.*,
(#rn := if(#n = nama, #rn + 1,
if(#n := nama, 1, 1)
)
) as rn
from t cross join
(select #n := '', #rn := 0) params
order by nama, id
) tt
on t.id = tt.id
where tt.rn > 2;

Related

How to find median value with group by (MySQL)

Need to find median value of time difference between sent date and click date (in seconds) for each type of emails. I found solution just for all data:
SET #rowindex := -1;
SELECT g.type, g.time_diff
FROM
(SELECT #rowindex:=#rowindex + 1 AS rowindex,
TIMESTAMPDIFF(SECOND, emails_sent.date_sent, emails_clicks.date_click) AS time_diff,
emails_sent.id_type AS type
FROM emails_sent inner join emails_clicks on emails_sent.id = emails_clicks.id_email
ORDER BY time_diff) AS g
WHERE g.rowindex IN (FLOOR(#rowindex / 2) , CEIL(#rowindex / 2));
Is it possible to add group by id_type statement?
Thanks!
First, you need to enumerate the rows for each type. Using variables, this code looks like:
select sc.*,
(#rn := if(#t = id_type, #rn + 1,
if(#t := id_type, 1, 1)
)
) as seqnum
from (select timestampdiff(second, s.date_sent, c.date_click) as time_diff,
s.id_type,
from emails_sent s inner join
emails_clicks c
on s.id = c.id_email
order by time_diff
) sc cross join
(select #t := -1, #rn := 0) as params;
Then, you need to bring in the total number for each type and do the calculation for the median:
select sc.id_type, avg(time_diff)
from (select sc.*,
(#rn := if(#t = id_type, #rn + 1,
if(#t := id_type, 1, 1)
)
) as seqnum
from (select timestampdiff(second, s.date_sent, c.date_click) as time_diff,
s.id_type,
from emails_sent s inner join
emails_clicks c
on s.id = c.id_email
order by time_diff
) sc cross join
(select #t := -1, #rn := 0) as params
) sc join
(select id_type, count(*) as cnt
from emails_sent s inner join
emails_clicks c
on s.id = c.id_email
group by id_type
) n
where 2 * seqnum in (n.cnt, n.cnt, n.cnt + 1, n.cnt + 2)
group by sc.id_type;

mysql get last 10 records from each group

I have mysql table called ware_stock_transaction and it has order_no, order_type, created_date, item_no.
I want to get the last 10 record from each item, like this:
item A (10 records)
item B (10 records)
item C (10 records)
In MySQL, you can use variables:
select wst.*
from (select wst.*,
(#rn := if(#i = item_no, #rn + 1,
if(#i := item_no, 1, 1)
)
) as rn
from ware_stock_transaction wst cross join
(select #rn := 0, #i := '') params
order by item_no, created_date desc
) wst
where rn <= 10;

How to SELECT the total row number as a column

My first query
SELECT
id,
year_,
month_
FROM
(SELECT
tp.id,
YEAR(FROM_UNIXTIME(tp.visited_date)) as year_,
MONTH(FROM_UNIXTIME(tp.visited_date)) as month_,
#rn := IF(#prev = CONCAT(YEAR(FROM_UNIXTIME(tp.visited_date)),MONTH(FROM_UNIXTIME(tp.visited_date))), #rn + 1, 1) AS rn,
#prev := CONCAT(YEAR(FROM_UNIXTIME(tp.visited_date)),MONTH(FROM_UNIXTIME(tp.visited_date)))
FROM
tr_place tp
JOIN
(SELECT #prev := NULL, #rn := 0) AS vars
ORDER BY
YEAR(FROM_UNIXTIME(tp.visited_date)) DESC,
MONTH(FROM_UNIXTIME(tp.visited_date)) DESC) AS T1
WHERE
rn < 3;
Will returns the data set This is the result which I got from the query
The subquery in it
SELECT
tp.id,
YEAR(FROM_UNIXTIME(tp.visited_date)) as year_,
MONTH(FROM_UNIXTIME(tp.visited_date)) as month_,
#rn := IF(#prev = CONCAT(YEAR(FROM_UNIXTIME(tp.visited_date)),MONTH(FROM_UNIXTIME(tp.visited_date))), #rn + 1, 1) AS rn,
#prev := CONCAT(YEAR(FROM_UNIXTIME(tp.visited_date)),MONTH(FROM_UNIXTIME(tp.visited_date)))
FROM
tr_place tp
JOIN
(SELECT #prev := NULL, #rn := 0) AS vars
ORDER BY
YEAR(FROM_UNIXTIME(tp.visited_date)) DESC,
MONTH(FROM_UNIXTIME(tp.visited_date)) DESC;
Returns data The sub query will returns this data
I need the subquery's greatest rn as a column in the first query.
How can I achieve that?
You may be having a problem with variable assignment. You should not be referencing variables in multiple expressions in the select. So, the correct way to write the first query is:
SELECT id, year_, month_
FROM (SELECT tp.id,
YEAR(FROM_UNIXTIME(tp.visited_date)) as year_,
MONTH(FROM_UNIXTIME(tp.visited_date)) as month_,
(#rn := IF (#prev = CONCAT(YEAR(FROM_UNIXTIME(tp.visited_date)), MONTH(FROM_UNIXTIME(tp.visited_date))), #rn,
if(#prev := CONCAT(YEAR(FROM_UNIXTIME(tp.visited_date)), MONTH(FROM_UNIXTIME(tp.visited_date))), 1, 1
)
) as rn
FROM tr_place tp JOIN
(SELECT #prev := NULL, #rn := 0) AS vars
ORDER BY YEAR(FROM_UNIXTIME(tp.visited_date)) DESC,
MONTH(FROM_UNIXTIME(tp.visited_date)) DESC
) t1
WHERE rn < 3;
Note the variable assignments are all in a single expression.
This may fix your issue.

Update duplicate rows

I have a table:
id name
1 a
2 a
3 a
4 b
5 b
6 c
I am looking for an update statement that will update name column to:
id name
1 a
2 a-2
3 a-3
4 b
5 b-2
6 c
In SQL Server I would use:
;with cte as(select *, row_number() over(partition by name order by id) rn from table)
update cte set name = name + '-' + cast(rn as varchar(10))
where rn <> 1
I am not strong in MySQL nonstandard queries.
Can I do something like this in MySQL?
You can do this:
UPDATE YourTable p
JOIN(SELECT t.id,t.name,count(*) as rnk
FROM YourTable t
INNER JOIN YourTable s on(t.name = s.name and t.id <= s.id)
GROUP BY t.id,t.name) f
ON(p.id = f.id)
SET p.name = concat(p.name,'-',f.rnk)
WHERE rnk > 1
This will basically use join and count to get the same as ROW_NUMBER() , and update only those who have more then 1 result(meaning the second,third ETC excluding the first)
In MySQL you can use variables in order to simulate ROW_NUMBER window function:
SELECT id, CONCAT(name, IF(rn = 1, '', CONCAT('-', rn))) AS name
FROM (
SELECT id, name,
#rn := IF(name = #n, #rn + 1,
IF(#n := name, 1, 1)) AS rn
FROM mytable
CROSS JOIN (SELECT #rn := 0, #n := '') AS vars
ORDER BY name, id) AS t
To UPDATE you can use:
UPDATE mytable AS t1
SET name = (
SELECT CONCAT(name, IF(rn = 1, '', CONCAT('-', rn))) AS name
FROM (
SELECT id, name,
#rn := IF(name = #n, #rn + 1,
IF(#n := name, 1, 1)) AS rn
FROM mytable
CROSS JOIN (SELECT #rn := 0, #n := '') AS vars
ORDER BY name, id) AS t2
WHERE t1.id = t2.id)
Demo here
You can also use UPDATE with JOIN syntax:
UPDATE mytable AS t1
JOIN (
SELECT id, rn, CONCAT(name, IF(rn = 1, '', CONCAT('-', rn))) AS name
FROM (
SELECT id, name,
#rn := IF(name = #n, #rn + 1,
IF(#n := name, 1, 1)) AS rn
FROM mytable
CROSS JOIN (SELECT #rn := 0, #n := '') AS vars
ORDER BY name, id) AS x
) AS t2 ON t2.rn <> 1 AND t1.id = t2.id
SET t1.name = t2.name;
The latter is probably faster than the former because it performs less UPDATE operations.
The next query will do it with less effort for the database:
UPDATE
tab AS tu
INNER JOIN
-- result set containing only duplicate rows that must to be updated
(
SELECT
t.id,
COUNT(*) AS cnt
FROM
tab AS t
-- join the same table by smaller id and equal value. That way you will exclude rows that are not duplicated
INNER JOIN
tab AS tp
ON
tp.name = t.name
AND
tp.id < t.id
GROUP BY
t.id
) AS tc
ON
tu.id = tc.id
SET
tu.name = CONCAT(tu.name, '-', tc.cnt + 1)

Getting incorrect rank using WHERE clause in mySQL

http://sqlfiddle.com/#!9/083f5d/1
It's returning 6th rank instead of 1st rank in above SQL query. What changes I need to do in SQL query so it provides 1st rank because I have used WHERE condition to check mpkid and roundpkid (WHERE mpkid=37 AND roundpkid=3).
Here is the query:
SELECT mpkid, totalvote, rank
FROM (
SELECT mpkid, roundpkid, totalvote,
#n := IF(#g = totalvote, #n, #n + 1) rank,
#g := totalvote
FROM tr_msearch_vote_summary,
(SELECT #n := 0) i
ORDER BY totalvote DESC
) q
WHERE mpkid=37 AND roundpkid=3
What I want:
Please see sqlfiddle first. There is only one record for round #3, and it is 37 so I want that it should display rank #1 for mpkid #37 and round #3 but it is giving rank #6.
You are applying the WHERE condition after the ranking is already complete. I'm guessing you want to do the ranking after the WHERE is applied:
SELECT t.mpkid, t.roundpkid,
#n := IF(#g = t.totalvote, #n, #n + 1) rank,
#g := t.totalvote totalvote
FROM tr_msearch_vote_summary t, (SELECT #n := 0) i
WHERE t.mpkid=37 AND t.roundpkid=3
ORDER BY t.totalvote DESC
You should also initialise #g to make sure it isn't preset in another query:
SELECT t.mpkid, t.roundpkid,
#n := IF(#g = t.totalvote, #n, #n + 1) rank,
#g := t.totalvote totalvote
FROM tr_msearch_vote_summary t, (SELECT #n := 0, #g := NULL) i
WHERE t.mpkid=37 AND t.roundpkid=3
ORDER BY t.totalvote DESC
UPDATE
If you want the ranking grouped by round and then to simply SELECT by mpkid, this is a more powerful query:
SELECT mpkid,
roundpkid,
totalvote,
rank
FROM (
SELECT t.mpkid,
#n := CASE
WHEN #r = t.roundpkid AND #g = t.totalvote THEN #n
WHEN #r = t.roundpkid THEN #n + 1
ELSE 1
END rank,
#r := t.roundpkid roundpkid,
#g := t.totalvote totalvote
FROM tr_msearch_vote_summary t, (SELECT #n := 0, #g := NULL, #r := NULL) i
ORDER BY t.roundpkid, t.totalvote DESC
) r
WHERE mpkid = 37;
Note that you do not need to supply the roundpkid. See updated fiddle
Please try this query,
SELECT mpkid, totalvote, rank
FROM (
SELECT mpkid, roundpkid, totalvote,
#n := IF(#g = totalvote, #n, #n + 1) rank,
#g := totalvote
FROM tr_msearch_vote_summary,
(SELECT #n := 0) i
WHERE roundpkid=3
ORDER BY totalvote DESC
) q
WHERE mpkid=37
And here is your updated sqlfiddle. http://sqlfiddle.com/#!9/083f5d/9