Use MySQL variable to create a group by scoring - mysql

On MySQL 5.6 server I have this table:
CREATE TABLE `student` (
`course` INT(5) NULL DEFAULT NULL,
`course_desc` VARCHAR(50) NULL DEFAULT NULL COLLATE 'utf8_unicode_ci',
`name` VARCHAR(50) NULL DEFAULT NULL COLLATE 'utf8_unicode_ci',
`vote` INT(2) NULL DEFAULT NULL,
UNIQUE INDEX `course_name` (`course`, `name`)
)
COLLATE='utf8_unicode_ci'
ENGINE=InnoDB;
With data:
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (100, 'Math', 'Mario', 10);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (100, 'Math', 'Giovanna', 8);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (100, 'Math', 'Federico', 8);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (100, 'Math', 'Arianna', 5);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (200, 'History', 'Mario', 9);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (200, 'History', 'Giovanna', 7);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (200, 'History', 'Patrizio', 3);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (200, 'History', 'Teresa', 10);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (300, 'Literacy', 'Giovanna', 7);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (300, 'Literacy', 'Federico', 6);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (300, 'Literacy', 'Arianna', 10);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (400, 'Science', 'Giovanni', 9);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (400, 'Science', 'Giovanna', 7);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (400, 'Science', 'Maria', 9);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (400, 'Science', 'Teresa', 0);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (400, 'Science', 'Carlo', 7);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (400, 'Science', 'Federico', 6);
INSERT INTO `student` (`course`, `course_desc`, `name`, `vote`) VALUES (500, 'Philosophy', 'Maria', 10);
This query:
SELECT
(#id := #id + 1) AS "ID",
t1.`course` AS "COURSE",
t1.`course_desc` AS "COURSE_DESC",
t1.`name` AS "NAME",
t1.`vote` AS "VOTE",
CASE
WHEN #prev_course = t1.`course` THEN
(
CASE
WHEN #prev_vote = t1.`vote` THEN #rank_count
WHEN #prev_vote := t1.`vote` THEN #rank_count := #rank_count + 1
END
)
WHEN #prev_course := t1.`course` THEN
(
#rank_count := 1
)
END AS "RANK"
FROM
(SELECT #id := 0) AS t0,
`student` AS t1,
(SELECT #prev_course := NULL) AS t2,
(SELECT #prev_vote := NULL) AS t3,
(SELECT #rank_count := 0) AS t4
ORDER BY
t1.`course`,
t1.`vote` DESC;
produces a WRONG result:
------------------------------------------------------------------
ID COURSE COURSE_DESC NAME VOTE RANK
------------------------------------------------------------------
1 100 Math Mario 10 1
2 100 Math Giovanna 8 2
3 100 Math Federico 8 2
4 100 Math Arianna 5 3
5 200 History Teresa 10 1
6 200 History Mario 9 2
7 200 History Giovanna 7 3
8 200 History Patrizio 3 4
9 300 Literacy Arianna 10 1
10 300 Literacy Giovanna 7 2
11 300 Literacy Federico 6 3
12 400 Science Giovanni 9 1
13 400 Science Maria 9 2
14 400 Science Giovanna 7 3
15 400 Science Carlo 7 3
16 400 Science Federico 6 4
17 400 Science Teresa 0 NULL
18 500 Philosophy Maria 10 1
------------------------------------------------------------------
This other query:
SELECT
(#id := #id + 1) AS "ID",
t1.`course` AS "COURSE",
t1.`course_desc` AS "COURSE_DESC",
t1.`name` AS "NAME",
t1.`vote` AS "VOTE",
CASE
WHEN #prev_course = t1.`course_desc` THEN
(
CASE
WHEN #prev_vote = t1.`vote` THEN #rank_count
WHEN #prev_vote := t1.`vote` THEN #rank_count := #rank_count + 1
END
)
WHEN #prev_course := t1.`course_desc` THEN
(
#rank_count := 1
)
END AS "RANK"
FROM
(SELECT #id := 0) AS t0,
`student` AS t1,
(SELECT #prev_course := NULL) AS t2,
(SELECT #prev_vote := NULL) AS t3,
(SELECT #rank_count := 0) AS t4
ORDER BY
t1.`course`,
t1.`vote` DESC;
produces a VERY WRONG result
------------------------------------------------------------------
ID COURSE COURSE_DESC NAME VOTE RANK
------------------------------------------------------------------
1 100 Math Mario 10 NULL
2 100 Math Giovanna 8 1
3 100 Math Federico 8 1
4 100 Math Arianna 5 2
5 200 History Teresa 10 3
6 200 History Mario 9 4
7 200 History Giovanna 7 5
8 200 History Patrizio 3 6
9 300 Literacy Arianna 10 7
10 300 Literacy Giovanna 7 8
11 300 Literacy Federico 6 9
12 400 Science Giovanni 9 10
13 400 Science Maria 9 10
14 400 Science Giovanna 7 11
15 400 Science Carlo 7 11
16 400 Science Federico 6 12
17 400 Science Teresa 0 NULL
18 500 Philosophy Maria 10 13
------------------------------------------------------------------
The goal is ranking the table from top (1) to bottom (n) depending on vote value.
Same vote = same rank.
Grouping by course.
I need some help
Tks a lot
MR

Your question is a bit unclear on what you want. But your use of variables is wrong. You should not assign a variable in one expression and then refer to it in another. MySQL does not guarantee the order of evaluation of expressions in a select, so they might be evaluated in the wrong order.
I think you want something like this:
select s.*,
(#rn := if(#c = course_desc, #rn + 1,
if(#c := course_desc, 1, 1)
)
) as rank
from (select s.*
from student s
order by s.course_desc, s.vote desc
) s cross join
(select #c := '', #rn := 0) params;
If you want students with the same vote to have the same value:
select s.*,
(#rn := if(#cv = concat_ws(':', course_desc, vote), #rn,
if(#cv like concat(course_desc, ':%'),
if(#cv := concat_ws(':', course_desc, vote), #rn + 1, #rn + 1),
if(#cv := concat_ws(':', course_desc, vote), 1, 1)
)
)
) as rank
from (select s.*
from student s
order by s.course_desc, s.vote desc
) s cross join
(select #cv := '', #rn := 0) params

Here's another way using LEFT JOIN instead of variables:
SELECT s1.course, s1.course_desc, s1.name, s1.vote, COUNT(s2.name) + 1 AS rank
FROM student AS s1
LEFT JOIN student AS s2
ON s1.course = s2.course AND s1.name <> s2.name AND s1.vote < s2.vote
GROUP BY s1.course, s1.course_desc, s1.name, s1.vote
ORDER BY s1.course, rank
The query assumes each student is uniquely identified by his/her name. In a real-world scenario you would substitute the name with an id.
Demo here

Related

How to find median given frequency of numbers?

The Numbers table keeps the value of number and its frequency.
+----------+-------------+
| Number | Frequency |
+----------+-------------|
| 0 | 7 |
| 1 | 1 |
| 2 | 3 |
| 3 | 1 |
+----------+-------------+
In this table, the numbers are 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 2, 3, so the median is (0 + 0) / 2 = 0. How to find median (output shown) given frequency of numbers?
+--------+
| median |
+--------|
| 0.0000 |
+--------+
I found the following solution here. However, I am unable to understand it. Can someone please explain the solution and/or post a different solution with explanation?
SELECT AVG(n.Number) AS median
FROM Numbers n LEFT JOIN
(
SELECT Number, #prev := #count AS prevNumber, (#count := #count + Frequency) AS countNumber
FROM Numbers,
(SELECT #count := 0, #prev := 0, #total := (SELECT SUM(Frequency) FROM Numbers)) temp ORDER BY Number
) n2
ON n.Number = n2.Number
WHERE
(prevNumber < floor((#total+1)/2) AND countNumber >= floor((#total+1)/2))
OR
(prevNumber < floor((#total+2)/2) AND countNumber >= floor((#total+2)/2))
Here's the SQL script for reproducibility:
CREATE TABLE `Numbers` (
`Number` INT NULL,
`Frequency` INT NULL);
INSERT INTO `Numbers` (`Number`, `Frequency`) VALUES ('0', '7');
INSERT INTO `Numbers` (`Number`, `Frequency`) VALUES ('1', '1');
INSERT INTO `Numbers` (`Number`, `Frequency`) VALUES ('2', '3');
INSERT INTO `Numbers` (`Number`, `Frequency`) VALUES ('3', '1');
Thanks!
You can use a cumulative sum and then take the midway point. I think the logic looks like this:
select avg(number)
from (select t.*, (#rf := #rf + frequency) as running_frequency
from (select t.* from t order by number) t cross join
(select #rf := 0) params
) t
where running_frequency - frequency >= ceil(#rf / 2) and
running_frequency <= ceil((#rf + 1) / 2);

Group and rank top N rows by category

I have a table with the columns category and votes. I've tried multiple solutions before with very little success; usually what would happen is that instead of returning the top 3 items in each category, it returns all of the items available.
SELECT `id`, `user_id`, `full_name`, `category`, `year`, `month`, `thumbnail_photo`, `photo_title`, `votes`
FROM
(
SELECT `id`, `user_id`, `full_name`, `category`, `year`, `month`, `thumbnail_photo`, `photo_title`, `votes`,
#category_rank := IF(#current_category = category, #category_rank + 1, 1) AS category_rank,
#current_category := category
FROM `photo_contest`
ORDER BY
`category`,
`votes` DESC
) ranked
WHERE
category_rank <= 3
AND `year` = '2017'
AND `month` = 'April'
AND `votes` > 0
This particular solution was adapted from SQLines. What I ultimately want to do is to turn a table like this:
Name | Category | Votes
--------- | -------- | -----
Name Foo | CatFoo | 0
Name Bar | CatFoo | 1
Name Baz | CatFoo | 10
Name Quux | CatFoo | 200
Name ooF | CatBar | 50
Name raB | CatBar | 300
Name zaB | CatBar | 10
Name xuuQ | CatBar | 200
...to:
Name | Category | Votes
--------- | -------- | -----
Name Quux | CatFoo | 200
Name Baz | CatFoo | 10
Name Bar | CatFoo | 1
Name raB | CatBar | 300
Name xuuQ | CatBar | 200
Name ooF | CatBar | 50
...with the other WHERE statements included. Year, month, and minimum votes.
Your subquery tries to calculate ranking over the entire table. If you only want to rank for the selected year-month with votes > 0, you should copy those conditions into the subquery as its own WHERE conditions.
UPDATE:
Looks like it's the missing ORDER BY in the outer-query that causes the said problem. I've created the following DDL/SQL at sqlfiddle.
CREATE TABLE IF NOT EXISTS `votes` (
`id` INT NOT NULL,
`category` VARCHAR(10) NULL,
`year` VARCHAR(4) NULL,
`month` VARCHAR(2) NULL,
`votes` INT
)
ENGINE = InnoDB;
INSERT INTO `votes` VALUES
(10, 'cat1', '2016', '05', 300),
(10, 'cat1', '2016', '06', 200),
(10, 'cat2', '2016', '05', 500),
(11, 'cat1', '2016', '05', 200),
(11, 'cat2', '2016', '05', 0),
(11, 'cat2', '2016', '06', 100),
(12, 'cat1', '2016', '05', 400),
(12, 'cat2', '2016', '05', 150),
(13, 'cat1', '2016', '05', 350),
(13, 'cat2', '2016', '05', 100),
(13, 'cat2', '2016', '06', 150),
(14, 'cat1', '2016', '05', 0),
(14, 'cat2', '2016', '05', 450);
SELECT `id`, `category`, `year`, `month`, `votes`
FROM (
SELECT `id`, `category`, `year`, `month`, `votes`,
#category_rank := IF(#current_category = category, #category_rank + 1, 1) AS category_rank,
#current_category := category
FROM `votes`
WHERE
`year` = '2016'
AND `month` = '05'
AND `votes` > 0
ORDER BY
`category`,
`votes` DESC
) ranked
WHERE
category_rank <= 3
ORDER BY
`category`,
`votes` DESC;
I'm not an expert with MySQL, so I propose you a different (standard SQL) approach: you can join the table with itself on Category and on Votes being less or equal to the votes of the current row.
select t1.Name, t1.Category, t1.Votes, count(distinct t2.Name) as rank
from photo_contest t1
join photo_contest t2
on t1.Category = t2.Category and
t1.Votes <= t2.Votes
/*where whatever you want*/
group by t1.Name, t1.Category, t1.Votes
having count(distinct t2.Name) <= 3
order by t1.Category, rank
I tested it here and it seems to do what you asked for
It sounds like your PHPMyAdmin needs an upgrade or a replacement. Meanwhile you might want to try #Stefano Zanini's non-MySQL specific SQL:
SELECT
t1.`id`, t1.`category`, t1.`year`, t1.`month`,
t1.`votes`, count(distinct t2.`id`) as rank
FROM photo_contest t1
INNER JOIN photo_contest t2 ON
t1.`category` = t2.`category` AND
t1.`votes` <= t2.`votes`
WHERE t1.`votes` > 0
GROUP BY t1.`id`, t1.`category`, t1.`votes`
HAVING count(distinct t2.`id`) <= 3
ORDER BY t1.`category`, rank;
It's available on sqlfiddle. If you think this solution suits you better please credit #Stefano Zanini's answer instead of this one.

Update oldest rows based on sum of column

We are having a following MySQL table to maintain user credits.
id user credits expiry status
-----------------------------------------
1 A 1.2 somedatetime 0
2 A 4.4 somedatetime 0
3 A 5.0 somedatetime 0
4 B 1.0 somedatetime 0
5 B 2.4 somedatetime 0
6 C 7.8 somedatetime 0
Whenever user makes a purchase, we deduct the amount from the available credits. To be fair to user, the credits with nearest expiry will be consumed first and so on. We will also update the status to mark row as consumed.
For example, if user A makes a purchase of $2, $1.2 will be debited from id 1 and remaining $0.8 from id 2 and so on. So Table will now look like
id user credits expiry status
-----------------------------------------
1 A 0.0 somedatetime 1
2 A 3.6 somedatetime 1
3 A 5.0 somedatetime 0
4 B 1.0 somedatetime 0
5 B 2.4 somedatetime 0
6 C 7.8 somedatetime 0
So far we have been doing it with brute-force approach. Any idea suggestion how to do it more efficiently in minimum or single query.
Update: since someone asked about our current brute-force approach, it's iterating each row from the oldest and updating till the purchase amount is covered, which is very inefficient.
Thanks
Using variables you calculate the totals credits. Run the inner query so you learn what is beign calculate first.
Fiddle Demo
UPDATE customer c
JOIN (
SELECT cu.`id`,
cu.`user`,
`credits`, `expiry`, `status`,
#total := IF(#customer = cu.`user`, #total := #total + `credits`, `credits`) as cumulative_total,
#customer := cu.`user` as user_current,
`credit_used`
FROM customer cu
CROSS JOIN (SELECT #customer := '', #total := 0 ) t
JOIN credits
ON cu.`user` = credits.`user`
ORDER BY cu.`id`
) t
ON c.`id` = t.`id`
SET c.credits = CASE WHEN c.credits <= t.credit_used THEN 0
ELSE t.cumulative_total - credit_used
END,
c.status = CASE WHEN c.credits <= t.credit_used THEN 1
ELSE 0
END;
My test Setup:
CREATE TABLE customer
(`id` int, `user` varchar(1), `credits` double, `expiry` int, `status` int)
;
INSERT INTO customer
(`id`, `user`, `credits`, `expiry`, `status`)
VALUES
(1, 'A', 1.2, 1, 0),
(2, 'A', 4.4, 2, 0),
(3, 'A', 5.0, 3, 0),
(4, 'B', 1.0, 4, 0),
(5, 'B', 2.4, 5, 0),
(6, 'C', 7.8, 6, 0)
;
CREATE TABLE credits
(`id` int, `user` varchar(1), `credit_used` double)
;
INSERT INTO credits
(`id`, `user`, `credit_used`)
VALUES
(1, 'A', 2.0),
(2, 'B', 3.4)
;
http://sqlfiddle.com/#!9/485673/1
SET #amount = 2;
UPDATE t1
JOIN (
SELECT t2.id,
IF(#amount=0,t2.credits, IF(#amount>t2.credits,0,t2.credits-#amount)) credits,
IF(#amount>=t2.credits,#amount := #amount-t2.credits, 0)
FROM (
SELECT id, credits
FROM t1
WHERE credits>0 AND `user`='A'
ORDER BY expiry ASC
) t2
) t
ON t1.id = t.id
SET t1.credits=t.credits
WHERE t1.user = 'A';

Finding in between time in a list of times

I have a table that looks like this
userid | eventid | description | date | starttime | endtime
1 1 Event 1 2016-02-02 09:30:00 11:00:00
1 2 Event 2 2016-02-02 13:30:00 15:00:00
1 3 Event 3 2016-02-02 17:30:00 21:00:00
2 4 Event 4 2016-02-03 13:00:00 14:00:00
2 5 Event 5 2016-02-03 15:00:00 16:00:00
I need to find what is the sum of time between the events on the same day by the user.
Like this:
userid | timeBetween
1 05:00:00
2 01:00:00
I should also assume that there may be overlapping times for example event1 starts at 11:00 ends 13:00 and event2 starts 12:00 and ends 14:00 by the same user on the same day. These cases are rare and I believe returning 00:00 here is the appropriate answer.
I solved a similar problem, finding the sum of the length of all events per day.
SELECT *,
SEC_TO_TIME( SUM( TIME_TO_SEC(TIMEDIFF(`endtime`,`starttime`)))) as sumtime
FROM `events`
group by userid, date
order by sumtime desc
Given this sample data:
CREATE TABLE t
(`userid` int, `eventid` int, `description` varchar(7), `date` date, `starttime` time, `endtime` time)
;
INSERT INTO t
(`userid`, `eventid`, `description`, `date`, `starttime`, `endtime`)
VALUES
(1, 1, 'Event 1', '2016-02-02', '09:30:00', '11:00:00'),
(1, 2, 'Event 2', '2016-02-02', '13:30:00', '15:00:00'),
(1, 3, 'Event 3', '2016-02-02', '17:30:00', '21:00:00'),
(2, 4, 'Event 4', '2016-02-03', '13:00:00', '14:00:00'),
(2, 5, 'Event 5', '2016-02-03', '15:00:00', '16:00:00')
;
this query
SELECT userid, SEC_TO_TIME(SUM(TIME_TO_SEC(diff))) AS time_between
FROM (
SELECT
TIMEDIFF(starttime, COALESCE(IF(userid != #prev_userid, NULL, #prev_endtime), starttime)) AS diff,
#prev_endtime := endtime,
#prev_userid := userid AS userid
FROM
t
, (SELECT #prev_endtime := NULL, #prev_userid := NULL) var_init_subquery
ORDER BY userid
) sq
GROUP BY userid;
will return
+--------+--------------+
| userid | time_between |
+--------+--------------+
| 1 | 05:00:00 |
| 2 | 01:00:00 |
+--------+--------------+
Explanation:
In this part
, (SELECT #prev_endtime := NULL, #prev_userid := NULL) var_init_subquery
ORDER BY userid
we initialize our variables. The ORDER BY is very important, since there's no order in a relational database unless you specify it. It is so important, because the SELECT clause processes the rows in this order.
In the SELECT clause the order is also very important. Here
#prev_endtime := endtime,
#prev_userid := userid AS userid
we assign the values of the current row to the variables. Since this happens after this line
TIMEDIFF(starttime, COALESCE(IF(userid != #prev_userid, NULL, #prev_endtime), starttime)) AS diff,
the variables still hold the values of the previous row in the timediff() function. Therefore we also have to use COALESCE(), because in the very first row and when the userid changes, there is no value to calculate the diff from. To get a diff of 0 there, COALESCE() exchanges the NULL value with the starttime.
The last part is obviously to simply sum the seconds of the "between times".
Here's one way you can get the timeBetween value in SECONDS
SELECT
firsttable.userid,
SEC_TO_TIME(SUM(TIME_TO_SEC(secondtable.starttime) - TIME_TO_SEC(firsttable.endtime))) timeBetween
FROM
(
SELECT
*,
IF(#prev = userid, #rn1 := #rn1 + 1, #rn1 := 1) rank,
#prev := userid
FROM eventtable,(SELECT #prev := 0,#rn1 := 1) var
ORDER BY userid,starttime DESC
) firsttable
INNER JOIN
(
SELECT
*,
IF(#prev2 = userid, #rn2 := #rn2 + 1, #rn2 := 1) rank,
#prev2 := userid
FROM eventtable,(SELECT #prev2 := 0,#rn2 := 1) var
ORDER BY userid,endtime DESC
) secondTable
ON firsttable.userid = secondtable.userid AND firsttable.rank = secondtable.rank + 1 AND
firsttable.date = secondtable.date
GROUP BY firsttable.userid;
TEST:
Unable to add a fiddle.
So here's test data with schema:
DROP TABLE IF EXISTS `eventtable`;
CREATE TABLE `eventtable` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`userid` int(11) NOT NULL,
`eventid` int(11) NOT NULL,
`description` varchar(100) CHARACTER SET utf8 NOT NULL,
`date` date NOT NULL,
`starttime` time NOT NULL,
`endtime` time NOT NULL,
PRIMARY KEY (`id`)
) ;
INSERT INTO `eventtable` VALUES ('1', '1', '1', 'Event 1', '2016-02-02', '09:30:00', '11:00:00');
INSERT INTO `eventtable` VALUES ('2', '1', '2', 'Event 2', '2016-02-02', '13:30:00', '15:00:00');
INSERT INTO `eventtable` VALUES ('3', '1', '3', 'Event 3', '2016-02-02', '17:30:00', '21:00:00');
INSERT INTO `eventtable` VALUES ('4', '2', '4', 'Event 4', '2016-02-03', '13:00:00', '14:00:00');
INSERT INTO `eventtable` VALUES ('5', '2', '5', 'Event 5', '2016-02-03', '15:00:00', '16:00:00');
Result:
Executing the above query on the given test data you will get output like below:
userid timeBetween
1 05:00:00
2 01:00:00
Note:
For overlapping events the above query will give you negative timeBetween value.
You can replace the the SEC_TO_TIME...line by the following:
SEC_TO_TIME(IF(SUM(TIME_TO_SEC(secondtable.starttime) - TIME_TO_SEC(firsttable.endtime)) < 0, 0,SUM(TIME_TO_SEC(secondtable.starttime) - TIME_TO_SEC(firsttable.endtime)))) timeBetween
If you take the TIMEDIFF of the MIN(starttime) and MAX(endtime) for each user/day and then subtract the sum of events as calculated earlier, this will give you the times in between.
try this on
select TIMEDIFF('start_time','end_time') from your table
hope this one help you

How to ignore next duplicated row?

I need your help!
I have a table:
CREATE TABLE `table` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`res` varchar(255) DEFAULT NULL,
`value` int(6) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=8 DEFAULT CHARSET=utf8;
-- Records of table
INSERT INTO `table` VALUES (1, 'gold', 44);
INSERT INTO `table` VALUES (2, 'gold', 44);
INSERT INTO `table` VALUES (3, 'gold', 45);
INSERT INTO `table` VALUES (4, 'gold', 46);
INSERT INTO `table` VALUES (5, 'gold', 44);
INSERT INTO `table` VALUES (6, 'gold', 44);
INSERT INTO `table` VALUES (7, 'gold', 44);
INSERT INTO `table` VALUES (8, 'gold', 47);
i need to make SELECT request which will ignored next or previous duplicated rows and i receive data like this:
- gold:44 (ignored 1 record)
- gold:45
- gold:46
- gold:44 (ignored 2 records)
- gold:47
there is no object which duplicated record will ignore (first,second,last).
(i tried to use group by value or distinct but this way removes other records with same value)
You can solve this with a gaps and islands solution.
- Normally that involves ROW_NUMBER() which is not present in MySQL
- The solution below mimics ROW_NUMBER() with variables and ORDER BY
Link to example : http://sqlfiddle.com/#!9/32e72/12
SELECT
MIN(id) AS id,
res,
value
FROM
(
SELECT
IF (#res = res AND #val = value, #row := #row + 1, #row := 1) AS val_ordinal,
id AS id,
res_ordinal AS res_ordinal,
#res := res AS res,
#val := value AS value
FROM
(
SELECT
IF (#res = res , #row := #row + 1, #row := 1) AS res_ordinal,
id AS id,
#res := res AS res,
#val := value AS value
FROM
`table`,
(
SELECT #row := 0, #res := '', #val := 0
)
AS initialiser
ORDER BY
res, id
)
AS sequenced_res_id,
(
SELECT #row := 0, #res := '', #val := 0
)
AS initialiser
ORDER BY
res, value, id
)
AS sequenced_res_val_id
GROUP BY
res,
value,
res_ordinal - val_ordinal
ORDER BY
MIN(id)
;
If I add res_ordinal, val_ordinal and res_ordinal - val_ordinal to your data, it can be seen that you can now differentiate between the two sets of 44
GROUP
INSERT INTO `table` VALUES ('1', 'gold', '44'); 1 - 1 = 0 (Gold, 44, 0)
INSERT INTO `table` VALUES ('2', 'gold', '44'); 2 - 2 = 0
INSERT INTO `table` VALUES ('3', 'gold', '45'); 3 - 1 = 2 (Gold, 45, 2)
INSERT INTO `table` VALUES ('4', 'gold', '46'); 4 - 1 = 3 (Gold, 46, 3)
INSERT INTO `table` VALUES ('5', 'gold', '44'); 5 - 3 = 2 (Gold, 44, 2)
INSERT INTO `table` VALUES ('6', 'gold', '44'); 6 - 4 = 2
INSERT INTO `table` VALUES ('7', 'gold', '44'); 7 - 5 = 2
INSERT INTO `table` VALUES ('8', 'gold', '47'); 8 - 1 = 7 (Gold, 47, 7)
NOTE: According to your data I could use id instead of making my own res_ordinal. doing it this way, however, copes with gaps in the id sequence and having multiple different resources. This means that in the following example the two golds are considered to be duplicates of each other...
1 Gold 44 1 - 1 = 0 (Gold, 44, 0)
2 Poop 45 1 - 1 = 0 (Poop, 45, 0)
3 Gold 44 2 - 2 = 0 (Gold, 44, 0) -- Duplicate
4 Gold 45 3 - 1 = 2 (Gold, 44, 2)
select t1.*
from `table` t1
where not exists ( select 1
from `table` t2
where t1.id = 1+t2.id
and t1.res = t2.res
and t1.value = t2.value
);
works fine
Use the DISTINCT clause to select unique rows like so:
SELECT DISTINCT res, value FROM table
Use Select DISTINCT res, value FROM table ... to avoid redundancy