how can i group by field value? - mysql

how can i group by one field start by value 0
eg.
select * from t;
id, check_id, user_name
1, 0, user_a
2, 1, user_a
3, 2, user_a
1, 0, user_a
2, 1, user_a
3, 3, user_a
1, 0, user_b
2, 1, user_b
3, 3, user_b
group by check_id by start by value 0 per group
user_name, check_info
user_a, 0-1-2
user_a, 0-1-3
user_b, 0-1-3
how can i group by?

Well, i read in the question : group by one field start by value 0
Then, you can try this.
select user_name,group_concat(distinct check_id order by check_id asc separator '-') check_info
from (
select id,check_id,user_name,
case when check_id = 0 then
#rn := #rn+1
else
#rn := #rn
end as unique_id
from t
inner join (select #rn := 0) as tmp
order by user_name
) as tbl
group by user_name,unique_id
This will group by for every records start by 0 and order by user_name.

This will give you what you want....maybe. It does work but is relying on the records coming back in the appropriate order when selected from the table (and that is NOT certain to occur).
SELECT user_name, GROUP_CONCAT(check_id ORDER BY grouping, check_id SEPARATOR '-')
FROM
(
SELECT id, check_id, user_name, #grouping:=if(id > #prev_id, #grouping, #grouping + 1) AS grouping, #prev_id:=id
FROM t
CROSS JOIN
(
SELECT #grouping:=0, #prev_id:=0
) sub0
) sub1
GROUP BY user_name, grouping
It works by returning the rows and using variables to assign a grouping to them (so when the id gets smaller it adds one to the grouping value), then does a GROUP BY on the user name and the grouping value.
But really you need to have the grouping value somehow stored with your data in advance.

Provided that id is an auto-increment field, then you can use:
SELECT user_name,
GROUP_CONCAT(check_id ORDER BY check_id SEPARATOR '-') AS check_info
FROM (
SELECT id, check_id, user_name,
#grp := IF (#uname = user_name,
IF (check_id = 0, #grp + 1, #grp),
IF (#uname := user_name, #grp + 1, #grp + 1)) AS grp
FROM mytable
CROSS JOIN (SELECT #grp := 0, #uname := '') AS vars
ORDER BY id) AS t
GROUP BY user_name, grp
Variables are used to identify slices of consecutive records, within each user_name partition, starting by 0.
Demo here

Related

get distinct values as array by user_id

I want to get a list of distinct values for each user limited by 3 values per user:
id, user_id, value
1, 1, a
2, 1, b
3, 2, c
4, 1, b
5, 1, d
6, 1, e
expected result:
user_id, values
1, [a,b,d]
2, [c]
is there some way to do this with GROUP BY user_id and DISTINCT?
Edit (based on comments):
We can use user-defined variables to assign row number to various value within a partition of user_id. Eventually, we will filter out this result-set to consider upto 3 rows per user_id only.
SELECT
dt2.user_id,
dt2.value
FROM
(
SELECT
#rn := CASE WHEN #ui = dt.user_id THEN #rn + 1
ELSE 1
END AS row_no,
#ui = dt.user_id,
dt.value
FROM
(
SELECT DISTINCT
user_id,
value
FROM your_table
ORDER BY user_id
) AS dt
CROSS JOIN (SELECT #rn := 0, #ui := null) AS user_init_vars
) AS dt2
WHERE dt2.row_no <= 3
Previous question's answer:
Group_Concat(Distinct...) all the unique value for a user_id.
We can then use Substring_Index() function to consider string upto 3rd comma. This will then result in consideration of upto 3 values only.
At the end, we can use Concat() function to enclose the resultant string in square brackets.
Values is Reserved keyword in MySQL. You can consider naming the resultant column into something else.
Try the following:
SELECT user_id,
CONCAT('[',
SUBSTRING_INDEX(GROUP_CONCAT(DISTINCT value), ',', 3),
']') AS user_values
FROM your_table
GROUP BY user_id

Check if a user was "active" in multiple rows - MySQL

How would I go about creating group_ids in the following example based on the area(s) the users are active in?
group_id rep_id area datebegin dateend
1 1000 A 1/1/15 1/1/16
1 1000 B 1/1/15 1/1/16
2 1000 C 1/2/16 12/31/99
In the table you can see that rep 1000 was active in both A and B between 1/15 and 1/16. How would I go about coding the group_id field to group by datebegin & dateend?
Thanks for any help.
You can use variables in order to enumerate groups of records having identical rep_id, datebegin, dateend values:
SELECT rep_id, datebegin, dateend,
#rn := IF(#rep_id <> rep_id,
IF(#rep_id := rep_id, 1, 1),
#rn + 1) AS rn
FROM (
SELECT rep_id, datebegin, dateend
FROM mytable
GROUP BY rep_id, datebegin, dateend) AS t
CROSS JOIN (SELECT #rep_id := 0, #rn := 0) AS v
ORDER BY rep_id, datebegin
Output:
rep_id, datebegin, dateend, rn
-----------------------------------
1000, 2015-01-01, 2016-01-01, 1
1000, 2016-02-01, 2099-12-03, 2
You can use the above query as a derived table and join back to the original table. rn field is the group_id field you are looking for.
You can use variables to assign groups. As you said, only if the date_begin and date_end exactly match for 2 rows, they would be in the same group. Else a new group starts.
select rep_id,area,date_begin,date_end,
,case when #repid <> rep_id then #rn:=1 --reset the group to 1 when rep_id changes
when #repid=rep_id and #begin=date_begin and #end=date_end then #rn:=#rn --if rep_id,date_begin and date_end match use the same #rn previously assigned
else #rn:=#rn+1 --else increment #rn by 1
end as group_id
,#begin:=date_begin
,#end:=date_end
,#repid:=rep_id
from t
cross join (select #rn:=0,#begin:='',#end:='',#repid:=-1) r
order by rep_id,date_begin,date_end
The above query includes variables in the output. To only get the group_id use
select rep_id,area,date_begin,date_end,group_id
from (
select rep_id,area,date_begin,date_end
,case when #repid <> rep_id then #rn:=1
when #repid=rep_id and #begin=date_begin and #end=date_end then #rn:=#rn
else #rn:=#rn+1
end as group_id
,#begin:=date_begin
,#end:=date_end
,#repid:=rep_id
from t
cross join (select #rn:=0,#begin:='',#end:='',#repid:=-1) r
order by rep_id,date_begin,date_end
) x

MySql Get top n in each group

I wrote a query which is like :
select title, userId, created_at, deviceCode, streamTimeInSecond
from ( select title, userId, created_at, deviceCode, streamTimeInSecond,
#userId_rank:=IF(#current_userId = userId, #userId_rank + 1, 1) as userId_rank,
#current_userId:=userId
from ViewforfirstfiveMovies order by userId, streamTimeInSecond desc ) ranked
where userId_rank<=5;
In this query I am trying to get 5 titles viewed by each userId present in database.
Problem: I am getting more than 5 records for few users.
Kindly help me with this problem.
MySQL does not guarantee the order of evaluation of expressions in the select. For that reason, you should only set inter-related variables in a single expression.
I would write this as:
select title, userId, created_at, deviceCode, streamTimeInSecond
from (select title, userId, created_at, deviceCode, streamTimeInSecond,
(#userId_rank := if(#current_userId = userId, #userId_rank + 1,
if(#current_userId := userId, 1, 1)
)
) as userId_rank,
from ViewforfirstfiveMovies cross join
(select #current_userId := 0, #userId_rank := 0) params
order by userId, streamTimeInSecond desc
) ranked
where userId_rank <= 5;
In addition, you should set the variables in the same statement.

How to select certain numbers of groups in MySQL?

I have the table with data:
And for this table I need to create pegination by productId column. I know about LIMIT N,M, but it works with rows and not with groups. For examle for my table with pegination = 2 I expect to retrieve all 9 records with productId = 1 and 2 (the number of groups is 2).
So how to create pegination by numbers of groups ?
I will be very thankfull for answers with example.
One way to do pagination by groups is to assign a product sequence to the query. Using variables, this requires a subquery:
select t.*
from (select t.*,
(#rn := if(#p = productid, #rn + 1,
if(#rn := productid, 1, 1)
)
) as rn
from table t cross join
(select #rn := 0, #p := -1) vars
order by t.productid
) t
where rn between X and Y;
With an index on t(productid), you can also do this with a subquery. The condition can then go in a having clause:
select t.*,
(select count(distinct productid)
from t t2
where t2.productid <= t.productid)
) as pno
from t
having pno between X and Y;
Try this:
select * from
(select * from <your table> where <your condition> group by <with your group>)
LIMIT number;

Per group, find first N users with SUM(x) >= N

Problem: Find the first 2 users who have at least 10 items in a category, per category.
Table structure:
CREATE TABLE items(
id INT AUTO_INCREMENT PRIMARY KEY,
datetime datetime,
category INT,
user INT,
items_count INT
);
Sample data:
INSERT INTO items (datetime, category, user, items_count) VALUES
('2013-01-01 00:00:00', 1, 1, 10),
('2013-01-01 00:00:01', 1, 2, 1),
('2013-01-01 00:00:02', 1, 3, 10),
('2013-01-01 00:00:03', 1, 2, 9),
('2013-01-01 00:00:00', 2, 4, 10),
('2013-01-01 00:00:01', 2, 1, 10),
('2013-01-01 00:00:01', 2, 5, 10);
Desired result:
category user
1 1
1 3
2 4
2 5
Note: As shown in the result, I need to be able to show preference towards a user when multiple users meet the requirements simultaneously.
SQL Fiddle:
http://sqlfiddle.com/#!2/58e60
This is what I have tried:
SELECT
Derived.*,
IF (#category != Derived.category, #rank := 1, #rank := #rank + 1) AS rank,
#category := category
FROM(
SELECT
category,
user,
SUM(items_count) AS items_count,
MAX(datetime) AS datetime
FROM items
GROUP BY
category,
user
HAVING
SUM(items_count) >= 10
) AS Derived
JOIN(SELECT #rank := 0, #category := 0) AS r
HAVING
rank <= 2
ORDER BY
Derived.category,
Derived.datetime
But it is faulty. Not only does it not take user precedence into account, it would produce the wrong result with data such as this:
('2013-01-01 00:00:00', 1, 1, 10),
('2013-01-01 00:00:01', 1, 2, 1),
('2013-01-01 00:00:02', 1, 3, 10),
('2013-01-01 00:00:03', 1, 2, 9),
('2013-01-01 00:00:10', 1, 3, 1);
Additional information: I do not know if procedures could make a difference in this scenario, but unfortunately it is not an option either. The user running this query only has SELECT privilege.
In order to find the users that meet your needs, you need the cumulative sum of the counts. The following query finds the occasions when a user first reaches 10 units. If the counts are never negative, then there is only one:
select i.*
from (select i.*,
(select sum(items_count)
from items i2
where i2.user = i.user and
i2.category = i.category and
i2.datetime <= i.datetime
) as cumsum
from items i
) i
where cumsum - items_count < 10 and cumsum >= 10
order by datetime;
To get the first two, you need to use MySQL tricks for counting within a group. Here is an example that generally works:
select i.*
from (select i.*, if(#prevc = category, #rn := #rn + 1, #rn := 1) as rn, #prevc := category
from (select i.*,
(select sum(items_count)
from items i2
where i2.user = i.user and
i2.category = i.category and
i2.datetime <= i.datetime
) as cumsum
from items i
) i
cross join
(select #rn := 0) const
where cumsum - items_count < 10 and cumsum >= 10
) i
where rn <= 2
order by category, datetime;
I have a problem with this approach, because nothing in MySQL says that the expression #prevc := category will actually be calculated after the calculation for rn. However, it seems to be the case, and this seems to work in practice.
I tried Gordon's query, but unfortunately it does not seem to work with large tables; after waiting 15 minutes for the result I decided to kill it.
However the following query worked very well for me, it chewed it's way through a table of ~6M rows in about 8 seconds.
#Variable
SET #min_items = 10,
#max_users = 2,
#preferred_user = 5,
#Static
#category = 0,
#user = 0,
#items = 0,
#row_num = 1;
--
SELECT
category,
user,
datetime
FROM(
SELECT
category,
user,
datetime,
IF (#category = category, #row_num := #row_num + 1, #row_num := 1) AS row_num,
#category := category
FROM(
SELECT
category,
user,
datetime,
IF (#user != user, #items := 0, NULL),
IF (#items < #min_items, #items := #items + items_count, NULL) AS items_cumulative,
#user := user
FROM items
ORDER BY
category,
user,
datetime
) AS Derived
WHERE items_cumulative >= #min_items
ORDER BY
category,
datetime,
FIELD(user, #preferred_user, user)
) AS Derived
WHERE row_num <= #max_users;