Longest Consecutive Days Count for BigQuery - mysql

Right now I just have an aggregate of how many days a user has worked. I'm trying to change this query to most continuous days worked.
Where u12345 would be 4 and u1 would be 2.
Is this possible to do with a BigQuery statement?
EDIT I am Kind of close with the following query but my u1 is getting 3 instead of 2.
SELECT MIN(e.timestamp) as date_created, e.uid, COUNT(e.uid) + 1 AS streak
FROM OnSite e
LEFT JOIN OnSite ee
ON e.uid = ee.uid
AND DATE(e.timestamp) = DATE(DATE_ADD(ee.timestamp, INTERVAL -1 DAY))
WHERE ee.uid IS NOT NULL
GROUP BY e.uid;
Schema (MySQL v5.7)
CREATE TABLE OnSite
(`uid` varchar(55), `worksite_id` varchar(55), `timestamp` datetime)
;
INSERT INTO OnSite
(`uid`, `worksite_id`, `timestamp`)
VALUES
("u12345", "worksite_1", '2019-01-01'),
("u12345", "worksite_1", '2019-01-02'),
("u12345", "worksite_1", '2019-01-03'),
("u12345", "worksite_1", '2019-01-04'),
("u12345", "worksite_1", '2019-01-06'),
("u1", "worksite_1", '2019-01-01'),
("u1", "worksite_1", '2019-01-02'),
("u1", "worksite_1", '2019-01-05'),
("u1", "worksite_1", '2019-01-06')
;
Query #1
SELECT uid, COUNT(DISTINCT timestamp) Total
FROM OnSite
GROUP BY uid;
| uid | Total |
| ------ | ----- |
| u1 | 4 |
| u12345 | 5 |
View on DB Fiddle

Below is for BigQuery Standard SQL
In case if you are interested in max consecutive days of the users on the same worksite:
#standardSQL
SELECT uid, MAX(consecuitive_days) max_consecuitive_days
FROM (
SELECT uid, grp, COUNT(1) consecuitive_days
FROM (
SELECT uid,
COUNTIF(step > 1) OVER(PARTITION BY uid, worksite_id ORDER BY ts) grp
FROM (
SELECT uid, worksite_id, ts,
DATE_DIFF(ts, LAG(ts) OVER(PARTITION BY uid, worksite_id ORDER BY ts), DAY) step
FROM `project.dataset.table`
)
) GROUP BY uid, grp
) GROUP BY uid
In case if worksite does not matter and you are looking just for max consecutive days:
#standardSQL
SELECT uid, MAX(consecuitive_days) max_consecuitive_days
FROM (
SELECT uid, grp, COUNT(1) consecuitive_days
FROM (
SELECT uid,
COUNTIF(step > 1) OVER(PARTITION BY uid ORDER BY ts) grp
FROM (
SELECT uid, ts,
DATE_DIFF(ts, LAG(ts) OVER(PARTITION BY uid ORDER BY ts), DAY) step
FROM `project.dataset.table`
)
) GROUP BY uid, grp
) GROUP BY uid
You can test, play any of above with he sample data from your question as in below example
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'u12345' uid, 'worksite_1' worksite_id, DATE '2019-01-01' ts UNION ALL
SELECT 'u12345', 'worksite_1', '2019-01-02' UNION ALL
SELECT 'u12345', 'worksite_1', '2019-01-03' UNION ALL
SELECT 'u12345', 'worksite_1', '2019-01-04' UNION ALL
SELECT 'u12345', 'worksite_1', '2019-01-06' UNION ALL
SELECT 'u1', 'worksite_1', '2019-01-01' UNION ALL
SELECT 'u1', 'worksite_1', '2019-01-02' UNION ALL
SELECT 'u1', 'worksite_1', '2019-01-05' UNION ALL
SELECT 'u1', 'worksite_1', '2019-01-06'
)
SELECT uid, MAX(consecuitive_days) max_consecuitive_days
FROM (
SELECT uid, grp, COUNT(1) consecuitive_days
FROM (
SELECT uid,
COUNTIF(step > 1) OVER(PARTITION BY uid ORDER BY ts) grp
FROM (
SELECT uid, ts,
DATE_DIFF(ts, LAG(ts) OVER(PARTITION BY uid ORDER BY ts), DAY) step
FROM `project.dataset.table`
)
) GROUP BY uid, grp
) GROUP BY uid
with result:
Row uid max_consecuitive_days
1 u12345 4
2 u1 2

does this fit for you?
set #gr=1;
select uid, max(cnt) max_cnt from (
select uid, grp, count(*) cnt from (
select uid,
case when ifnull(DATE_ADD(oldDate, INTERVAL 1 DAY), timestamp)= timestamp then
#gr
else
#gr := #gr +1
end grp
from
(
SELECT
uid,
timestamp,
lag(timestamp) over (partition by uid order by timestamp asc) as oldDate
FROM OnSite
) t
)t2
group by uid, grp
)t3
group by uid
Result
| uid | max_cnt |
| ------ | ------- |
| u1 | 2 |
| u12345 | 4 |
DB Fiddle

Related

UNION query results in a new row instead of new column

The following query returns a result with 5 columns (
date ,lowest_hr_price ,max_hr_price ,min_price , max_price )
instead of
(date ,lowest_hr_price ,max_hr_price , min_price ,max_price , AvgPrice, AvgPieces ).
AvgPrice and AvgPieces are instead added as rows .
(select date(m.min_max_date) as date,
max(case when m.lbl='min_hr_price' then m.min_max_hr_price else null end) as lowest_hr_price,
max(case when m.lbl='max_hr_price' then m.min_max_hr_price else null end) as max_hr_price,
max(case when n.lbl='min_price' then n.min_max_price else null end) as min_price,
max(case when n.lbl='max_price' then n.min_max_price else null end) as max_price
from (select 'min_hr_price' as lbl, price as min_max_hr_price, date_time as min_max_date
from mytable
where date_time in (select min(date_time) as min_date from mytable group by date(date_time)) and symbol = 'dollar'
UNION
select 'max_hr_price', price, date_time
from mytable WHERE symbol = 'dollar'
AND date_time in (select max(date_time) as max_date from mytable WHERE symbol = 'dollar' group by date(date_time))) as m,
(
select 'min_price' as lbl,
min(date_time) as min_max_date,
min(price) as min_max_price
from mytable
WHERE symbol = 'dollar'
group by date(date_time)
UNION
select 'max_price' as lbl,
max(date_time) as min_max_date,
max(price) as min_max_price
from mytable
WHERE symbol = 'dollar'
group by date(date_time)
) n
where m.min_max_date=n.min_max_date
group by date(m.min_max_date)
order by m.min_max_date DESC
)
UNION
(SELECT null, null, date_time, avg (price) as AvgPrice, avg (pieces) as AvgPieces FROM mytable
WHERE symbol = 'dollar'
group by date(date_time))
Actual result:
date |lowest_hr_price | max_hr_price | min_price | max_price
------------------------------------------------------------------------------------
2018-03-06 | 1 | 2 | 0 | 10
NULL | NULL | {date} | {avgprice} | {avgpieces}
Expected result:
date |lowest_hr_price | max_hr_price | min_price | max_price | AvgPrice | AvgPieces
-------------------------------------------------------------------------------------------------------------
2018-03-06 | 1 | 2 | 0 | 10 | {avgprice}| {avgpieces}
if you instead of an union (select rows append one select to each others ) need all the result on the same rows
in this case you could use a cross join eg :
select distinct * from (
(select date(m.min_max_date) as date,
max(case when m.lbl='min_hr_price' then m.min_max_hr_price else null end) as lowest_hr_price,
max(case when m.lbl='max_hr_price' then m.min_max_hr_price else null end) as max_hr_price,
max(case when n.lbl='min_price' then n.min_max_price else null end) as min_price,
max(case when n.lbl='max_price' then n.min_max_price else null end) as max_price
from (select 'min_hr_price' as lbl, price as min_max_hr_price, date_time as min_max_date
from mytable
where date_time in (select min(date_time) as min_date from mytable group by date(date_time)) and symbol = 'dollar'
UNION
select 'max_hr_price', price, date_time
from mytable WHERE symbol = 'dollar'
AND date_time in (select max(date_time) as max_date from mytable WHERE symbol = 'dollar' group by date(date_time))) as m,
(
select 'min_price' as lbl,
min(date_time) as min_max_date,
min(price) as min_max_price
from mytable
WHERE symbol = 'dollar'
group by date(date_time)
UNION
select 'max_price' as lbl,
max(date_time) as min_max_date,
max(price) as min_max_price
from mytable
WHERE symbol = 'dollar'
group by date(date_time)
) n
where m.min_max_date=n.min_max_date
group by date(m.min_max_date)
order by m.min_max_date DESC
) ) T1 INNER join
(SELECT null, null, date_time, avg (price) as AvgPrice, avg (pieces) as AvgPieces FROM mytable
WHERE symbol = 'dollar'
group by date(date_time)) T2 ON date(T1.date) = date(T2.date_time)

How to get most occurences of rows for every user in mysql

user_id category suburb dated walk_time
1 experience US 2016-04-09 5
1 discovery US 2016-04-09 5
1 experience UK 2016-04-09 5
1 experience AUS 2016-04-23 10
2 actions IND 2016-04-15 2
2 actions IND 2016-04-15 1
2 discovery US 2016-04-21 2
3 discovery FR 2016-04-12 3
3 Emotions IND 2016-04-23 3
3 discovery UK 2016-04-12 4
3 experience IND 2016-04-12 3
I am trying to get every users most used category,suburb,dated,walk_time
so resulting table would be
user_id category suburb dated walk_time
1 experience US 2016-04-09 5
2 actions IND 2016-04-15 2
3 discovery IND 2016-04-12 3
The query I am trying here is
select user_id,
substring_index(group_concat(suburb order by cnt desc), ',', 1) as suburb_visited,
substring_index(group_concat(category order by cct desc), ',', 1) as category_used,
substring_index(group_concat(walk_time order by wct desc), ',', 1) as walked,
substring_index(group_concat(dated order by nct desc), ',', 1) as dated_at
from (select user_id, suburb, count(*) as cnt,category, count(*) cct, walk_time, count(*) wct, dated,count(*) nct
from temp_user_notes
group by user_id, suburb,category,walk_time,dated
) upv
group by user_id;
SELECT user_id,
(SELECT category FROM temp_user_notes t1
WHERE t1.user_id = T.user_id
GROUP BY category ORDER BY count(*) DESC LIMIT 1) as category,
(SELECT suburb FROM temp_user_notes t2
WHERE t2.user_id = T.user_id
GROUP BY suburb ORDER BY count(*) DESC LIMIT 1) as suburb,
(SELECT dated FROM temp_user_notes t3
WHERE t3.user_id = T.user_id
GROUP BY dated ORDER BY count(*) DESC LIMIT 1) as dated,
(SELECT walk_time FROM temp_user_notes t4
WHERE t4.user_id = T.user_id
GROUP BY walk_time ORDER BY count(*) DESC LIMIT 1) as walk_time
FROM (SELECT DISTINCT user_id FROM temp_user_notes) T
http://sqlfiddle.com/#!9/8aac6a/19
Try this, seems to be a little complicated, but hope help for you;)
Mysql Schema:
CREATE TABLE table1
(`user_id` int, `category` varchar(10), `suburb` varchar(3), `dated` datetime, `walk_time` int)
;
INSERT INTO table1
(`user_id`, `category`, `suburb`, `dated`, `walk_time`)
VALUES
(1, 'experience', 'US', '2016-04-09 00:00:00', 5),
(1, 'discovery', 'US', '2016-04-09 00:00:00', 5),
(1, 'experience', 'UK', '2016-04-09 00:00:00', 5),
(1, 'experience', 'AUS', '2016-04-23 00:00:00', 10),
(2, 'actions', 'IND', '2016-04-15 00:00:00', 2),
(2, 'actions', 'IND', '2016-04-15 00:00:00', 1),
(2, 'discovery', 'US', '2016-04-21 00:00:00', 2),
(3, 'discovery', 'FR', '2016-04-12 00:00:00', 3),
(3, 'Emotions', 'IND', '2016-04-23 00:00:00', 3),
(3, 'discovery', 'UK', '2016-04-12 00:00:00', 4),
(3, 'experience', 'IND', '2016-04-12 00:00:00', 3)
;
Query SQL:
select c.user_id, c.category, s.suburb, d.dated, w.walk_time
from (
select user_id, left(group_concat(category order by cnt desc), locate(',', group_concat(category order by cnt desc)) - 1) as category
from (
select
user_id, category, count(1) as cnt
from table1
group by user_id, category
) t
group by user_id
) c
inner join (
select user_id, left(group_concat(suburb order by cnt desc), locate(',', group_concat(suburb order by cnt desc)) - 1) as suburb
from (
select
user_id, suburb, count(1) as cnt
from table1
group by user_id, suburb
) t
group by user_id
) s on c.user_id = s.user_id
inner join (
select user_id, left(group_concat(dated order by cnt desc), locate(',', group_concat(dated order by cnt desc)) - 1) as dated
from (
select
user_id, dated, count(1) as cnt
from table1
group by user_id, dated
) t
group by user_id
) d on c.user_id = d.user_id
inner join (
select user_id, left(group_concat(walk_time order by cnt desc), locate(',', group_concat(walk_time order by cnt desc)) - 1) as walk_time
from (
select
user_id, walk_time, count(1) as cnt
from table1
group by user_id, walk_time
) t
group by user_id
) w on c.user_id = w.user_id
Result:
| user_id | category | suburb | dated | walk_time |
+---------+------------+--------+---------------------+-----------+
| 1 | experience | US | 2016-04-09 00:00:00 | 5 |
| 2 | actions | IND | 2016-04-15 00:00:00 | 2 |
| 3 | discovery | IND | 2016-04-12 00:00:00 | 3 |

Select from a select statement from a defined value

I've the following table structure:
id |name |date
1 a 2012-01-01
2 a 2011-01-01
3 a 2010-01-01
4 a 2014-01-01
5 a 2011-01-01
I'd like to perform a select order by date (desc), and after select the first 3 rows from the results by a condition which would be where id = 1. So the second part of the query would be "give me the first 3 rows starting from the row whose id equals to 1"
EDIT:
After the first "part" the result would be:
SELECT id, name, date FROM table ORDER BY date DESC
id |name |date
4 a 2014-01-01
1 a 2012-01-01
2 a 2011-01-01
5 a 2011-01-01
3 a 2010-01-01
After the second part it should look like this (so the first 3 after the row whose id is 1):
id |name |date
2 a 2011-01-01
5 a 2011-01-01
3 a 2010-01-01
I have no any idea how could I solve it, please help me.
EDIT:
This is the concrete code I'd like to re-write:
SELECT `id`, `questions`.`userid`, `categories`.`name`, `user`.`username`, `title`,
`details`, `date` FROM `questions`
LEFT JOIN `user`
ON `questions`.`userid` = `user`.`userid`
LEFT JOIN `categories`
ON `questions`.`categoryid` = `categories`.`categoryid`
ORDER BY `date` DESC LIMIT 10
SELECT *
FROM table
WHERE date < (SELECT date FROM table WHERE id = 1)
ORDER BY date DESC
LIMIT 3
This isn't pretty because MySQL doesn't support row_number() or common table expressions, but it should work. Basically, get the row number ordered by the date, then select those whose row number is greater than an arbitrary value (in this case 1). Finally use limit to select the number of records you want.
SELECT id, name, mydate
FROM (
SELECT id, name, mydate, #rn:=#rn+1 rn
FROM mytable, (select #rn:=0) t
ORDER BY mydate DESC
) t2
WHERE rn > (
select rn
from (
SELECT id, name, mydate, #rn:=#rn+1 rn
FROM mytable, (select #rn:=0) t
ORDER BY mydate DESC
) t2
where id = 1
)
LIMIT 3
SQL Fiddle Demo
This is what you want to do... if finds the first id thats equal to 4 and then selects those out. then limit the offset to go to the next row and pull out 3
SELECT id, name, m_date from(
SELECT id, name, m_date, #a := id, if(#a = 4, #b := 1, #b) AS join_id
FROM test
join(SELECT #a := 0, #b := 0) t
ORDER BY m_date DESC
) AS tt
WHERE join_id = 1
LIMIT 1,3
SELECT temp.`id`, temp.`userid`, `categories`.`name`, `user`.`username`, temp.`title`,
temp.`details`, temp.`date` FROM (
SELECT `id`, `categoryid`, `details`, `title`, `userid`, `date`, #a := id, if(#a = 11, #b := 1, #b) AS join_id
FROM `questions`
join(SELECT #a := 0, #b := 0) t
ORDER BY `date` DESC
) as temp
LEFT JOIN `user`
ON temp.`userid` = `user`.`userid`
LEFT JOIN `categories`
ON temp.`categoryid` = `categories`.`categoryid`
WHERE join_id = 1
LIMIT 1,10;
SEE FIDDLE for clarification

Difficult MySQL Query - Getting Max difference between dates

I have a MySQL table of the following form
account_id | call_date
1 2013-06-07
1 2013-06-09
1 2013-06-21
2 2012-05-01
2 2012-05-02
2 2012-05-06
I want to write a MySQL query that will get the maximum difference (in days) between successive dates in call_date for each account_id. So for the above example, the result of this query would be
account_id | max_diff
1 12
2 4
I'm not sure how to do this. Is this even possible to do in a MySQL query?
I can do datediff(max(call_date),min(call_date)) but this would ignore dates in between the first and last call dates. I need some way of getting the datediff() between each successive call_date for each account_id, then finding the maximum of those.
I'm sure fp's answer will be faster, but just for fun...
SELECT account_id
, MAX(diff) max_diff
FROM
( SELECT x.account_id
, DATEDIFF(MIN(y.call_date),x.call_date) diff
FROM my_table x
JOIN my_table y
ON y.account_id = x.account_id
AND y.call_date > x.call_date
GROUP
BY x.account_id
, x.call_date
) z
GROUP
BY account_id;
CREATE TABLE t
(`account_id` int, `call_date` date)
;
INSERT INTO t
(`account_id`, `call_date`)
VALUES
(1, '2013-06-07'),
(1, '2013-06-09'),
(1, '2013-06-21'),
(2, '2012-05-01'),
(2, '2012-05-02'),
(2, '2012-05-06')
;
select account_id, max(diff) from (
select
account_id,
timestampdiff(day, coalesce(#prev, call_date), call_date) diff,
#prev := call_date
from
t
, (select #prev:=null) v
order by account_id, call_date
) sq
group by account_id
| ACCOUNT_ID | MAX(DIFF) |
|------------|-----------|
| 1 | 12 |
| 2 | 4 |
see it working live in an sqlfiddle
If you have an index on account_id, call_date, then you can do this rather efficiently without variables:
select account_id, max(call_date - prev_call_date) as diff
from (select t.*,
(select t2.call_date
from table t2
where t2.account_id = t.account_id and t2.call_date < t.call_date
order by t2.call_date desc
limit 1
) as prev_call_date
from table t
) t
group by account_id;
Just for educational purposes, doing it with JOIN:
SELECT t1.account_id,
MAX(DATEDIFF(t2.call_date, t1.call_date)) AS max_diff
FROM t t1
LEFT JOIN t t2
ON t2.account_id = t1.account_id
AND t2.call_date > t1.call_date
LEFT JOIN t t3
ON t3.account_id = t1.account_id
AND t3.call_date > t1.call_date
AND t3.call_date < t2.call_date
WHERE t3.account_id IS NULL
GROUP BY t1.account_id
Since you didn't specify, this shows max_diff of NULL for accounts with only 1 call.
SELECT a1.account_id , max(a1.call_date - a2.call_date)
FROM account a2, account a1
WHERE a1.account_id = a2.account_id
AND a1.call_date > a2.call_date
AND NOT EXISTS
(SELECT 1 FROM account a3 WHERE a1.call_date > a3.call_date AND a2.call_date < a3.call_date)
GROUP BY a1.account_id
Which gives :
ACCOUNT_ID MAX(A1.CALL_DATE - A2.CALL_DATE)
1 12
2 4

Give a sequential number for each GROUP BY values

I have a temp table with many rows (#TümDATA). I INSERT it's rows into another temp table (#GrupTOT) with GROUP BY clause. But I'm stuck here, I need to give the rows a sequential number after they are GROUPED.
Here is my SQL:
INSERT INTO #GrupTOT(AY, BLK, DRE, TOT)
SELECT J.AY, J.BLK, J.DRE, SUM(J.BORÇ)
FROM #TümDATA J
GROUP BY J.AY, J.BLK, J.DRE
You can try SELECT INTO using an IDENTITY column. This will create the new temporary table #GrupTOT.
Here is a Fiddle example.
SELECT SeqNo = identity(int,1,1), --Identity column
AY = J.AY
BLK = J.BLK,
DRE = J.DRE,
TOT = SUM(J.BORÇ)
INTO #GrupTOT
FROM #TümDATA J
GROUP BY J.AY, J.BLK, J.DRE;
--SELECT * FROM #GrupTOT
You can use ROW_NUMBER to get a number based on an ORDER BY. Or you could add an IDENTITY column to autoincrement a number on insert if that is what you want.
The ROW_NUMBER approach:
WITH CTE AS
(
SELECT Col1, Col2, Col3, Count(*) as [COUNT]
FROM dbo.Table1
GROUP BY Col1, Col2, Col3
)
INSERT INTO dbo.Table2
SELECT RowNum = ROW_NUMBER() OVER ( ORDER BY Col1, Col2, Col3, [COUNT] DESC ),
Col1, Col2, Col3, [COUNT]
FROM CTE
Try this: (Works in Oracle)
WITH ORDERS
AS (SELECT
TO_DATE ( '2013-09-18 00:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-19 00:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'James' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:02',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:03',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:04',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-21 16:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'Jennifer' AS NAME
FROM
DUAL)
SELECT
THE_DATE,
NAME,
ROWNUM
FROM
(SELECT
TRUNC ( THE_DATE ) THE_DATE,
NAME,
COUNT ( 1 )
FROM
ORDERS
GROUP BY
TRUNC ( THE_DATE ),
NAME);
Original Data:
9/18/2013 12:00:01 AM John
9/19/2013 12:00:01 AM James
9/20/2013 12:00:01 AM John
9/20/2013 12:00:02 AM John
9/20/2013 12:00:03 AM John
9/20/2013 12:00:04 AM John
9/21/2013 4:00:01 PM Jennifer
Result:
9/21/2013 Jennifer 1
9/19/2013 James 2
9/20/2013 John 3
9/18/2013 John 4
You can use the ROW_NUMBER() function.
E.g.
WITH q AS (
SELECT field_a
FROM SomeTable
GROUP BY field_a
)
SELECT ROW_NUMBER() OVER(ORDER BY field_a) AS row_num,
field_a
FROM q