Mysql union with aggregate functions - mysql

Please tell me a way to optimise the below query
Please help me someone to optimise the union query with aggrgate functions, the result should come in same column name
(SELECT `raw_id`, `enodeb`, hour( DATE_SUB(`interval`, INTERVAL 15 MINUTE) ) AS intervaltime,`id_sector`,`metric`, Sum(`interval_count`) as metric_value, `cuase_code`,`qci`, N.network_name as network_name FROM raw_metrics left join networks N on N.network_id = raw_metrics.network_id WHERE 1 AND DATE(DATE_SUB(`interval`, INTERVAL 15 MINUTE)) = "2015-03-12" and `metric` in(SELECT `measurement_name` FROM `c_xml_measurements` left join counter_types CT ON CT.counter_id = c_xml_measurements.counter_type WHERE CT.status = 1 and CT.counter_id in (1,2)) GROUP BY intervaltime, `id_sector`, metric)
UNION ALL
(SELECT `raw_id`, `enodeb`, hour( DATE_SUB(`interval`, INTERVAL 15 MINUTE) ) AS intervaltime,`id_sector`,`metric`, AVG(`metric_value`) as metric_value, `cuase_code`,`qci`, N.network_name as network_name FROM raw_metrics left join networks N on N.network_id = raw_metrics.network_id WHERE 1 AND DATE(DATE_SUB(`interval`, INTERVAL 15 MINUTE)) = "2015-03-12" and `metric` in(SELECT `measurement_name` FROM `c_xml_measurements` left join counter_types CT ON CT.counter_id = c_xml_measurements.counter_type WHERE ((CT.status = 1 and CT.counter_id not in (1,2)) OR (CT.status = 0 and CT.counter_id in (1,2))) and (aggregator= 'AVG' OR aggregator= '') ) GROUP BY intervaltime, `id_sector`, metric)
UNION ALL
(SELECT `raw_id`, `enodeb`, hour( DATE_SUB(`interval`, INTERVAL 15 MINUTE) ) AS intervaltime,`id_sector`,`metric`, MAX(`metric_value`) as metric_value, `cuase_code`,`qci`, N.network_name as network_name FROM raw_metrics left join networks N on N.network_id = raw_metrics.network_id WHERE 1 AND DATE(DATE_SUB(`interval`, INTERVAL 15 MINUTE)) = "2015-03-12" and `metric` in(SELECT `measurement_name` FROM `c_xml_measurements` left join counter_types CT ON CT.counter_id = c_xml_measurements.counter_type WHERE ((CT.status = 1 and CT.counter_id not in (1,2)) OR (CT.status = 0 and CT.counter_id in (1,2))) and (aggregator= 'MAX') ) GROUP BY intervaltime, `id_sector`, metric)
UNION ALL
(SELECT `raw_id`, `enodeb`, hour( DATE_SUB(`interval`, INTERVAL 15 MINUTE) ) AS intervaltime,`id_sector`,`metric`, MIN(`metric_value`) as metric_value, `cuase_code`,`qci`, N.network_name as network_name FROM raw_metrics left join networks N on N.network_id = raw_metrics.network_id WHERE 1 AND DATE(DATE_SUB(`interval`, INTERVAL 15 MINUTE)) = "2015-03-12" and `metric` in(SELECT `measurement_name` FROM `c_xml_measurements` left join counter_types CT ON CT.counter_id = c_xml_measurements.counter_type WHERE ((CT.status = 1 and CT.counter_id not in (1,2)) OR (CT.status = 0 and CT.counter_id in (1,2))) and (aggregator= 'MIN') ) GROUP BY intervaltime, `id_sector`, metric) ORDER BY raw_id DESC

Related

provide solution to me to combine multiple query to one which is having little difference in where condition

I have below queries but I need to combine into one query, facing difficulty ,help me
query1
select count(*), s.institutionid, substr(m.timestamp, 1, 4) as y
from stats_to_institution as s
join masterstats_innodb as m on m.id = s.statid
join articles as a on a.productid = coalesce(NULLIF(video, 0), article)
where s.institutionid in(181, 69877 ) and m.timestamp between '2019-01-01' and '2020-12-31' and a.streamid = 1 and a.productid >= 49 and m.access = 2 group by s.institutionid, y;
query 2
select count(*), s.institutionid, substr(m.timestamp, 1, 4) as y
from stats_to_institution as s
join masterstats_innodb as m on m.id = s.statid
join articles as a on a.productid = coalesce(NULLIF(video, 0), article)
where s.institutionid in(181, 69877 ) and m.timestamp between '2019-01-01' and '2020-12-31' and a.streamid = 5 and a.productid >= 49 and m.access = 2 group by s.institutionid, y;
query 3
select count(*), s.institutionid, substr(m.timestamp, 1, 4) as y
from stats_to_institution as s
join masterstats_innodb as m on m.id = s.statid
join articles as a on a.productid = coalesce(NULLIF(video, 0), article)
where s.institutionid in(181, 69877 ) and m.timestamp between '2019-01-01' and '2020-12-31' and a.streamid = 1 and a.productid >= 49 group by s.institutionid, y;
query 4
select count(*), s.institutionid, substr(m.timestamp, 1, 4) as y
from stats_to_institution as s
join masterstats_innodb as m on m.id = s.statid
join articles as a on a.productid = coalesce(NULLIF(video, 0), article)
where s.institutionid in(181, 69877 ) and m.timestamp between '2019-01-01' and '2020-12-31' and a.streamid = 5 and a.productid >= 49 group by s.institutionid, y;
I have created multiple queruies but want to combine into one query for the report purpose.help me
You should use the conditional aggregation as follows:
select count(case when a.streamid = 1 and m.access = 2 then 1 end) as query1_count,
count(case when a.streamid = 5 and m.access = 2 then 1 end) as query2_count,,
count(case when a.streamid = 1 then 1 end) as query3_count,
count(case when a.streamid = 5 then 1 end) as query4_count,
s.institutionid,
substr(m.timestamp, 1, 4) as y
from stats_to_institution as s
join masterstats_innodb as m on m.id = s.statid
join articles as a on a.productid = coalesce(NULLIF(video, 0), article)
where s.institutionid in(181, 69877 )
and m.timestamp between '2019-01-01' and '2020-12-31'
and a.productid >= 49 and a.streamid in (1,5)
group by s.institutionid, y;
I believe that there are some better solutions but this may also help.
Here is also a link for UNION to choose if you want union or union all.
select count(*), s.institutionid, substr(m.timestamp, 1, 4) as y
from stats_to_institution as s
join masterstats_innodb as m on m.id = s.statid
join articles as a on a.productid = coalesce(NULLIF(video, 0), article)
where s.institutionid in(181, 69877 )
and m.timestamp between '2019-01-01' and '2020-12-31'
and a.streamid in(1,5)
and a.productid >= 49
and m.access = 2
group by s.institutionid, y
UNION
select count(*), s.institutionid, substr(m.timestamp, 1, 4) as y
from stats_to_institution as s
join masterstats_innodb as m on m.id = s.statid
join articles as a on a.productid = coalesce(NULLIF(video, 0), article)
where s.institutionid in(181, 69877 )
and m.timestamp between '2019-01-01' and '2020-12-31'
and a.streamid in(1,5)
and a.productid >= 49
group by s.institutionid, y;

Script is freezing. Want to take the average count of "Women" for 1 month ago to 60 months ago. This portion of the code is freezing on me

/*This is the code Im using to count and take the Average?*/
`SELECT GETDATE() AS CurrentDate,
(SELECT COUNT(1) FROM People WITH(NOLOCK) LEFT JOIN LinkPeopleToCompanies WITH(NOLOCK) ON
People.PeopleID = LinkPeopleToCompanies.PeopleID
WHERE (LinkPeopleToCompanies.ToDate >= GETDATE() OR LinkPeopleToCompanies.ToDate IS NULL)
AND LinkPeopleToCompanies.SinceDate <= GETDATE() - 2190
AND LinkPeopleToCompanies.CompaniesID = Companies.CompaniesID) AS TotalDirectorsAddedin6years,
(SELECT AVG(DATEDIFF("yyyy",People.BirthDay,GETDATE())) FROM People WITH(NOLOCK) LEFT JOIN
LinkPeopleToCompanies WITH(NOLOCK) ON People.PeopleID = LinkPeopleToCompanies.PeopleID
WHERE (LinkPeopleToCompanies.ToDate >= GETDATE() - 30 OR LinkPeopleToCompanies.ToDate IS NULL)
AND LinkPeopleToCompanies.SinceDate <= GETDATE() - 30
AND LinkPeopleToCompanies.CompaniesID = Companies.CompaniesID) AS AverageDirectorAge1MonthAgo,
/I repeat this same code going back progressively by 30 days or 1 month. Need some more effective code/
(SELECT AVG(LinkPeopleToCompanies.CustomInt1) FROM People WITH(NOLOCK) LEFT JOIN
LinkPeopleToCompanies WITH(NOLOCK) ON People.PeopleID = LinkPeopleToCompanies.PeopleID
WHERE (LinkPeopleToCompanies.ToDate >= GETDATE() - 30 OR LinkPeopleToCompanies.ToDate IS NULL)
AND LinkPeopleToCompanies.SinceDate <= GETDATE() - 30
AND LinkPeopleToCompanies.CompaniesID = Companies.CompaniesID) AS AverageDirectorTenure1MonthAgo,
TotalWomenonBoard1 = CASE WHEN ((SELECT COUNT(1) FROM People WITH(NOLOCK) INNER JOIN
LinkPeopleToCompanies WITH(NOLOCK) ON People.PeopleID = LinkPeopleToCompanies.PeopleID
WHERE (LinkPeopleToCompanies.ToDate >= GETDATE()- 30 OR LinkPeopleToCompanies.ToDate IS NULL)
AND LinkPeopleToCompanies.SinceDate <= GETDATE()- 30
AND LinkPeopleToCompanies.CompaniesID = Companies.CompaniesID) = 0 )
THEN '9999999'
ELSE (SELECT COUNT(1) FROM People WITH(NOLOCK) INNER JOIN LinkPeopleToCompanies WITH(NOLOCK) ON
People.PeopleID = LinkPeopleToCompanies.PeopleID
WHERE (LinkPeopleToCompanies.ToDate >= GETDATE() - 30 OR LinkPeopleToCompanies.ToDate IS NULL)
AND LinkPeopleToCompanies.SinceDate <= GETDATE() - 30
AND LinkPeopleToCompanies.CompaniesID = Companies.CompaniesID
AND People.Gender = 'F')
END,
TotalWomenonBoard2 = CASE WHEN ((SELECT COUNT(1) FROM People WITH(NOLOCK) INNER JOIN
LinkPeopleToCompanies WITH(NOLOCK) ON People.PeopleID = LinkPeopleToCompanies.PeopleID
WHERE (LinkPeopleToCompanies.ToDate >= GETDATE()- 60 OR LinkPeopleToCompanies.ToDate IS NULL)
AND LinkPeopleToCompanies.SinceDate <= GETDATE()- 60
AND LinkPeopleToCompanies.CompaniesID = Companies.CompaniesID) = 0 )
THEN '9999999'
ELSE (SELECT COUNT(1) FROM People WITH(NOLOCK) INNER JOIN LinkPeopleToCompanies WITH(NOLOCK) ON
People.PeopleID = LinkPeopleToCompanies.PeopleID
WHERE (LinkPeopleToCompanies.ToDate >= GETDATE() - 60 OR LinkPeopleToCompanies.ToDate IS NULL)
AND LinkPeopleToCompanies.SinceDate <= GETDATE() - 60
AND LinkPeopleToCompanies.CompaniesID = Companies.CompaniesID
AND People.Gender = 'F')
END,
TotalWomenonBoard60 = CASE WHEN ((SELECT COUNT(1) FROM People WITH(NOLOCK) INNER JOIN
LinkPeopleToCompanies WITH(NOLOCK) ON People.PeopleID = LinkPeopleToCompanies.PeopleID
WHERE (LinkPeopleToCompanies.ToDate >= GETDATE()- 1825 OR LinkPeopleToCompanies.ToDate IS NULL)
AND LinkPeopleToCompanies.SinceDate <= GETDATE()- 1825
AND LinkPeopleToCompanies.CompaniesID = Companies.CompaniesID) = 0 )
THEN '9999999'
ELSE (SELECT COUNT(1) FROM People WITH(NOLOCK) INNER JOIN LinkPeopleToCompanies WITH(NOLOCK) ON
People.PeopleID = LinkPeopleToCompanies.PeopleID
WHERE (LinkPeopleToCompanies.ToDate >= GETDATE()- 1825 OR LinkPeopleToCompanies.ToDate IS NULL)
AND LinkPeopleToCompanies.SinceDate <= GETDATE()- 1825
AND LinkPeopleToCompanies.CompaniesID = Companies.CompaniesID
AND People.Gender = 'F')
END
FROM Companies WITH(NOLOCK)
ORDER BY Company`
Syntax in the question appears to be Transact-SQL ala Microsoft SQL Server (which is different than MySQL and Oracle; this question seems to be unrelated to the actual client program, that detail might be important... setting that aside.
For performance, I would opt to use conditional aggregation. I would use a single SELECT and one set of table references, and avoid multiple SELECT queries against the same tables with different WHERE clauses.
To get a count of "some" rows, I'd use a conditional in an expression to return a 1 or 0, and then total those up with SUM. (Or, I return a non-null and NULL, and use COUNT.)
Without digging into every SELECT ... FROM included in the question, it looks like the only form of statement we need is this:
SELECT GETDATE() AS CurrentDate
, SUM(CASE WHEN ... THEN 1 ELSE 0 END) AS stat_count_foo
FROM Companies c
LEFT
JOIN LinkPeopleToCompanies t
ON t.companiesid = c.companiesid
LEFT
JOIN People p
ON p.peopleid = t.peopleid
GROUP
BY c.companiesid
Note that there is not a WHERE clause in there. (We would only include conditions in the WHERE clause to exclude rows that we don't need for any metrics.)
Adding some example metrics, our statement would look something like this:
SELECT GETDATE() AS CurrentDate
, SUM( CASE
WHEN (t.todate >= GETDATE() OR t.todate IS NULL)
AND t.sincedate <= GETDATE() - 2190
THEN 1
ELSE 0
END
) AS TotalDirectorsAddedin6years
, AVG( CASE
WHEN (t.todate >= GETDATE() - 30 OR t.todate IS NULL)
AND t.sincedate <= GETDATE() - 30
THEN DATEDIFF('yyyy',p.birthDay,GETDATE())
ELSE NULL
END
) AS AverageDirectorAge1MonthAgo
, SUM( CASE
WHEN (t.todate >= GETDATE()- 1825 OR t.todate IS NULL)
AND t.sincedate <= GETDATE()- 1825
AND p.Gender = 'F'
THEN 1
ELSE 0
END
) AS TotalWomenonBoard60
FROM Companies c
LEFT
JOIN LinkPeopleToCompanies t
ON t.companiesid = c.companiesid
LEFT
JOIN People p
ON p.peopleid = t.peopleid
GROUP
BY c.companiesid
Extend this by adding additional expressions. Do NOT add any additional SELECT, FROM or JOIN keywords. We could wrap the aggregate expressions in another expression to replace a NULL value with a "0" (for a company that doesn't have any people linked to it.

Expressing formula within a SELECT query

I have this existing query:
SELECT
extension
, Total_Outbound+Total_Missed+Total_Received AS Total_Calls
, Total_Missed
, Total_Talk_Time_minutes
FROM (
SELECT
, extension
, sum(if(Answered = 1,0,1)) AS Total_Missed
, sum(CASE WHEN LEGTYPE1 = 2 AND ANSWERED = 1 THEN 1 ELSE 0 END) AS Total_Received
, sum(if(LEGTYPE1 = 1,1,0)) AS Total_Outbound
, round(sum(Duration) / 60,2) AS Total_Talk_Time_minutes
FROM session a
GROUP BY extension
) x;
It works great but I need to add a metric/formula to it called missed_call_score right under Total_Talk_Time_Minutes.
The formula for the missed call score is this:
(missed calls/total talk time) * (average calls per CSR/total calls) * 100 but one thing to note is that the average calls per csr needs to ignore the MAX and MIN, so the lowest and highest number of calls taken.
I'm not sure how I could construct this score within a single select variable or the syntax I would use for this given the fact that it has to throw out the max and min.
Here is an example of my needed output and the formulas it should be using:
extension | Total calls | missed calls | total talk time | missed call score
----------------------------------------------------------------------------
1234 8 4 15.5 5.7
4321 4 0 9.42 0.0
5678 5 2 6.78 6.5
9876 13 6 18.3 7.2
Total call sum = 30
Total call sum without high and low = 13
average calls per CSR = (13/2) = 6.5
extension 1 = (4/15.5) * (6.5/30) * 100 = 5.7
extension 2 = (0/9.42) * (6.5/30) * 100 = 0.0
extension 3 = (2/6.78) * (6.5/30) * 100 = 6.5
extension 4 = (6/18.3) * (6.5/30) * 100 = 7.2
The data above for extension, total calls, missed calls and talk time are taken from my sql fiddle, linked below. I simply added the score column to give example of my expected output.
The fiddle linked below shows my create and inserts so hopefully that gives everything needed to assist me with this.
**sql fiddle
**
http://sqlfiddle.com/#!9/aa1f9/1
UPDATE
Full production query with joins
SELECT firstn ,
lastn ,
extension ,
Total_Outbound+Total_Missed+Total_Received AS Total_Calls ,
Total_Missed ,
Total_Talk_Time_minutes ,
Total_All_Calls ,
Max_Calls ,
Min_Calls ,
CSR_Count ,
((Total_Missed/Total_Talk_Time_minutes) *
(((Total_All_Calls-Max_Calls-Min_Calls)/CSR_Count)/Total_All_Calls)) * 100
FROM ( SELECT u.firstn ,
u.lastn ,
c.extension ,
sum(if(Answered = 1,0,1)) AS Total_Missed ,
sum(CASE WHEN LEGTYPE1 = 2 AND ANSWERED = 1 THEN 1 ELSE 0 END) AS Total_Received ,
sum(CASE WHEN LEGTYPE1 = 1 THEN 1 ELSE 0 END) AS Total_Outbound ,
round(sum(Duration) / 60,2) AS Total_Talk_Time_minutes ,
(SELECT COUNT(1) FROM ambition.session a INNER JOIN ambition.callsummary b ON a.NOTABLECALLID = b.NOTABLECALLID
INNER join ambition.mxuser c ON a.RESPONSIBLEUSEREXTENSIONID = c.EXTENSIONID
INNER join jackson_id.users u ON c.extension = u.extension
WHERE b.ts between curdate() - interval 5 day and now()
AND c.extension IN (7276,7314,7295,7306,7357,7200,7218,7247,7331,7255,7330,7000,7215,7240,7358,7312)) Total_All_Calls ,
(SELECT MAX(CNT) FROM (SELECT COUNT(1) CNT, c.extension
FROM ambition.SESSION a INNER JOIN ambition.callsummary b ON a.NOTABLECALLID = b.NOTABLECALLID
INNER join ambition.mxuser c ON a.RESPONSIBLEUSEREXTENSIONID = c.EXTENSIONID
INNER join jackson_id.users u ON c.extension = u.extension
WHERE b.ts between curdate() - interval 5 day and now()
AND c.extension IN (7276,7314,7295,7306,7357,7200,7218,7247,7331,7255,7330,7000,7215,7240,7358,7312) GROUP BY responsibleuserextensionid) y) Max_Calls ,
(SELECT MIN(CNT) FROM (SELECT COUNT(1) CNT, c.extension
FROM ambition.SESSION a
INNER JOIN ambition.callsummary b ON a.NOTABLECALLID = b.NOTABLECALLID
INNER join ambition.mxuser c ON a.RESPONSIBLEUSEREXTENSIONID = c.EXTENSIONID
INNER join jackson_id.users u ON c.extension = u.extension
WHERE b.ts between curdate() - interval 5 day and now()
AND c.extension IN (7276,7314,7295,7306,7357,7200,7218,7247,7331,7255,7330,7000,7215,7240,7358,7312)GROUP BY responsibleuserextensionid) y) Min_Calls ,
(SELECT COUNT(DISTINCT c.extension)-2
FROM ambition.SESSION a INNER JOIN ambition.callsummary b ON a.NOTABLECALLID = b.NOTABLECALLID
INNER join ambition.mxuser c ON a.RESPONSIBLEUSEREXTENSIONID = c.EXTENSIONID
INNER join jackson_id.users u ON c.extension = u.extension
WHERE b.ts between curdate() - interval 5 day and now()
AND c.extension IN (7276,7314,7295,7306,7357,7200,7218,7247,7331,7255,7330,7000,7215,7240,7358,7312)) CSR_Count
FROM ambition.session a
INNER JOIN ambition.callsummary b ON a.NOTABLECALLID = b.NOTABLECALLID
INNER join ambition.mxuser c ON a.RESPONSIBLEUSEREXTENSIONID = c.EXTENSIONID
INNER join jackson_id.users u ON c.extension = u.extension
LEFT JOIN ambition.knownnumbers k ON a.callingpartyno = k.phone_number
WHERE b.ts between curdate() - interval 5 day and now()
AND c.extension IN (7276,7314,7295,7306,7357,7200,7218,7247,7331,7255,7330,7000,7215,7240,7358,7312)
GROUP BY c.extension, u.firstn, u.lastn ) x
This should work for you:
SELECT
extension
, Total_Outbound+Total_Missed+Total_Received AS Total_Calls
, Total_Missed
, Total_Talk_Time_minutes
, Total_All_Calls
, Max_Calls
, Min_Calls
, CSR_Count
, ((Total_Missed/Total_Talk_Time_minutes) *
(((Total_All_Calls-Max_Calls-Min_Calls)/CSR_Count)/Total_All_Calls)) * 100
FROM (
SELECT
extension
, sum(if(Answered = 1,0,1)) AS Total_Missed
, sum(CASE WHEN LEGTYPE1 = 2 AND ANSWERED = 1 THEN 1 ELSE 0 END) AS Total_Received
, sum(CASE WHEN ANSWERED = 1 AND LEGTYPE1 = 1 THEN 1 ELSE 0 END) AS Total_Outbound
, round(sum(Duration) / 60,2) AS Total_Talk_Time_minutes
, (SELECT COUNT(1) FROM session) Total_All_Calls
, (SELECT MAX(CNT) FROM (SELECT COUNT(1) CNT, EXTENSION FROM SESSION GROUP BY EXTENSION) y) Max_Calls
, (SELECT MIN(CNT) FROM (SELECT COUNT(1) CNT, EXTENSION FROM SESSION GROUP BY EXTENSION) y) Min_Calls
, (SELECT COUNT(DISTINCT EXTENSION)-2 FROM SESSION) CSR_Count
FROM session a
GROUP BY extension
) x;
Here is the fiddle.
Basically I used sub-counts in your derived table x to get each of the variables needed for missed_call_score. One major thing worth noting is that the logic was off for Total_Outbound, so I tweaked that to a CASE statement instead of an IF(). I selected the count columns in the outer query just so you can see what is going on, you can remove those.
I've done something similar in the past and extracted this snippet from my code.
I think/hope that this might help you getting started (I left out most of the columns from your query and you'd have to adjust avg(amount) to match your formula.
select extension, avg(amount) from
(
select t.*,
min(amount) over (partition by extension) as min_amt,
max(amount) over (partition by extension) as max_amt
from your_table t
) t
where amount > min_amt and amount < max_amt group by extension;

MySql COUNT datediff values in complex function

I have this SQL Query that returns datediff of number of week
SELECT
ROUND(DATEDIFF((
SELECT t.date FROM actividad_newsletters t WHERE t.id_newsletter = t1.id_newsletter AND t1.id_desc = 3 AND (t.id_desc = 5 OR t.id_desc = 7) AND t.date > t1.date ORDER BY t.date LIMIT 1),
MIN(t1.date)
)/7, 0) as weeks
FROM actividad_newsletters t1
INNER JOIN newsletter t2 ON t1.id_newsletter = t2.id
GROUP BY id_newsletter
HAVING weeks IS NOT NULL
And gets this results:
0
0
0
0
0
1
1
1
1
1
1
2
2
3
3
3
3
3
3
3
3
3
3
3
.
.
.
117
117
118
119
119
I want do a count for same result like:
total week
----- ----
5 0
6 1
2 2
11 3
. .
. .
. .
2 117
1 118
2 119
How should I do it?
I tried with count(weeks) but isn't working
Try This
Select weeks, count(*) from (SELECT
ROUND(DATEDIFF((
SELECT t.date FROM actividad_newsletters t WHERE t.id_newsletter = t1.id_newsletter AND t1.id_desc = 3 AND (t.id_desc = 5 OR t.id_desc = 7) AND t.date > t1.date ORDER BY t.date LIMIT 1),
MIN(t1.date)
)/7, 0) as weeks
FROM actividad_newsletters t1
INNER JOIN newsletter t2 ON t1.id_newsletter = t2.id
GROUP BY id_newsletter
HAVING weeks IS NOT NULL) as t group by weeks
SOLVED:
SELECT weeks, COUNT(weeks) FROM (
SELECT
ROUND(DATEDIFF((
SELECT t.date FROM actividad_newsletters t WHERE t.id_newsletter = t1.id_newsletter AND t1.id_desc = 3 AND (t.id_desc = 5 OR t.id_desc = 7) AND t.date > t1.date ORDER BY t.date LIMIT 1),
MIN(t1.date)
)/7, 0) as weeks
FROM actividad_newsletters t1
INNER JOIN newsletter t2 ON t1.id_newsletter = t2.id
GROUP BY id_newsletter
HAVING weeks IS NOT NULL ) AS count GROUP BY weeks

Cohort analysis in SQL

Looking to do some cohort analysis on a userbase. We have 2 tables "users" and "sessions", where users and sessions both have a "created_at" field. I'm looking to formulate a query that yields a 7 by 7 table of numbers (with some blanks) that shows me: a count of users who were created on a particular day who also have a session created y = (0..6 days ago), indicating that he returned on that day.
created_at d2 d3 d4
today * * *
today-1 49 * *
today-2 45 30 *
today-3 47 48 18
...
In this case, 47 users who were created on today-3 returned on today-2.
Can I perform this in a single MySQL query? I can perform the queries individually like so, but it'd be really nice to have it all in one query.
SELECT `users`.* FROM `users` INNER JOIN `sessions` ON `sessions`.`user_id` = `users`.`id` WHERE `users`.`os` = 'ios' AND (`sessions`.`updated_at` BETWEEN '2013-01-16 08:00:00' AND '2013-01-17 08:00:00')
This seems a complex problem. Regardless of whether it also seems to you a difficult one or not, it is never a bad idea to start working it up from a smaller problem.
You could start, for instance, with a query returning all the users (just the users) that have been registered within the last week, i.e. starting from the day six days from now, as per your requirement:
SELECT *
FROM users
WHERE created_at >= CURDATE() - INTERVAL 6 DAY
The next step could be grouping the results by dates and counting rows in every group:
SELECT
created_at,
COUNT(*) AS user_count
FROM users
WHERE created_at >= CURDATE() - INTERVAL 6 DAY
GROUP BY created_at
If created_at is a datetime or timestamp, use DATE(created_at) as the grouping criterion:
SELECT
DATE(created_at) AS created_at,
COUNT(*) AS user_count
FROM users
WHERE created_at >= CURDATE() - INTERVAL 6 DAY
GROUP BY DATE(created_at)
However, you don't seem to want absolute dates in the output, but only relative ones, like today, today - 1 day etc. In that case, you could use the DATEDIFF() function, which returns the number of days between two dates, to produce (numeric) offsets from today and group by those values:
SELECT
DATEDIFF(CURDATE(), created_at) AS created_at,
COUNT(*) AS user_count
FROM users
WHERE created_at >= CURDATE() - INTERVAL 6 DAY
GROUP BY DATE(created_at)
Your created_at column would contain "dates" like 0, 1 and so on till 6. Converting them into today, today-1 etc. is trivial and you will see that in the final query. So far, however, we've reached the point at which we need to take one step back (or, perhaps, it's rather a half step to the right), because we don't really need to count the users but rather their returns. So, the actual working dataset from users that is needed at the moment will be this:
SELECT
id,
DATEDIFF(CURDATE(), created_at) AS day_offset
FROM users
WHERE created_at >= CURDATE() - INTERVAL 6 DAY
We need user IDs to join this rowset to (the one that will be derived from) sessions and we need day_offset as the grouping criterion.
Moving on, a similar transformation will need to be performed on the sessions table, and I won't go into details on that. Suffice it to say that the resulting query will be very identical to the last one, with just two exception:
id gets replaced with user_id;
DISTINCT is applied to the entire subset.
The reason for DISTINCT is to return no more than one row per user & day: it is my understanding that however many sessions a user might have on a particular day, you want to count them as one return. So, here's what gets derived from sessions:
SELECT DISTINCT
user_id,
DATEDIFF(CURDATE(), created_at) AS day_offset
FROM sessions
WHERE created_at >= CURDATE() - INTERVAL 6 DAY
Now it only remains to join the two derived tables, apply grouping and use conditional aggregation to get the required results:
SELECT
CONCAT('today', IFNULL(CONCAT('-', NULLIF(u.DayOffset, 0)), '')) AS created_at,
SUM(s.DayOffset = 0) AS d0,
SUM(s.DayOffset = 1) AS d1,
SUM(s.DayOffset = 2) AS d2,
SUM(s.DayOffset = 3) AS d3,
SUM(s.DayOffset = 4) AS d4,
SUM(s.DayOffset = 5) AS d5,
SUM(s.DayOffset = 6) AS d6
FROM (
SELECT
id,
DATEDIFF(CURDATE(), created_at) AS DayOffset
FROM users
WHERE created_at >= CURDATE() - INTERVAL 6 DAY
) u
LEFT JOIN (
SELECT DISTINCT
user_id,
DATEDIFF(CURDATE(), created_at) AS DayOffset
FROM sessions
WHERE created_at >= CURDATE() - INTERVAL 6 DAY
) s
ON u.id = s.user_id
GROUP BY u.DayOffset
;
I must admit that I haven't tested/debugged this, but, if this be needed, I'll be happy to work with the data samples you will have provided, once you have provided them. :)
Example Of Month Wise Cohort:
First Let's Create Table Individual User Activity Flow (MONTH WISE):
SELECT
mu.created_timestamp AS cohort
, mu.id AS user_id
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 1 AND l.user_id = mu.id) AS m1
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 2 AND l.user_id = mu.id) AS m2
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 3 AND l.user_id = mu.id) AS m3
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 4 AND l.user_id = mu.id) AS m4
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 5 AND l.user_id = mu.id) AS m5
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 6 AND l.user_id = mu.id) AS m6
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 7 AND l.user_id = mu.id) AS m7
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 8 AND l.user_id = mu.id) AS m8
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 9 AND l.user_id = mu.id) AS m9
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 10 AND l.user_id = mu.id) AS m10
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 11 AND l.user_id = mu.id) AS m11
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 12 AND l.user_id = mu.id) AS m12
FROM user mu
WHERE mu.created_timestamp BETWEEN '2018-01-01 00:00:00' AND '2019-12-31 23:59:59'
Then After This Table Calculate the individual activity-sum of the user:
SELECT MONTH(c.cohort) AS cohort
,COUNT(c.user_id) AS signups
,SUM(c.m1) AS m1
,SUM(c.m2) AS m2
,SUM(c.m3) AS m3
,SUM(c.m4) AS m4
,SUM(c.m5) AS m5
,SUM(c.m6) AS m6
,SUM(c.m7) AS m7
,SUM(c.m8) AS m8
,SUM(c.m9) AS m9
,SUM(c.m10) AS m10
,SUM(c.m11) AS m11
,SUM(c.m12) AS m12
FROM (SELECT
mu.created_timestamp AS cohort
, mu.id AS user_id
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 1 AND l.user_id = mu.id) AS m1
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 2 AND l.user_id = mu.id) AS m2
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 3 AND l.user_id = mu.id) AS m3
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 4 AND l.user_id = mu.id) AS m4
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 5 AND l.user_id = mu.id) AS m5
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 6 AND l.user_id = mu.id) AS m6
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 7 AND l.user_id = mu.id) AS m7
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 8 AND l.user_id = mu.id) AS m8
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 9 AND l.user_id = mu.id) AS m9
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 10 AND l.user_id = mu.id) AS m10
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 11 AND l.user_id = mu.id) AS m11
,(SELECT IF(COUNT(l.order_date) = 0 , 0, 1) FROM order l WHERE MONTH(l.order_date) = 12 AND l.user_id = mu.id) AS m12
FROM user mu
WHERE mu.created_timestamp BETWEEN '2018-01-01 00:00:00' AND '2019-12-31 23:59:59') AS c GROUP BY MONTH(cohort)
In replacement of months you can use days, other wise cohort analysis mostly use in month cases
This answer inverts the output table that #Newy wanted so the cohorts are the rows instead of the columns, and uses absolute dates instead of relative ones.
I was looking for a query that would give me something like this:
Date d0 d1 d2 d3 d4 d5 d6
2016-11-03 3 1 0 0 0 0 0
2016-11-04 4 2 0 1 0 0 *
2016-11-05 7 0 1 1 0 * *
2016-11-06 7 3 1 1 * * *
2016-11-07 13 5 1 * * * *
2016-11-08 4 0 * * * * *
2016-11-09 1 * * * * * *
I was looking for the number of users that signed up a certain date, then how many of those users returned 1 day later, 2 days later, etc. So on 2016-11-07 13 users signed up and had a session, then 5 of those users came back 1 day later, then one user came back 2 days later, etc.
I took the first subquery of #Andriy M's large query and modified it to give me the date a user signed up, not the days relative to the current date:
SELECT
id,
DATE(created_at) AS DayOffset
FROM users
WHERE created_at >= CURDATE() - INTERVAL 6 DAY
Then the LEFT JOIN subquery I modified to look like this:
SELECT DISTINCT
sessions.user_id,
DATEDIFF(sessions.created_at, user.created_at) AS DayOffset
FROM sessions
LEFT JOIN users ON (users.id = sessions.user_id)
WHERE sessions.created_at >= CURDATE() - INTERVAL 6 DAY
I wanted the dayoffset not relative to the current date as in #Andriy M's answer, but relative to the date the user signed up. So I did left join on the user table to get the time the user signed up and did a date diff on that.
So the final query looks something like this:
SELECT u.DayOffset as Date,
SUM(s.DayOffset = 0) AS d0,
SUM(s.DayOffset = 1) AS d1,
SUM(s.DayOffset = 2) AS d2,
SUM(s.DayOffset = 3) AS d3,
SUM(s.DayOffset = 4) AS d4,
SUM(s.DayOffset = 5) AS d5,
SUM(s.DayOffset = 6) AS d6
FROM (
SELECT
id,
DATE(created_at) AS DayOffset
FROM users
WHERE created_at >= CURDATE() - INTERVAL 6 DAY
) as u
LEFT JOIN (
SELECT DISTINCT
sessions.user_id,
DATEDIFF(sessions.created_at, user.created_at) AS DayOffset
FROM sessions
LEFT JOIN users ON (users.id = sessions.user_id)
WHERE sessions.created_at >= CURDATE() - INTERVAL 6 DAY
) as s
ON s.user = u.id
GROUP BY u.DayOffset
Monthly cohort based on #Newy response:
SELECT u.MonthOffset AS MONTH,
SUM(s.MonthOffset = 0) AS m0,
SUM(s.MonthOffset = 1) AS m1,
SUM(s.MonthOffset = 2) AS m2,
SUM(s.MonthOffset = 3) AS m3,
SUM(s.MonthOffset = 4) AS m4,
SUM(s.MonthOffset = 5) AS m5,
SUM(s.MonthOffset = 6) AS m6
FROM (
SELECT
id,
TIMESTAMPDIFF(month, DATE(date), CURDATE()) AS MonthOffset
FROM users
WHERE date >= CURDATE() - INTERVAL 6 month
) AS u
LEFT JOIN (
SELECT DISTINCT
user_id,
TIMESTAMPDIFF(month, DATE(date), CURDATE()) AS MonthOffset
FROM sessions
WHERE sessions.date >= CURDATE() - INTERVAL 6 month
) AS s
ON s.user_id = u.id
GROUP BY u.MonthOffset;