How to extract Cohort analysis in SQL ? or MySQL? - mysql

Looking at other posts was not working due to my lack of ability .
I am trying to extract a Cohort from the date of installation .
information TABLE
u_status
used : created_at , last_login
u_daily_login
SQL 
SET
#targetDate = '2020-07-01';
SELECT
#Datecount := TIMESTAMPDIFF(
DAY,
#targetDate,
DATE_FORMAT(NOW(), '%Y-%m-%d')) -1;
SELECT
dl.df,
COUNT(udl.uMasterId)
FROM
(
SELECT
DATE_FORMAT(
DATE_ADD(
#targetDate,
INTERVAL td.generate_series DAY
),
'%Y-%m-%d'
) AS df
FROM
(
SELECT
0 generate_series
FROM DUAL
WHERE
(#num := 1 -2) * 0
UNION ALL
SELECT
#num := #num +1
FROM
`information_schema`.COLUMNS
WHERE
#num <= #Datecount
) AS td
) AS dl
LEFT JOIN(
SELECT udlt.*
FROM
(
SELECT
*
FROM
u_daily_login
WHERE
DATE >= #targetDate
) AS udlt
INNER JOIN(
/*基準日に登録したユニークユーザー*/ SELECT
*
FROM
(
SELECT
DATE_FORMAT(us.`createdAt`, '%Y-%m-%d') AS cdate,
us.name,
uMasterId
FROM
u_status AS us
WHERE
DATE_FORMAT(us.`createdAt`, '%Y-%m-%d') = #targetDate
) AS noise
GROUP BY
noise.name
) AS uus
ON
uus.uMasterId = udlt.uMasterId
) AS udl
ON
dl.df = udl.date
GROUP BY
dl.df
LIMIT 100;
I got the desired result, As it is now, I'm typing targetDate manually.
ex. #targetDate = 2020-07-02 , "targetDate = 2020-07-03 ...etc
How can I efficiently extract daily cohorts?

Related

I am missing an alias for a derived table

I know, this question has been asked very often but I know my error, I know how I could fix it, but I can´t find the point where the error is. In my opinion, all the subqueries have different and unique names, I even gave the columns different names then the subqueries. Any help would be appreciated. Where is the point I am missing an alias?
Whenever I am trying to run this query I get the response "Every derived table must have its alias", which is an understandable error message, but I can't figure out where my error is located.
SELECT
mso.entity_id,
GROUP_CONCAT(msh.comment) AS comment,
msoa.lastname,
base_grand_total,
mso.created_at,
mso.status,
marketplace_order_id AS amazon_order_id,
clvData.recurrenceRate,
clvData.avgRepRate
FROM
mag_sales_flat_order AS mso
LEFT JOIN mag_sales_flat_order_status_history AS msh ON mso.entity_id = msh.parent_id
LEFT JOIN mag_sales_flat_order_address AS msoa ON mso.entity_id = msoa.parent_id
left join (
select
cast(((cet.cec - cnt.cnc) / cst.csc) AS decimal(6, 2)) as recurrenceRate,
avg(repRate.countedOrders) AS avgRepRate
from(
Select
*,
(
select
count(customer_email) AS csc
from
mag_sales_flat_order
where
created_at between '2017-01-01'
and '2017-12-31'
) AS cst,
(
select
count(customer_email) AS cec
from
mag_sales_flat_order
where
created_at between '2017-01-01'
and '2020-12-31'
) AS cet,
(
select
count(mso_new.customer_email) AS cnc
from
(
select
*
from
mag_sales_flat_order
where
created_at between '2018-01-01'
and current_date()
) AS mso_new
left join (
select
*
from
mag_sales_flat_order
where
created_at between '2017-01-01'
and '2017-12-31'
) AS mso_old on mso_new.customer_email = mso_old.customer_email
)) AS cnt
join (
select
customer_email,
count(grand_total) as countedOrders,
sum(grand_total) as summedOrders
from
mag_sales_flat_order
group by
customer_email
) AS repRate on cl.customer_email = repRate.customer_email
) AS clvData on mso.customer_email = clvData.customer_email
WHERE
store_id IN({$store['id']})
AND (
mso.status = 'complete'
OR mso.status = 'closed'
OR mso.status = 'processing'
OR mso.status = 'exported'
OR mso.status LIKE 'pending%'
)
AND (
DATE_FORMAT(mso.created_at, '%Y-%m-%d') >= '$begin_date'
)
AND (
DATE_FORMAT(mso.created_at, '%Y-%m-%d') <= '$end_date'
)
GROUP BY
entity_id;
SELECT
mso.entity_id,
GROUP_CONCAT(msh.comment) AS comment,
msoa.lastname,
base_grand_total,
mso.created_at,
mso.status,
marketplace_order_id AS amazon_order_id,
clvData.recurrenceRate,
clvData.avgRepRate
FROM mag_sales_flat_order AS mso
LEFT JOIN mag_sales_flat_order_status_history AS msh ON mso.entity_id = msh.parent_id
LEFT JOIN mag_sales_flat_order_address AS msoa ON mso.entity_id = msoa.parent_id
LEFT JOIN
(
SELECT cast(((cet.cec - cnt.cnc) / cst.csc) AS decimal(6, 2)) as recurrenceRate, avg(repRate.countedOrders) AS avgRepRate
FROM
(
SELECT *,
(
SELECT count(customer_email) AS csc
FROM mag_sales_flat_order
WHERE created_at BETWEEN '2017-01-01' AND '2017-12-31'
) AS cst,
(
SELECT count(customer_email) AS cec
FROM mag_sales_flat_order
WHERE created_at BETWEEN '2017-01-01' AND '2020-12-31'
) AS cet,
(
SELECT count(mso_new.customer_email) AS cnc
FROM
(
SELECT *
FROM mag_sales_flat_order
WHERE created_at BETWEEN '2018-01-01' AND getdate()
) AS mso_new
LEFT JOIN
(
SELECT *
FROM mag_sales_flat_order
WHERE created_at BETWEEN '2017-01-01' AND '2017-12-31'
) AS mso_old on mso_new.customer_email = mso_old.customer_email
) AS cnt
) as cl
JOIN
(
SELECT customer_email, count(grand_total) as countedOrders, sum(grand_total) as summedOrders
FROM mag_sales_flat_order
GROUP BY customer_email
) AS repRate on cl.customer_email = repRate.customer_email
) AS clvData on mso.customer_email = clvData.customer_email
WHERE store_id IN({ $ store ['id'] })
AND
(
mso.status = 'complete'
OR mso.status = 'closed'
OR mso.status = 'processing'
OR mso.status = 'exported'
OR mso.status LIKE 'pending%'
)
AND
(
DATE_FORMAT(mso.created_at, '%Y-%m-%d') >= '$begin_date'
)
AND
(
DATE_FORMAT(mso.created_at, '%Y-%m-%d') <= '$end_date'
)
GROUP BY entity_id;

why is time differences using alias not working?

SELECT
t.imei,
t.date,
t.time startTime,
t.ignition,
t.tripStartStop,
(
SELECT
min(time)
from
gps_data
where
time > t.time
and date >= t.date
and imei = '358480088853405'
and tripStartStop = 0
) ' stopTime',
SUBTIME('startTime', 'stopTime') AS diff
from
gps_data t
where
imei = '358480088853405'
and date >= '2020-03-08'
and date <= '2020-03-09'
and tripStartStop = 1
the above query returns startTime and stopTime as alias values but i can't get the difference of these two times
used both SUBTIME and TIMEDIFF
You cannot use aliases like that. The query:
SUBTIME('startTime','stopTime')
treats the startTime and stopTime as strings, hence the 00:00:00.
What you can do is following:
select q.imei, q.date, q.startTime, q.ignition,
q.tripStartStop, q.stopTime, subtime(q.startTime, q.stopTime)
from (
SELECT t.imei
, t.date
, t.time startTime
, t.ignition
, t.tripStartStop
, ( SELECT min(time) from gps_data where time>t.time and date>=t.date and imei='358480088853405' and tripStartStop=0 ) 'stopTime'
from gps_data t
where
imei='358480088853405'
and date between '2020-03-08' and '2020-03-09'
and tripStartStop=1
) as q
You need to select 'starTime' and 'stopTime' using select statement within the Substring. That would solve your problem.
SUBTIME(SELECT(startTime),SELECT(stopTime))
SELECT t.imei
, t.date
, t.time startTime
, t.ignition
, t.tripStartStop
, ( SELECT min(time) from gps_data where time>t.time and date>=t.date and imei='358480088853405' and tripStartStop=0 ) ' stopTime'
, SUBTIME(SELECT(startTime),SELECT(stopTime)) diff
from gps_data t
where imei='358480088853405'
and date>='2020-03-08'
and date<='2020-03-09'
and tripStartStop=1````

user defined variable to store ranking gives wrong values if order by

mysql table: work
|id|user_id|created_at|realization|
I have been working on a sql query which calculates performance (realisation today / realization on the first day of the month) and sortes records based on performance.
Expected result:
|ranking|performance|user|
|1|0.88|36|
|2|0.712444111|444|
|3|0.711|1|
|4|0.33333|9|
|5|0.1006|29|
returned result:
|ranking|performance|user|
|4|0.88|36|
|2|0.712444111|444|
|5|0.711|1|
|3|0.33333|9|
|1|0.1006|29|
Here is my query:
SET #ranking := 0;
SELECT
#ranking := #ranking + 1 as ranking,
w1.user_id,
IFNULL(ROUND(w2.realization / w1.realization), 4), 0) AS performance
FROM work w1
JOIN (
SELECT min(created_at) AS first_month, max(created_at) AS last_month, user_id
FROM work
WHERE (DATE_FOMAT(NOW(), '%Y-%m') = DATE_FORMAT(created_at, '%Y-%m')
GROUP BY user_id
ORDER BY user_id
) AS w ON w1.user_id = w.user_id AND w1.created_at = w.first_month
JOIN work AS w2 ON w1.user_id = w2.user_id AND w2.created_at = w.last_month
ORDER BY performance DESC
UPDATE
Even if I try to wrap it this way, the rankings are not right
SET #ranking := 0;
SELECT #ranking := #ranking + 1 as ranking, a.user_id, a.performance
FROM (
SELECT
w1.user_id,
IFNULL(ROUND(w2.realization / w1.realization), 4), 0) AS performance
FROM work w1
JOIN (
SELECT min(created_at) AS first_month, max(created_at) AS last_month, user_id
FROM work
WHERE (DATE_FOMAT(NOW(), '%Y-%m') = DATE_FORMAT(created_at, '%Y- %m')
GROUP BY user_id
ORDER BY user_id
) AS w ON w1.user_id = w.user_id AND w1.created_at = w.first_month
JOIN work AS w2 ON w1.user_id = w2.user_id AND w2.created_at = w.last_month
ORDER BY performance DESC
) AS a

Mysql - Access column from grandparent's joined table in WHERE in subquery inside parent's FROM

Inside a WHERE inside a subquery inside a FROM inside another subquery inside a SELECT that's joined to another table, I need to access a column from that joined table.
edited to add more complete example:
SELECT
field_one,
field_two,
field_three,
field_one-field_three AS field_five,
field_six
FROM (
SELECT
IFNULL(
(
SELECT
SUM(us.field_seven) AS field_one
FROM
table_one us
WHERE
us.rto_id = rto.relevant_field_one
AND
us.created >= (
SELECT
IF(
selected_date IS NULL,
MIN(created),
selected_date
)
FROM (
SELECT
IF(
latest_date < DATE_SUB(CURDATE(), INTERVAL rtt.relevant_field_two DAY),
CURDATE(),
MAX(prevdate)
) AS selected_date,
created
FROM (
SELECT
created,
#calc_prevdate as prevdate,
DATEDIFF(#calc_prevdate, created) AS diff,
#calc_prevdate := created
FROM (
SELECT
sto.created
FROM
table_one sto
WHERE
sto.rto_id = rto.relevant_field_one
UNION ALL
SELECT
stt.created
FROM
table_two stt
WHERE
stt.rto_id = rto.relevant_field_one
ORDER BY
created DESC
) AS x
CROSS JOIN (
SELECT
#calc_prevdate := NULL
) as vars
) AS z
CROSS JOIN (
SELECT
MAX(created) AS latest_date
FROM(
SELECT
sto.created
FROM
table_one sto
WHERE
sto.rto_id = rto.relevant_field_one
UNION ALL
SELECT
stt.created
FROM
table_two stt
WHERE
stt.rto_id = rto.relevant_field_one
ORDER BY
created DESC
) as z
) AS y
WHERE
diff > rtt.relevant_field_two
) as w
)
GROUP BY us.rto_id
),0
) AS field_one,
IFNULL(
(
SELECT
SUM(tt.field_seven) AS field_three
FROM
table_two tt
WHERE
tt.rto_id = rto.relevant_field_one
AND
tt.created >= (
SELECT
IF(
selected_date IS NULL,
MIN(created),
selected_date
)
FROM (
SELECT
IF(
latest_date < DATE_SUB(CURDATE(), INTERVAL rtt.relevant_field_two DAY),
CURDATE(),
MAX(prevdate)
) AS selected_date,
created
FROM (
SELECT
created,
#calc_prevdate as prevdate,
DATEDIFF(#calc_prevdate, created) AS diff,
#calc_prevdate := created
FROM (
SELECT
sto.created
FROM
table_one sto
WHERE
sto.rto_id = rto.relevant_field_one
UNION ALL
SELECT
stt.created
FROM
table_two stt
WHERE
stt.rto_id = rto.relevant_field_one
ORDER BY
created DESC
) AS x
CROSS JOIN (
SELECT
#calc_prevdate := NULL
) as vars
) AS z
CROSS JOIN (
SELECT
MAX(created) AS latest_date
FROM(
SELECT
sto.created
FROM
table_one sto
WHERE
sto.rto_id = rto.relevant_field_one
UNION ALL
SELECT
stt.created
FROM
table_two stt
WHERE
stt.rto_id = rto.relevant_field_one
ORDER BY
created DESC
) as z
) AS y
WHERE
diff > rtt.relevant_field_two
) as w
)
GROUP BY tt.rto_id
), 0
) AS field_three,
IFNULL(
(
SELECT
COUNT(*) AS field_two
FROM
table_two tt
WHERE
tt.rto_id = rto.relevant_field_one
GROUP BY tt.rto_id
), 0
) AS field_two,
IFNULL(
(
SELECT
GREATEST(
IFNULL(MAX(us.created), 0), IFNULL(MAX(tt.created), 0)
) AS field_six
FROM
table_one us
LEFT JOIN
table_two tt ON us.rto_id = tt.rto_id
WHERE
us.rto_id = rto.relevant_field_one
GROUP BY us.rto_id
), 0
) AS field_six
FROM
relevant_table_one rto
LEFT JOIN
relevant_table_two rtt ON rto.rtt_id = rtt.id
WHERE
rto.rtt_id = ?
GROUP BY rto.relevant_field_one
) v
ORDER BY id ASC;
given that query, I need to access relevant_table_one.relevant_field_one and relevant_table_two.relevant_field_two from inside the subqueries, but the restrictions on subqueries dictates that you cant access a parents table in a subquery inside a FROM
I managed to solve this (so far I think) by adding #rfo := relevant_field_one and #rft := relevant_field_two up in the select where they were accessable and then referring to the created variables instead of the columns down in the nested query where relevant.
It's possible I'm just getting false positives but so far the solution appears to be working.

Sum two rows (one previous row and one is current row in same table) in SQL

I have this table:
Date_on deposited withdrawal in_bank
2012-09-1 3000 2000 50000
2012-09-2/t 4000/t 0 54000
2013-09-3/t 3000 2000 55000
Now I want to execute a query to add the deposited amounts and subtract the withdrawals from the previous days entry in_bank. How can I do that? Can any one help me on that? This is my query:
select date_on, in_bank,((in_bank+deposited)-withdrawal)
from tablename where date_on > '2012-09-01' order by date_on
SELECT today.withdrawal, today.deposited, today.date_on,
(IFNULL(today.deposited, 0) - IFNULL(today.withdrawal, 0)) + IFNULL((SELECT in_bank FROM tablename AS yesterday WHERE yesterday.date_on = DATE_SUB(today.date_on, INTERVAL 1 DAY)), 0)
FROM tablename AS today
WHERE date_on BETWEEN '2012-09-01' AND '2012-09-02'
ORDER BY date_on ASC
The query will assume the balance was zero if no previous days date can be found.
Added a fiddle
http://sqlfiddle.com/#!2/d8261/14
Try this:
SELECT
t1.date_on,
t1.in_bank,
(
SELECT in_bank
FROM
(
SELECT *, (#rownum2 := #rownum2 +1) rank
FROM Table1,(SELECT #rownum2 :=0 ) t
ORDER BY date_on
) t2 WHERE t1.rank - t2.rank = 1
) - t1.withdrawal - deposited "total"
FROM
(
SELECT *, (#rownum := #rownum +1) rank
FROM Table1,(SELECT #rownum :=0 ) t
ORDER BY date_on
) t1;
SQL Fiddle Demo
SELECT
date_on,
in_bank,
(( ISNULL(in_bank, 0) + ISNULL(in_deposited,0)) - ISNULL(withdrawal,0))
FROM tablename
WHERE date_on > '2012-09-01'
ORDER BY date_on
I am not very familiar with mysql but this might help
select account.*,D.expectable from account left join
(
select deposited - withdrawal + in_bank as expectable,DATE_ADD(Date_on,INTERVAL 1 DAY) as nDate from account
)
D on account.Date_on = D.nDate