Why is MySQL cumulative sum producing wrong results - mysql

I need to find the cumulative sum for the following data:
Following query:
SELECT created, COUNT( * )
FROM `transactions`
GROUP BY created
Gives me:
created COUNT( * )
2015-8-09 1
2015-8-15 1
2015-8-16 2
2015-8-17 1
2015-8-23 1
I tried to do the cumulative sum like:
SELECT t1.created, COUNT( * ) , SUM( t2.totalcount ) AS sum
FROM transactions t1
INNER JOIN (
SELECT id, created c, COUNT( * ) AS totalcount
FROM transactions
GROUP BY created
ORDER BY created
)t2 ON t1.id >= t2.id
GROUP BY t1.created
ORDER BY t1.created
but the results it gives arent as expected:
created COUNT( * ) sum
2015-8-09 5 6
2015-8-15 3 4
2015-8-16 6 8
2015-8-17 1 1
2015-8-23 4 5
How do i produce the following result:
created COUNT( * ) sum
2015-8-09 1 1
2015-8-15 1 2
2015-8-16 2 4
2015-8-17 1 5
2015-8-23 1 6

select tmp.*, #sum := #sum + cnt as cum_sum
from
(
SELECT created, COUNT( * ) as cnt
FROM `transactions`
GROUP BY created
ORDER BY created
) tmp
cross join (select #sum := 0) s

Your inner query is selecting id without grouping on it. Let's rework it in terms of the date.
SELECT t1.created, COUNT( * ) AS daycount, SUM( t2.totalcount ) AS sum
FROM transactions t1
INNER JOIN ( SELECT created, COUNT( * ) AS totalcount
FROM transactions
GROUP BY created
) t2 ON t1.created >= t2.created
GROUP BY t1.created
ORDER BY t1.created;
Or you might want to put the totalcount inline:
SELECT t1.created, COUNT(*) AS daycount
, ( SELECT COUNT(*) FROM transactions t2
WHERE t2.created <= t1.created ) AS totalcount
FROM transactions t1
GROUP BY created
ORDER BY CREATED;

Related

(2013, 'Lost connection to MySQL server during query ([WinError 10054] An existing connection was forcibly closed by the remote host)')

with posts as
(
select dpd.user_id as page_id, dpd.tweet_id as post_id, dpd.retweets as shares
from monthly_post_details dpd
where dpd.user_id in ('TheDeshBhakt', 'abhinavxarora', 'dhruv_rathee', 'shamsharmashow', 'TheWaliRahmani', 'AKTKbasics', 'mohakmangal', 'AnOpenLetter001', 'thekumarshyam', 'upword_', 'TheLallantop', 'thequint', 'QuintHindi', 'SatyaHindi', 'newslaundry', 'BrutIndia', 'nitishrajpute', 'ScoopWhoop', 'UFbySamdishh', 'Thepoliticspoi1', 'ndtv', 'aajtak', 'ZeeNews', 'ABPNews', 'kunalkamra88', 'PanickarS', 'SureshChavhanke', 'DaaruBaazMehta', 'ChargingHindi', 'ElvishYadav', 'StringReveals', 'OpIndiaHindi', 'MediaHarshVT', 'nshuklaindia', 'AtriNeeraj', 'atsshow7', 'SatyaSanatan01', 'Thakur312Manish', 'abhiandniyu', 'NationalDastak', 'thenews_intl', 'capitaltvindia', 'ajitanjum', 'dblive15', 'thelivetvnews', 'ppbajpai', 'abhisar_sharma', 'bstvlive', '_pyaraHindustan', 'JaipurDialogues', 'knockingNews', 'SushantBSinha', 'Article19_India', 'ultachasmauc', 'DOpolitics_in', 'sakshijoshii', 'VaadClips', 'TheSatyaShow', 'TV9Bharatvarsh', 'Republic_Bharat', 'indiatvnews', 'ndtvindia', 'news24tvchannel', 'News18India', 'NewsNationTV', 'ZeeHindustan_', 'IndiaNews_itv', 'goodnewstoday', 'DDNewslive', 'WIONews', 'cnnnews18', 'IndiaToday', 'TimesNow', 'theprintindia', 'republic', 'thewire_in', 'MirrorNow', 'NewsX', 'themojostory')
and DATE(dpd.created_time) between '2022-10-01' and '2022-10-31'
),
top_ten as
(
select page_id, shares from posts order by shares desc limit 30
),
top_posts as
(
select page_id, count(*) as num_posts_in_top from top_ten group by page_id
),
median_posts as
(
select page_id, avg(shares) as median_shares
from
(
select page_id, shares,
(select count(*) from posts t2 where t2.page_id = t3.page_id) as ct,
seq,
(select count(*) from posts t2 where t2.page_id < t3.page_id) as delta
from (select page_id, shares, #rownum := #rownum + 1 as seq
from (select * from posts order by page_id, shares) t1
order by page_id, seq
) t3 cross join (select #rownum := 0) x
having (ct%2 = 0 and seq-delta between floor((ct+1)/2) and floor((ct+1)/2) +1)
or (ct%2 <> 0 and seq-delta = (ct+1)/2)
) T
group by page_id
),
metrics as
(
select p.page_id, count(*) as total_posts,
IFNULL(max(p.shares),0) as max_shares,
IFNULL(sum(p.shares), 0) as total_shares,
IFNULL(tp.num_posts_in_top, 0) as num_posts_in_top
from posts p left join top_posts tp
on p.page_id = tp.page_id
group by p.page_id order by total_shares desc
),
top_posts_score as
(
# remove in future: calculation to be done using pd.rank()
select page_id, sum(ran) as num_posts_in_top_score
from (
select page_id,
rank() over(
order by shares) as ran
from top_ten
) tpt
group by page_id
)
select metrics.*, median_posts.median_shares,
IFNULL(tps.num_posts_in_top_score,0) as num_posts_in_top_score
from metrics left join median_posts on metrics.page_id=median_posts.page_id
left join top_posts_score tps on metrics.page_id = tps.page_id;
Query is working when Date range is for 1 day or 1 week not working for 1 month

Mysql - Access column from grandparent's joined table in WHERE in subquery inside parent's FROM

Inside a WHERE inside a subquery inside a FROM inside another subquery inside a SELECT that's joined to another table, I need to access a column from that joined table.
edited to add more complete example:
SELECT
field_one,
field_two,
field_three,
field_one-field_three AS field_five,
field_six
FROM (
SELECT
IFNULL(
(
SELECT
SUM(us.field_seven) AS field_one
FROM
table_one us
WHERE
us.rto_id = rto.relevant_field_one
AND
us.created >= (
SELECT
IF(
selected_date IS NULL,
MIN(created),
selected_date
)
FROM (
SELECT
IF(
latest_date < DATE_SUB(CURDATE(), INTERVAL rtt.relevant_field_two DAY),
CURDATE(),
MAX(prevdate)
) AS selected_date,
created
FROM (
SELECT
created,
#calc_prevdate as prevdate,
DATEDIFF(#calc_prevdate, created) AS diff,
#calc_prevdate := created
FROM (
SELECT
sto.created
FROM
table_one sto
WHERE
sto.rto_id = rto.relevant_field_one
UNION ALL
SELECT
stt.created
FROM
table_two stt
WHERE
stt.rto_id = rto.relevant_field_one
ORDER BY
created DESC
) AS x
CROSS JOIN (
SELECT
#calc_prevdate := NULL
) as vars
) AS z
CROSS JOIN (
SELECT
MAX(created) AS latest_date
FROM(
SELECT
sto.created
FROM
table_one sto
WHERE
sto.rto_id = rto.relevant_field_one
UNION ALL
SELECT
stt.created
FROM
table_two stt
WHERE
stt.rto_id = rto.relevant_field_one
ORDER BY
created DESC
) as z
) AS y
WHERE
diff > rtt.relevant_field_two
) as w
)
GROUP BY us.rto_id
),0
) AS field_one,
IFNULL(
(
SELECT
SUM(tt.field_seven) AS field_three
FROM
table_two tt
WHERE
tt.rto_id = rto.relevant_field_one
AND
tt.created >= (
SELECT
IF(
selected_date IS NULL,
MIN(created),
selected_date
)
FROM (
SELECT
IF(
latest_date < DATE_SUB(CURDATE(), INTERVAL rtt.relevant_field_two DAY),
CURDATE(),
MAX(prevdate)
) AS selected_date,
created
FROM (
SELECT
created,
#calc_prevdate as prevdate,
DATEDIFF(#calc_prevdate, created) AS diff,
#calc_prevdate := created
FROM (
SELECT
sto.created
FROM
table_one sto
WHERE
sto.rto_id = rto.relevant_field_one
UNION ALL
SELECT
stt.created
FROM
table_two stt
WHERE
stt.rto_id = rto.relevant_field_one
ORDER BY
created DESC
) AS x
CROSS JOIN (
SELECT
#calc_prevdate := NULL
) as vars
) AS z
CROSS JOIN (
SELECT
MAX(created) AS latest_date
FROM(
SELECT
sto.created
FROM
table_one sto
WHERE
sto.rto_id = rto.relevant_field_one
UNION ALL
SELECT
stt.created
FROM
table_two stt
WHERE
stt.rto_id = rto.relevant_field_one
ORDER BY
created DESC
) as z
) AS y
WHERE
diff > rtt.relevant_field_two
) as w
)
GROUP BY tt.rto_id
), 0
) AS field_three,
IFNULL(
(
SELECT
COUNT(*) AS field_two
FROM
table_two tt
WHERE
tt.rto_id = rto.relevant_field_one
GROUP BY tt.rto_id
), 0
) AS field_two,
IFNULL(
(
SELECT
GREATEST(
IFNULL(MAX(us.created), 0), IFNULL(MAX(tt.created), 0)
) AS field_six
FROM
table_one us
LEFT JOIN
table_two tt ON us.rto_id = tt.rto_id
WHERE
us.rto_id = rto.relevant_field_one
GROUP BY us.rto_id
), 0
) AS field_six
FROM
relevant_table_one rto
LEFT JOIN
relevant_table_two rtt ON rto.rtt_id = rtt.id
WHERE
rto.rtt_id = ?
GROUP BY rto.relevant_field_one
) v
ORDER BY id ASC;
given that query, I need to access relevant_table_one.relevant_field_one and relevant_table_two.relevant_field_two from inside the subqueries, but the restrictions on subqueries dictates that you cant access a parents table in a subquery inside a FROM
I managed to solve this (so far I think) by adding #rfo := relevant_field_one and #rft := relevant_field_two up in the select where they were accessable and then referring to the created variables instead of the columns down in the nested query where relevant.
It's possible I'm just getting false positives but so far the solution appears to be working.

Operand should contain 1 column(s) - in mysql query

I have the following complex query that is giving me an error
Operand should contain 1 column(s)
Can anyone suggest what is wrong
SELECT
t.user_id AS user_id,
t.organisation_id AS organisation_id,
t.firstname AS firstname,
t.surname AS surname,
t.username AS username,
t.year_id AS year_id,
t.form_name AS form_name,
t.House AS House,
rcPoints.total AS milestoneRedeemedCodesTotal,
rcFilteredPoints.total AS redeemedCodesTotalFiltered,
(
COALESCE (rcFilteredPoints.total, 0) - COALESCE (milestoneHistory.total, 0)
) AS redeemedCodesTotalAvailableFiltered,
ABS(
FLOOR(
(
COALESCE (rcFilteredPoints.total, 0) - COALESCE (milestoneHistory.total, 0)
) / 1000
) * 1000
) AS redeemedCodesTotalTowardsMilestone,
ABS(
FLOOR(
(
COALESCE (rcFilteredPoints.total, 0) - COALESCE (milestoneHistory.total, 0)
) / 1000
)
) AS redeemedCodesMilestoneTriggers,
COALESCE (milestoneHistory.total, 0) AS historyTotal
FROM
`myuser` `t`
LEFT JOIN (
SELECT
rc.user_id AS user_id,
SUM(rc.school_points) AS total
FROM
`redeemed_codes` `rc`
INNER JOIN myuser m ON (m.user_id = rc.user_id)
WHERE
(rc.date_redeemed >= 0)
AND (m.organisation_id = 58022)
GROUP BY
rc.user_id
) AS rcPoints ON (rcPoints.user_id = t.user_id)
LEFT JOIN (
SELECT
rc.user_id AS user_id,
SUM(rc.school_points) AS total
FROM
`redeemed_codes` `rc`
INNER JOIN myuser m ON (m.user_id = rc.user_id)
WHERE
(rc.date_redeemed >= 0)
AND (m.organisation_id = 58022)
GROUP BY
rc.user_id
) AS rcFilteredPoints ON (
rcFilteredPoints.user_id = t.user_id
)
LEFT JOIN (
SELECT
mh.user_id AS user_id,
mh.milestone_id AS milestone_id,
MAX(mh.points_when_triggered) AS total
FROM
`milestone_history` `mh`
WHERE
mh.milestone_id = 13
GROUP BY
mh.user_id
) AS milestoneHistory ON (
milestoneHistory.user_id = t.user_id
)
WHERE
(
(
SELECT
COALESCE (count(*), 0)
FROM
milestone_history mha
WHERE
mha.milestone_id = 13
AND mha.user_id = t.user_id
) = 0
)
AND (t.organisation_id = 58022)
AND
(
SELECT * FROM
redeemed_codes t1
WHERE
organisation_id = 1
AND
(
SELECT
sum(school_points)
FROM
redeemed_codes t2
WHERE
t2.redeemed_code_id <= t1.redeemed_code_id
) >= 1000
ORDER BY redeemed_code_id
LIMIT 1
)
GROUP BY
t.user_id
ORDER BY
redeemedCodesMilestoneTriggers DESC
LIMIT 1
Your query might have multiple errors, but this condition in the WHERE clause is definitely suspect and would lead to that error:
AND (SELECT *
FROM redeemed_codes t1
WHERE organisation_id = 1 AND
(SELECT sum(school_points)
FROM redeemed_codes t2
WHERE t2.redeemed_code_id <= t1.redeemed_code_id
) >= 1000
ORDER BY redeemed_code_id
LIMIT 1
)
I have no idea what you are trying to do. Sometimes, the solution is simply EXISTS:
EXISTS (SELECT *
FROM redeemed_codes t1
WHERE organisation_id = 1 AND
(SELECT sum(school_points)
FROM redeemed_codes t2
WHERE t2.redeemed_code_id <= t1.redeemed_code_id
) >= 1000
)

Select in select - Every derived table must have its own alias error

I'm trying to get from database information about orders grouped by date.
I have table sales_flat_order, where I have it's id, order creation date, total_paid for order, and order item count. And I have table sales_flat_order_item where are orders items with it prices.
I created script to get order information by day:
SELECT
DATE( sales_flat_order.created_at ) AS date,
SUM( sales_flat_order.total_paid ) AS sales,
SUM( sales_flat_order.total_item_count ) AS items
FROM
sales_flat_order,
sales_flat_order_payment
WHERE
sales_flat_order.status = 'complete'
AND sales_flat_order.entity_id = sales_flat_order_payment.parent_id
AND sales_flat_order_payment.method = 'checkmo'
GROUP BY DATE( sales_flat_order.created_at )
WITH ROLLUP
I get:
DATE SALES ITEMS
2013-03-05 72 3
2013-03-06 100 5
And I have script to count median price:
SELECT
avg(t1.price) as median_val
FROM
(
SELECT
#rownum:=#rownum+1 as `row_number`,
d.price
FROM
sales_flat_order_item d,
(SELECT #rownum:=0) r
WHERE 1
ORDER BY d.price
) as t1,
(
SELECT
count(*) as total_rows
FROM
sales_flat_order_item d
WHERE 1
) as t2
WHERE 1
AND t1.row_number>=total_rows/2
and t1.row_number<=total_rows/2+1;
Now I'm trying to combine this two script to get:
DATE SALES ITEMS median_item_price
2013-03-05 72 3 19
2013-03-06 100 5 10.5
Combined script:
SELECT
DATE( sales_flat_order.created_at ) AS date,
SUM( sales_flat_order.total_paid ) AS sales,
SUM( sales_flat_order.total_item_count ) AS items,
sales_flat_order_item.price as median_item_price
FROM
sales_flat_order,
sales_flat_order_payment,
(
SELECT
avg(t1.price) as median_val
FROM
(
SELECT
#rownum:=#rownum+1 as `row_number`,
d.price
FROM
sales_flat_order_item d,
(SELECT #rownum:=0) r
WHERE 1
ORDER BY d.price
) as t1,
(
SELECT
count(*) as total_rows
FROM
sales_flat_order_item d
WHERE 1
) as t2
WHERE 1
AND t1.row_number>=total_rows/2
and t1.row_number<=total_rows/2+1
) as sales_flat_order_item
WHERE
sales_flat_order.status = 'complete'
AND sales_flat_order.entity_id = sales_flat_order_payment.parent_id
AND sales_flat_order_payment.method = 'checkmo'
AND DATE(sales_flat_order_item.created_at) = DATE(sales_flat_order.created_at)
GROUP BY DATE( sales_flat_order.created_at )
WITH ROLLUP
and get error: #1248 - Every derived table must have its own alias
here is database: http://sqlfiddle.com/#!2/7dfec
Can anyone help?
Solution:
SELECT
DATE( sales_flat_order.created_at ) AS date,
SUM( sales_flat_order.total_paid ) AS sales,
SUM( sales_flat_order.total_item_count ) AS items,
MAX( median.median_val ) as median_item_price
FROM
sales_flat_order,
sales_flat_order_payment,
(
SELECT DATE(sq.created_at) as median_date, avg(sq.price) as median_val FROM (
SELECT t1.row_number, t1.price, t1.created_at FROM(
SELECT IF(#prev!=d.created_at, #rownum:=1, #rownum:=#rownum+1) as `row_number`, d.price, #prev:=d.created_at AS created_at
FROM sales_flat_order_item d, (SELECT #rownum:=0, #prev:=NULL) r
ORDER BY d.price
) as t1 INNER JOIN
(
SELECT count(*) as total_rows, created_at
FROM sales_flat_order_item d
GROUP BY created_at
) as t2
ON t1.created_at = t2.created_at
WHERE 1=1
AND t1.row_number>=t2.total_rows/2 and t1.row_number<=t2.total_rows/2+1
)sq
group by DATE(sq.created_at)
) as median
WHERE
sales_flat_order.status = 'complete'
AND sales_flat_order.entity_id = sales_flat_order_payment.parent_id
AND sales_flat_order_payment.method = 'checkmo'
AND median.median_date = DATE( sales_flat_order.created_at )
GROUP BY DATE( sales_flat_order.created_at )
WITH ROLLUP

Union Select Column Mismatch

Here's my query:
SELECT
FROM_UNIXTIME( date_added, '%m-%d-%Y' ) AS formatted_date,
SUM( tb =1 ) AS sum_users,
SUM( tb =2 ) AS sum_links,
SUM( tb =3 ) AS sum_ads,
SUM( tb =4 ) AS sum_actions
FROM (
SELECT date_added, 1 AS tb
FROM users_list WHERE 1=1
UNION ALL
SELECT date_added, 2
FROM users_links WHERE 1=1
UNION ALL
SELECT date_served, 3
FROM ads_served WHERE 1=1
UNION ALL
SELECT date_served, 4
FROM actions WHERE 1=1
) AS t
GROUP BY formatted_date
ORDER BY formatted_date DESC
Here's my table data:
users_list
id date_added
1 1234567890
2 1334567890
3 1434567890
users_links
id date_added
1 1244567890
2 1354567890
3 1464567890
ads_served
id date_served revenue
1 1234567891 0.01
2 1334567892 0.02
3 1434567893 0.02
actions
id date_served
1 1234561890
2 1334562890
3 1434563890
I am trying to sum the revenue for formatted_date in the ads_served table as a 6th column for the output query. I am lost as to where to start. If I add the sum(revenue) to the union select I get a "column mismatch" error.
Column revenue belongs to ads_served but you are selecting from a sub query where revenue is not present. Add it to the subquery:
SELECT
FROM_UNIXTIME( date_added, '%m-%d-%Y' ) AS formatted_date,
SUM( tb =1 ) AS sum_users,
SUM( tb =2 ) AS sum_links,
SUM( tb =3 ) AS sum_ads,
SUM( tb =4 ) AS sum_actions,
SUM( revenue ) As sum_revenue
FROM (
SELECT date_added, 1 AS tb, 0 As revenue
FROM users_list WHERE 1=1
UNION ALL
SELECT date_added, 2, 0
FROM users_links WHERE 1=1
UNION ALL
SELECT date_served, 3, revenue
FROM ads_served WHERE 1=1
UNION ALL
SELECT date_served, 4, 0
FROM actions WHERE 1=1
) AS t
GROUP BY formatted_date
ORDER BY formatted_date DESC
Try in this way. Why do you use 1=1 ?
SELECT
FROM_UNIXTIME( date_added, '%m-%d-%Y' ) AS formatted_date,
SUM( tb =1 ) AS sum_users,
SUM( tb =2 ) AS sum_links,
SUM( tb =3 ) AS sum_ads,
SUM( tb =4 ) AS sum_actions,
sum(total) as tot_rev
FROM (
SELECT date_added,'' as total, 1 AS tb
FROM users_list WHERE 1=1
UNION ALL
SELECT date_added,'', 2
FROM users_links WHERE 1=1
UNION ALL
SELECT date_served,revenue, 3
FROM ads_served WHERE 1=1
UNION ALL
SELECT date_served,'', 4
FROM actions WHERE 1=1
) AS t
GROUP BY formatted_date
ORDER BY formatted_date DESC