Cumulative count over time - mysql

I have a table orders like this:
customer_id order_date
10 2012-01-01
11 2012-01-02
10 2012-01-02
12 2012-01-03
11 2012-01-04
12 2012-02-01
11 2012-02-04
13 2012-02-05
14 2012-02-06
How can I get a cumulative average over time (per month) like this:
order date count orders count customers (customer_id)
2012-01 1 1 (12)
2012-01 2 2 (10,11)
2012-02 1 2 (13,14)
2012-02 2 2 (10,12
2012-02 3 2 (11)
showing how the number of customers vs. number of orders per customer develops over time.
The following query gives me the wanted information - but not over time. How can I iterate the query over time?
SELECT number_of_orders, count(*) as amount FROM (
SELECT o.customer_id, count(*) as number_of_orders
FROM orders o
GROUP BY o.customer_id) as t1
GROUP BY number_of_orders
Update:
have now build the following PHP code to generate what I need, wonder if that could be done using cumulative counts like on http://www.freeopenbook.com/mysqlcookbook/mysqlckbk-chp-12-sect-14.html
$year = 2011;
for ($cnt_months = 1; $cnt_months <= 12; $cnt_months++) {
$cnt_months_str = ($cnt_months < 10) ? '0'.$cnt_months : $cnt_months;
$raw_query = "SELECT number_of_orders, count(*) as amount
FROM (
SELECT
o.customer_id,
count(*) as number_of_orders
FROM orders o
where Date_Format( o.order_date, '%Y%m' ) >= " . $year . "01 and Date_Format( o.order_date, '%Y%m' ) <= " . $year . $cnt_months_str . "
GROUP BY o.customer_id) as t1
GROUP BY number_of_orders";
$query = db_query($raw_query);
while ($row = db_fetch_array($query)) {
$data[$cnt_months_str][$row['number_of_orders']] = array($row['number_of_orders'], (int)$row['amount']);
}
}

A good starting point is
SELECT
order_date,
COUNT(*) AS distinctOrders,
COUNT(DISTINCT customer_id) AS distinctCustomers,
GROUP_CONCAT(DISTINCT customer_id ASC) AS customerIDs
FROM orders
GROUP BY order_date ASC
This will give you the order_date, the number of orders on that date, the number of customers on that date, and the list of customer ids on that date.
Just looking at a way to tally up on a month by month basis. So taking this forward I've used a subquery to tally up as it goes
SELECT
ordersPerDate.*,
IF(
MONTH(ordersPerDate.order_date)=#thisMonth,
#runningTotal := #runningTotal+ordersPerDate.distinctOrders,
#runningTotal := 0
) AS ordersInThisMonth,
#thisMonth := MONTH(ordersPerDate.order_date)
FROM
(
SELECT
#thisMonth := 0,
#runningTotal := 0
) AS variableInit,
(
SELECT
order_date,
COUNT(*) AS distinctOrders,
COUNT(DISTINCT customer_id) AS distinctCustomers,
GROUP_CONCAT(DISTINCT customer_id ASC) AS customerIDs
FROM orders
GROUP BY order_date ASC
) AS ordersPerDate
And finally to clean it up, wrapped it in yet another subquery just to return the rows desired rather than the internal variables
Grouping on individual days
SELECT
collatedData.order_date,
collatedData.ordersInThisMonth AS count_orders,
collatedData.distinctCustomers AS count_customers,
collatedData.customerIDs AS customer_ids
FROM (
SELECT
ordersPerDate.*,
IF(
MONTH(ordersPerDate.order_date)=#thisMonth,
#runningTotal := #runningTotal+ordersPerDate.distinctOrders,
#runningTotal := 0
) AS ordersInThisMonth,
#thisMonth := MONTH(ordersPerDate.order_date)
FROM
(
SELECT
#thisMonth := 0,
#runningTotal := 0
) AS variableInit,
(
SELECT
order_date,
COUNT(*) AS distinctOrders,
COUNT(DISTINCT customer_id) AS distinctCustomers,
GROUP_CONCAT(DISTINCT customer_id) AS customerIDs
FROM orders
GROUP BY order_date ASC
) AS ordersPerDate
) AS collatedData
And now finally, following additional information from the OP, the end product
Grouping on calendar months
// Top level will sanitise the output
SELECT
collatedData.orderYear,
collatedData.orderMonth,
collatedData.distinctOrders,
collatedData.ordersInThisMonth AS count_orders,
collatedData.distinctCustomers AS count_customers,
collatedData.customerIDs AS customer_ids
FROM (
// This level up will iterate through calculating running totals
SELECT
ordersPerDate.*,
IF(
(ordersPerDate.orderYear,ordersPerDate.orderMonth) = (#thisYear,#thisMonth),
#runningTotal := #runningTotal+ordersPerDate.distinctOrders*ordersPerDate.distinctCustomers,
#runningTotal := 0
) AS ordersInThisMonth,
#thisMonth := ordersPerDate.orderMonth,
#thisYear := ordersPerDate.orderYear
FROM
(
SELECT
#thisMonth := 0,
#thisYear := 0,
#runningTotal := 0
) AS variableInit,
(
// Next level up will collate this to get per year, month, and per number of orders
SELECT
ordersPerDatePerUser.orderYear,
ordersPerDatePerUser.orderMonth,
ordersPerDatePerUser.distinctOrders,
COUNT(DISTINCT ordersPerDatePerUser.customer_id) AS distinctCustomers,
GROUP_CONCAT(ordersPerDatePerUser.customer_id) AS customerIDs
FROM (
// Inner query will get the number of orders for each year, month, and customer
SELECT
YEAR(order_date) AS orderYear,
MONTH(order_date) AS orderMonth,
customer_id,
COUNT(*) AS distinctOrders
FROM orders
GROUP BY orderYear ASC, orderMonth ASC, customer_id ASC
) AS ordersPerDatePerUser
GROUP BY
ordersPerDatePerUser.orderYear ASC,
ordersPerDatePerUser.orderMonth ASC,
ordersPerDatePerUser.distinctOrders DESC
) AS ordersPerDate
) AS collatedData

SELECT
substr(order_date,1,7) AS order_period,
count(*) AS number_of_orders,
count(DISTINCT orders.customer_id) AS number_of_customers,
GROUP_CONCAT(DISTINCT orders.customer_id) AS customers
FROM orders
GROUP BY substr(order_date,1,7)

Related

How to get TopN query group by month MYSQL

There's a table like:
months contact COUNT
202007 asdas 45
202007 madhouse 1
202007 RORC YANG 1
202007 RORG 2
202007 ROR 5
202008 SARINA 1
202008 SMB 1
How can I get top 4 query result each month?
Expected result:
months contact COUNT
202007 asdas 45
202007 ROR 5
202007 RORG 2
202008 SARINA 1
202008 SMB 1
I'm working with mysql5.6
Here are 2 choices. The first uses rank() over() which does not guarantee only 4 rows per month (there could be more) and the second uses row_number() over() which will limit number of rows to a max of 4 per month
select
*
from (
select
* , rank() over(partition by months order by c desc) as cr
from (
select months, contact, count(*) as c
from mytable
group by months, contact
) as g
) as d
where cr <= 4
;
select
*
from (
select
* , row_number() over(partition by months order by c desc) as rn
from (
select months, contact, count(*) as c
from mytable
group by months, contact
) as g
) as d
where rn <= 4
;
see demo
for older MySQL try a row number hack:
select
*
from (
select
#row_num :=IF(#prev_value=g.months,#row_num+1,1)AS RowNumber
, g.months
, g.contact
, g.c
, #prev_value := g.months
from (
select months, contact, count(*) as c
from mytable
group by months, contact
) as g
CROSS JOIN (SELECT #row_num :=1, #prev_value :='') vars
ORDER BY g.months, g.contact
) as d
where RowNumber <= 4
see that in demo
TOP5
SELECT z.months, z.contact, z.count
FROM
(SELECT
x.*,
#rownum := #rownum + 1,
IF(#part = x.months,#r := #r + 1,#r := 1) AS rank,
#part := x.months
FROM
(
SELECT
*
FROM
my_table e
ORDER BY
e.months ASC,e.count DESC) X,
(
SELECT
#rownum := 0,
#part := NULL,
#r := 0) rt)z
WHERE z.rank <=5

How to count rows until some value with ORDER BY

I need to get counts of some rows with ORDER BY.
how can I get count of the rows
what are before orderId = 50
(I see answer is 3, but I need to have mysql query how to get it)
I have mysql query like this:
select c.id as customerId, o.id as orderId from orders o
inner join customers c on (c.id=o.customerId)
order by c.id asc, o.id desc
this query outputs:
customerId orderId
19 36
19 35
19 34
31 50
31 49
31 48
53 73
53 72
SQL DEMO
SELECT Max(rn)
FROM (
select customerId,
orderId ,
#row := if( orderid = 50,
null,
#row + 1 ) as rn
from orders o
cross join ( SELECT #row := 0 ) as vars
order by customerId asc, orderId desc
) t
if can use row_number
SELECT MIN(rn) - 1 as cnt
FROM (
select customerId,
orderId ,
row_number() over (order by customerId asc, orderId desc ) as rn
from orders
) t
WHERE orderId = 50;

How to get top selling item every month

I want to get top selling item/product for every month. I tried to using GROUP BY function, but my problem is how to get just 1 product in every month.
SELECT MONTHNAME(date), product, SUM(quantity)
FROM mytable
GROUP BY MONTHNAME(date), product
Also, how to use row_number() over function in mysql? I think by using that I can get just 1 product per month?
This is what I want to get:
You can try below -
select MONTHNAME(`date`),product,sum(quantity) qty
from tablename a
group by MONTHNAME(`date`),product
having max(qty) in (select sum(quantity) from tablename b where MONTHNAME(a.`date`)=MONTHNAME(b.`date`) and a.product=b.product)
SELECT
month, n, product, quantity
FROM
( SELECT #prev := '', #n := 0 ) init
JOIN
( SELECT #n := if(MONTHNAME(date) != #prev, 1, #n + 1) AS n,
#prev := MONTHNAME(date),
MONTHNAME(date) AS month, product, SUM(quantity) AS quantity
FROM mytable
GROUP BY
MONTHNAME(date), product
ORDER BY
MONTHNAME(date) ASC,
SUM(quantity) DESC
) x
WHERE n <= 1
ORDER BY month, n

Offset with Quantity in SQL

Let us suppose we have following tables
product_id | quantity
1 | 250
2 | 150
3 | 120
4 | 300
5 | 301
How do we know that the item number of 401th in SQL? (the answer should be product_id : 3). The query should return the product_id
Let us assume also the row has been in order
You can use Correlated query to find cummulative sum and then filter range using between to find the required slot:
select product_id
from (
select a.*,
coalesce((
select sum(quantity)
from your_table b
where b.product_id < a.product_id
), 0) + 1 cquant1,
(
select sum(quantity)
from your_table b
where b.product_id <= a.product_id
) cquant2
from your_table a
) t
where 401 between cquant1 and cquant2;
Demo
You can also use user variable for this:
select *
from (
select product_id,
#sum1 := #sum1 + coalesce((
select quantity
from your_table x
where x.product_id < t.product_id
order by x.product_id desc limit 1
), 0) as cquantity1,
#sum2 := #sum2 + quantity as cquantity2
from your_table t,
(select #sum1 := 0, #sum2 := 0) t2
order by product_id
) t
where 401 between cquantity1 and cquantity2;
Demo
In case of ORACLE, this will not work with SQLServer
This is by using LAG and SUM OVER() functions,
SELECT PRODUCT_ID FROM
(
SELECT PRODUCT_ID
, LAG(CUM_QUAN, 1, 0) OVER (ORDER BY PRODUCT_ID) AS START_QUAN
, CUM_QUAN END_QUAN
FROM
(
SELECT PRODUCT_ID
, QUANTITY
, SUM(QUANTITY) OVER (ORDER BY PRODUCT_ID) AS CUM_QUAN
FROM YOUR_TABLE
)
) WHERE 401 BETWEEN START_QUAN AND END_QUAN
You can do this with variables by getting a cumulative sum. However, Gurv's answer is way too complicated.
I think this is the simplest way:
select t.*
from (select t.*, (#s := #s + quantity) as running_quantity
from t cross join
(select #s := 0) params
order by product_id
) t
where 401 < running_quantity and
401 >= running_quantity - quantity;

Aggregate Functions in MySQL

I have been running into some issues with where/when I can use aggregate functions in MySQL. If I have the following two simple tables:
Campaign Table (campaign_id, campagin_name, account, country)
Revenue Table (campaign_id, revenue, date)
I want to write a query to find the top account by revenue for each week:
I tried the following
SELECT account, SUM(revenue) as sum_rev
FROM campaign
JOIN revenue
ON c.campaign_id = r.campaign_id
WHERE revenue =
( SELECT revenue
FROM campaign
JOIN revenue
ON c.campaign_id = r.campaign_id
WHERE revenue = MAX(SUM(revenue))
)
GROUP BY week(date)
I was told this isn't correct, is the issue just the nesting of the aggregate function max and sum?
In MySQL, I think variables are the simplest way:
SELECT cr.*
FROM (SELECT cr.*,
(#rn := if(#w = concat_ws('-', yyyy, wk), #rn + 1,
if(#rn := concat_ws('-', yyyy, wk), 1, 1)
)
) as rn
FROM (SELECT c.account, year(r.date) as yyyy, week(r.date) as wk, SUM(r.revenue) as sum_rev
FROM campaign c JOIN
revenue r
ON c.campaign_id = r.campaign_id
GROUP BY c.account, year(r.date), week(r.date)
ORDER BY yyyy, wk, sum_rev DESC
) cr CROSS JOIN
(SELECT #wy := '', #rn := 0) params
) cr
WHERE rn = 1;