Improving the performance of a MySQL left join sub query - mysql

I have the following MySQL query which calculates the total number of orders for each month within a given date range e.g. a year. The query works correctly, but the performance is slow (around 250ms).
Any ideas on how to rewrite it to make it more efficient?
WITH recursive `dates` AS (
(
SELECT '2019-11-28' AS item
)
UNION
ALL (
SELECT
item + INTERVAL 1 DAY
FROM
`dates`
WHERE
item + INTERVAL 1 DAY <= '2020-11-27'
)
)
SELECT
DATE_FORMAT(`item`, '%b %y') AS `date`,
COUNT(`orders`.`id`) AS `total`
FROM
`dates`
LEFT JOIN (
SELECT
`orders`.`id`,
`orders`.`created_at`
FROM
`orders`
INNER JOIN `locations` ON `orders`.`location_id` = `locations`.`id`
WHERE
`orders`.`shop_id` = 10379184
AND `locations`.`country_id` = 128
AND `orders`.`created_at` >= '2019-11-28 12:01:42'
AND `orders`.`created_at` <= '2020-11-27 12:01:42'
) AS `orders` ON DATE(`orders`.`created_at`) = `dates`.`item`
GROUP BY
`date`
UPDATE: Some have suggested using two left joins, however if I do that then the country_id filter is not applied:
WITH recursive `dates` AS (
(
SELECT
'2019-11-28' AS item
)
UNION
ALL (
SELECT
item + INTERVAL 1 DAY
FROM
`dates`
WHERE
item + INTERVAL 1 DAY <= '2020-11-27'
)
)
SELECT
DATE_FORMAT(`item`, '%b %y') AS `date`,
COUNT(`orders`.`id`) AS `total`
FROM
`dates`
LEFT JOIN `orders` USE INDEX (`orders_created_at_index`) ON DATE(`created_at`) = `dates`.`item`
AND `orders`.`shop_id` = 10379184
AND `orders`.`created_at` >= '2019-11-28 12:22:43'
AND `orders`.`created_at` <= '2020-11-27 12:22:43'
LEFT JOIN `locations` ON `orders`.`location_id` = `locations`.`id`
AND `locations`.`country_id` = 128
GROUP BY
`date`
Thanks!

I would suggest using a correlated subquery:
SELECT DATE_FORMAT(d.item, '%b %y') AS `date`,
(SELECT COUNT(*)
FROM orders o JOIN
locations l
ON o.location_id = l.id
WHERE shop_id = 10379184 AND
country_id = 128 AND
o.created_at >= d.item AND
o.created_at < d.item + interval 1 day
) as total
FROM dates d;
This avoids the outer aggregation, which is often a performance improvement.
In addition, indexes could probably help the query, but it is unclear where columns such as country_id and shop_id are coming from.

After much tinkering, I produced the following which operates in under 40ms, which is good enough for my needs. I still think it's not ideal and would welcome any improvements...
SELECT
`date`,
COUNT(`order`)
FROM
(
WITH recursive `dates` AS (
(
SELECT
'2019-11-28' AS item
)
UNION
ALL (
SELECT
item + INTERVAL 1 DAY
FROM
`dates`
WHERE
item + INTERVAL 1 DAY <= '2020-11-27'
)
)
SELECT
DATE_FORMAT(`item`, '%b %y') AS `DATE`,
`orders`.`id` AS `order`,
`locations`.`id` AS `location`
FROM
`dates`
LEFT JOIN
`orders`
ON
DATE(`created_at`) = `dates`.`item`
AND
`orders`.`shop_id` = 10379184
AND
`orders`.`created_at` >= '2019-11-28 12:22:43'
AND
`orders`.`created_at` <= '2020-11-27 12:22:43'
LEFT JOIN
`locations`
ON
`orders`.`location_id` = `locations`.`id`
AND
`locations`.`country_id` = 209
) AS items
WHERE
(
`order` IS NULL
AND `location` IS NULL
)
OR (
`order` IS NOT NULL
AND `location` IS NOT NULL
)
GROUP BY
`date`

Related

Show all data in a date range using MYSQL recursive function

I'm trying to get a list of sales for the past 6 months and get 0 values if I have no data for a specific month. So I'm using recursive_all_dates to generate a date range for the past 6 months which works great:
with recursive all_dates(dt) as (
-- anchor
select DATE_SUB(now(), INTERVAL 6 MONTH) dt
union all
-- recursion with stop condition
select dt + interval 1 month from all_dates where dt + interval 1 month <= DATE(now())
)
select DATE_FORMAT(dt, '%Y-%m') as ym from all_dates
This will return:
ym
------
2019-10
2019-11
2019-12
2020-01
2020-02
2020-03
2020-04
Now I want to left join this with my real data:
with recursive all_dates(dt) as (
-- anchor
select DATE_SUB(now(), INTERVAL 6 MONTH) dt
union all
-- recursion with stop condition
select dt + interval 1 month from all_dates where dt + interval 1 month <= now()
)
SELECT
DATE_FORMAT(ad.dt, '%Y-%m') as ym,
sum(profit) as profit
FROM
all_dates as ad
LEFT JOIN organisation_invoices as i
ON
DATE_FORMAT(ad.dt, '%Y-%m') = DATE_FORMAT(i.issue_date, '%Y-%m')
JOIN (
SELECT
invoice_id,
SUM(value) as profit
FROM organisation_invoice_services isrv
GROUP BY invoice_id
) isrv
ON i.id = isrv.invoice_id
WHERE
i.organisation_id = '4b166dbe-d99d-5091-abdd-95b83330ed3a' AND
i.issue_date >= DATE_SUB(NOW(), INTERVAL 6 MONTH)
GROUP BY `ym`
ORDER BY `ym` ASC
But I still only get the populated months:
ym profit
------------------
2019-12 8791
2020-02 302
2020-04 10452
The desired result:
ym profit
------------------
2019-10 0
2019-11 0
2019-12 8791
2020-01 0
2020-02 302
2020-03 0
2020-04 10452
What am I missing?
Edit: Sample data set and fiddle:
CREATE TABLE `organisation_invoices` (
`id` varchar(255) NOT NULL,
`organisation_id` varchar(255) NOT NULL,
`issue_date` date NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `organisation_invoice_services` (
`id` varchar(255) NOT NULL,
`organisation_id` varchar(255) NOT NULL,
`invoice_id` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
`qty` float NOT NULL,
`value` float NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
INSERT INTO `organisation_invoices` (id, organisation_id, issue_date)
VALUES ('e11cec69-138f-4e20-88e5-5430b6c8d0a1', '4b166dbe-d99d-5091-abdd-95b83330ed3a', '2020-01-20');
INSERT INTO `organisation_invoice_services` (id, organisation_id, invoice_id, qty, `value`)
VALUES ('fe45dfd67-138f-4e20-88e5-5430b6c8d0a1', '4b166dbe-d99d-5091-abdd-95b83330ed3a', 'e11cec69-138f-4e20-88e5-5430b6c8d0a1', 1, 1000);
https://www.db-fiddle.com/f/dibyQi31CBtr2Cr8vjJA8i/0
You can use the following:
with recursive all_dates(dt) as (
-- anchor
select DATE_SUB(now(), INTERVAL 6 MONTH) dt
union all
-- recursion with stop condition
select dt + interval 1 month from all_dates where dt + interval 1 month <= now()
)
SELECT DATE_FORMAT(ad.dt, '%Y-%m') as ym, IFNULL(sum(profit),0) as profit
FROM all_dates as ad
LEFT JOIN organisation_invoices as i
ON DATE_FORMAT(ad.dt, '%Y-%m') = DATE_FORMAT(i.issue_date, '%Y-%m')
LEFT JOIN (
SELECT
invoice_id,
SUM(value) as profit
FROM organisation_invoice_services isrv
GROUP BY invoice_id
) isrv
ON i.id = isrv.invoice_id
WHERE
(i.organisation_id = '4b166dbe-d99d-5091-abdd-95b83330ed3a' AND
i.issue_date >= DATE_SUB(NOW(), INTERVAL 6 MONTH)) OR i.organisation_id IS NULL
GROUP BY `ym`
ORDER BY `ym` ASC
demo on dbfiddle.uk
Changes:
The conditions on the WHERE clause change the behaviour of your LEFT JOIN. Since you check for a specific organization_id, you only get matches between your month table and data (the LEFT JOIN behaves like a INNER JOIN). You need the following WHERE clause instead:
WHERE (i.organisation_id = '4b166dbe-d99d-5091-abdd-95b83330ed3a' AND
i.issue_date >= DATE_SUB(NOW(), INTERVAL 6 MONTH)) OR i.organisation_id IS NULL
You also have to change the second JOIN to a LEFT JOIN.

MySQL - Display all dates including zero data nested join

I'm trying to display all dates in a month, and also in the reservation detail, I only have check_in_date and check_out_date, so I have to create left join inside a left join, below is my script
SELECT
*
FROM
(
SELECT
#dt:= DATE_ADD( #dt, interval 1 day ) myDate
FROM
(
SELECT
#dt := '2020-01-31'
) vars, tb_dummy
LIMIT 29
) JustDates
LEFT JOIN
(
SELECT
DATE_FORMAT(d.myDate2,'%Y-%m-%d') AS `myDate2`,
COALESCE(count(rdt.reservation_detail_id), 0) AS `RNS`,
FORMAT(SUM(rdt.subtotal_amount/COALESCE(DATEDIFF(DATE(DATE(rdt.check_out_date)), DATE(rdt.check_in_date)), 0)), 2) AS `REVENUE`,
FORMAT(SUM(rdt.subtotal_amount/COALESCE(DATEDIFF(DATE(DATE(rdt.check_out_date)), DATE(rdt.check_in_date)), 0))/COALESCE(count(rdt.reservation_detail_id), 0), 2) AS `AVGREV`
FROM
(
SELECT
#dt:= DATE_ADD( #dt, interval 1 day ) myDate2
FROM
(
SELECT
#dt := '2020-01-31'
) vars2, tb_dummy
LIMIT 29
) d
LEFT JOIN
tb_reservation_detail rdt
ON d.myDate2 BETWEEN DATE(rdt.check_in_date) AND DATE(DATE(rdt.check_out_date) - INTERVAL 1 DAY)
INNER JOIN
tb_reservation R
ON rdt.reservation_id = R.reservation_id
WHERE
rdt.reservation_status_id <> 3
AND
R.property_id = 57
GROUP BY d.myDate2
ORDER BY d.myDate2 ASC
) Resv
ON
JustDates.myDate = Resv.myDate2
ORDER BY
JustDates.myDate ASC
when i run it only return dates from the left table like : Left join result
but when I change
SELECT
*
FROM
(
SELECT
#dt:= DATE_ADD( #dt, interval 1 day ) myDate
FROM
(
SELECT
#dt := '2020-01-31'
) vars, tb_dummy
LIMIT 29
) JustDates
**LEFT JOIN**
(
to
SELECT
*
FROM
(
SELECT
#dt:= DATE_ADD( #dt, interval 1 day ) myDate
FROM
(
SELECT
#dt := '2020-01-31'
) vars, tb_dummy
LIMIT 29
) JustDates
**RIGHT JOIN**
(
it returns data from the right table like this: Right join result
What is wrong with my code?
welcome to StackOverflow. I think your problem is that you don't quite understand the difference between RIGHT JOIN and LEFT JOIN. Check out this StackOverflow post that goes over the differences.
As far as wanting to display all of the dates in a month, here's a link to an answer I posted that I believe does what you want it to. In my answer I provide an example query that contains a derived table you can select from and then LEFT JOIN your tables to so it will show all the days in the month regardless if there is data in your tables for a given day or not.
Hope this helps.

initialise all values in a query to 0 and top to NULL if there's no result

Hi all i execute this query to get a table where there's statistics of some database information.. i'd like to intialise the fields that don't exist ( because the query is executed in different dates and sometimes there's a day where there's nothing ) so i'd like it to return 0 and NULL ( in TOP column )
SELECT
SUM(IF(`TOP` = 'one',`Nb`,0)) as first_one,
SUM(IF(`TOP` = 'two',`Nb`,0)) as second_one,
SUM(IF(`TOP` = 'three',`Nb`,0)) as thrid_one,
SUM(IF(`TOP` NOT IN ('three','two','one'),`Nb`,0)) as forth_one,
GROUP_CONCAT(IF(`TOP` NOT IN ('three','two','one'),`TOP`,'') SEPARATOR '') as `OR`
FROM (
SELECT
COUNT(*) as Nb,
'one' as `TOP`
FROM
mytable
WHERE
TYPE = 'MSS'
AND YEAR(date) = YEAR(CURDATE())
AND MONTH(date) = MONTH(CURDATE())
UNION ALL
SELECT
COUNT(*) as Nb,
'two' as `TOP`
FROM
mytable
WHERE
TYPE = 'MSS'
AND S=0
AND YEAR(date) = YEAR(CURDATE())
AND MONTH(date) = MONTH(CURDATE())
UNION ALL
SELECT
COUNT(*) as Nb,
'three' as `TOP`
FROM
mytable
WHERE
TYPE = 'MSS'
AND S<>0
AND YEAR(date) = YEAR(CURDATE())
AND MONTH(date) = MONTH(CURDATE())
UNION ALL
SELECT
`Nb`,
`TOP`
FROM(
SELECT
COUNT(*) as Nb ,
`OR` as `TOP`
FROM
mytable
WHERE
TYPE = 'MSS'
AND YEAR(date) = YEAR(CURDATE())
AND MONTH(date) = MONTH(CURDATE())
GROUP BY
`OR`
ORDER BY
Nb DESC
LIMIT 1
) as tmp
)as tmp1
Assuming that in tmp1 you have data you need but with "gaps" (days when there were no data at all) you could RIGHT JOIN tmp1 to table tmp2 using day (I assume that you have such column in tmp1 table). So tmp2 would be just list of days:
SELECT '2013-05-17' as day UNION SELECT '2013-05-18' UNION SELECT ...
I could elaborate my answer if you'd like to provide your DB schema.
You can replace each subquery with:
SELECT
IFNULL(tmp.Nb,0) as Nb,
IFNULL(tmp.`TOP`, 'value') as `TOP`
FROM (
--subquery
) as tmp
Example for the first subquery:
SELECT
IFNULL(tmp.Nb,0) as Nb,
IFNULL(tmp.`TOP`, 'one') as `TOP`
FROM (
SELECT
COUNT(*) as Nb,
'one' as `TOP`
FROM
mytable
WHERE
TYPE = 'MSS'
AND YEAR(date) = YEAR(CURDATE())
AND MONTH(date) = MONTH(CURDATE())
) as tmp
SQL is good at grouping existing entities into categories, but bad at "creating" entities itself. I would advise either a generic number table (really just the numbers from 0 to a few hundredthousand) if you have also non-date categories or as Wiktor suggested a date-Table which gets filled every now and then and has the next few years as well as the time since your program is working.
With a date table
list_dates (
id int(11) not null primary key auto_increment,
dateval date not null
)
you could start your queries from that table (with a reasonable range, of course) and count every thing else:
select list_dates.dateval as date, count(*) as cnt
from list_dates
left join actions on actions.actiontime >= (cast list_dates.date_val as datetime)
and actions.actiontime < (cast list_dates.date_val `interval 1 day as datetime)
where list_dates.dateval between '$fromDate' and '$toDate'
group by list_dates.dateval
;
or starting with a number table numbers
select $fromDate + interval numbers.number day as date, count(*) as cnt
from numbers
left join actions
on actions.actiontime >= (cast $fromDate + interval numbers.number day as datetime)
and actions.actiontime < (cast $fromDate + interval (1 + numbers.number) day as datetime)
where numbers.number >= 0 and numbers.number < $countDates
group by numbers.number
;
One Day
If you really want just that one day (today) then you can of course use a anonymous subselect- Table instead, so it becomes
select list_dates.dateval as date, count(*) as cnt
from ( select curdate() as dateval ) as list_dates
left join actions on actions.actiontime >= (cast list_dates.date_val as datetime)
and actions.actiontime < (cast list_dates.date_val `interval 1 day as datetime)
where list_dates.dateval between '$fromDate' and '$toDate'
group by list_dates.dateval
;

Retrieving an overall total of points and a weekly total within the same query

I'm trying to query my reward database to work out how many points members of staff have allocated in total and this week.
The query that I'm using to work out the total points a teacher has allocated is as follows:
SELECT `Giver_ID` , SUM( `Points` ) AS TotalPoints
FROM `transactions`
GROUP BY `Giver_ID`
ORDER BY `Giver_ID` ASC
The query that I'm using to work out weekly allocations is very similar:
SELECT `Giver_ID` , SUM( `Points` ) AS WeeklyPoints
FROM `transactions`
WHERE ( `Datetime` >= '2012-09-24' AND `Datetime` <= '2012-09-30' )
GROUP BY `Giver_ID`
ORDER BY `Giver_ID` ASC
My question is this: is it possible to combine the queries to produce Giver_ID, TotalPoints and WeeklyPoints from a single query?
Thanks in advance,
Yes, it is possible -
SELECT
Giver_ID,
SUM(Points) AS TotalPoints,
SUM(IF(Datetime >= '2012-09-24' AND Datetime <= '2012-09-30', Points, NULL)) AS WeeklyPoints
FROM transactions
GROUP BY Giver_ID
Try this:
SELECT a.`Giver_ID` ,
MAX(b.`TotalPoints`) as `TotalPoints`,
MAX(c.`WeeklyPoints`) as `WeeklyPoints`
FROM `transactions` as a
LEFT JOIN (SELECT `Giver_ID`, SUM(`Points`) AS TotalPoints FROM `transactions` GROUP BY `Giver_ID`) as b ON a.`Giver_ID`=b.`Giver_ID`
LEFT JOIN (SELECT `Giver_ID`, SUM(`Points`) AS WeeklyPoints FROM `transactions` WHERE ( `Datetime` >= '2012-09-24 00:00:00' AND `Datetime` <= '2012-09-30' ) GROUP BY `Giver_ID`) as c ON a.`Giver_ID`=c.`Giver_ID`
GROUP BY a.`Giver_ID`
ORDER BY a.`Giver_ID` ASC
SELECT `Giver_ID` , SUM( `Points`) AS WeeklyPoints,(SELECT SUM(`Points`)
FROM `transactions` where `Giver_ID`=t.`Giver_ID` GROUP BY `Giver_ID`) AS TotalPoints
FROM `transactions` t
WHERE ( `Datetime` >= '2012-09-24' AND `Datetime` <= '2012-09-30' )
GROUP BY `Giver_ID`
ORDER BY `Giver_ID` ASC

Setting user defined variables with joins

I have a query like below:
Result gives #sold_count:=SUM(I.quantity) = 10, but #sold_count = 0,
so calculations are all 0.
What should be wrong here?
SET #sold_count :=0;
SELECT
#sold_count:=SUM(I.quantity),
#sold_count,I.from_widget,COUNT(from_widget) as order_count,
(#sold_count * buy_price) as ciro,
(#sold_count * list_price) as liste_ciro,
(#sold_count * widget_price) as vitrin_ciro,
P.*
FROM
tbl_products P
LEFT JOIN tbl_order_items I on I.product_id = P.id
WHERE
P.publish_date BETWEEN DATE_SUB( CURDATE( ) ,INTERVAL 3 MONTH ) AND DATE_SUB( CURDATE( ) ,INTERVAL 0 MONTH )
GROUP BY I.from_widget,I.product_id
ORDER BY publish_date DESC
Don't use variables. Just:
SELECT
SUM(I.quantity),
I.from_widget,
COUNT(from_widget) AS order_count,
SUM(I.quantity) * buy_price AS ciro,
SUM(I.quantity) * list_price AS liste_ciro,
SUM(I.quantity) * widget_price AS vitrin_ciro,
P.*
FROM
tbl_products P
LEFT JOIN tbl_order_items I
ON I.product_id = P.id
WHERE
P.publish_date BETWEEN DATE_SUB( CURDATE( ) , INTERVAL 3 MONTH )
AND DATE_SUB( CURDATE( ) , INTERVAL 0 MONTH )
GROUP BY I.from_widget,
I.product_id
ORDER BY publish_date DESC ;
You could also make the query a nested one, if you don't like using SUM(quantity) many times:
SELECT
sum_quantity * buy_price AS ciro,
sum_quantity * list_price AS liste_ciro,
sum_quantity * widget_price AS vitrin_ciro,
tmp.*
FROM
( SELECT
SUM(I.quantity) AS sum_quantity,
I.from_widget,
COUNT(from_widget) AS order_count,
buy_price,
list_price,
widget_price,
P.*
FROM
tbl_products P
LEFT JOIN tbl_order_items I
ON I.product_id = P.id
WHERE
P.publish_date BETWEEN DATE_SUB( CURDATE( ) , INTERVAL 3 MONTH )
AND DATE_SUB( CURDATE( ) , INTERVAL 0 MONTH )
GROUP BY I.from_widget,
I.product_id
) AS tmp
ORDER BY publish_date DESC ;