single sql query to perform some group by - mysql

CREATE TABLE IF NOT EXISTS `projects` (
`idproject` int(10) unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL,
`date` datetime NOT NULL,
`status` enum('new','active','closed') NOT NULL,
`priority` enum('low','medium','high') NOT NULL,
PRIMARY KEY (`idproject`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 AUTO_INCREMENT=20
Here are some data:
INSERT INTO `projects` (`idproject`, `name`, `date`, `status`, `priority`) VALUES
(1, 'iCompany', '2011-03-23 11:41:44', 'new', 'medium'),
(2, 'John Doe & Co.', '2011-04-09 14:38:04', 'closed', 'low'),
(3, 'ACME, Inc.', '2011-05-21 11:43:11', 'active', 'high'),
(4, 'John Doe & Co.', '2011-03-28 15:19:45', 'active', 'low'),
(5, 'John Doe & Co.', '2011-03-08 15:16:32', 'new', 'low'),
(6, 'ACME, Inc.', '2011-04-05 20:58:42', 'active', 'low'),
(7, 'Mega Corp', '2011-04-21 08:08:53', 'new', 'low'),
(8, 'iCompany', '2011-04-17 08:40:36', 'active', 'medium'),
(9, 'iCompany', '2011-05-18 14:36:48', 'active', 'low'),
(10, 'John Doe & Co.', '2011-04-18 19:08:25', 'new', 'medium'),
(11, 'ACME, Inc.', '2011-05-19 13:11:04', 'active', 'low'),
(12, 'Foo Bars', '2011-03-03 17:19:29', 'new', 'high'),
(13, 'ACME, Inc.', '2011-04-23 20:42:33', 'active', 'medium'),
(14, 'Foo Bars', '2011-05-13 09:18:15', 'active', 'medium'),
(15, 'ACME, Inc.', '2011-03-20 14:37:18', 'new', 'low'),
(16, 'Foo Bars', '2011-04-18 13:46:23', 'active', 'high'),
(17, 'iCompany', '2011-05-31 07:13:32', 'closed', 'low'),
(18, 'Foo Bars', '2011-05-31 15:43:39', 'active', 'low'),
(19, 'John Doe & Co.', '2011-05-28 11:28:32', 'active', 'medium')
I'd like to have the list of all projects:
- with their latest (chronologically) status and priority,
- with the number of days between the first and latest entry (0 if there is
only one entry), you may ignore the hours,
- sorted by by priority ('high' first), then by name,
- without the projects where the latest status is 'closed' (omitted from
result).
Output should be:
+---------------+-----------+---------------+----------------+
¦name ¦total_days ¦latest_status ¦latest_priority ¦
+---------------+-----------+---------------+----------------+
¦ACME, Inc. ¦62 ¦active ¦high ¦
¦John Doe & Co. ¦81 ¦active ¦medium ¦
¦Foo Bars ¦89 ¦active ¦low ¦
¦Mega Corp ¦0 ¦new ¦low ¦
+---------------+-----------+---------------+----------------+
So far i got to write this:
SELECT name,status FROM projects group by name order by priority desc,name
please help?

SELECT *
FROM (
SELECT name,
DATEDIFF(MAX(date), MIN(date)) total_days,
(SELECT tt.status FROM projects tt
WHERE t.name = tt.name AND tt.date = MAX(t.DATE)) latest_status,
(SELECT tt.priority FROM projects tt
WHERE t.name = tt.name AND tt.date = MAX(t.DATE)) latest_priority
FROM projects t
GROUP BY name
) t
WHERE latest_status != 'closed'
ORDER BY (CASE latest_priority
WHEN 'high' THEN 0
WHEN 'medium' THEN 1
WHEN 'low' THEN 2
END), name;
total days: take the DATEDIFF of the MAX and MIN date, which will give you the number of days in between;
latest status: fetch the status for which the row's date is equal to the MAX date;
latest priority: fetch the priorityfor which the row's date is equal to the MAX date;
order by: translate each priority string to a numerical value and order by it.
Here's an sqlfiddle.

Try this: http://www.sqlfiddle.com/#!2/b3962/1
select p.name, r.total_days, p.status as latest_status, p.priority as latest_priority
from projects p
join
(
select name, max(date) as recent_date, datediff(max(date),min(date)) as total_days
from projects
group by name
)
-- recent
r on(r.name,r.recent_date) = (p.name,date)
where p.status not in ('closed')
order by
(case latest_priority
when 'high' then 0
when 'medium' then 1
when 'low' THEN 2
end), p.name
Output:
| NAME | TOTAL_DAYS | LATEST_STATUS | LATEST_PRIORITY |
-----------------------------------------------------------------
| ACME, Inc. | 62 | active | high |
| John Doe & Co. | 81 | active | medium |
| Foo Bars | 89 | active | low |
| Mega Corp | 0 | new | low |
If you want to make it more shorter(by using USING), make the alias of recent_date to date: http://www.sqlfiddle.com/#!2/b3962/2
select
p.name, r.total_days, p.status as latest_status, p.priority as latest_priority
from projects p
join
(
select name, max(date) as date, datediff(max(date),min(date)) as total_days
from projects
group by name
) r using(name,date)
where p.status not in ('closed')
order by
(case latest_priority
when 'high' then 0
when 'medium' then 1
when 'low' then 2
end), p.name
How it works
Work it from inside out. First step, find the latest:
select name, max(date) as recent_date, datediff(max(date),min(date)) as total_days
from projects
group by name
Output:
| NAME | DATE | TOTAL_DAYS |
--------------------------------------------------------------
| ACME, Inc. | May, 21 2011 11:43:11-0700 | 62 |
| Foo Bars | May, 31 2011 15:43:39-0700 | 89 |
| iCompany | May, 31 2011 07:13:32-0700 | 69 |
| John Doe & Co. | May, 28 2011 11:28:32-0700 | 81 |
| Mega Corp | April, 21 2011 08:08:53-0700 | 0 |
Final step, join the above results to main table:
select
p.name, r.total_days, p.status as latest_status, p.priority as latest_priority
from projects p
join
(
select name, max(date) as date, datediff(max(date),min(date)) as total_days
from projects
group by name
) r using(name,date)
where p.status not in ('closed')
order by
(case latest_priority
when 'high' then 0
when 'medium' then 1
when 'low' then 2
end), p.name
For overriding the ordering, use CASE statement on ORDER BY clause.
Output:
| NAME | TOTAL_DAYS | LATEST_STATUS | LATEST_PRIORITY |
-----------------------------------------------------------------
| ACME, Inc. | 62 | active | high |
| John Doe & Co. | 81 | active | medium |
| Foo Bars | 89 | active | low |
| Mega Corp | 0 | new | low |

Related

SQL date difference using the same column based on conditions

I'm trying to calculate the days since the last different order so for example let's say I have the following table:
cust_id|Product_id|Order_date|
1 |a |10/02/2020|
2 |b |10/01/2020|
3 |c |09/07/2020|
4 |d |09/02/2020|
1 |a |08/29/2020|
1 |f |08/02/2020|
2 |g |07/01/2020|
3 |t |06/06/2020|
4 |j |05/08/2020|
1 |w |04/20/2020|
I want to find the difference between the most recent date and the previous date that has a product ID that doesn't match the most recent product ID.
So the output should be something like this:
cust_id|latest_Product_id|time_since_last_diff_order_days|
1 |a |30 |
2 |b |92 |
3 |c |91 |
4 |d |123 |
Here's the query that I tried to use but got an error (error code 1064)
SELECT a.cust_id, a.Product_ID as latest_Product_id, DATEDIFF(MAX(a.Order_date),MAX(b.Order_date)) as time_since_last_diff_order_days
FROM database_customers.cust_orders a
INNER JOIN
database_customers.cust_orders b
on
a.cust_id = b.cust_id
WHERE a.product_id =! b.prodcut_id;
Thank you for any help!
It isn't pretty,, but will do the job
CREATE TABLE tab1
(`cust_id` int, `Product_id` varchar(1), `Order_date` datetime)
;
INSERT INTO tab1
(`cust_id`, `Product_id`, `Order_date`)
VALUES
(1, 'a', '2020-10-02 02:00:00'),
(2, 'b', '2020-10-01 02:00:00'),
(3, 'c', '2020-09-07 02:00:00'),
(4, 'd', '2020-09-02 02:00:00'),
(1, 'a', '2020-08-29 02:00:00'),
(1, 'f', '2020-08-02 02:00:00'),
(2, 'g', '2020-07-01 02:00:00'),
(3, 't', '2020-06-06 02:00:00'),
(4, 'j', '2020-05-08 02:00:00'),
(1, 'w', '2020-04-20 02:00:00')
;
WITH CTE AS (SELECT `cust_id`, `Product_id`,`Order_date`,ROW_NUMBER() OVER(PARTITION BY `cust_id` ORDER BY `Order_date` DESC) rn
FROM tab1)
SELECT t1.`cust_id`, t1.`Product_id`, t2.time_since_last_diff_order_days
FROM
(SELECT
`cust_id`, `Product_id`
FROM
CTE
WHERE rn = 1 ) t1
JOIN
( SELECT `cust_id`,DATEDIFF(MAX(`Order_date`), MIN(`Order_date`)) time_since_last_diff_order_days
FROM CTE WHERE rn in (1,2) GROUP BY `cust_id`) t2 ON t1.cust_id = t2.cust_id
cust_id | Product_id | time_since_last_diff_order_days
------: | :--------- | ------------------------------:
1 | a | 34
2 | b | 92
3 | c | 93
4 | d | 117
db<>fiddle here
I want to find the difference between the most recent date and the previous date that has a product ID that doesn't match the most recent product ID.
You can use first_value() to get the last product and then aggregate:
select cust_id, last_product_id, max(order_date),
datediff(max(order_date), max(case when product_id <> last_product_id then order_date end)) as diff_from_last_product
from (select co.*,
first_value(product_id) over (partition by cust_id order by order_date) as last_product_id
from cust_orders co
) co
group by cust_id, last_product_id;

SQL: How to display data with row values as column names and then count number of times ID per row value?

This is my database and fiddle setup:
CREATE TABLE PRODUCTS (
NAME varchar(20),
PRODUCT_ID int(3)
);
CREATE TABLE PAYMENT (
NAME varchar(20),
PAYMENT_ID int(4)
);
INSERT INTO PRODUCTS (NAME, PRODUCT_ID)
VALUES
("Apple", 1),
("Banana", 2),
("Watermelon", 3),
("Bread", 4),
("Milk", 5),
("Cake", 6),
("Candy", 7),
("Butter", 8),
("Carrot", 9),
("Tomato", 10);
INSERT INTO PAYMENT (NAME, PAYMENT_ID)
VALUES
("Banana", 85),
("Apple", 94),
("Banana", 94),
("Candy", 85),
("Banana", 105);
https://www.db-fiddle.com/f/5jsGYfWZ6pnzgf62Z5FA8L/0
I'm thinking I have to do some Count(CASE statement within here).
I saw something about pivots, but not sure about that?
Yes you are right, that is a possibility
I use IF when there is only one option.
SELECT
p.NAME
,SUM(IF(PAYMENT_ID = 85,1,0)) '85'
,SUM(IF(PAYMENT_ID = 94,1,0)) '94'
,SUM(IF(PAYMENT_ID = 105,1,0)) '105'
FROM PRODUCTS p LEFT JOIN PAYMENT pa ON p.NAME = pa.NAME
GROUP BY p.NAME;
There Reference between the two tables should be the id and not the name, the name maybe unique, but numbers need less space than text.
CREATE TABLE PRODUCTS (
NAME varchar(20),
PRODUCT_ID int(3)
);
CREATE TABLE PAYMENT (
NAME varchar(20),
PAYMENT_ID int(4)
);
INSERT INTO PRODUCTS (NAME, PRODUCT_ID)
VALUES
("Apple", 1),
("Banana", 2),
("Watermelon", 3),
("Bread", 4),
("Milk", 5),
("Cake", 6),
("Candy", 7),
("Butter", 8),
("Carrot", 9),
("Tomato", 10);
INSERT INTO PAYMENT (NAME, PAYMENT_ID)
VALUES
("Banana", 85),
("Apple", 94),
("Banana", 94),
("Candy", 85),
("Banana", 105);
✓
✓
✓
✓
SELECT
p.NAME
,SUM(IF(PAYMENT_ID = 85,1,0)) '85'
,SUM(IF(PAYMENT_ID = 94,1,0)) '94'
,SUM(IF(PAYMENT_ID = 105,1,0)) '105'
FROM PRODUCTS p LEFT JOIN PAYMENT pa ON p.NAME = pa.NAME
GROUP BY p.NAME;
NAME | 85 | 94 | 105
:--------- | -: | -: | --:
Apple | 0 | 1 | 0
Banana | 1 | 1 | 1
Bread | 0 | 0 | 0
Butter | 0 | 0 | 0
Cake | 0 | 0 | 0
Candy | 1 | 0 | 0
Carrot | 0 | 0 | 0
Milk | 0 | 0 | 0
Tomato | 0 | 0 | 0
Watermelon | 0 | 0 | 0
db<>fiddle here
MySQL has a convenient shortcut. I recommend:
SELECT p.NAME,
SUM(pa.PAYMENT_ID = 85) as payment_85,
SUM(pa.PAYMENT_ID = 94) as payment_94,
SUM(pa.PAYMENT_ID = 105) as payment_105
FROM PRODUCTS p LEFT JOIN
PAYMENT pa
ON p.NAME = pa.NAME
GROUP BY p.NAME;
Of course, if you don't care about products with no payments, you only need one table:
SELECT pa.NAME,
SUM(pa.PAYMENT_ID = 85) as payment_85,
SUM(pa.PAYMENT_ID = 94) as payment_94,
SUM(pa.PAYMENT_ID = 105) as payment_105
FROM PAYMENT pa
GROUP BY pa.NAME;

Adding all months in past year in mysql query result, even if no rows

I am looking for a way to retrieve all subscriptions to vacancies over the past 12 months. Since not every vacancy will have a subscription in each month, some rows will have to return 0, but right now it's just not in the result set. How do I add the missing rows?
The tables I use are the following:
CREATE TABLE `calendar_months` (
`month_id` int(8) DEFAULT NULL,
`en_abbr` varchar(255) NOT NULL,
`en_long` varchar(255) NOT NULL,
`nl_abbr` varchar(255) NOT NULL,
`nl_long` varchar(255) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
INSERT INTO `calendar_months` (`month_id`, `en_abbr`, `en_long`, `nl_abbr`, `nl_long`) VALUES
(1, 'jan', 'January', 'jan', 'Januari'),
(2, 'feb', 'February', 'feb', 'Februari'),
(3, 'mar', 'March', 'mrt', 'Maart'),
(4, 'apr', 'April', 'apr', 'April'),
(5, 'may', 'May', 'mei', 'Mei'),
(6, 'jun', 'June', 'jun', 'Juni'),
(7, 'jul', 'July', 'jul', 'Juli'),
(8, 'aug', 'August', 'aug', 'Augustus'),
(9, 'sep', 'September', 'sep', 'September'),
(10, 'oct', 'October', 'okt', 'Oktober'),
(11, 'nov', 'November', 'nov', 'November'),
(12, 'dec', 'December', 'dec', 'December');
CREATE TABLE `vacancies` (
`vacancy_id` int(11) NOT NULL,
`org_id` int(11) NOT NULL,
`name` varchar(255) NOT NULL COMMENT 'title',
`description` varchar(255) DEFAULT NULL,
`create_time` datetime DEFAULT NULL,
`is_deleted` tinyint(4) NOT NULL DEFAULT '0',
`status` int(11) NOT NULL DEFAULT '0'COMMENT
) ;
CREATE TABLE `vacancy_subscriptions` (
`subscription_id` int(11) NOT NULL,
`vacancy_id` int(11) DEFAULT NULL,
`user_id` int(10) DEFAULT NULL,
`subscription_date` datetime NOT NULL,
`message` text
)
I use the following query:
SELECT
CONCAT(cm.nl_long, ' ', YEAR(v.create_time)) as label, YEAR(v.create_time) as vacyear, MONTH(v.create_time) as vacmonth, COUNT(*) as totalsubscriptions
FROM `calendar_months` as cm
LEFT JOIN `vacancies` as v on cm.month_id = month(v.create_time)
LEFT JOIN `vacancy_subscriptions` as vs on v.vacancy_id = vs.vacancy_id
WHERE
(v.create_time >= (DATE_ADD(NOW(),INTERVAL -12 MONTH)))
AND v.is_deleted = 0
AND v.org_id = 1
GROUP BY vacyear, vacmonth
ORDER BY vacyear ASC, vacmonth ASC
And this gives me the result I am looking for.
+---------------+---------+----------+--------------------+
| label | vacyear | vacmonth | totalsubscriptions |
+---------------+---------+----------+--------------------+
| Oktober 2017 | 2017 | 10 | 5 |
| November 2017 | 2017 | 11 | 1 |
| December 2017 | 2017 | 12 | 13 |
| Maart 2018 | 2018 | 3 | 4 |
| April 2018 | 2018 | 4 | 5 |
+---------------+---------+----------+--------------------+
But how do I add the months with no subscriptions in the past 12 months with the last column just being "0"? Like this:
+----------------+---------+----------+--------------------+
| label | vacyear | vacmonth | totalsubscriptions |
+----------------+---------+----------+--------------------+
| Mei 2017 | 2017 | 5 | 0 |
| Juni 2017 | 2017 | 6 | 0 |
| Juli 2017 | 2017 | 7 | 0 |
| Augustus 2017 | 2017 | 8 | 0 |
| September 2017 | 2017 | 9 | 0 |
| Oktober 2017 | 2017 | 10 | 5 |
| November 2017 | 2017 | 11 | 1 |
| December 2017 | 2017 | 12 | 13 |
| Januari 2018 | 2018 | 1 | 0 |
| Februari 2018 | 2018 | 2 | 0 |
| Maart 2018 | 2018 | 3 | 4 |
| April 2018 | 2018 | 4 | 5 |
+----------------+---------+----------+--------------------+
UPDATE: I have created an SQL fiddle containing some data! http://www.sqlfiddle.com/#!9/75db76
Your logic is broken. Your calendar table needs to include both the month and the year -- and the first date of the month (for convenience).
Then you can do:
SELECT CONCAT(cm.nl_long, ' ', cm.year) as label, COUNT(*) as totalsubscriptions
FROM calendar_months cm LEFT JOIN
vacancies v
ON cm.month_id = month(v.create_time) AND
cm.year = year(v.create_time) AND
v.is_deleted = 0 AND
v.org_id = 1 LEFT JOIN
`vacancy_subscriptions` vs
on v.vacancy_id = vs.vacancy_id
WHERE cm.month_start_date >= DATE_ADD(NOW(), INTERVAL -12 MONTH))
GROUP BY label
ORDER BY MIN(cm.month_start_date);
Notes:
The filtering on all but the first table should be in the ON clauses (for LEFT JOIN.
The GROUP BY needs to be on fields in the first table.
If you have a real calendar table (rather than just a list of months), then you can filter for the past year on that table.
You probably don't want to filter on NOW() -- you'll get 13 months of data, with partial first and last months.
Move your where condition related to left join table in the on clause
SELECT
CONCAT(cm.nl_long, ' ', YEAR(v.create_time)) as label, YEAR(v.create_time) as vacyear, ifnull(MONTH(v.create_time),0) as vacmonth, ifnull(COUNT(*),0) as totalsubscriptions
FROM `calendar_months` as cm
LEFT JOIN `vacancies` as v on cm.month_id = month(v.create_time)
AND (v.create_time >= (DATE_ADD(NOW(),INTERVAL -12 MONTH)))
AND v.is_deleted = 0
AND v.org_id = 1
LEFT JOIN `vacancy_subscriptions` as vs on v.vacancy_id = vs.vacancy_id
GROUP BY vacyear, vacmonth, label
ORDER BY vacyear ASC, vacmonth ASC
otherwise the where condition work as an inner join and don't return rows if the value don't match
do the fact you have not the year for all the month you could use a cross join on union for get this value
SELECT
CONCAT(cm.nl_long, ' ', y.my_year) as label, YEAR(v.create_time) as vacyear
, ifnull(MONTH(v.create_time), 0) as vacmonth, ifnull(COUNT(*),0) as totalsubscriptions
FROM `calendar_months` as cm
CROSS JOIN (
select 2018 my_year
from dual
union
select 2017
from dual
) y
LEFT JOIN `vacancies` as v on cm.month_id = month(v.create_time)
AND (v.create_time >= (DATE_ADD(NOW(),INTERVAL -12 MONTH)))
AND v.is_deleted = 0
AND v.org_id = 1
LEFT JOIN `vacancy_subscriptions` as vs on v.vacancy_id = vs.vacancy_id
GROUP BY vacyear, vacmonth, label
ORDER BY y.my_year ASC, vacmonth ASC

mysql running difference with group by

Dataset I am experimenting has the structure as given in this SQLFiddle.
create table readings_tab (id int, site varchar(15), logged_at datetime, reading smallint);
insert into readings_tab values (1, 'A', '2017-08-21 13:22:00', 2500);
insert into readings_tab values (2, 'B', '2017-08-21 13:22:00', 1210);
insert into readings_tab values (3, 'C', '2017-08-21 13:22:00', 3500);
insert into readings_tab values (4, 'A', '2017-08-22 13:22:00', 2630);
insert into readings_tab values (5, 'B', '2017-08-22 13:22:00', 1400);
insert into readings_tab values (6, 'C', '2017-08-22 13:22:00', 3800);
insert into readings_tab values (7, 'A', '2017-08-23 13:22:00', 2700);
insert into readings_tab values (8, 'B', '2017-08-23 13:22:00', 1630);
insert into readings_tab values (9, 'C', '2017-08-23 13:22:00', 3950);
insert into readings_tab values (10, 'A', '2017-08-24 13:22:00', 2850);
insert into readings_tab values (11, 'B', '2017-08-24 13:22:00', 1700);
insert into readings_tab values (12, 'C', '2017-08-24 13:22:00', 4200);
insert into readings_tab values (13, 'A', '2017-08-25 13:22:00', 3500);
insert into readings_tab values (14, 'B', '2017-08-25 13:22:00', 2300);
insert into readings_tab values (15, 'C', '2017-08-25 13:22:00', 4700);
Current Query:
select t.rownum, t.logged_on, t.tot_reading, coalesce(t.tot_reading - t3.tot_reading, 0) AS daily_generation
from
(
select #rn:=#rn+1 AS rownum, date(t.logged_at) AS logged_on, sum(t.reading) AS tot_reading
from readings_tab t, (SELECT #rn:=0) t2
group by date(t.logged_at)
order by date(t.logged_at) desc
) t
left join
(
select #rn:=#rn+1 AS rownum, date(t.logged_at) AS logged_on, sum(t.reading) AS tot_reading
from readings_tab t, (SELECT #rn:=0) t2
group by date(t.logged_at)
order by date(t.logged_at) desc
) t3 on t.rownum = t3.rownum + 1
order by t.logged_on desc;
I am expecting below output. I don't need the formula (3500+2300+4700, etc...) in the result set. Just included it to make it understandable.
-----------------------------------------------------------------
| logged_on | tot_reading | daily_generation |
-----------------------------------------------------------------
| 2017-08-25 | (3500+2300+4700) = 10500 | (10500 - 8750) = 1750 |
| 2017-08-24 | (2850+1700+4200) = 8750 | (8750-8280) = 470 |
| 2017-08-23 | (2700+1630+3950) = 8280 | (8280-7830) = 450 |
| 2017-08-22 | (2630+1400+3800) = 7830 | (7830-7210) = 620 |
| 2017-08-21 | (2500+1210+3500) = 7210 | 0 |
-----------------------------------------------------------------
I cannot figure out why it doesn't produce expected output. Can someone please help?
If using variables make sure they are unique to each subquery else you can get incorrect results. I suggest the following adjusted query (which has some added columns to help follow what is happening):
select
t.rownum, t.logged_on, t.tot_reading
, coalesce(t.tot_reading - t3.tot_reading, 0) AS daily_generation
, t3.rownum t3_rownum
, t3.tot_reading t3_to_read
, t.tot_reading t_tot_read
from
(
select #rn:=#rn+1 AS rownum, date(t.logged_at) AS logged_on, sum(t.reading) AS tot_reading
from readings_tab t
cross join (SELECT #rn:=0) t2
group by date(t.logged_at)
order by date(t.logged_at) desc
) t
left join
(
select #rn2:=#rn2+1 AS rownum, date(t.logged_at) AS logged_on, sum(t.reading) AS tot_reading
from readings_tab t
cross join (SELECT #rn2:=0) t2
group by date(t.logged_at)
order by date(t.logged_at) desc
) t3 on t.rownum = t3.rownum + 1
order by t.logged_on desc
;
Note I also recommend using explicit CROSS JOIN syntax as it leads to easier comprehension for anyone who needs to maintain this query.
Here is the result (& also see http://sqlfiddle.com/#!9/dcb5e2/1 )
| rownum | logged_on | tot_reading | daily_generation | t3_rownum | t3_to_read | t_tot_read |
|--------|------------|-------------|------------------|-----------|------------|------------|
| 5 | 2017-08-25 | 10500 | 1750 | 4 | 8750 | 10500 |
| 4 | 2017-08-24 | 8750 | 470 | 3 | 8280 | 8750 |
| 3 | 2017-08-23 | 8280 | 450 | 2 | 7830 | 8280 |
| 2 | 2017-08-22 | 7830 | 620 | 1 | 7210 | 7830 |
| 1 | 2017-08-21 | 7210 | 0 | (null) | (null) | 7210 |

Filter out rows with date range gaps in mysql

In mysql I have a table similar to the following one:
--------------------------------------------
| id | parent_id | date_start | date_end |
--------------------------------------------
| 1 | | 2017-05-01 | 2017-05-10 |
| 2 | 1 | 2017-05-01 | 2017-05-10 |
| 3 | | 2017-06-01 | 2017-06-10 |
| 4 | 3 | 2017-06-01 | 2017-06-03 |
| 5 | 3 | 2017-06-04 | 2017-06-06 |
| 6 | 3 | 2017-06-07 | 2017-06-10 |
| 7 | | 2017-07-01 | 2017-07-10 |
| 8 | 7 | 2017-07-01 | 2017-07-03 |
| 9 | 7 | 2017-07-04 | 2017-07-06 |
| 10 | 7 | 2017-07-08 | 2017-07-10 |
rows without parent id are "pricelists" while rows with parent are pricelist periods.
I'd want to filter out pricelist ids with periods that have time gaps, so ideally my query should return 1 and 3.
So far I've written a simple query which correctly returns 3:
SELECT distinct period1.parent_id
FROM pricelist period1
INNER JOIN pricelist period2
ON period1.parent_id = period2.parent_id
AND period2.date_start = DATE_ADD(period1.date_end,INTERVAL 1 DAY);
but unfortunately it doesn't take into account pricelists with a single period, which have no gaps by definition!
So I was wondering if it could be possible to modify such a query to return pricelists with either single periods or multiple periods without time gaps, possibly without a UNION.
I had difficulty finding a MySQL workspace so initially I presented a T-SQL solution, but have subsequently learned how to use MySQL at rextester which has helped a lot. The syntax for the DATEDIFF() function in MySQL is the opposite logic to T-SQL which became complex without an ability to test it. Hopefully now resolved.
The basic logic of this approach is to calculate the overall duration of the parents. Then to calculate the sum of duration of all the children. Then compare these durations (in a join) and if they are the same you have no gaps.
Note this logic isn't tested for overlaps in children but I would expect thses to also fail at the join where durations are compared.
Data:
#drop table if exists `PRICELIST`;
create table `PRICELIST`
(`id` int, `parent_id` int, `date_start` datetime, `date_end` datetime)
;
INSERT INTO PRICELIST
(`id`, `parent_id`, `date_start`, `date_end`)
VALUES
(1, NULL, '2017-05-01 00:00:00', '2017-05-10 00:00:00'),
(2, 1, '2017-05-01 00:00:00', '2017-05-10 00:00:00'),
(3, NULL, '2017-06-01 00:00:00', '2017-06-10 00:00:00'),
(4, 3, '2017-06-01 00:00:00', '2017-06-03 00:00:00'),
(5, 3, '2017-06-04 00:00:00', '2017-06-06 00:00:00'),
(6, 3, '2017-06-07 00:00:00', '2017-06-10 00:00:00'),
(7, NULL, '2017-07-01 00:00:00', '2017-07-10 00:00:00'),
(8, 7, '2017-07-01 00:00:00', '2017-07-03 00:00:00'),
(9, 7, '2017-07-04 00:00:00', '2017-07-06 00:00:00'),
(10, 7, '2017-07-08 00:00:00', '2017-07-10 00:00:00')
;
MySQL:
select p.id, datediff(p.date_end,p.date_start) pdu, x.du
from pricelist p
inner join (
select
p1.parent_id, sum(datediff(p1.date_end,p1.date_start) + coalesce(datediff(p2.date_start,p1.date_end),0)) du
from (
select parent_id,date_start,date_end
from pricelist
where parent_id IS NOT NULL
) p1
left join (
select parent_id,date_start,date_end
from pricelist
where parent_id IS NOT NULL
) p2 on p1.parent_id = p2.parent_id and p1.date_end < p2.date_start
where datediff(p2.date_start,p1.date_end) = 1 or p2.parent_id is null
group by p1.parent_id
) x on p.id = x.parent_id and datediff(p.date_end,p.date_start) = x.du
where p.parent_id IS NULL
;
see it working at: http://rextester.com/JRD47056
T-SQL:
Whilst I could not find a MySQL environment that worked, I used MSSQL but avoided any analytic functions etc that can't be used in MySQL. However I have relied on datediff() which is slightly different to the same function in MySQL.
TSQL query
select p.id, datediff(day,p.date_start,p.date_end) du
from #pricelist p
inner join (
select
p1.parent_id, sum(datediff(day,p1.date_start,p1.date_end) + coalesce(datediff(day,p1.date_end,p2.date_start),0)) du
from (
select parent_id,date_start,date_end
from #pricelist
where parent_id IS NOT NULL
) p1
left join (
select parent_id,date_start,date_end
from #pricelist
where parent_id IS NOT NULL
) p2 on p1.parent_id = p2.parent_id and p1.date_end < p2.date_start
where datediff(day,p1.date_end,p2.date_start) = 1 or p2.parent_id is null
group by p1.parent_id
) x on p.id = x.parent_id and datediff(day,p.date_start,p.date_end) = x.du
where p.parent_id IS NULL
see it working at: http://rextester.com/KUK2410
In MySQL the datediff() expects just 2 parameters and ou need to swap the field references (i.e. latest date first)
Perhaps there are easier ways. Best I could come up with for now.