Combine rows/columns from two tables - JOIN clause - mysql

In the link see my SQLFIDDLE and see b
CREATE TABLE Projects
(`p_id` int, `project_title` varchar(9), `l_id` int);
INSERT INTO Projects
(`p_id`, `project_title`, `l_id`)
VALUES
(1, 'A', 6),
(2, 'B', 6),
(3, 'C', 7),
(4, 'D', 8),
(5, 'E', 9),
(6, 'F', 10);
CREATE TABLE Locations
(`l_id` int, `title` varchar(9), `parent_id` int );
INSERT INTO Locations
(`l_id`, `title`, `parent_id`)
VALUES
(1, 'Country', 0),
(2, 'District1', 1),
(3, 'District2', 1),
(4, 'District3', 1),
(5, 'District4', 1),
(6, 'Loc 5', 2),
(7, 'Loc 6', 3),
(8, 'Loc 7', 3),
(9, 'Loc 8', 4),
(10, 'Loc 9', 4),
(11, 'Loc 10', 4),
(12, 'Loc 11', 5);
I would like to achieve this:
+------+-----------+-------------+
| L_ID | Title | count P_ID |
+------+-----------+-------------+
| 2 | District1 | 2 |
| 3 | District2 | 2 |
| 4 | District3 | 2 |
| 5 | District4 | 0 |
+----+------------+------+-------+
I have tried with INNER JOIN, LEFT OUTER JOIN. All i can achieve is like below and doesnt help me:
+------+-----------+----------------------+
| L_ID | Title | parent_id | counted |
+------+-----------+------------+---------+
| 6 | Loc 5 | 2 | 2 |
| 7 | Loc 6 | 3 | 2 |
| 9 | Loc 8 | 4 | 2 |
+---- -+-----------+------------+---------+
Locations table is a nested one, if this matters. I need to count projects that are in each District and also to get District name.
I tried:
SELECT l.*, COUNT(p.l_id) AS thecounted
FROM locations l
INNER JOIN projects p ON p.l_id = l.l_id
GROUP BY l.parent_id
and
SELECT l.*, COUNT(p.l_id) AS thecounted
FROM locations l
LEFT OUTER JOIN projects p on l.l_id = p.l_id
GROUP BY l.parent_id

Consider two joins:
select d.l_id, d.title, count(p.l_id) count_p_id
from locations d
left join locations l on l.parent_id = d.l_id
left join projects p on p.l_id = l.l_id
where d.parent_id = 0
group by d.l_id, d.title
The query starts from the list of districts (d), whose parent is 0. Then, it goes down one level to the locations (l), and looks up the corresponding projects (p). The final step is aggregation.

The solution of GMB returns this 1 row
l_id title count_p_id
1 Country 0
using this script version
select d.l_id, d.title, count(p.l_id) count_p_id
from locations d
left join locations l on l.parent_id = d.l_id
left join projects p on p.l_id = l.l_id
where d.parent_id = 0
group by d.l_id, d.title
We get the desired result with the slightly corrected condition
where d.parent_id = 1
Result:
l_id title count_p_id
2 District1 2
3 District2 2
4 District3 2
5 District4 0
Sorry for posting the answer, instead of a simple comment, which would be sufficient, but don't have enough reputation credits yet.

Related

SQL date difference using the same column based on conditions

I'm trying to calculate the days since the last different order so for example let's say I have the following table:
cust_id|Product_id|Order_date|
1 |a |10/02/2020|
2 |b |10/01/2020|
3 |c |09/07/2020|
4 |d |09/02/2020|
1 |a |08/29/2020|
1 |f |08/02/2020|
2 |g |07/01/2020|
3 |t |06/06/2020|
4 |j |05/08/2020|
1 |w |04/20/2020|
I want to find the difference between the most recent date and the previous date that has a product ID that doesn't match the most recent product ID.
So the output should be something like this:
cust_id|latest_Product_id|time_since_last_diff_order_days|
1 |a |30 |
2 |b |92 |
3 |c |91 |
4 |d |123 |
Here's the query that I tried to use but got an error (error code 1064)
SELECT a.cust_id, a.Product_ID as latest_Product_id, DATEDIFF(MAX(a.Order_date),MAX(b.Order_date)) as time_since_last_diff_order_days
FROM database_customers.cust_orders a
INNER JOIN
database_customers.cust_orders b
on
a.cust_id = b.cust_id
WHERE a.product_id =! b.prodcut_id;
Thank you for any help!
It isn't pretty,, but will do the job
CREATE TABLE tab1
(`cust_id` int, `Product_id` varchar(1), `Order_date` datetime)
;
INSERT INTO tab1
(`cust_id`, `Product_id`, `Order_date`)
VALUES
(1, 'a', '2020-10-02 02:00:00'),
(2, 'b', '2020-10-01 02:00:00'),
(3, 'c', '2020-09-07 02:00:00'),
(4, 'd', '2020-09-02 02:00:00'),
(1, 'a', '2020-08-29 02:00:00'),
(1, 'f', '2020-08-02 02:00:00'),
(2, 'g', '2020-07-01 02:00:00'),
(3, 't', '2020-06-06 02:00:00'),
(4, 'j', '2020-05-08 02:00:00'),
(1, 'w', '2020-04-20 02:00:00')
;
WITH CTE AS (SELECT `cust_id`, `Product_id`,`Order_date`,ROW_NUMBER() OVER(PARTITION BY `cust_id` ORDER BY `Order_date` DESC) rn
FROM tab1)
SELECT t1.`cust_id`, t1.`Product_id`, t2.time_since_last_diff_order_days
FROM
(SELECT
`cust_id`, `Product_id`
FROM
CTE
WHERE rn = 1 ) t1
JOIN
( SELECT `cust_id`,DATEDIFF(MAX(`Order_date`), MIN(`Order_date`)) time_since_last_diff_order_days
FROM CTE WHERE rn in (1,2) GROUP BY `cust_id`) t2 ON t1.cust_id = t2.cust_id
cust_id | Product_id | time_since_last_diff_order_days
------: | :--------- | ------------------------------:
1 | a | 34
2 | b | 92
3 | c | 93
4 | d | 117
db<>fiddle here
I want to find the difference between the most recent date and the previous date that has a product ID that doesn't match the most recent product ID.
You can use first_value() to get the last product and then aggregate:
select cust_id, last_product_id, max(order_date),
datediff(max(order_date), max(case when product_id <> last_product_id then order_date end)) as diff_from_last_product
from (select co.*,
first_value(product_id) over (partition by cust_id order by order_date) as last_product_id
from cust_orders co
) co
group by cust_id, last_product_id;

How to advanced query based on join

My schema looks like this:
SET GLOBAL sql_mode=(SELECT REPLACE(##sql_mode,'ONLY_FULL_GROUP_BY',''));
create table ads(
ad_id int,
ad_name varchar(10)
);
create table ads_insight(
id int,
ad_id int,
date date,
ad_clicks int
);
create table products(
id int,
name varchar(10)
);
create table products_insight(
id int,
product_id int,
sale int,
date date
);
create table ads_products(
ad_id int,
product_id int
);
insert into ads(ad_id, ad_name) values
(1,'ad1'),
(2,'ad2'),
(3,'ad3');
insert into ads_insight(id, ad_id, date, ad_clicks) values
(1, 1, '2021-04-25', 1),
(3, 1, '2021-04-23', 2),
(4, 1, '2021-04-22', 8),
(5, 2, '2021-04-25', 6),
(6, 2, '2021-03-03', 7);
insert into products(id, name) values
(1,'prod1'),
(2,'prod2'),
(3,'prod3'),
(4,'prod4'),
(5,'prod5');
insert into products_insight(id, product_id, sale, date) values
(1, 1, 10, '2021-04-25'),
(2, 1, 13, '2021-04-24'),
(3, 1, 15, '2021-04-23'),
(4, 1, 14, '2021-04-22'),
(5, 1, 17, '2021-04-21'),
(6, 1, 15, '2021-04-20'),
(7, 1, 13, '2021-04-19'),
(8, 2, 15, '2021-04-25');
insert into ads_products (ad_id, product_id) values
(1, 1),
(1, 2),
(2, 3),
(2, 4),
(2, 2),
(3, 1);
Here you have fiddle
A quick explanation of schema:
I have ads:
each ad has insights, which tell us when a certain ad was active(=> ad_clicks has to be > 0).
each ad has products(many2many - ads_products table). Each product has products_insight which tells us how many sales that product generated on a certain day.
Now I want to get all ads from the time range 2021-04-20 - 2021-04-25 which had ad_clicks > 0 (which I have done) AND count how many sales each ad has generated when it was active. So count sale only if the ad has ad_insight and ad_clicks > 0.
My query looks like this:
SET #from_date = '2021-04-20';
SET #to_date = '2021-04-25';
SELECT
ads.ad_name,
IFNULL(ad_clicks, 0) AS clicks,
IFNULL(product_sale, 0) AS product_sale,
IFNULL(products, '') AS products
FROM ads
LEFT JOIN (
SELECT ad_id, SUM(ad_clicks) AS ad_clicks
FROM ads_insight
WHERE date BETWEEN #from_date AND #to_date
GROUP BY ad_id
) AS ai ON ai.ad_id = ads.ad_id
LEFT JOIN (
SELECT ad_id, SUM(sale) AS product_sale
FROM ads_products AS ap
LEFT JOIN products_insight AS pi ON pi.product_id = ap.product_id
WHERE date BETWEEN #from_date AND #to_date
GROUP BY ad_id
) AS pi ON pi.ad_id = ads.ad_id
LEFT JOIN (
SELECT ap.ad_id, GROUP_CONCAT(DISTINCT p.name) AS products
FROM ads_products AS ap
JOIN products AS p ON ap.product_id = p.id
GROUP BY ap.ad_id
) AS p ON ads.ad_id = p.ad_id
WHERE ad_clicks>0;
And it generates the following result:
| ad_name | clicks | product_sale | products |
| ------- | ------ | ------------ | ----------------- |
| ad1 | 11 | 99 | prod1,prod2 |
| ad2 | 6 | 15 | prod2,prod3,prod4 |
But I want this(there is a difference in the product_sale column)
| ad_name | clicks | product_sale | products |
| ------- | ------ | ------------ | ----------------- |
| ad1 | 11 | 55 | prod1,prod2 |
| ad2 | 6 | 15 | prod2,prod3,prod4 |
54 because it counts only rows with id: 1,3,4 from products_insight because in these days ad with id 1 was active. (active means that there is a row in ads_insight table.

SQL - get distinct values and its frequency count of occurring in a group

I have a table data as:
CREATE TABLE SERP (
id INT(6) UNSIGNED AUTO_INCREMENT PRIMARY KEY,
s_product_id INT,
search_product_result VARCHAR(255)
);
INSERT INTO SERP(s_product_id, search_product_result)
VALUES
(0, 'A'),
(0, 'B'),
(0, 'C'),
(0, 'D'),
(1, 'A'),
(1, 'E'),
(2, 'A'),
(2, 'B'),
(3, 'D'),
(3, 'E'),
(3, 'D');
The data set is as follows:
s_product_id | search_product_result
___________________________________________
0 | A
0 | B
0 | C
0 | D
-------------------------------------------
1 | A
1 | E
-------------------------------------------
2 | A
2 | B
-------------------------------------------
3 | D
3 | E
3 | D
I need to list all distinct search_product_result values and count frequencies of these values occurring in s_product_id.
Required Output result-set:
DISTINCT_SEARCH_PRODUCT | s_product_id_frequency_count
------------------------------------------------------------
A | 3
B | 2
C | 1
D | 2 [occurred twice in 3, but counted only once.]
E | 2
Here, A occurs in three s_product_id : 0, 1, 2, B in two : 0, 2, and so on.
D occurred twice in the same group 3, but is counted only once for that group.
I tried grouping by search_product_result, but this counts D twice in the same group.
select search_product_result, count(*) as Total from serp group by search_product_result
Output:
search_product_result | Total
------------------------------------
A | 3
B | 2
C | 1
D | 3 <---
B | 2
You can try below - use count(distinct s_product_id)
select search_product_result, count(distinct s_product_id) as Total
from serp group by search_product_result
use count(distinct()
select search_product_result, count(distinct s_product_id, search_product_result) as Total
from SERP
group by search_product_result
see dbfiddle

mysql running difference with group by

Dataset I am experimenting has the structure as given in this SQLFiddle.
create table readings_tab (id int, site varchar(15), logged_at datetime, reading smallint);
insert into readings_tab values (1, 'A', '2017-08-21 13:22:00', 2500);
insert into readings_tab values (2, 'B', '2017-08-21 13:22:00', 1210);
insert into readings_tab values (3, 'C', '2017-08-21 13:22:00', 3500);
insert into readings_tab values (4, 'A', '2017-08-22 13:22:00', 2630);
insert into readings_tab values (5, 'B', '2017-08-22 13:22:00', 1400);
insert into readings_tab values (6, 'C', '2017-08-22 13:22:00', 3800);
insert into readings_tab values (7, 'A', '2017-08-23 13:22:00', 2700);
insert into readings_tab values (8, 'B', '2017-08-23 13:22:00', 1630);
insert into readings_tab values (9, 'C', '2017-08-23 13:22:00', 3950);
insert into readings_tab values (10, 'A', '2017-08-24 13:22:00', 2850);
insert into readings_tab values (11, 'B', '2017-08-24 13:22:00', 1700);
insert into readings_tab values (12, 'C', '2017-08-24 13:22:00', 4200);
insert into readings_tab values (13, 'A', '2017-08-25 13:22:00', 3500);
insert into readings_tab values (14, 'B', '2017-08-25 13:22:00', 2300);
insert into readings_tab values (15, 'C', '2017-08-25 13:22:00', 4700);
Current Query:
select t.rownum, t.logged_on, t.tot_reading, coalesce(t.tot_reading - t3.tot_reading, 0) AS daily_generation
from
(
select #rn:=#rn+1 AS rownum, date(t.logged_at) AS logged_on, sum(t.reading) AS tot_reading
from readings_tab t, (SELECT #rn:=0) t2
group by date(t.logged_at)
order by date(t.logged_at) desc
) t
left join
(
select #rn:=#rn+1 AS rownum, date(t.logged_at) AS logged_on, sum(t.reading) AS tot_reading
from readings_tab t, (SELECT #rn:=0) t2
group by date(t.logged_at)
order by date(t.logged_at) desc
) t3 on t.rownum = t3.rownum + 1
order by t.logged_on desc;
I am expecting below output. I don't need the formula (3500+2300+4700, etc...) in the result set. Just included it to make it understandable.
-----------------------------------------------------------------
| logged_on | tot_reading | daily_generation |
-----------------------------------------------------------------
| 2017-08-25 | (3500+2300+4700) = 10500 | (10500 - 8750) = 1750 |
| 2017-08-24 | (2850+1700+4200) = 8750 | (8750-8280) = 470 |
| 2017-08-23 | (2700+1630+3950) = 8280 | (8280-7830) = 450 |
| 2017-08-22 | (2630+1400+3800) = 7830 | (7830-7210) = 620 |
| 2017-08-21 | (2500+1210+3500) = 7210 | 0 |
-----------------------------------------------------------------
I cannot figure out why it doesn't produce expected output. Can someone please help?
If using variables make sure they are unique to each subquery else you can get incorrect results. I suggest the following adjusted query (which has some added columns to help follow what is happening):
select
t.rownum, t.logged_on, t.tot_reading
, coalesce(t.tot_reading - t3.tot_reading, 0) AS daily_generation
, t3.rownum t3_rownum
, t3.tot_reading t3_to_read
, t.tot_reading t_tot_read
from
(
select #rn:=#rn+1 AS rownum, date(t.logged_at) AS logged_on, sum(t.reading) AS tot_reading
from readings_tab t
cross join (SELECT #rn:=0) t2
group by date(t.logged_at)
order by date(t.logged_at) desc
) t
left join
(
select #rn2:=#rn2+1 AS rownum, date(t.logged_at) AS logged_on, sum(t.reading) AS tot_reading
from readings_tab t
cross join (SELECT #rn2:=0) t2
group by date(t.logged_at)
order by date(t.logged_at) desc
) t3 on t.rownum = t3.rownum + 1
order by t.logged_on desc
;
Note I also recommend using explicit CROSS JOIN syntax as it leads to easier comprehension for anyone who needs to maintain this query.
Here is the result (& also see http://sqlfiddle.com/#!9/dcb5e2/1 )
| rownum | logged_on | tot_reading | daily_generation | t3_rownum | t3_to_read | t_tot_read |
|--------|------------|-------------|------------------|-----------|------------|------------|
| 5 | 2017-08-25 | 10500 | 1750 | 4 | 8750 | 10500 |
| 4 | 2017-08-24 | 8750 | 470 | 3 | 8280 | 8750 |
| 3 | 2017-08-23 | 8280 | 450 | 2 | 7830 | 8280 |
| 2 | 2017-08-22 | 7830 | 620 | 1 | 7210 | 7830 |
| 1 | 2017-08-21 | 7210 | 0 | (null) | (null) | 7210 |

Left join that will exclude certain rows

When I left join the following tables, I get the results for all the id's. I need to exclude the results where there is no single id present in sms table.
So the expected output is as follows:
+-----------+-----------+
| messageid | mobilenos |
+-----------+-----------+
| a | 12 |
| c | 31 |
+-----------+-----------+
2 rows in set (0.00 sec)
The messageid "d" should not be displayed in the output because there is not a single entry for "d" in the sms table.
I will like to know if the following query is correct or if there is a better way:
select a.* from splitvalues as a
left join sms as b on a.messageid = b.batchid and a.mobilenos = b.destination
left join (select a.messageid from splitvalues as a left join sms as b on a.messageid = b.batchid where b.batchid is null) as dt on dt.messageid = a.messageid where dt.messageid is null and b.destination is null;
Following are the table details:
splitvalues
messageid mobilenos
a 10
a 11
a 12
b 20
b 21
b 22
b 23
b 24
c 30
c 31
d 40
d 41
d 42
d 43
sms
batchid destination
a 10
a 11
b 20
b 21
b 22
b 23
b 24
c 30
drop table if exists splitvalues;
drop table if exists sms;
create table if not exists splitvalues (messageid varchar(255), mobilenos int);
create table if not exists sms (batchid varchar(255), destination int);
insert into splitvalues values ('a', 10), ('a', 11), ('a', 12), ('b', 20), ('b', 21), ('b', 22), ('b', 23), ('b', 24), ('c', 30), ('c', 31), ('d', 40), ('d', 41), ('d', 42), ('d', 43);
insert into sms values ('a', 10), ('a', 11), ('b', 20), ('b', 21), ('b', 22), ('b', 23), ('b', 24), ('c', 30);
mysql> select a.* from splitvalues as a left join sms as b on a.messageid = b.batchid and a.mobilenos = b.destination where b.destination is null;
+-----------+-----------+
| messageid | mobilenos |
+-----------+-----------+
| a | 12 |
| c | 31 |
| d | 40 |
| d | 41 |
| d | 42 |
| d | 43 |
+-----------+-----------+
6 rows in set (0.00 sec)
Try This...
select a.* from [dbo].[splitvalues] a join [dbo].[sms] b on a.messageid=b.batchid
Or
select a.* from [dbo].[splitvalues] a ,[dbo].[sms] b where a.messageid=b.batchid
Try inner join it will produce rows which exists in both table,
select * from splitvalues as a
inner join sms as b on a.messageid = b.batchid and a.mobilenos = b.destination
select * from
splitvalues
where mobilenos not in(select destination from sms) limit 2;