MySQL cohort query result structure - mysql

I have following MySQL query:
SELECT results.YEAR , results.cohort_date , results.actives AS
active_users , user_totals.total AS cohort_size
FROM (SELECT YEAR( user.date_added) AS cohort_date ,
COUNT(DISTINCT user_log.user_id) AS actives,
ROUND( DATEDIFF( user_log.login_date ,user.date_added)) AS YEAR
FROM user
JOIN user_log ON user_log.user_id = user.id
GROUP BY cohort_date, YEAR) AS results
JOIN (SELECT YEAR( user.date_added) AS cohort_date, COUNT( user.id) AS total
FROM user
GROUP BY cohort_date) AS user_totals
ON user_totals.cohort_date = results.cohort_date
Which gives result in following structure:
YEAR cohort_date cohort_size active_users
[1, 1973, 1000, 750],
[2, 1973, 1000, 300],
[3, 1973, 1000, 400],
[1, 1268, 549, 336],
[2, 1268, 549, 221],
How can I rewrite query so that it returns data in this format?
[1973, 1000, 750,300,400],
[1268, 549, 336,221],

Use group_concat() function to generate comma separated list of values by group:
SELECT results.cohort_date , user_totals.total AS cohort_size, group_concat(results.actives) AS
active_users
FROM (SELECT YEAR( user.date_added) AS cohort_date ,
COUNT(DISTINCT user_log.user_id) AS actives,
ROUND( DATEDIFF( user_log.login_date ,user.date_added)) AS YEAR
FROM user
JOIN user_log ON user_log.user_id = user.id
GROUP BY cohort_date, YEAR) AS results
JOIN (SELECT YEAR( user.date_added) AS cohort_date, COUNT( user.id) AS total
FROM user
GROUP BY cohort_date) AS user_totals
ON user_totals.cohort_date = results.cohort_date
GROUP BY results.cohort_date , user_totals.total

Related

MySQL - Get users who have NO orders in current month but don have in previous, in one request

I need to get users count, grouped by user type (A,B,C) and every month (that exist in db) in current year - only for users who don't have paid orders (with total > 0) in every month (every row returned by SQL), but have orders (with total > 0) in any previous months (in any year, not just current). In other words this is inactive users, who placed some paid order before, but don't placed any new orders in current SQL request row month returned.
What I expect to get in results (values are just examples):
label user_type data
Nov B 2
Nov A 1
Nov C 3
Dec C 1
.... other months
This means that in December there are 5 users with user type A and 3 users with user type B and 0 users with user type C, who DON'T placed orders in December 2021, but placed orders sometime before December in any year.
Sample DB (two tables - users and orders) with SQL that show number users, by every user type, in every month, who placed orders in this month. Instead of just this simple results, I need to get users counts that DON'T placed orders in this month, but placed paid orders somewhere before.
https://dbfiddle.uk/?rdbms=mysql_5.6&fiddle=4c4fadf67bcdc7cc3443f46c387173df
I need SQL that will work with MySQL 5.7
Try this query to generate counts for all months x user type
SELECT
DATE_FORMAT(DATE(CONCAT_WS('-', YEAR(CURDATE()), months.mm, '01')), "%b") as label,
users.user_type,
SUM(
EXISTS (
SELECT 1
FROM orders
WHERE orders.user_id = users.userid
AND orders.`date` < DATE(CONCAT_WS('-', YEAR(CURDATE()), months.mm, '01'))
) AND NOT EXISTS (
SELECT 1
FROM orders
WHERE orders.user_id = users.userid
AND orders.`date` BETWEEN DATE(CONCAT_WS('-', YEAR(CURDATE()), months.mm, '01')) AND LAST_DAY(DATE(CONCAT_WS('-', YEAR(CURDATE()), months.mm, '01')))
)
) counts
FROM (
SELECT '01' mm
UNION SELECT '02' UNION SELECT '03' UNION SELECT '04' UNION SELECT '05'
UNION SELECT '06' UNION SELECT '07' UNION SELECT '08' UNION SELECT '09'
UNION SELECT '10' UNION SELECT '11' UNION SELECT '12'
) months
CROSS JOIN users
GROUP BY months.mm, users.user_type
demo
Test this query if it fits your needs
SELECT DATE_FORMAT(o.date, "%b") as label,
UPPER(u.user_type) as user_type,
COUNT(distinct o.user_id) as data FROM orders o
JOIN users u ON o.user_id = u.userid
WHERE DATE_FORMAT(o.date, "%Y") = "2021"
AND o.user_id NOT IN
(SELECT DISTINCT o1.user_id FROM orders o1 WHERE DATE_FORMAT(o1.date, "%b") = DATE_FORMAT(now(), "%b") AND YEAR(o1.date) = YEAR(now()) )
AND o.user_id IN
(SELECT DISTINCT o1.user_id FROM orders o1 WHERE (DATE_FORMAT(o1.date, "%c") < DATE_FORMAT(now(), "%c") OR YEAR(o1.date) < YEAR(now())))
GROUP BY DATE_FORMAT(o.date, "%Y %b"),
u.user_type HAVING SUM(o.total) > 0 ORDER BY o.date ASC
EDIT
The query below returns every month of the year
SELECT months.MONTH as label,
ifnull(UPPER(u.user_type), '-') as user_type,
COUNT(distinct o.user_id) as data
FROM (
SELECT 1 AS MONTH
UNION SELECT 2 AS MONTH
UNION SELECT 3 AS MONTH
UNION SELECT 4 AS MONTH
UNION SELECT 5 AS MONTH
UNION SELECT 6 AS MONTH
UNION SELECT 7 AS MONTH
UNION SELECT 8 AS MONTH
UNION SELECT 9 AS MONTH
UNION SELECT 10 AS MONTH
UNION SELECT 11 AS MONTH
UNION SELECT 12 AS MONTH
) as months
LEFT JOIN orders o
ON DATE_FORMAT(o.date, "%c") = months.MONTH
LEFT JOIN users u ON o.user_id = u.userid
WHERE (DATE_FORMAT(o.date, "%Y") = "2021" OR o.date IS NULL)
AND (
(
NOT EXISTS
(SELECT DISTINCT o1.user_id
FROM orders o1
WHERE
DATE_FORMAT(o1.date, "%b") = DATE_FORMAT(now(), "%b")
AND YEAR(o1.date) = YEAR(now())
AND o1.user_id = o.user_id
)
AND EXISTS
(SELECT DISTINCT o1.user_id
FROM orders o1
WHERE
(DATE_FORMAT(o1.date, "%c") < DATE_FORMAT(now(), "%c") OR YEAR(o1.date) < YEAR(now())) AND o1.user_id = o.user_id
)
)
OR o.user_id IS null OR u.userid IS NULL
)
GROUP BY months.MONTH, u.user_type ORDER BY months.MONTH ASC
This uses a similar approach to VeteranSlayer but it starts with the cross join between months and users followed by the left join to orders. It also uses ranges for the date comparisons instead of the functions. It may perform really badly but it should give the correct result -
SELECT
months.month AS `label`,
u.user_type,
COUNT(u.userid) AS `data`
FROM (
SELECT 'Jan' `month`, '2021-01-01' month_start, '2021-01-31' month_end UNION ALL
SELECT 'Feb', '2021-02-01', '2021-02-28' UNION ALL
SELECT 'Mar', '2021-03-01', '2021-03-31' UNION ALL
SELECT 'Apr', '2021-04-01', '2021-04-30' UNION ALL
SELECT 'May', '2021-05-01', '2021-05-31' UNION ALL
SELECT 'Jun', '2021-06-01', '2021-06-30' UNION ALL
SELECT 'Jul', '2021-07-01', '2021-07-31' UNION ALL
SELECT 'Aug', '2021-08-01', '2021-08-31' UNION ALL
SELECT 'Sep', '2021-09-01', '2021-09-30' UNION ALL
SELECT 'Oct', '2021-10-01', '2021-10-31' UNION ALL
SELECT 'Nov', '2021-11-01', '2021-11-30' UNION ALL
SELECT 'Dec', '2021-12-01', '2021-12-31'
) months
INNER JOIN users u
LEFT JOIN orders o
ON o.date BETWEEN months.month_start AND months.month_end
AND o.user_id = u.userid
WHERE o.user_id IS NULL
AND EXISTS (
SELECT DISTINCT o1.user_id
FROM orders o1
WHERE o1.date < months.month_start
AND o1.user_id = u.userid
)
GROUP BY months.month, u.user_type
ORDER BY months.month_start ASC, u.user_type ASC;
EDIT
The performance of these queries varies dramatically based on the scale of the dataset, the distribution of data and the indices. I have done some tests with many different index variations and the following test datasets. Note the random data created in the two tables can lead to wildly different performance. The dummy table referenced in the SELECTs of the INSERTs is just a random table with 1M rows.
CREATE TABLE `users` (
`id` int unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,
`user_type` char(1) NOT NULL,
KEY `IDX_user_type` (`user_type`)
);
INSERT INTO users (user_type)
SELECT
CASE (FLOOR(RAND() * 3) + 1) WHEN 1 THEN 'A' WHEN 2 THEN 'B' ELSE 'C' END AS `user_type`
FROM dummy
LIMIT 1000;
CREATE TABLE orders (
`id` int UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
`user_id` int,
`date` DATE,
`total` DECIMAL(6,2),
KEY `IDX_user_id_date` (`user_id`,`date`)
);
INSERT INTO orders (user_id, date, total)
SELECT
(FLOOR(RAND() * 1000) + 1) AS `user_id`,
('2020-01-01' + INTERVAL FLOOR(RAND() * 685) + 1 DAY) AS `date`,
( (FLOOR(RAND() * 10) + 1) * 5) AS `total`
FROM dummy
LIMIT 100000;
The most significant performance difference across the queries came from adding -
KEY `IDX_user_id_date` (`user_id`,`date`)
and adding the user_type index gave a small but consistent improvement -
KEY `IDX_user_type` (`user_type`)
ProGu's query executed consistently with an average time of 1.466 sec. And my query was similarly consistent at 0.922 sec. Your mileage will vary!
I haven't included time's for VeteranSlayer's query as it returned radically different results.
EDIT 2
Repopulated the two tables with 50k users and 1M orders
TRUNCATE TABLE orders;
TRUNCATE TABLE users;
INSERT INTO users (user_type)
SELECT
CASE (FLOOR(RAND() * 3) + 1) WHEN 1 THEN 'A' WHEN 2 THEN 'B' ELSE 'C' END AS `user_type`
FROM (SELECT 1 FROM dummy LIMIT 50000) t;
INSERT INTO orders (user_id, date, total)
SELECT
(FLOOR(RAND() * 50000) + 1),
TIMESTAMPADD(SECOND, FLOOR(RAND() * TIMESTAMPDIFF(SECOND, '2016-01-01', '2021-12-13')), '2016-01-01'),
((FLOOR(RAND() * 50) + 1) * 5)
FROM (SELECT 1 FROM dummy LIMIT 1000000) t
ORDER BY date;
The resulting distribution of orders, by time and user_id, is quite even which is unlikely to be realistic so this test dataset grossly exacerbates any performance issues, I think.
I was surprised that by using my months table, ProGu's query was significantly faster, dropping from 21.062sec to 9.703sec, and using one less temporary table (two instead of three).
SELECT
months.month as label,
users.user_type,
SUM(
EXISTS (
SELECT 1
FROM orders
WHERE orders.user_id = users.id
AND orders.`date` < months.month_start
) AND NOT EXISTS (
SELECT 1
FROM orders
WHERE orders.user_id = users.id
AND orders.`date` BETWEEN months.month_start AND months.month_end
)
) counts
FROM (
SELECT 'Jan' `month`, '2021-01-01' month_start, '2021-01-31' month_end UNION ALL
SELECT 'Feb', '2021-02-01', '2021-02-28' UNION ALL
SELECT 'Mar', '2021-03-01', '2021-03-31' UNION ALL
SELECT 'Apr', '2021-04-01', '2021-04-30' UNION ALL
SELECT 'May', '2021-05-01', '2021-05-31' UNION ALL
SELECT 'Jun', '2021-06-01', '2021-06-30' UNION ALL
SELECT 'Jul', '2021-07-01', '2021-07-31' UNION ALL
SELECT 'Aug', '2021-08-01', '2021-08-31' UNION ALL
SELECT 'Sep', '2021-09-01', '2021-09-30' UNION ALL
SELECT 'Oct', '2021-10-01', '2021-10-31' UNION ALL
SELECT 'Nov', '2021-11-01', '2021-11-30' UNION ALL
SELECT 'Dec', '2021-12-01', '2021-12-31'
) months
CROSS JOIN users
GROUP BY months.month, users.user_type
ORDER BY months.month_start ASC, users.user_type ASC
My query above can be significantly improved by pre grouping the orders data for the current year (your mileage will vary but worth considering) -
SELECT
months.month AS `label`,
u.user_type,
COUNT(u.id) AS `data`
FROM (
SELECT 'Jan' `month`, '2021-01-01' month_start, '2021-01-31' month_end UNION ALL
SELECT 'Feb', '2021-02-01', '2021-02-28' UNION ALL
SELECT 'Mar', '2021-03-01', '2021-03-31' UNION ALL
SELECT 'Apr', '2021-04-01', '2021-04-30' UNION ALL
SELECT 'May', '2021-05-01', '2021-05-31' UNION ALL
SELECT 'Jun', '2021-06-01', '2021-06-30' UNION ALL
SELECT 'Jul', '2021-07-01', '2021-07-31' UNION ALL
SELECT 'Aug', '2021-08-01', '2021-08-31' UNION ALL
SELECT 'Sep', '2021-09-01', '2021-09-30' UNION ALL
SELECT 'Oct', '2021-10-01', '2021-10-31' UNION ALL
SELECT 'Nov', '2021-11-01', '2021-11-30' UNION ALL
SELECT 'Dec', '2021-12-01', '2021-12-31'
) months
INNER JOIN users u
LEFT JOIN (
SELECT `user_id`, DATE_FORMAT(`date`, '%Y-%m-01') AS `m`
FROM `orders`
WHERE `date` >= '2021-01-01'
GROUP BY `user_id`, `m`
) o
ON o.m = months.month_start
AND o.user_id = u.id
WHERE o.user_id IS NULL
AND EXISTS (
SELECT 1
FROM orders o1
WHERE o1.date < months.month_start
AND o1.user_id = u.id
)
GROUP BY months.month, u.user_type
ORDER BY months.month_start ASC, u.user_type ASC
Execution time dropped from 12.422sec to 6.497sec
And the final test I tried was de-normalising by adding first_order_date to the users table -
ALTER TABLE `users` ADD COLUMN `first_order_date` DATE NULL AFTER `user_type`;
UPDATE users u
INNER JOIN (SELECT o.user_id, MIN(date) AS `first_o`, MAX(date) AS `last_o` FROM orders o GROUP BY o.user_id) t ON u.id = t.user_id
SET `u`.`first_order_date` = `t`.`first_o`, `u`.`last_order_date` = `t`.`last_o`;
I then modified my query to use this instead of the EXISTS sub-query -
SELECT
`months`.`month` AS `label`,
`u`.`user_type`,
COUNT(`u`.`id`) AS `data`
FROM (
SELECT 'Jan' `month`, '2021-01-01' month_start, '2021-01-31' month_end UNION ALL
SELECT 'Feb', '2021-02-01', '2021-02-28' UNION ALL
SELECT 'Mar', '2021-03-01', '2021-03-31' UNION ALL
SELECT 'Apr', '2021-04-01', '2021-04-30' UNION ALL
SELECT 'May', '2021-05-01', '2021-05-31' UNION ALL
SELECT 'Jun', '2021-06-01', '2021-06-30' UNION ALL
SELECT 'Jul', '2021-07-01', '2021-07-31' UNION ALL
SELECT 'Aug', '2021-08-01', '2021-08-31' UNION ALL
SELECT 'Sep', '2021-09-01', '2021-09-30' UNION ALL
SELECT 'Oct', '2021-10-01', '2021-10-31' UNION ALL
SELECT 'Nov', '2021-11-01', '2021-11-30' UNION ALL
SELECT 'Dec', '2021-12-01', '2021-12-31'
) `months`
INNER JOIN `users` `u`
LEFT JOIN (
SELECT `user_id`, DATE_FORMAT(`date`, '%Y-%m-01') AS `m`
FROM `orders`
WHERE `date` >= '2021-01-01'
GROUP BY `user_id`, `m`
) o
ON `o`.`m` = `months`.`month_start`
AND `o`.`user_id` = `u`.`id`
WHERE `o`.`user_id` IS NULL
AND `u`.`first_order_date` < `months`.`month_start`
GROUP BY `months`.`month`, `u`.`user_type`
ORDER BY `months`.`month_start` ASC, `u`.`user_type` ASC;
This returns the same result in 1.447sec. Obviously, de-normalising like this should be avoided but I included it here as it shows the performance benefit for this one scenario.

Count if avg is below/above X

I am trying to get the number of 'critics' and 'promoters' from average of ratings from a joined table on a specific group of questions
SELECT category
, SUM( IF( round(avg(items.value) ) <= 6, 1, 0) ) AS critics
, SUM( IF( round(avg(items.value) ) >= 9, 1, 0) ) AS promoters
FROM reviews
INNER JOIN items
ON reviews.id = items.review_id
AND items.question_id in (1, 2, 4)
GROUP BY category
However I get the error:
General error: 1111 Invalid use of group function
I think you should try with using having with it, something like below:
SELECT
category,
COUNT(items.id) AS critics
FROM reviews
INNER JOIN items ON reviews.id = items.review_id AND
items.question_id IN (1, 2, 4)
GROUP BY category
HAVING ROUND(AVG(items.value)) <= 6
First retrieve category wise rounded average value and then apply condition either it is critics and promoters.
-- MySQL
SELECT t.category
, CASE WHEN t.avg_value <= 6
THEN 1
ELSE 0
END critics
, CASE WHEN t.avg_value >= 9
THEN 1
ELSE 0
END promoters
FROM (SELECT category
, ROUND(AVG(items.value)) avg_value
FROM reviews
INNER JOIN items
ON reviews.id = items.review_id
AND items.question_id IN (1, 2, 4)
GROUP BY category) t
Please check this url for finding out pseudocode https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=2679b2be50c3059c73ab9754c612179c
First retrieve category and review_id wise rounded average value and then apply condition either it is critics and promoters.
SELECT t.category
, SUM(CASE WHEN t.avg_value <= 6
THEN 1
ELSE 0
END) critics
, SUM(CASE WHEN t.avg_value >= 9
THEN 1
ELSE 0
END) promoters
FROM (SELECT category
, items.review_id
, ROUND(AVG(items.value)) avg_value
FROM reviews
INNER JOIN items
ON reviews.id = items.review_id
AND items.question_id IN (1, 2, 4)
GROUP BY category
, items.review_id) t
GROUP BY t.category

How to SELECT TOP 3 Values in to 3 Columns with specific rule?

So I would like to find out the top 3 sellers(by Amount) in each sales month for each customer.
Original Table:
create table q2(Sales_Date date, Customer_ID varchar(256), Item varchar(256), Amount float);
INSERT INTO q2
VALUES
('2018/8/1' ,'AAA' , 'Apple' , 5600),
('2018/8/8' ,'AAA' , 'Nike' , 500),
('2018/8/9' ,'AAA' , 'Pear' , 600),
('2018/8/10' ,'AAA' , 'Kindle', 900),
('2018/8/1' ,'BBB' , 'Cola', 20),
('2018/9/12' ,'BBB' , 'LEGO' , 240),
('2018/9/13' ,'CCC' , 'Apple' , 2500),
('2018/9/14' ,'CCC' , 'Kindle' , 5000),
('2018/7/4' ,'CCC' , 'Nike' , 1000),
('2018/9/7' ,'CCC' , 'Pear' , 300),
('2018/9/7' ,'CCC' , 'LEGO' , 50);
Expect Output
I have try different methods but none of them work. How can I achieve this?
So far, I can only retrieve the Top seller with the code, but this is not enough.
SELECT
m.Sales_month,
m.Customer_ID,
m.Item
FROM(
SELECT
Month(Sales_Date) as Sales_month,
Customer_ID,
Amount,
Item,
DENSE_RANK() OVER (PARTITION BY Month(Sales_Date), Customer_ID ORDER BY Amount Desc) AS 'rank'
FROM q2
) as m
WHERE m.rank = 1;
Many Thanks!!
I understand that you want the top 3 sales per customer and per month, in columns.
Ranking records per customer and per month is a good start. We can then filter on the top three records per group, and pivot with conditional aggregation:
select sales_month, customer_id,
max(case when rn = 1 then item end) as item_1,
max(case when rn = 2 then item end) as item_2,
max(case when rn = 3 then item end) as item_3
from (
select q.*,
date_format(sales_date, '%Y-%m-01') as sales_month,
row_number() over (partition by customer_id, date_format(sales_date, '%Y-%m-01') order by amount desc) as rn
from q2
) q
where rn <= 3
group by sales_month, customer_id
Note that this includes the sales year in the grouping, as opposed to your original code, that only used the month. This is handy if your data spreads over more than one year.

MySQL help making query

I need to display the consumption of products for 6 months.
My table Consumption is made as:
int ID_Conso
int ID_Product
int Quantity_Conso
int Month_Conso
int Year_COnso
there is 1 record/1 product/1 month as shown in picture below
the query I want to write for my view must show the result as :
Here is my query :
SELECT c.id_Product,
t1.conso1,
t2.conso2,
t3.conso3,
t4.conso4,
t5.conso5,
t6.conso6
FROM Consumption c,
(SELECT quantity_conso as "conso1"
FROM `consumption`
WHERE year= Year(Now()) and month = 1) t1,
(SELECT quantity_conso as "conso2"
FROM `consumption`
WHERE year= Year(Now()) and month = 2) t2,
(SELECT quantity_conso as "conso3"
FROM `consumption`
WHERE year= Year(Now()) and month = 3) t3,
(SELECT quantity_conso as "conso4"
FROM `consumption`
WHERE year= Year(Now()) and month = 4) t4,
(SELECT quantity_conso as "conso5"
FROM `consumption`
WHERE year= Year(Now()) and month = 5) t5,
(SELECT quantity_conso as "conso6"
FROM `consumption`
WHERE year= Year(Now()) and month = 6) t6
this query displays all the records for all the id_product (doesn't display null if one record is missing)
I have tried to use if exist but without success.
Use aggregation function SUM with IF, and GROUP BY on Id_Product. Try the following query:
SELECT
Id_Product,
SUM(IF(Month_Conso = 1, Quantity_Conso, 0)) AS QtyConso_Month1,
SUM(IF(Month_Conso = 2, Quantity_Conso, 0)) AS QtyConso_Month2,
SUM(IF(Month_Conso = 3, Quantity_Conso, 0)) AS QtyConso_Month3,
SUM(IF(Month_Conso = 4, Quantity_Conso, 0)) AS QtyConso_Month4,
SUM(IF(Month_Conso = 5, Quantity_Conso, 0)) AS QtyConso_Month5,
SUM(IF(Month_Conso = 6, Quantity_Conso, 0)) AS QtyConso_Month6
FROM Consumption
GROUP BY Id_Product
ORDER BY Id_Product ASC

MYSQL Select Query with UNION to mege data from three tables which shares the same FKey

my query is built like this, but I am not getting the expected result. I have tables like
company_group- group_company_id, group_company_name
store - store_id,store_name
sales_report- sales_id, group_company_id,store_id, sales_4,vat_4
warehouse_sales - warehouse_sales_id, warehouse_sales_id(FK-group_company_id), store_name(FK-store_id),date, sales_four_percent
Stock_recpt- Stock_recpt_id, group_company_id, receipt_date,input_vat_4
This the query to get the result like
company_group store_name Month sales_4 vat_4 input_vat
SELECT firm, store_name, MONTH, sales_4, vat_4, input_vat_4
FROM (select `company_group`.`group_company_name`as firm,`store`.`store_name`as store_name, MONTHNAME( receipt_date ) AS MONTH,0 AS sales_4, 0 as vat_4,sum(`input_vat_4_percent`)as input_vat_4 from
company_group,store, Stock_recpt
WHERE `company_group`.`group_company_id` = `Stock_recpt`.`group_company_id`and `store`.`store_id` = `Stock_recpt`.`store_id` and `Stock_recpt`.`purchase_type` = 'purchase_4_percent'
UNION
SELECT `company_group`.`group_company_name`as firm , `store`.`store_name`as store_name , MONTHNAME(date) AS MONTH , sum( `warehouse_sales`.`sales_four_percent`)as sales_4, sum( `warehouse_sales`.`vat_four_percent` ) AS vat_4, 0 as input_vat_4
from company_group,store, warehouse_sales
WHERE `company_group`.`group_company_id` = `warehouse_sales`.`firm_name`and `store`.`store_id` = `warehouse_sales`.`store_name`
UNION
SELECT `company_group`.`group_company_name` as firm, `store`.`store_name`as store_name,MONTHNAME( sale_date ) AS MONTH, sum( `sales_report`.`sales_four_percent`)as sales_4, sum( `sales_report`.`vat_four_percent` ) AS vat_4,0 as input_vat_4
FROM company_group,store, sales_report
WHERE `company_group`.`group_company_id` = `sales_report`.`group_company_id`and `store`.`store_id` = `sales_report`.`store_id`)a
GROUP BY firm , store_name,`MONTH`