MySQL: Join distinct rows of two tables in a certain order? - mysql

I have a list of inventory units and sale transactions that I want to, (1) join by unit SKU, and (2) associate ONE transaction to ONE inventory unit in first-in-first-out order by date. I'm having trouble with the second part.
The best I can come up with is:
SELECT `units`.`unit_date`, `units`.`unit_id`, `trans`.`tran_date`, `trans`.`tran_id`, `units`.`unit_sku` FROM `units`
INNER JOIN `trans`
ON `trans`.`unit_sku` = `units`.`unit_sku`
GROUP BY `trans`.`tran_id`, `trans`.`unit_sku`
ORDER BY `units`.`unit_date` asc, `trans`.`tran_date` asc
;
units table:
unit_date | unit_id | unit_sku
2015-06-01 | 1 | U1KLM
2015-06-02 | 2 | U1KLM
2015-06-03 | 3 | U2QRS
2015-06-04 | 4 | U2QRS
2015-06-05 | 5 | U1KLM
trans table:
tran_date | tran_id | unit_sku
2015-06-11 | A | U2QRS
2015-06-12 | B | U1KLM
2015-06-13 | C | U1KLM
2015-06-14 | D | U2QRS
2015-06-15 | E | U1KLM
The desired result is one tran_id to be joined to one unit_id of the unit_sku by earliest-to-latest order of unit_date:
unit_date | unit_id | tran_date | tran_id | unit_sku
2015-06-01 | 1 | 2015-06-12 | B | U1KLM
2015-06-02 | 2 | 2015-06-13 | C | U1KLM
2015-06-03 | 3 | 2015-06-11 | A | U2QRS
2015-06-04 | 4 | 2015-06-14 | D | U2QRS
2015-06-05 | 5 | 2015-06-15 | E | U1KLM
The query (undesired) result joins tran_id only to the unit_id of the earliest occurrence of unit_sku:
unit_date | unit_id | tran_date | tran_id | unit_sku
2015-06-01 | 1 | 2015-06-12 | B | U1KLM
2015-06-01 | 1 | 2015-06-13 | C | U1KLM
2015-06-01 | 1 | 2015-06-15 | E | U1KLM
2015-06-03 | 3 | 2015-06-11 | A | U2QRS
2015-06-03 | 3 | 2015-06-14 | D | U2QRS
Any ideas on how to do get the desired result? In this setup, only unit_date and tran_date are sortable; the rest are randomly generated.
Repro script:
DROP TEMPORARY TABLE IF EXISTS `units`;
DROP TEMPORARY TABLE IF EXISTS `trans`;
CREATE TEMPORARY TABLE `units` (`unit_date` date, `unit_id` char(1) , `unit_sku` char(5), PRIMARY KEY(`unit_id`));
CREATE TEMPORARY TABLE `trans` (`tran_date` date, `tran_id` char(1) , `unit_sku` char(5), PRIMARY KEY(`tran_id`));
INSERT INTO `units` (`unit_date`, `unit_id`, `unit_sku`) VALUES
('2015-06-01', '1', 'U1KLM')
, ('2015-06-02', '2', 'U1KLM')
, ('2015-06-03', '3', 'U2QRS')
, ('2015-06-04', '4', 'U2QRS')
, ('2015-06-05', '5', 'U1KLM')
;
INSERT INTO `trans` (`tran_date`, `tran_id`, `unit_sku`) VALUES
('2015-06-11', 'A', 'U2QRS')
, ('2015-06-12', 'B', 'U1KLM')
, ('2015-06-13', 'C', 'U1KLM')
, ('2015-06-14', 'D', 'U2QRS')
, ('2015-06-15', 'E', 'U1KLM')
;
SELECT `units`.`unit_date`, `units`.`unit_id`, `trans`.`tran_date`, `trans`.`tran_id`, `units`.`unit_sku` FROM `units`
INNER JOIN `trans`
ON `trans`.`unit_sku` = `units`.`unit_sku`
GROUP BY `trans`.`tran_id`, `trans`.`unit_sku`
ORDER BY `units`.`unit_date` asc, `trans`.`tran_date` asc
;

I believe this is what you're looking for: (This is assuming that 1 to 1 relationship)
SET #UNITRN := 0;
SET #TRANSRN :=0;
SELECT A.`unit_date`, A.`unit_id`, B.`tran_date`, B.`tran_id`, A.`unit_sku` FROM (SELECT #UNITRN := #UNITRN + 1 AS ROWNUM, UNIT_DATE, UNIT_ID, UNIT_SKU FROM UNITS ORDER BY UNIT_SKU, UNIT_DATE ASC) A
JOIN (SELECT #TRANSRN := #TRANSRN + 1 AS ROWNUM, TRAN_DATE, TRAN_ID, UNIT_SKU FROM TRANS ORDER BY UNIT_SKU, TRAN_DATE ASC) B
ON A.UNIT_SKU = B.UNIT_SKU
AND A.ROWNUM = B.ROWNUM
ORDER BY A.UNIT_DATE ASC

Related

MYSQL: Exclude duplicate scan logs within same day

I'm trying to select rows excluding duplicates in a day.
Criteria for duplicate is: SAME USER AND SAME PRODUCT_UPC AND SAME DATE(SCANNED_ON)
So, from the below table, if SCAN_ID = 100 is selected, exclude SCAN_ID = 101 since they belong to same user_id AND same product_upc AND have same DATE(scanned_on).
Here's the table structure:
SCAN_ID USER_ID PRODUCT_UPC SCANNED_ON
100 1 0767914767 2020-08-01 03:49:11
101 1 0767914767 2020-08-01 03:58:28
102 2 0064432050 2020-08-02 04:01:31
103 3 0804169977 2020-08-10 04:08:48
104 4 0875523846 2020-08-10 05:21:32
105 4 0007850492 2020-08-12 07:10:05
Query I've come up so far is:
SET #last_user='', #last_upc='', #last_date='';
SELECT *,
#last_user as last_user , #last_user:=user_id as this_user,
#last_upc as last_upc , #last_upc:=product_upc as this_upc,
#last_date as last_date , #last_date:=DATE(scanned_on) as this_date
FROM scansv2
HAVING this_user != last_user OR this_upc != last_upc OR this_date != last_date
In MySQL 8 you can use ROW_NUMVER for this
CREATE TABLE scansv2 (
`SCAN_ID` INTEGER,
`USER_ID` INTEGER,
`PRODUCT_UPC` INTEGER,
`SCANNED_ON` DATETIME
);
INSERT INTO scansv2
(`SCAN_ID`, `USER_ID`, `PRODUCT_UPC`, `SCANNED_ON`)
VALUES
('100', '1', '0767914767', '2020-08-01 03:49:11'),
('101', '1', '0767914767', '2020-08-01 03:58:28'),
('102', '2', '0064432050', '2020-08-02 04:01:31'),
('103', '3', '0804169977', '2020-08-10 04:08:48'),
('104', '4', '0875523846', '2020-08-10 05:21:32'),
('105', '4', '0007850492', '2020-08-12 07:10:05');
WITH rownum AS (SELECT `SCAN_ID`, `USER_ID`, `PRODUCT_UPC`, `SCANNED_ON`,ROW_NUMBER() OVER (
PARTITION BY `PRODUCT_UPC`
ORDER BY `SCANNED_ON` DESC) row_num FROM scansv2)
SELECT `SCAN_ID`, `USER_ID`, `PRODUCT_UPC`, `SCANNED_ON` FROM rownum WHERE row_num = 1 ORDER BY `SCAN_ID`
SCAN_ID | USER_ID | PRODUCT_UPC | SCANNED_ON
------: | ------: | ----------: | :------------------
101 | 1 | 767914767 | 2020-08-01 03:58:28
102 | 2 | 64432050 | 2020-08-02 04:01:31
103 | 3 | 804169977 | 2020-08-10 04:08:48
104 | 4 | 875523846 | 2020-08-10 05:21:32
105 | 4 | 7850492 | 2020-08-12 07:10:05
db<>fiddle here
in MySQL 5.x you need user defined variables for the same purpose
SELECT `SCAN_ID`, `USER_ID`, `PRODUCT_UPC`, `SCANNED_ON`
FROM
(SELECT `SCAN_ID`, `USER_ID`, `SCANNED_ON`,
IF (#product = `PRODUCT_UPC`,#row_num := #row_num + 1,#row_num := 1) row_num
, #product := `PRODUCT_UPC` PRODUCT_UPC
FROM (SELECT * FROM scansv2 ORDER BY `PRODUCT_UPC`, `SCANNED_ON`) c,(SELECT #row_num := 0,#product := 0) a ) b
WHERE row_num = 1 ORDER BY `SCAN_ID`
SCAN_ID | USER_ID | PRODUCT_UPC | SCANNED_ON
------: | ------: | ----------: | :------------------
100 | 1 | 767914767 | 2020-08-01 03:49:11
102 | 2 | 64432050 | 2020-08-02 04:01:31
103 | 3 | 804169977 | 2020-08-10 04:08:48
104 | 4 | 875523846 | 2020-08-10 05:21:32
105 | 4 | 7850492 | 2020-08-12 07:10:05
db<>fiddle here
In most databases (including MySQL pre-8.0), filtering with a subquery is a supported and and efficient option:
select s.*
from scansv2 s
where s.scanned_on = (
select min(s1.scanned_on)
from scansv2 s1
where
s1.user_id = s.user_id
and s1.product_upc = s.product_upc
and s1.scanned_on >= date(s.scanned_on)
and s1.scanned_on < date(s.scanned_on) + interval 1 day
)
This gives you the first row per user_id, product_upc and day, and filters out the other ones, if any.

Get total monthly transaction from sub queries in sql

I'm having a challenge to get the monthly total sum of amount_tendered from both shop1 table and shop2 table, and the monthly total sum of payment_amount from payments table.
and if payments don't have a value for the month it should show zero.
shop1
--------------------------------------------------------
| trans_id | amount_tendered | trans_date |
--------------------------------------------------------
| 1 | 10.00 | 2020-09-03 06:09:55 |
| 2 | 15.00 | 2020-08-01 10:19:01 |
--------------------------------------------------------
shop2
--------------------------------------------------------
| trans_id | amount_tendered | trans_date |
--------------------------------------------------------
| 1 | 30.00 | 2020-09-01 16:09:55 |
| 2 | 15.00 | 2020-09-11 11:19:01 |
--------------------------------------------------------
Payments
------------------------------------------------------------
| payments_id | payment_amount | payment_date |
------------------------------------------------------------
| 1 | 100.00 | 2020-09-01 16:09:55 |
| 2 | 105.00 | 2020-09-11 11:19:01 |
------------------------------------------------------------
SELECT t1.yr, t1.mnth, ifnull(t2.total_trans,0), ifnull(t3.payments,0) FROM
(SELECT YEAR(trans_date) as yr,
MONTHNAME(trans_date) as mnth,
FROM shop1
GROUP BY YEAR(trans_date), MONTHNAME(trans_date)
ORDER BY YEAR(trans_date), MONTHNAME(trans_date)) as t1
LEFT JOIN (
SELECT(
(SUM(amount_tendered) FROM shop1 GROUP BY YEAR(trans_date), MONTHNAME(trans_date)+
(SUM(amount_tendered) FROM shop2 GROUP BY YEAR(trans_date), MONTHNAME(trans_date)
) as 'total_trans'
)as t2
LEFT JOIN (
SELECT SUM(payment_amount ) FROM transactions GROUP BY YEAR(payment_date), MONTHNAME(payment_date) as payments
)as t3
The expected result
------------------------------------------------------------
| yr | mnth | total_trans | payments |
------------------------------------------------------------
| 2020 | August | 15.00 | 0.00 |
| 2020 | September| 55.00 | 105.00 |
------------------------------------------------------------
Error : Syntax error near 'FROM transactions GROUP BY YEAR(transaction_date), MONTHNAME(transaction_date) ' at line 4
You would typically compute the monthly sum in two separate subqueries, and then join the results.
I am not a fan of having one table per shop: having several tables with the same columns usually indicates a design problem. Here, we use union all to collect data from both tables before aggregating.
select t.*, p.payments
from (
select year(trans_date) yr, monthname(trans_date) mnth, sum(amount_tendered) total_trans
from (
select trans_date, amount_tendered from shop1
union all
select trans_date, amount_tendered from shop2
) t
group by year(trans_date), monthname(trans_date)
) t
left join (
select year(payment_date) yr, monthname(payment_date) mnth, sum(payment_amount) payments
from payments
group by year(payment_date), monthname(payment_date)
) p on p.yr = t.yr and p.mnth = t.mnth
The left join avoids filtering out months that have no transactions.

MySQL: retrieving the last record from each group and combining with the data of another table

There is a purchase table that information regarding the products bought. This will generate one one or more rows in the tStockMP table for each individual product bought.
Now, I need to display the table information for each product in stock. Since the purchase table contains the history of the changes, that information is in the highest keyid when grouped by purchase_id in the tPurchases table.
I've provided a complete script, here with example data describing my problem.
DROP TABLE IF EXISTS tPurchases;
DROP TABLE IF EXISTS tStockMP;
-- The purchase table
CREATE TABLE tPurchases (
keyid INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
brand VARCHAR(255),
model VARCHAR(255),
purchase_id INT
);
INSERT INTO tPurchases (brand,model,purchase_id) VALUES ("Hp","note1",23);
INSERT INTO tPurchases (brand,model,purchase_id) VALUES ("Lg","IPSLED",45);
INSERT INTO tPurchases (brand,model,purchase_id) VALUES ("Hp","notE1",23);
INSERT INTO tPurchases (brand,model,purchase_id) VALUES ("Bx","BOX",56);
INSERT INTO tPurchases (brand,model,purchase_id) VALUES ("LG","IPSLED",45);
INSERT INTO tPurchases (brand,model,purchase_id) VALUES ("HP","NOTE1",23);
-- The Stock MP Table
CREATE TABLE tStockMP (
keyid INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
purchase_id INT,
status TINYINT
);
INSERT INTO tStockMP (purchase_id,status) VALUES (23,1);
INSERT INTO tStockMP (purchase_id,status) VALUES (23,1);
INSERT INTO tStockMP (purchase_id,status) VALUES (23,0);
INSERT INTO tStockMP (purchase_id,status) VALUES (45,0);
INSERT INTO tStockMP (purchase_id,status) VALUES (56,1);
INSERT INTO tStockMP (purchase_id,status) VALUES (56,1);
INSERT INTO tStockMP (purchase_id,status) VALUES (56,0);
-- Example data
--
-- tPurchases table
-- keyid brand model purchase_id
-- 0 Hp note1 23
-- 1 Lg IPSLED 45
-- 2 Hp notE1 23
-- 3 Bx BOX 56
-- 4 LG IPSLED 45
-- 5 HP NOTE1 23
--
--
-- tStockMP table.
-- purchase_id status
-- 23 1
-- 23 1
-- 23 0
-- 45 0
-- 56 1
-- 56 1
-- 56 0
--
--
-- Expected result
--
-- purchase_id status brand model
-- 23 1 HP NOTE1
-- 23 1 HP NOTE1
-- 23 0 HP NOTE1
-- 45 0 LG IPSLED
-- 56 1 Bx BOX
-- 56 1 Bx BOX
-- 56 0 Bx BOX
SELECT s.keyid, s.purchase_id, s.status, p.brand, p.model, p.keyid AS purkeyid
FROM tStockMP AS s, tPurchases AS p
WHERE s.purchase_id = p.purchase_id;
-- +-------+-------------+--------+-------+--------+----------+
-- | keyid | purchase_id | status | brand | model | purkeyid |
-- +-------+-------------+--------+-------+--------+----------+
-- | 1 | 23 | 1 | Hp | note1 | 1 |
-- | 1 | 23 | 1 | Hp | notE1 | 3 |
-- | 1 | 23 | 1 | HP | NOTE1 | 6 |-> *
-- | 2 | 23 | 1 | Hp | note1 | 1 |
-- | 2 | 23 | 1 | Hp | notE1 | 3 |
-- | 2 | 23 | 1 | HP | NOTE1 | 6 |-> *
-- | 3 | 23 | 0 | Hp | note1 | 1 |
-- | 3 | 23 | 0 | Hp | notE1 | 3 |
-- | 3 | 23 | 0 | HP | NOTE1 | 6 |-> *
-- | 4 | 45 | 0 | Lg | IPSLED | 2 |
-- | 4 | 45 | 0 | LG | IPSLED | 5 |-> *
-- | 5 | 56 | 1 | Bx | BOX | 4 |-> *
-- | 6 | 56 | 1 | Bx | BOX | 4 |-> *
-- | 7 | 56 | 0 | Bx | BOX | 4 |-> *
-- +-------+-------------+--------+-------+--------+----------+
The I would need to "filter" the results so that ONLY the * rows remain in the final query, lest I need to do it by hand. But I don't know how to modify my query and make this happen.
Never use commas in the FROM clause. I typical solution is to use a correlated subquery:
SELECT s.keyid, s.purchase_id, s.status, p.brand, p.model, p.keyid AS purkeyid
FROM tStockMP s JOIN
tPurchases p
ON s.purchase_id = p.purchase_id
WHERE p.keyid = (SELECT MAX(p2.keyid)
FROM tPurchases p2
WHERE p2.purchase_id = p.purchase_id
);
With an index on tPurchases(purchase_id, keyid), this often has the best performance.
If I approached this with window functions, I would phrase it as:
SELECT s.keyid, s.purchase_id, s.status, p.brand, p.model, p.keyid AS purkeyid
FROM tStockMP s JOIN
(SELECT p.*,
ROW_NUMBER() OVER (PARTITION BY purchase_id ORDER BY keyid DESC) as seqnum
FROM tPurchases p
) p
ON s.purchase_id = p.purchase_id
WHERE seqnum = 1;
GMB has an alternative approach. If you have lots of data, it would be interesting to compare the performance of the two methods. I would advise the same index as above for all comparisons.
If your database supports window functions, you can use ROW_NUMBER() to identify the "latest" record per group, and use that information to filter the dataset:
SELECT *
FROM (
SELECT
s.keyid,
s.purchase_id,
s.status,
p.brand,
p.model,
p.keyid AS purkeyid,
ROW_NUMBER() OVER(PARTITION BY s.keyid ORDER BY p.keyid DESC) rn
FROM tStockMP AS s
INNER JOIN tPurchases AS p ON p.purchase_id = s.purchase_id
) t
WHERE rn = 1
First use NOT EXISTS in tPurchases to get only the rows with the max keyid and then join to tStockMP:
SELECT s.keyid, s.purchase_id, s.status, t.brand, t.model, t.keyid AS purkeyid
FROM tStockMP AS s
INNER JOIN (
SELECT p.* FROM tPurchases AS p
WHERE NOT EXISTS (
SELECT 1 FROM tPurchases
WHERE purchase_id = p.purchase_id AND keyid > p.keyid
)
) AS t ON t.purchase_id = s.purchase_id
See the demo.
Results:
| keyid | purchase_id | status | brand | model | purkeyid |
| ----- | ----------- | ------ | ----- | ------ | -------- |
| 1 | 23 | 1 | HP | NOTE1 | 6 |
| 2 | 23 | 1 | HP | NOTE1 | 6 |
| 3 | 23 | 0 | HP | NOTE1 | 6 |
| 4 | 45 | 0 | LG | IPSLED | 5 |
| 5 | 56 | 1 | Bx | BOX | 4 |
| 6 | 56 | 1 | Bx | BOX | 4 |
| 7 | 56 | 0 | Bx | BOX | 4 |

MySQL find Count grouped by per month

For my problem the general structure of the tables is:
the Workers are located in different Branches (Branch table).
Prospective customer register (Registration table) as a Customer (Customer table)
and can order the products to buy (Order table).
Branch Table:
+------------+--------------+-----------------+
| 'branchId' | 'street' | 'city' |
+------------+--------------+-----------------+
| 'B002' | 'Clover Dr' | 'London' |
| 'B003' | 'Main St' | 'Glagsow' |
| 'B004' | 'Manse Rd' | 'Bristol' |
| 'B005' | 'Deer Rd' | 'London' |
| 'B007' | 'Argyll St' | 'Los Angeles' |
| 'B008' | 'Mission St' | 'San Francisco' |
| 'B009' | 'SOMA' | 'San Francisco' |
+------------+--------------+-----------------+
Customer Table:
+--------------+----------+-----------+-----------------+
| 'customerId' | 'fName' | 'lName' | 'telNo' |
+--------------+----------+-----------+-----------------+
| 'CR56' | 'Aline' | 'Stewart' | '0141-848-1825' |
| 'CR58' | 'Jacky' | 'Ho' | '0123-1325434' |
| 'CR62' | 'Mary' | 'Tregar' | '01224-196720' |
| 'CR74' | 'Mike' | 'Ritchie' | '01475-392178' |
| 'CR76' | 'John' | 'Kay' | '0207-774-5632' |
+--------------+----------+-----------+-----------------+
Registration Table:
+--------------+------------+------------+-----------------------+
| 'customerId' | 'branchId' | 'workerId' | 'joiningDate' |
+--------------+------------+------------+-----------------------+
| 'CR56' | 'B003' | 'SG37' | '2004-05-02 12:00:00' |
| 'CR58' | 'B003' | 'SA9' | '2004-05-03 12:00:00' |
| 'CR62' | 'B007' | 'SA9' | '2004-05-01 12:00:00' |
| 'CR74' | 'B004' | 'SG37' | '2004-04-04 12:00:00' |
| 'CR76' | 'B005' | 'SL41' | '2004-03-03 12:00:00' |
+--------------+------------+------------+-----------------------+
Order Table:
+--------------+---------------+-----------------------+
| 'customerId' | 'productId' | 'orderDate' |
+--------------+---------------+-----------------------+
| 'CR56' | 'PA14' | '2004-05-04 11:30:00' |
| 'CR62' | 'PA14' | '2004-05-04 14:00:00' |
| 'CR56' | 'PG36' | '2004-06-07 11:00:00' |
| 'CR56' | 'PG4' | '2004-04-14 12:05:00' |
| 'CR76' | 'PG4' | '2004-04-04 10:15:00' |
+--------------+---------------+-----------------------+
I am trying to form a query to find the number of orders per Branch within 1, 2, and 3 months of client Registration.
Let's say for example
+----------+------------+-----------------+
| 'months' | 'branchId' | 'numberOfOrder' |
+----------+------------+-----------------+
| 1 | 'B003' | 2 |
| 2 | 'B004' | 1 |
+----------+------------+-----------------+
I tried to group the table by month and date but I am stuck and not able to proceed forward.
Does anyone has any ideas and help me unblock?
I started doing something like this, but I am completely lost at the moment.
SELECT
COUNT(DISTINCT o.orderDate) AS 'count'
FROM
Order o, Registration r
WHERE
o.orderDate BETWEEN DATE('2001-01-01') AND DATE('2005-01-31')
GROUP BY YEAR(o.orderDate), MONTH(o.orderDate);
But this seems I am pretty far from what I am trying to achieve.
I'm not entirely sure of what your desired result is, but with this query you can get the count of orders, per branch, within 3 months after registration.
SELECT
reg.branchId,
COUNT(reg.branchId) AS 'orderCount'
FROM `order` AS ord INNER JOIN `registration` AS reg
ON ord.customerId = reg.customerId
WHERE reg.joiningDate BETWEEN reg.joiningDate AND DATE_ADD(reg.joiningDate, INTERVAL 3 MONTH)
GROUP BY reg.branchId
Result
Is this what you wanted to do?
Your desired result has nothing in common with your data.
So i assume you want the Order count for every branch.
I added the year also, because it is usually needed and doesn't bother if your data don't go over one year
Update:
Now ot only select orders which was place in the 3 month since the a customer joined
.It is limited by the date_add in the where clause
CREATE TABLE registration
(`customerId` varchar(4), `branchId` varchar(4), `workerId` varchar(4), `joiningDate` datetime)
;
INSERT INTO registration
(`customerId`, `branchId`, `workerId`, `joiningDate`)
VALUES
('CR56', 'B003', 'SG37', '2004-05-02 12:00:00'),
('CR58', 'B003', 'SA9', '2004-05-03 12:00:00'),
('CR62', 'B007', 'SA9', '2004-05-01 12:00:00'),
('CR74', 'B004', 'SG37', '2004-04-04 12:00:00'),
('CR76', 'B005', 'SL41', '2004-03-03 12:00:00')
;
✓
✓
CREATE TABLE `order`
(`customerId` varchar(4), `productId` varchar(4), `orderDate` datetime)
;
INSERT INTO `order`
(`customerId`, `productId`, `orderDate`)
VALUES
('CR56', 'PA14', '2004-05-04 11:30:00'),
('CR62', 'PA14', '2004-05-04 14:00:00'),
('CR56', 'PG36', '2004-06-07 11:00:00'),
('CR56', 'PG4', '2004-04-14 12:05:00'),
('CR76', 'PG4', '2004-04-04 10:15:00')
;
✓
✓
SELECT MONTH(o.`orderDate`),r.branchId, COUNT(*) numberOfOrder
FROM registration r inner join `order` o ON r.`customerId` = o.`customerId`
WHERE o.`orderDate` BETWEEN r.`joiningDate` AND DATE_ADD(r.`joiningDate`, INTERVAL 3 MONTH)
GROUP BY YEAR(o.`orderDate`),MONTH(o.`orderDate`),r.branchId
MONTH(o.`orderDate`) | branchId | numberOfOrder
-------------------: | :------- | ------------:
4 | B005 | 1
5 | B003 | 1
5 | B007 | 1
6 | B003 | 1
db<>fiddle here
You may try below query i guess, having moths calculated on the basis of differences in orderDate and joiningDate -
SELECT abs(ceil(datediff(o.`orderDate`, r.`joiningDate`)/30)) months_join,r.branchId, COUNT(*) numberOfOrder
FROM registration r inner join `order` o ON r.`customerId` = o.`customerId`
GROUP BY YEAR(o.`orderDate`), abs(ceil(datediff(o.`orderDate`, r.`joiningDate`)/30)),r.branchId

Selecting max(date) twice from the same table, for different conditions

This is a small snippet of my table, which currently contains ~10,000,000 rows
+---------+---------------------+-----------+----------------+
| card_id | date | avg_price | foil_avg_price |
+---------+---------------------+-----------+----------------+
| 10000 | 2014-06-28 09:05:56 | 5.02 | 10.22 |
| 20000 | 2014-06-28 09:05:54 | 14.58 | 25.10 |
| 10000 | 2014-06-29 09:05:56 | 0.00 | 19.62 |
| 20000 | 2014-06-29 09:05:54 | 14.58 | 0.00 |
| 10000 | 2014-07-01 09:05:56 | 0.00 | 19.62 |
| 20000 | 2014-07-01 09:05:54 | 0.00 | 25.10 |
+---------+---------------------+-----------+----------------+
It is a price history for cards, including what the avg_price and what the foil_avg_price was for each day or so.
I'd like to select, for a group of card id's the most recent date when the foil_avg_price was > 0, what that price was, and the most recent date that the avg_price was > 0, and what that price was. My resulting data set for the above would look something like this:
+---------+---------------------+-----------+---------------------+----------------+
| card_id | avg_date | avg_price | foil_date | foil_avg_price |
+---------+---------------------+-----------+---------------------+----------------+
| 10000 | 2014-06-28 09:05:56 | 5.02 | 2014-07-01 09:05:54 | 19.62 |
| 20000 | 2014-06-29 09:05:54 | 14.58 | 2014-07-01 09:05:54 | 25.10 |
+---------+---------------------+-----------+---------------------+----------------+
I'm sure that this involves an INNER JOIN on the same table but I can't quite get my head around it. Any help would be much appreciated.
Three steps:
Find last price date
Find last foil price date
resolve prices on these dates
So,
SELECT dates.*, price.avg_price, foilprice.foil_avg_price
FROM (
SELECT
card_id,
MAX(IF(avg_price>0, `date`, '0001-01-01')) AS avg_date,
MAX(IF(foil_avg_price>0, `date`, '0001-01-01')) AS foil_avg_date
FROM card_price
GROUP BY card_id
) AS dates
INNER JOIN card_price AS price
ON dates.card_id=price.`date`
INNER JOIN card_price AS foilprice
ON dates.card_id=foilprice.`date`
Try this query
SELECT A.card_id,max(date),MAX(avg_price), (SELECT MAX(date) FROM test WHERE card_id = A.card_id AND foil_avg_price = MAX(A.foil_avg_price)) AS date,MAX(foil_avg_price) FROM test A
GROUP BY A.card_id
How about if you had 20,000,000 rows...
DROP TABLE IF EXISTS my_table;
CREATE TABLE my_table
(card_id INT NOT NULL
,date DATETIME NOT NULL
,price_type VARCHAR(20) NOT NULL
,price_value DECIMAL(5,2) NOT NULL
,PRIMARY KEY(card_id,date,price_type)
);
INSERT INTO my_table VALUES
(10000,'2014-06-28 09:05:56','avg_price',5.02),
(20000,'2014-06-28 09:05:54','avg_price',14.58),
(10000,'2014-06-29 09:05:56','avg_price',0.00),
(20000,'2014-06-29 09:05:54','avg_price',14.58),
(10000,'2014-07-01 09:05:56','avg_price',0.00),
(20000,'2014-07-01 09:05:54','avg_price',0.00),
(10000,'2014-06-28 09:05:56','foil_avg_price',10.22),
(20000,'2014-06-28 09:05:54','foil_avg_price',25.10),
(10000,'2014-06-29 09:05:56','foil_avg_price',19.62),
(20000,'2014-06-29 09:05:54','foil_avg_price',0.00),
(10000,'2014-07-01 09:05:56','foil_avg_price',19.62),
(20000,'2014-07-01 09:05:54','foil_avg_price',25.10);
SELECT x.*
FROM my_table x
JOIN
( SELECT card_id,price_type,MAX(date) max_date FROM my_table WHERE price_value > 0 GROUP BY card_id,price_type) y
ON y.card_id = x.card_id
AND y.price_type = x.price_type
AND y.max_date = x.date;
+---------+---------------------+----------------+-------------+
| card_id | date | price_type | price_value |
+---------+---------------------+----------------+-------------+
| 10000 | 2014-06-28 09:05:56 | avg_price | 5.02 |
| 10000 | 2014-07-01 09:05:56 | foil_avg_price | 19.62 |
| 20000 | 2014-06-29 09:05:54 | avg_price | 14.58 |
| 20000 | 2014-07-01 09:05:54 | foil_avg_price | 25.10 |
+---------+---------------------+----------------+-------------+
Try this:
SELECT a.card_id, a.avg_date, a.avg_price, b.foil_date, b.foil_avg_price
FROM (SELECT c.card_id, c.date AS avg_date, c.avg_price
FROM cards c
INNER JOIN (SELECT c.card_id, MAX(IF(c.avg_price > 0, c.date, NULL)) avg_date
FROM cards c GROUP BY c.card_id
) a ON c.card_id = a.card_id AND c.date = a.avg_date
) AS a
LEFT JOIN (SELECT c.card_id, c.date AS foil_date, c.foil_avg_price
FROM cards c
INNER JOIN (SELECT c.card_id, MAX(IF(c.foil_avg_price > 0, c.date, NULL)) foil_date
FROM cards c GROUP BY c.card_id
) a ON c.card_id = a.card_id AND c.date = a.foil_date
) AS b ON a.card_id = b.card_id ;
OR
SELECT a.card_id, a.avg_date, a.avg_price, b.foil_date, b.foil_avg_price
FROM (SELECT *
FROM (SELECT c.card_id, c.date, c.avg_price
FROM cards c WHERE c.avg_price > 0
ORDER BY c.date DESC
) AS A
GROUP BY A.date
) AS a
LEFT JOIN ( SELECT *
FROM (SELECT c.card_id, c.date, c.foil_avg_price
FROM cards c WHERE c.foil_avg_price > 0
ORDER BY c.date DESC
) AS B
GROUP BY B.date
) AS b ON a.card_id = b.card_id;