How to get biggest differences in column compared to last month? - mysql

I have two tables - Products and Prices
Every month table Prices is populated with new prices for each products. How can I get 5 products whose prices have the biggest incremental difference from last month prices?
table Products
id | name
1 | apples
2 | pears
3 | bananas
table Prices
id | price | product_id | created_at
1 | 10 | 1 | 2017-02-07 07:00:00
2 | 10 | 2 | 2017-02-07 07:00:00
3 | 15 | 3 | 2017-02-07 07:00:00
5 | 15 | 1 | 2017-03-07 07:00:00
6 | 20 | 2 | 2017-03-07 07:00:00
7 | 25 | 3 | 2017-03-07 07:00:00
The result would be to find out that
1. Bananas has prices by 15 higher (lastMonth: 15, now: 25)
2. Pears 2 has prices by 10 higher (lastMonth: 10, now: 20)
3. Apples has prices by 5 higher (lastMonth: 10, now: 15)
I was thinking something like this (uff I know this is terrible)
SELECT products.id, products.name, prices.beforePrice, prices.afterPrice, prices.difference
FROM products
INNER JOIN prices ON products.id = prices.product_id
WHERE
(
SELECT *biggest-difference*
FROM prices
WHERE *difference_between_last_2_months*
GROUP BY product_id
LIMIT 5
)

Create table/insert data
CREATE TABLE Products
(`id` INT, `name` VARCHAR(7))
;
INSERT INTO Products
(`id`, `name`)
VALUES
(1, 'apples'),
(2, 'pears'),
(3, 'bananas')
;
CREATE TABLE Prices
(`id` INT, `price` INT, `product_id` INT, `created_at` DATETIME)
;
INSERT INTO Prices
(`id`, `price`, `product_id`, `created_at`)
VALUES
(1, 10, 1, '2017-02-07 07:00:00'),
(2, 10, 2, '2017-02-07 07:00:00'),
(3, 15, 3, '2017-02-07 07:00:00'),
(5, 15, 1, '2017-03-07 07:00:00'),
(6, 20, 2, '2017-03-07 07:00:00'),
(7, 25, 3, '2017-03-07 07:00:00')
;
Query
SELECT
Products.id
, Products.name
, (current_month.price - last_month.price) AS difference
, (
CASE
WHEN last_month.price > current_month.price
THEN 'lower'
WHEN last_month.price < current_month.price
THEN 'higher'
END
) AS incremental
, last_month.price 'lastMonth'
, current_month.price 'now'
FROM (
SELECT
*
FROM
Prices
WHERE
MONTH(created_at) = MONTH((CURDATE() - INTERVAL 1 MONTH))
)
AS
last_month
INNER JOIN (
SELECT
*
FROM
Prices
WHERE
MONTH(created_at) = MONTH((CURDATE()))
)
AS
current_month
ON
last_month.product_id = current_month.product_id
INNER JOIN
Products
ON
last_month.product_id = Products.id
WHERE
last_month.price < current_month.price #incremental should be higher
ORDER BY
difference DESC
LIMIT 5
Result
id name difference incremental lastMonth now
------ ------- ---------- ----------- --------- --------
2 pears 10 higher 10 20
3 bananas 10 higher 15 25
1 apples 5 higher 10 15

You can use a proper joins based on fliterd select by month.
This should return the value you need ( you can add the literal string you need )
select p.name, m1.price as this_month, m2.price as prev_month, m2.price-m1.price as diff
from product
left join (
select price, product_id
from Prices
where month(created_at) = month(NOW())
and year(created_at) = year(NOW())
) m1 on m1.product_id = p.id
left join (
select price, product_id
from Prices
where month(created_at) = MONT(DATE_SUB(NOW(), INTERVAL 1 MONTH)
and year(created_at) = year(DATE_SUB(NOW(), INTERVAL 1 MONTH)
) m2 on m2.product_id = p.id
order by diff desc
limit 5

Related

Select from tablue where value is repeated on same table on another colum

I have a table like this
idGoal | idMatch | minute
1 | 1 | 30
2 | 1 | 40
3 | 2 | 30
4 | 3 | 45
I want to get only the goals where the minute are the same on distinct matches.
So it shows idGoal 1 and 3.
I would use exists:
select g.*
from goals g
where exists (select 1
from goals g2
where g2.minute = g.minute and
g2.idMatch <> g.idMatch
);
In particular, exists can take advantage of an index on (minute, idMatch).
Your table:
CREATE TABLE goals
(idGoal INT, idMatch INT, minute TINYINT);
Your data:
INSERT INTO goals
(idGoal, idMatch, minute)
VALUES
(1, 1, 30),
(2, 1, 40),
(3, 2, 30),
(4, 3, 45);
Your query:
SELECT idGoal
FROM goals
WHERE minute IN
(
SELECT minute
FROM goals
GROUP BY minute
HAVING COUNT(*) > 1
);
Your result:
idGoal
------
1
3

Most recent date with maximum score

I have the table definition below.
CREATE TABLE IF NOT EXISTS ranking (
user_id int(11) unsigned NOT NULL,
create_date date NOT NULL,
score double(8,2),
PRIMARY KEY (user_id, create_date)
)
insert into ranking (user_id, create_date, score) values
(1, '2017-03-01', 100),
(1, '2017-03-02', 90),
(1, '2017-03-03', 80),
(1, '2017-03-04', 100),
(1, '2017-03-05', 90),
(2, '2017-03-01', 90),
(2, '2017-03-02', 80),
(2, '2017-03-03', 100),
(2, '2017-03-5', 100),
(3, '2017-03-01', 80),
(3, '2017-03-02', 100),
(3, '2017-03-03', 90),
(3, '2017-03-6', 100);
select * from ranking;
user_id | create_date | score
1 | 2017-03-01 | 100
1 | 2017-03-02 | 90
1 | 2017-03-03 | 80
1 | 2017-03-04 | 100
1 | 2017-03-05 | 90
2 | 2017-03-01 | 90
2 | 2017-03-02 | 80
2 | 2017-03-03 | 100
2 | 2017-03-05 | 100
3 | 2017-03-01 | 80
3 | 2017-03-02 | 100
3 | 2017-03-03 | 90
3 | 2017-03-06 | 100
What I want is for each user_id, get the most recent create_date on which the score is maximum. For example, in the example above, for user_id = 1, when create_date = 2017-03-01 and create_date = 2017-03-04, the maximum score is 100, but I just want the most recent date with the maximum score, i.e., create_date = 2017-03-04 and score = 100. The query result is as follows:
user_id | create_date | score
1 | 2017-03-04 | 100
2 | 2017-03-05 | 100
3 | 2017-03-06 | 100
Below is my solution, which returns the expected result but I believe there exist better solutions.
SELECT a.* from
(
SELECT s1.user_id , s1.create_date, s1.score FROM ranking AS s1
INNER JOIN
(SELECT user_id , FORMAT(max(score), 0) as best_score FROM ranking GROUP BY user_id ) AS s2
ON s1.user_id = s2.user_id AND s1.score = s2.best_score
) a
NATURAL LEFT JOIN
(
SELECT s1.user_id , s1.create_date, s1.score FROM ranking AS s1
INNER JOIN
(
SELECT user_id , create_date, score FROM ranking
) s2
WHERE s1.user_id = s2.user_id AND s1.score = s2.score AND s1.create_date < s2.create_date
) b
WHERE b.user_id IS NULL;
Can someone provide better solutions? Thanks.
SELECT t1.user_id,
MAX(t1.create_date) AS max_date,
t2.max_score
FROM ranking t1
INNER JOIN
(
SELECT user_id, MAX(score) AS max_score
FROM ranking
GROUP BY user_id
) t2
ON t1.user_id = t2.user_id AND
t1.score = t2.max_score
GROUP BY t1.user_id
Output:
Demo here:
Rextester
Try this:
select user_id, max(create_date),max(score) from ranking GROUP BY user_id
result:
1 2017-03-04 100.00
2 2017-03-05 100.00
3 2017-03-06 100.00
or
select user_id, max(create_date),cast(max(score) as UNSIGNED) as maxscore from ranking GROUP BY user_id
result:
1 2017-03-04 100
2 2017-03-05 100
3 2017-03-06 100
Try this query -
SELECT r1.* FROM ranking r1
JOIN (SELECT user_id, MAX(score) max_score FROM ranking GROUP BY user_id) r2
ON r1.user_id = r2.user_id AND r1.score = r2.max_score
JOIN (SELECT user_id, score, MAX(create_date) max_create_date FROM ranking GROUP BY user_id, score) r3
ON r1.user_id = r3.user_id AND r1.score = r3.score AND r1.create_date = r3.max_create_date;
1 04-Mar-17 100
2 05-Mar-17 100
3 06-Mar-17 100

Return last matched row in mysql

I have this table
id | qty_from | qty_to | p_id | price
--------------------------------------
1 | 1 | 10 | 4 | 1000
--------------------------------------
2 | 11 | 20 | 4 | 2000
--------------------------------------
3 | 1 | 10 | 5 | 500
--------------------------------------
4 | 11 | 20 | 5 | 1000
--------------------------------------
5 | 1 | 10 | 6 | 1000
--------------------------------------
6 | 10 | 15 | 6 | 2000
And i tried to get rows by qty_from AND qty_to AND p_id using below code
SELECT * FROM table WHERE p_id IN ('4', '5', '6') AND qty_from <= 8 AND qty_to >= 8
It returns 1st, 3rd and 5th rows.
And when i use this code, it return only 1st and 3rd rows.
SELECT * FROM table WHERE p_id IN ('4', '5', '6') AND qty_from <= 16 AND qty_to >= 16
I want to return 6th row too. because it's the biggest qty in p_id = 6
How can i achieve this?
After discuss here is what you need (first and not tested solution) :
SELECT *
FROM (SELECT *, MAX(qty_to) as max_qty_to FROM `limit`) T
WHERE p_id IN ('4', '5', '6')
AND ( ( qty_from <= 16 AND qty_to >= 16 ) OR qty_to = T.max_qty_to )
You should update your where condition to achieve your desired result. The updated query will be -
SELECT * FROM table WHERE p_id IN ('4', '5', '6') AND qty_from <= 10 AND qty_to >= 8
I just solve this.
Thanks to #Meloman for giving me the clue.
SELECT * FROM table WHERE p_id IN ('4', '5', '6') AND ((qty_from <= 16 AND qty_to >= 16) OR (qty_to, p_id) IN (SELECT dd.qty_to, dd.p_id FROM table dd INNER JOIN (SELECT MAX(qty_to) AS mm, p_id FROM table GROUP BY p_id) ddd ON dd.p_id = ddd.p_id AND dd.qty_to = ddd.mm WHERE dd.p_id IN ('4', '5', '6'))) GROUP BY p_id
I test this multiple times and i think it's the answer
This solution works:
SELECT DISTINCT t3.*
FROM `table` t1
LEFT JOIN `table` t2 ON t2.id=IFNULL(
(SELECT id FROM `table` t3
WHERE t3.p_id=t1.p_id AND 16 BETWEEN t3.qty_from AND t3.qty_to
ORDER BY qty_to DESC LIMIT 1),
(SELECT id FROM `table` t4
WHERE t4.p_id=t1.p_id
ORDER BY qty_to DESC LIMIT 1))
WHERE t1.p_id IN ('4', '5', '6')
Basically, first a table with the given p_ids is fetched and then joined with the rows of the most desired ids per p_id (if 16 is not in range of qty_from and qty_to, the one with the biggest qty_to is taken).
One flaw: If multiple rows match the range condition, only the one with the biggest qty_to is selected, so there is only one result per p_id. I hope that's sufficient for you!

SQL: Grouped average-if / case SELECT statement

I have a database that looks like this SQL Fiddle: http://sqlfiddle.com/#!9/aa02e/1
CREATE TABLE Table1
(`Store` varchar(1), `Date` date, `Product` varchar(2), `Weekday` int, `Month` int, `Revenue` float)
;
INSERT INTO Table1
(`Store`, `Date`, `Product`, `Weekday`, `Month`, `Revenue`)
VALUES
('a', '20160101', 'aa', 5, 1, 1.5),
('a', '20160101', 'bb', 5, 1, 4),
('a', '20160101', 'cc', 5, 1, 3.5),
('a', '20160108', 'dd', 5, 1, 2.5),
('a', '20160108', 'ee', 5, 1, 5),
('b', '20160204', 'aa', 4, 2, 9.5),
('b', '20160204', 'bb', 4, 2, 4),
('b', '20160204', 'cc', 4, 2, 3),
('b', '20160211', 'dd', 4, 2, 1.5),
('b', '20160211', 'ee', 4, 2, 2.5)
;
SELECT * FROM table1;
+-------+------------+---------+---------+-------+---------+
| Store | Date | Product | Weekday | Month | Revenue |
+-------+------------+---------+---------+-------+---------+
| a | 2016-01-01 | aa | 5 | 1 | 1.5 |
| a | 2016-01-01 | bb | 5 | 1 | 4 |
| a | 2016-01-01 | cc | 5 | 1 | 3.5 |
| a | 2016-01-08 | dd | 5 | 1 | 2.5 |
| a | 2016-01-08 | ee | 5 | 1 | 5 |
| b | 2016-02-04 | aa | 4 | 2 | 9.5 |
| b | 2016-02-04 | bb | 4 | 2 | 4 |
| b | 2016-02-04 | cc | 4 | 2 | 3 |
| b | 2016-02-11 | dd | 4 | 2 | 1.5 |
| b | 2016-02-11 | ee | 4 | 2 | 2.5 |
+-------+------------+---------+---------+-------+---------+
It shows revenue data for stores incl. products, date and the respective day/month.
I want to select the following:
Store
Monthly revenue totals (i.e. what is the total revenue for store a in Jan?)
Weekday revenue averages (i.e. what is the avg revenue for store a on Thu?)
The first and second bullet are straightforward, but I'm having problems with the last one.
Currently, it takes the average over all products and all dates (assuming the weekday matches). What I need are the following steps:
Sum up all revenues for a store and a particular date (e.g. for store b: 9.5+4+3=16.5 for Feb 4th, and 1.5+2.5=4 for Feb 11th) if that date has the same weekday (here Thursday)
Take the average of the two values (e.g. avg(16.5,4)=10.25)
How can I accomplish that?
Thank you
Here is the query:
SELECT
Store,
SUM(CASE WHEN Month = 1 THEN Revenue ELSE NULL END) AS REVENUE_JAN,
SUM(CASE WHEN Month = 2 THEN Revenue ELSE NULL END) AS REVENUE_FEB,
AVG(CASE WHEN Weekday = 4 THEN Revenue ELSE NULL END) AS REVENUE_THU,
AVG(CASE WHEN Weekday = 5 THEN Revenue ELSE NULL END) AS REVENUE_FRI
FROM Table1
GROUP BY
Store
;
The weekday average is tricky. Your query is getting the average "order size" per weekday. But you want the total revenue.
One method is to first aggregate by weekday, but that is a bit of a mess. Instead, you can use this trick of calculating the average by dividing the total revenue by the number of days:
SELECT Store,
SUM(CASE WHEN Month = 1 THEN Revenue ELSE NULL END) AS REVENUE_JAN,
SUM(CASE WHEN Month = 2 THEN Revenue ELSE NULL END) AS REVENUE_FEB,
(SUM(CASE WHEN Weekday = 4 THEN Revenue END) /
COUNT(DISTINCT CASE WHEN Weekday = 4 THEN Date END)
) AS REVENUE_THU,
(SUM(CASE WHEN Weekday = 5 THEN Revenue END) /
COUNT(DISTINCT CASE WHEN Weekday = 5 THEN Date END)
) AS REVENUE_FRI
FROM Table1
GROUP BY Store;
SELECT
t1.store,
SUM(CASE WHEN Month = 1 THEN Revenue ELSE NULL END) AS REVENUE_JAN,
SUM(CASE WHEN Month = 2 THEN Revenue ELSE NULL END) AS REVENUE_FEB,
daily.REVENUE_THU,
daily.REVENUE_FRI
FROM Table1 t1
JOIN (
SELECT
Store,
weekday,
avg(CASE WHEN weekday = 4 THEN sum_rev END) as REVENUE_THU,
avg(CASE WHEN weekday = 5 THEN sum_rev END) as REVENUE_FRI
FROM (
SELECT
Store, date, weekday,
SUM(revenue) AS sum_rev
FROM Table1
GROUP BY
Store, date, weekday
) AS foo
GROUP BY Store, weekday
) AS daily ON daily.store = t1.store
GROUP BY
t1.store
How about this solution it return average for chosen day of chosen store
CREATE PROCEDURE sumForDayStore(IN vday INTEGER, IN vStore VARCHAR(50))
BEGIN
DECLARE totalDays INTEGER;
DECLARE totalRevenu INTEGER;
SET totalDays = (SELECT count(*) FROM Table1 WHERE WeekDay = vDay AND store = vStore);
SET totalRevenu = (SELECT sum(Revenue) FROM Table1 WHERE WeekDay = vDay AND store = vStore);
SELECT totalRevenu/totalDays;
END;
CALL sumForDayStore(5,'a');
How about this one:
SELECT mnth.Store, REVENUE_JAN, REVENUE_FEB, avg(rthu) REVENUE_THU, avg(rfri) REVENUE_FRI
FROM
(Select Store, sum(case when Month = 1 then Revenue else NULL END) REVENUE_JAN,
sum(case when Month = 2 then Revenue else NULL END) REVENUE_FEB
From Table1 group by Store) as mnth
join
(Select Store, sum(case when Weekday = 4 then Revenue end) rThu,
sum(case when Weekday = 5 then Revenue end) rFri from Table1 group by Store, Date) as dys
on mnth.Store = dys.Store
group by mnth.Store, REVENUE_JAN, REVENUE_FEB
I compared the performance of this with the query in the first answer and it shows better performance according to SQL server execution plan (1.6 times faster). Maybe this would be helpful on a larger data set.

Counting records with related records which appear first in a given date

I have two tables, players and games, created as follows:
CREATE TABLE IF NOT EXISTS `players` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL,
`created_at` datetime NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 AUTO_INCREMENT=1 ;
CREATE TABLE IF NOT EXISTS `games` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`player` int(11) NOT NULL,
`played_at` datetime NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 AUTO_INCREMENT=1 ;
I wish to extract 3 values for each day:
The number of players created at that day
The number of players played at that day
The number of players having played for the first time at that day
So, suppose for example that the players table looks as follows:
+----+--------+---------------------+
| id | name | created_at |
+----+--------+---------------------+
| 1 | Alan | 2016-02-01 00:00:00 |
| 2 | Benny | 2016-02-01 06:00:00 |
| 3 | Calvin | 2016-02-02 00:00:00 |
| 4 | Dan | 2016-02-03 00:00:00 |
+----+--------+---------------------+
And the games table looks as follows:
+----+--------+---------------------+
| id | player | played_at |
+----+--------+---------------------+
| 1 | 1 | 2016-02-01 01:00:00 |
| 2 | 3 | 2016-02-02 02:00:00 |
| 3 | 2 | 2016-02-03 14:00:00 |
| 4 | 3 | 2016-02-03 17:00:00 |
| 5 | 3 | 2016-02-03 18:00:00 |
+----+--------+---------------------+
Then the query should return something like
+------------+-----+--------+-------+
| day | new | played | first |
+------------+-----+--------+-------+
| 2016-02-01 | 2 | 1 | 1 |
| 2016-02-02 | 1 | 1 | 1 |
| 2016-02-03 | 1 | 2 | 1 |
+------------+-----+--------+-------+
I have a solution for 1 (new):
SELECT Date(created_at) AS day,
Count(*) AS new
FROM players
GROUP BY day;
That's easy. I think I also have a solution for 2 (played), thanks to MySQL COUNT DISTINCT:
select Date(played_at) AS day,
Count(Distinct player) AS played
FROM games
GROUP BY day;
But I have no idea how to get the needed result for 3 (first). I also don't know how to put everything in a single query, to save execution time (the games table may include millions of records).
In case you need it, here's a query which inserts the example data:
INSERT INTO `players` (`id`, `name`, `created_at`) VALUES
(1, 'Alan', '2016-02-01 00:00:00'),
(2, 'Benny', '2016-02-01 06:00:00'),
(3, 'Calvin', '2016-02-02 00:00:00'),
(4, 'Dan', '2016-02-03 00:00:00');
INSERT INTO `games` (`id`, `player`, `played_at`) VALUES
(1, 1, '2016-02-01 01:00:00'),
(2, 3, '2016-02-02 02:00:00'),
(3, 2, '2016-02-03 14:00:00'),
(4, 3, '2016-02-03 17:00:00'),
(5, 3, '2016-02-03 18:00:00');
One version is to get all relevant data into a union and do the analysis from there;
SELECT SUM(type='P') new,
COUNT(DISTINCT CASE WHEN type='G' THEN pid END) played,
SUM(type='F') first
FROM (
SELECT id pid, DATE(created_at) date, 'P' type FROM players
UNION ALL
SELECT player, DATE(played_at) date, 'G' FROM games
UNION ALL
SELECT player, MIN(DATE(played_at)), 'F' FROM games GROUP BY player
) z
GROUP BY date;
In the union;
Records with type P is player creation statistics.
Records with type G is player related game statistics.
Records with type F is statistics for when players played their first game.
You can count the result of a temp table based on min(played_at) and filterd by having
select count(player) from
( select player, min(played_at)
from games
group by player
having min(played_at) = YOUR_GIVEN_DATE ) as t;
this query will give you the result:
select day,( select count(distinct(id)) from players where Date(created_at) = temp.day ) as no_created_at ,
( select count(distinct(player)) from games where Date(played_at) = temp.day) as no_played_at,
( select count(distinct(player)) from games where Date(played_at) =
(select min(Date(played_at)) from games internal_games
where internal_games.player =games.player and Date(games.played_at) = temp.day )) as no_first_played_at
from (
SELECT Date(created_at) AS day
FROM players
GROUP BY day
union
select Date(played_at) AS day
FROM games
GROUP BY day) temp
and the output:
Here's a solution with a bunch of subqueries, which accounts for the possibility that players may have been created on days with no games, and vice versa:
select
all_dates.date as day,
ifnull(new.num, 0) as new,
ifnull(players.num, 0) as players,
ifnull(first.num, 0) as first
from (
select date(created_at) as date from players
union
select date(played_at) from games
) as all_dates
left join (
select date(created_at) as created_at_date, count(*) as num
from players
group by created_at_date
) as new on all_dates.date = new.created_at_date
left join (
select date(played_at) as played_at_date, count(distinct player) as num
from games
group by played_at_date
) as players on all_dates.date = players.played_at_date
left join (
select min_date, count(*) num
from (
select player, date(min(played_at)) as min_date
from games
group by player
) as players_first
group by min_date
) as first on all_dates.date = first.min_date
order by day;