optimize mysql query: medal standings - mysql

I have 2 tables:
olympic_medalists with columns gold_country, silver_country, bronze_country
flags with country column
I want to list the olympic medal table accordingly. I have this query, it works, but it seems to kill mysql. Hope someone can help me with an optimized query.
SELECT DISTINCT country AS sc,
IFNULL(
(SELECT COUNT(silver_country)
FROM olympic_medalists
WHERE silver_country = sc AND silver_country != ''
GROUP BY silver_country),0) AS silver_medals,
IFNULL(
(SELECT COUNT(gold_country)
FROM olympic_medalists
WHERE gold_country = sc AND gold_country != ''
GROUP BY gold_country),0) AS gold_medals,
IFNULL(
(SELECT COUNT(bronze_country)
FROM olympic_medalists
WHERE bronze_country = sc AND bronze_country != ''
GROUP BY bronze_country),0) AS bronze_medals
FROM olympic_medalists, flags
GROUP BY country, gold_medals, silver_country, bronze_medals HAVING (
silver_medals >= 1 || gold_medals >= 1 || bronze_medals >= 1)
ORDER BY gold_medals DESC, silver_medals DESC, bronze_medals DESC,
SUM(gold_medals+silver_medals+bronze_medals)
result will be like:
country | g | s | b | tot
---------------------------------
country1 | 9 | 5 | 2 | 16
country2 | 5 | 5 | 5 | 15
and so on
Thanks!
olympic medalists:
`id` int(8) NOT NULL auto_increment,
`gold_country` varchar(64) collate utf8_unicode_ci default NULL,
`silver_country` varchar(64) collate utf8_unicode_ci default NULL,
`bronze_country` varchar(64) collate utf8_unicode_ci default NULL, PRIMARY KEY (`id`)
flags
`id` int(11) NOT NULL auto_increment,
`country` varchar(128) default NULL,
PRIMARY KEY (`id`)

This will be much more efficient than your current solution of executing three different SELECT subqueries for each row in a cross-joined relation (and you wonder why it stalls out!):
SELECT a.country,
COALESCE(b.cnt,0) AS g,
COALESCE(c.cnt,0) AS s,
COALESCE(d.cnt,0) AS b,
COALESCE(b.cnt,0) +
COALESCE(c.cnt,0) +
COALESCE(d.cnt,0) AS tot
FROM flags a
LEFT JOIN (
SELECT gold_country, COUNT(*) AS cnt
FROM olympic_medalists
GROUP BY gold_country
) b ON a.country = b.gold_country
LEFT JOIN (
SELECT silver_country, COUNT(*) AS cnt
FROM olympic_medalists
GROUP BY silver_country
) c ON a.country = c.silver_country
LEFT JOIN (
SELECT bronze_country, COUNT(*) AS cnt
FROM olympic_medalists
GROUP BY bronze_country
) d ON a.country = d.bronze_country
What would be even faster is instead of storing the actual textual country name in each of the gold, silver, and bronze columns, just store the integer-based country id. Comparisons on integers are always going to be faster than comparisons on strings.
Moreover, once you replace each country name in the olympic_medalists table with the corresponding id's, you'll want to create an index on each column (gold, silver, and bronze).
Updating the textual names to be their corresponding id's instead is a simple task and could be done with a single UPDATE statement in conjunction with some ALTER TABLE commands.

try this:
SELECT F.COUNTRY,IFNULL(B.G,0) AS G,IFNULL(B.S,0) AS S,
IFNULL(B.B,0) AS B,IFNULL(B.G+B.S+B.B,0) AS TOTAL
FROM FLAGS F LEFT OUTER JOIN
(SELECT A.COUNTRY,
SUM(CASE WHEN MEDAL ='G' THEN 1 ELSE 0 END) AS G,
SUM(CASE WHEN MEDAL ='S' THEN 1 ELSE 0 END) AS S,
SUM(CASE WHEN MEDAL ='B' THEN 1 ELSE 0 END) AS B
FROM
(SELECT GOLD_COUNTRY AS COUNTRY,'G' AS MEDAL
FROM OLYMPIC_MEDALISTS WHERE GOLD_COUNTRY IS NOT NULL
UNION ALL
SELECT SILVER_COUNTRY AS COUNTRY,'S' AS MEDAL
FROM OLYMPIC_MEDALISTS WHERE SILVER_COUNTRY IS NOT NULL
UNION ALL
SELECT BRONZE_COUNTRY AS COUNTRY,'B' AS MEDAL
FROM OLYMPIC_MEDALISTS WHERE BRONZE_COUNTRY IS NOT NULL)A
GROUP BY A.COUNTRY)B
ON F.COUNTRY=B.COUNTRY
ORDER BY IFNULL(B.G,0) DESC,IFNULL(B.S,0) DESC,
IFNULL(B.B,0) DESC,IFNULL(B.G+B.S+B.B,0) DESC,F.COUNTRY

Related

MySQL GROUP_CONCAT and JOIN

Media table:
CREATE TABLE $media_table (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`title` varchar(300) DEFAULT NULL,
`options` longtext DEFAULT NULL,
PRIMARY KEY (`id`)
}
Table example:
id title options
--------------------------
1 video ...
2 video ...
3 audio ...
Category table:
CREATE TABLE $media_taxonomy_table (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`type` varchar(15) DEFAULT NULL,
`title` varchar(300) DEFAULT NULL,
`media_id` int(11) unsigned DEFAULT NULL,
PRIMARY KEY (`id`),
INDEX `media_id` (`media_id`)
}
type can be 'category' or 'tag' (I guess this could be enum)
Table example:
id type title media_id
---------------------------------------
1 category rock 1
2 category trance 1
3 category trance 2
4 category rock 3
5 tag silent 1
5 tag loud 1
6 tag foo 2
I am trying to GROUP_CONCAT on both category and tag from $media_taxonomy_table.
This query will return GROUP_CONCAT with only category
SELECT mt.id, mt.title, mt.options, GROUP_CONCAT(mtt.title ORDER BY mtt.title ASC SEPARATOR ', ') as category
FROM $media_table as mt
LEFT JOIN $media_taxonomy_table as mtt
ON mt.id = mtt.media_id AND mtt.type='category'
WHERE playlist_id = %d
GROUP BY mt.id
Results, received:
id title options category
----------------------------------------
1 video ... rock, trance
2 video ... trance
3 audio ... rock
expected (I need tag as well):
id title options category tag
--------------------------------------------------------------
1 video ... rock, trance silent, load
2 video ... trance foo
3 audio ... rock
You can do conditional aggregation:
SELECT
mt.id,
mt.title,
mt.options,
GROUP_CONCAT(CASE WHEN mtt.type = 'category' THEN mtt.title END ORDER BY mtt.title SEPARATOR ', ') as categories,
GROUP_CONCAT(CASE WHEN mtt.type = 'tag' THEN mtt.title END ORDER BY mtt.title SEPARATOR ', ') as tags
FROM $media_table as mt
LEFT JOIN $media_taxonomy_table as mtt ON mt.id = mtt.media_id
WHERE mt.playlist_id = %d
GROUP BY mt.id, mt.title, mt.options
Seems you need another group_concat so you need another join
SELECT mt.id
, mt.title
, mt.options
, GROUP_CONCAT(mtt.title ORDER BY mtt.title ASC SEPARATOR ', ') as category
, GROUP_CONCAT(mtt2.title ORDER BY mtt.title ASC SEPARATOR ', ') as tag
FROM $media_table as mt
LEFT JOIN $media_taxonomy_table as mtt
ON mt.id = mtt.media_id AND mtt.type='category'
LEFT JOIN $media_taxonomy_table as mtt2
ON mt.id = mtt.media_id AND mtt.type='tag'
WHERE playlist_id = %d
GROUP BY mt.id

MySQL table "pivot" without creating tables/views: unique column values as header

I have this 'occupations' two-column table, holding the name and occupation of multiple persons. The occupations are known and can only be 'Developer', 'Engineer','Doctor','Musician'.
Name | Occupation
Dan | Developer
Martin | Doctor
Sam | Engineer
Andre | Musician
Tom | Engineer
The aim is to obtain something like the following:
Doctor | Engineer | Developer | Musician
Martin | Sam | Dan | Andre
NULL | Tom | NULL | NULL
All columns should be alphabetically ordered.
Do you guys have any suggestions regarding how I can this be achieved (without creating tables, views) using MySQL?
Thanks a lot!
This is a pain, but you can do it using variables and aggregation:
select max(doctor) as doctor,
max(engineer) as engineer,
max(developer) as developer,
max(musician) as musician
from ((select name as doctor, null as engineer, null as developer, null as musician,
(#rnd := #rnd + 1) as rn
from t cross join
(select #rnd := 0) as params
where occupation = 'doctor'
) union all
(select null as doctor, name as engineer, null as developer, null as musician,
(#rne := #rne + 1) as rn
from t cross join
(select #rne := 0) as params
where occupation = 'engineer'
) union all
(select null as doctor, null as engineer, name as developer, null as musician,
(#rnv := #rnv + 1) as rn
from t cross join
(select #rnv := 0) as params
where occupation = 'developer'
) union all
(select null as doctor, null as engineer, null as developer, name as musician,
(#rnm := #rnm + 1) as rn
from t cross join
(select #rnm := 0) as params
where occupation = 'musician'
) union all
) o
group by rn;
This will work in MySql 8.0:
with occup as (
select
case when o.occupation = 'Doctor' then o.Name end as Doctor,
case when o.occupation = 'Engineer' then o.Name end as Engineer,
case when o.occupation = 'Developer' then o.Name end as Developer,
case when o.occupation = 'Musician' then o.Name end as Musician
from occupations o
),
doctors as (
select ROW_NUMBER() OVER (
ORDER BY case when occup.Doctor is null then 1 else 0 end
) as rn, occup.Doctor from occup
),
engineers as (
select ROW_NUMBER() OVER (
ORDER BY case when occup.Engineer is null then 1 else 0 end
) as rn, occup.Engineer from occup
),
developers as (
select ROW_NUMBER() OVER (
ORDER BY case when occup.Developer is null then 1 else 0 end
) as rn, occup.Developer from occup
),
musicians as (
select ROW_NUMBER() OVER (
ORDER BY case when occup.Musician is null then 1 else 0 end
) as rn, occup.Musician from occup
)
select doctors.Doctor, engineers.Engineer, developers.Developer, musicians.Musician
from doctors
inner join engineers on doctors.rn = engineers.rn
inner join developers on engineers.rn = developers.rn
inner join musicians on musicians.rn = developers.rn
WHERE coalesce(doctors.Doctor, engineers.Engineer, developers.Developer, musicians.Musician) IS NOT NULL;
See the demo

How to select rows where for same `userid` other field has specific values?

I have this kind of table (simplified):
orders sample data below
---------------------------------------------
id INT: 1 2 3 4 5
userid INT 10 10 10 20 20
status CHAR(1) A A B A C
and want to select all orders where for each userid status is IN ('A','B') but have no orders at all IN ('C','D').
So output for above data would give orders with ID=1, 2 and 3. User ID=10 have orders A and B, but no C or D.
In other words: Select orders for customers who have orders with status A or B, but none of statuses C or D.
I started with this:
SELECT
xcart_orders.orderid,
xcart_orders.*
FROM xcart_orders
JOIN (
select count(*) as bad_statuses, userid from xcart_orders
where status in ('C','D')
group by userid
) bo
ON bo.userid=xcart_orders.userid
JOIN (
select count(*) as good_statuses, userid from xcart_orders
where status in ('A','B')
group by userid
) bo2
ON bo2.userid=xcart_orders.userid
WHERE bo2.good_statuses>0 and bo.bad_statuses=0
but think count(*) won't return zero for 'bad' statuses, so I get no results.
You have an aggregation without GROUP BY and for check the result you need us HAVING instead of WHERE
SELECT
xcart_orders.orderid,
xcart_orders.*,
SUM(CASE WHEN xcart_orders.status in ('C','D') THEN 1 ELSE 0 END) AS bad_statuses,
SUM(CASE WHEN xcart_orders.status in ('A','B') THEN 1 ELSE 0 END) AS good_statuses
FROM xcart_orders
GROUP BY orderid
HAVING bad_statuses = 0
AND good_statuses > 0
Please be aware the fields you get from xcart_orders.* will be random (or non deterministc) if you need a particular one you need to order it first.
First you GROUP BY user_id to check if have any status different to 'A', 'B'
Then you select orders from those user_id:
SQL DEMO
SELECT `user_id`
FROM orders1
GROUP BY `user_id`
HAVING COUNT(*) = COUNT(CASE WHEN `status` IN ('A', 'B') THEN 1 END);
SELECT *
FROM orders1
WHERE `user_id` IN (SELECT `user_id`
FROM orders1
GROUP BY `user_id`
HAVING COUNT(*) = COUNT(CASE WHEN `status` IN ('A', 'B') THEN 1 END)
);
OUTPUT

Why is my INNER JOIN so slow? Server times out

my relatively simple query is taking so long to execute that the server times out. Incidentally, if I run the sub-query on it's own, it executes very quickly.
Essentially I'm trying to get the first date for each game_id, then get the corresponding score and duration...
My query:
SELECT
sq.*,
up.score AS score,
up.duration AS duration
FROM (
SELECT
up.uid AS uid,
up.lesson_id AS lesson_id,
up.level AS level,
up.game_id AS game_id,
MIN(up.date) AS first_date
FROM cdu_user_progress up
WHERE (up.score >= '0')
GROUP BY up.uid, up.lesson_id, up.level, up.game_id
) sq
INNER JOIN cdu_user_progress up ON up.uid = sq.uid AND up.lesson_id = sq.lesson_id AND up.level = sq.level AND up.game_id = sq.game_id AND up.date = sq.first_date
GROUP BY sq.uid, sq.lesson_id, sq.level, sq.game_id
cdu_user_progress is:
------------------------------------------------------------------------------------
|id |uid |lesson_id |game_id |level |score |duration |date |
------------------------------------------------------------------------------------
Explain:
Field Type Null Key Default Extra
--------------------------------------------------------------
id int(11) NO PRI NULL auto_increment
uid int(11) NO NULL
lesson_id int(11) NO NULL
game_id int(11) NO NULL
level int(11) NO NULL
score int(11) NO NULL
duration int(11) NO NULL
date int(11) NO NULL
First, I don't think you need the outer group by, unless you have duplicates by date. If so, perhaps you can use id instead of or in addition to date:
SELECT sq.*, up.score AS score, up.duration AS duration
FROM (SELECT up.uid, up.lesson_id, up.level, up.game_id,
MIN(up.date) AS first_date
FROM cdu_user_progress up
WHERE up.score >= 0
GROUP BY up.uid, up.lesson_id, up.level, up.game_id
) s INNER JOIN
cdu_user_progress up
ON up.uid = sq.uid AND up.lesson_id = sq.lesson_id AND
up.level = sq.level AND up.game_id = sq.game_id AND
up.date = sq.first_date;
The best indexes for this query are cdu_user_progress(score) and cdu_user_progress(uid, lesson_id, level, game_id, date). If the first query runs fast, the second index should be a big help.

SQL query joining a few tables (MySQL)

I need a "little" help with an SQL query (MySQL).
I have the following tables:
COURIERS table:
+------------+
| COURIER_ID |
+------------+
DELIVERIES table:
+-------------+------------+------------+
| DELIVERY_ID | COURIER_ID | START_DATE |
+-------------+------------+------------+
ORDERS table:
+----------+-------------+-------------+
| ORDER_ID | DELIVERY_ID | FINISH_DATE |
+----------+-------------+-------------+
COORDINATES table:
+-------------+-----+-----+------+
| DELIVERY_ID | LAT | LNG | DATE |
+-------------+-----+-----+------+
In the real database I have more columns in each table, but for this example the above columns are enough.
What do I need?
An SQL query that returns all couriers [COURIER_ID], their last
delivery [DELIVERY_ID] (based on last START_DATE), the
delivery's last coordinate [LAT and LNG] (based on last DATE) and the remaining orders count (total of orders of the last delivery that have no FINISH_DATE).
A courier can have no deliveries, in this case I want DELIVERY_ID =
NULL, LAT = NULL and LNG = NULL in the result.
A delivery can have no coordinates, in this case I want LAT = NULL
and LNG = NULL in the result.
What was I able to do?
SELECT c.`COURIER_ID`,
d.`DELIVERY_ID`,
r.`LAT`,
r.`LNG`,
(SELECT COUNT(DISTINCT `ORDER_ID`)
FROM `ORDERS`
WHERE `DELIVERY_ID` = d.`DELIVERY_ID`
AND `FINISH_DATE` IS NULL) AS REMAINING_ORDERS
FROM `COURIERS` AS c
LEFT JOIN `DELIVERIES` AS d USING (`COURIER_ID`)
LEFT JOIN `COORDINATES` AS r ON r.`DELIVERY_ID` = d.`DELIVERY_ID`
WHERE (CASE WHEN
(SELECT MAX(`START_DATE`)
FROM `DELIVERIES`
WHERE `COURIER_ID` = c.`COURIER_ID`) IS NULL THEN d.`START_DATE` IS NULL ELSE d.`START_DATE` =
(SELECT MAX(`START_DATE`)
FROM `DELIVERIES`
WHERE `COURIER_ID` = c.`COURIER_ID`) END)
AND (CASE WHEN
(SELECT MAX(`DATE`)
FROM `COORDINATES`
WHERE `DELIVERY_ID` = d.`DELIVERY_ID`) IS NULL THEN r.`DATE` IS NULL ELSE r.`DATE` =
(SELECT MAX(`DATE`)
FROM `COORDINATES`
WHERE `DELIVERY_ID` = d.`DELIVERY_ID`) END)
GROUP BY c.`COURIER_ID`
ORDER BY d.`START_DATE` DESC
The problem is that this query is very slow (from 5 to 20 seconds) when I have over 5k COORDINATES and it does not returns all couriers sometimes.
Thank you so much for any solution.
Try this:
SELECT C.COURIER_ID, D.DELIVERY_ID, D.START_DATE, D.FINISH_DATE,
B.LAT, B.LNG, B.DATE, C.NoOfOrders
FROM COURIERS C
LEFT JOIN ( SELECT *
FROM (SELECT *
FROM DELIVERIES D
ORDER BY D.COURIER_ID, D.START_DATE DESC
) A
GROUP BY COURIER_ID
) AS A ON C.COURIER_ID = A.COURIER_ID
LEFT JOIN ( SELECT *
FROM (SELECT *
FROM COORDINATES CO
ORDER BY CO.DELIVERY_ID, CO.DATE DESC
) B
GROUP BY CO.DELIVERY_ID
) AS B ON A.DELIVERY_ID = B.DELIVERY_ID
LEFT JOIN ( SELECT O.DELIVERY_ID, COUNT(1) NoOfOrders
FROM ORDERS O WHERE FINISH_DATE IS NULL
GROUP BY O.DELIVERY_ID
) AS C ON A.DELIVERY_ID = C.DELIVERY_ID;
I haven't been able to test this query since I don't have a mysql database set up right now, much less with this schema and sample data. But I think this will work for you:
select
c.courier_id
, d.delivery_id
, co.lat
, co.lng
, oc.cnt as remaining_orders
from
couriers c
left join (
select
d.delivery_id
, d.courier_id
from
deliveries d
inner join (
select
d.delivery_id
, max(d.start_date) as start_date
from
deliveries d
group by
d.delivery_id
) dmax on dmax.delivery_id = d.delivery_id and dmax.start_date = d.start_date
) d on d.courier_id = c.courier_id
left join (
select
c.delivery_id
, c.lat
, c.lng
from
coordinates c
inner join (
select
c.delivery_id
, max(c.date) as date
from
coordinates c
group by
c.delivery_id
) cmax on cmax.delivery_id = c.delivery_id and cmax.date = c.date
) co on co.delivery_id = d.delivery_id
left join (
select
o.delivery_id
, count(o.order_id) as cnt
from
orders o
where
o.finish_date is null
group by
o.delivery_id
) oc on oc.delivery_id = d.delivery_id