Complex query extremely slow when grouping or ordering - index suggestions? - mysql

So, I've been given the task of replicating functionality we currently handle via code, within MySQL.
The query below works beautifully, bringing back 245,000 rows in 40ms, however as soon as you touch it with a group or order, it takes over 6s.
Does anyone have any suggestions on what changes need making to the indexes or potentially how to modify the query to improve it?
Thanks
Without any grouping or ordering
select
s.id as sensorid,
s.sensortypeid,
COALESCE(s.pulserate, 1) as pulserate,
COALESCE(s.correctionFactor, 1) as correctionFactor,
ur.id as unitrateid,
COALESCE(ur.priceperkwh, 0) as priceperkwh,
COALESCE(ur.duosCharges, 0) as duosCharges,
IF(t.blnnonunitratecharges, t.nonunitratecharge/48, 0) as nonunitratecost,
IF(t.blnFeedIn, COALESCE(t.feedInRate, 0), 0) as feedInRate,
IF(t.blnRoc, COALESCE(t.rocRate, 0), 0) as rocRate,
from_unixtime(FLOOR(UNIX_TIMESTAMP(srs.dateTimeStamp)/(30*60))*(30*60)) as timeKey
from sensorreadings srs
inner join sensorpoints sp on (sp.id = srs.sensorpointid)
inner join sensors s on (s.id = sp.sensorid)
left join unitrates ur on ur.id = (
select
ur.id
from unitrates ur, tariffs t, companyhubs ch
where
ur.tariffid = t.id and
t.companyid = ch.companyid and
ch.hubid = s.hubid and
t.utilitytypeid = s.utilitytypeid and
(srs.dateTimeStamp between t.startdate and t.enddate) and
((time(srs.dateTimeStamp) between ur.starttime and ur.endtime) and
(ur.dayMask & POW(2, WEEKDAY(srs.dateTimeStamp)) <> 0) and
(ur.monthMask & POW(2, MONTH(srs.dateTimeStamp) - 1) <> 0))
order by
t.startdate desc,
ur.starttime desc
limit 0, 1
)
left join tariffs t on (t.id = ur.tariffid)
where
s.id = 5289
With grouping and ordering
select
s.id as sensorid,
s.sensortypeid,
COALESCE(s.pulserate, 1) as pulserate,
COALESCE(s.correctionFactor, 1) as correctionFactor,
ur.id as unitrateid,
COALESCE(ur.priceperkwh, 0) as priceperkwh,
COALESCE(ur.duosCharges, 0) as duosCharges,
IF(t.blnnonunitratecharges, t.nonunitratecharge/48, 0) as nonunitratecost,
IF(t.blnFeedIn, COALESCE(t.feedInRate, 0), 0) as feedInRate,
IF(t.blnRoc, COALESCE(t.rocRate, 0), 0) as rocRate,
min(srs.reading) as minReading,
avg(srs.reading) as avgReading,
from_unixtime(FLOOR(UNIX_TIMESTAMP(srs.dateTimeStamp)/(30*60))*(30*60)) as timeKey
from sensorreadings srs
inner join sensorpoints sp on (sp.id = srs.sensorpointid)
inner join sensors s on (s.id = sp.sensorid)
left join unitrates ur on ur.id = (
select
ur.id
from unitrates ur, tariffs t, companyhubs ch
where
ur.tariffid = t.id and
t.companyid = ch.companyid and
ch.hubid = s.hubid and
t.utilitytypeid = s.utilitytypeid and
(srs.dateTimeStamp between t.startdate and t.enddate) and
((time(srs.dateTimeStamp) between ur.starttime and ur.endtime) and
(ur.dayMask & POW(2, WEEKDAY(srs.dateTimeStamp)) <> 0) and
(ur.monthMask & POW(2, MONTH(srs.dateTimeStamp) - 1) <> 0))
order by
t.startdate desc,
ur.starttime desc
limit 0, 1
)
left join tariffs t on (t.id = ur.tariffid)
where
s.id = 5289
group by timeKey
order by timeKey desc
Schemas
CREATE TABLE `sensorreadings` (
`sensorpointid` int(11) NOT NULL DEFAULT '0',
`reading` decimal(15,5) NOT NULL,
`dateTimeStamp` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
PRIMARY KEY (`sensorpointid`,`dateTimeStamp`),
KEY `sensormetricid` (`sensormetricid`),
KEY `sensorreadings_timestamp` (`dateTimeStamp`,`sensorpointid`),
KEY `sensorpointid` (`sensorpointid`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
CREATE TABLE `sensorpoints` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`sensorid` int(11) DEFAULT NULL,
`hubpointid` int(11) DEFAULT NULL,
`pointlabel` varchar(255) NOT NULL,
`pointhash` varchar(255) NOT NULL,
`target` decimal(10,0) DEFAULT NULL,
`tolerance` decimal(10,0) DEFAULT '0',
`blnlivepoint` int(1) NOT NULL DEFAULT '0',
PRIMARY KEY (`id`),
KEY `FK_sensorpoints_sensors` (`sensorid`),
CONSTRAINT `FK_sensorpoints_sensors` FOREIGN KEY (`sensorid`) REFERENCES `sensors` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB AUTO_INCREMENT=8824 DEFAULT CHARSET=latin1;
CREATE TABLE `sensors` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`hubid` int(11) DEFAULT NULL,
`sensortypeid` int(11) NOT NULL DEFAULT '5',
`pulserate` decimal(10,6) DEFAULT NULL,
`utilitytypeid` int(11) NOT NULL DEFAULT '1',
`correctionfactor` decimal(10,3) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `FK_sensors_sensortypes` (`sensortypeid`),
KEY `FK_sensors_hubs` (`hubid`),
KEY `FK_sensors_utilitytypes` (`utilitytypeid`),
CONSTRAINT `FK_sensors_hubs` FOREIGN KEY (`hubid`) REFERENCES `hubs` (`id`) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT `FK_sensors_sensortypes` FOREIGN KEY (`sensortypeid`) REFERENCES `sensortypes` (`id`) ON DELETE CASCADE ON UPDATE CASCADE,
) ENGINE=InnoDB AUTO_INCREMENT=5503 DEFAULT CHARSET=latin1;
CREATE TABLE `tariffs` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`companyid` int(11) DEFAULT NULL,
`utilitytypeid` int(11) DEFAULT NULL,
`startdate` date NOT NULL,
`enddate` date NOT NULL,
`blnnonunitratecharges` int(1) DEFAULT '0',
`nonunitratecharge` decimal(16,8) DEFAULT '0.00000000',
`blnFeedIn` int(1) DEFAULT '0',
`blnRoc` int(1) DEFAULT '0',
`rocRate` decimal(16,8) DEFAULT '0.00000000',
`feedInRate` decimal(16,8) DEFAULT '0.00000000',
PRIMARY KEY (`id`),
KEY `companyid` (`companyid`,`utilitytypeid`,`startdate`,`enddate`),
KEY `startdate` (`startdate`,`enddate`),
) ENGINE=InnoDB AUTO_INCREMENT=1107 DEFAULT CHARSET=latin1;
CREATE TABLE `unitrates` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`tariffid` int(11) NOT NULL,
`priceperkwh` decimal(16,8) NOT NULL,
`starttime` time NOT NULL,
`endtime` time NOT NULL,
`duoscharges` decimal(10,5) DEFAULT NULL,
`dayMask` int(11) DEFAULT '127',
`monthMask` int(11) DEFAULT '4095',
PRIMARY KEY (`id`),
KEY `FK_unitrates_tariffs` (`tariffid`),
KEY `times` (`starttime`,`endtime`),
KEY `masks` (`dayMask`,`monthMask`),
CONSTRAINT `FK_unitrates_tariffs` FOREIGN KEY (`tariffid`) REFERENCES `tariffs` (`id`) ON DELETE CASCADE ON UPDATE CASCADE,
) ENGINE=InnoDB AUTO_INCREMENT=3104 DEFAULT CHARSET=latin1;
Explains
Without groups/ordering
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|----|--------------------|-------|--------|---------------------------------|-------------------------|---------|-------------------------------|------|----------------------------------------------|
| 1 | PRIMARY | s | const | PRIMARY | PRIMARY | 4 | const | 1 | NULL |
| 1 | PRIMARY | sp | ref | PRIMARY,FK_sensorpoints_sensors | FK_sensorpoints_sensors | 5 | const | 1 | Using index |
| 1 | PRIMARY | srs | ref | PRIMARY,sensorpointid | PRIMARY | 4 | dbnameprod.sp.id | 211 | Using index |
| 1 | PRIMARY | ur | eq_ref | PRIMARY | PRIMARY | 4 | func | 1 | Using where |
| 1 | PRIMARY | t | eq_ref | PRIMARY | PRIMARY | 4 | dbnameprod.ur.tariffid | 1 | NULL |
| 2 | DEPENDENT SUBQUERY | ch | ref | hubid | hubid | 5 | const | 1 | Using where; Using temporary; Using filesort |
| 2 | DEPENDENT SUBQUERY | t | ref | PRIMARY,companyid,startdate | companyid | 10 | dbnameprod.ch.companyid,const | 1 | Using where; Using index |
| 2 | DEPENDENT SUBQUERY | ur | ref | FK_unitrates_tariffs,times | FK_unitrates_tariffs | 4 | dbnameprod.t.id | 1 | Using where |
With ordering/grouping
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
|----|--------------------|-------|--------|---------------------------------------------------------------|-------------------------|---------|-------------------------------|------|----------------------------------------------|
| 1 | PRIMARY | s | const | PRIMARY | PRIMARY | 4 | const | 1 | Using temporary; Using filesort |
| 1 | PRIMARY | sp | ref | PRIMARY,FK_sensorpoints_sensors | FK_sensorpoints_sensors | 5 | const | 1 | Using index |
| 1 | PRIMARY | srs | ref | PRIMARY,sensormetricid,sensorreadings_timestamp,sensorpointid | PRIMARY | 4 | dbnameprod.sp.id | 211 | Using index |
| 1 | PRIMARY | ur | eq_ref | PRIMARY | PRIMARY | 4 | func | 1 | Using where |
| 1 | PRIMARY | t | eq_ref | PRIMARY | PRIMARY | 4 | dbnameprod.ur.tariffid | 1 | NULL |
| 2 | DEPENDENT SUBQUERY | ch | ref | hubid | hubid | 5 | const | 1 | Using where; Using temporary; Using filesort |
| 2 | DEPENDENT SUBQUERY | t | ref | PRIMARY,companyid,startdate | companyid | 10 | dbnameprod.ch.companyid,const | 1 | Using where; Using index |
| 2 | DEPENDENT SUBQUERY | ur | ref | FK_unitrates_tariffs,times | FK_unitrates_tariffs | 4 | dbnameprod.t.id | 1 | Using where |

Well you are grouping and ordering for a calculated field timeKey and db doesnt have any index on that field.
So db need to calculate all rows before doing the group by and then do the ordering and without index cant speed up the calculations.
Suggestion: Create a time field on your db and add index for that field.

Before looking into the performance, let's discuss the likelihood that the query is broken.
When doing a GROUP BY, all the non-aggregate SELECT values should be included in the GROUP BY. Otherwise, any random value can be delivered.
Furthermore, this pattern:
SELECT ..., AVG(a.x)
FROM a
JOIN b ON ...
GROUP BY a.id
usually leads to an inflation of the number of rows (due to the JOIN), followed by computing the aggregates over the inflated number of rows. Add COUNT(*) to see if I am right for your case. For COUNT, the answer can be blatantly wrong; for AVG it can be subtly wrong; for MIN it is probably correct. And finally the GROUP BY deflates the number of rows.
The usual cure is to compute the aggregates without the JOINs (I am not sure if it is possible in your case). Maybe something like...
...
JOIN (
SELECT min(srs.reading) as minReading,
avg(srs.reading) as avgReading,
from_unixtime(FLOOR(UNIX_TIMESTAMP(srs.dateTimeStamp)/
(30*60))*(30*60)) as timeKey
FROM srs
GROUP BY timeKey
) AS r
JOIN ...
It is usually a 'bad' idea to have date and time in separate columns. A DATETIME or TIMESTAMP is easier to compare against, etc. (I am unclear on what you are doing with your separate date and time.) This can also be a performance issue.
The 3 tables lead to a bunch of JOINing, making the WHERE s.id = 5289 hard to transfer to srs. You may need to rethink the schema as another performance issue.
I realize the values are different, but could
order by
t.startdate desc,
ur.starttime desc
be replaced by
order by srs.dateTimeStamp
That might lead to to being able to us an index.
I'm surprised you are using DECIMAL(m,n) instead of FLOAT for sensor readings.

Related

How to use correct indexes with a double inner join query?

I have a query with 2 INNER JOIN statements, and only fetching a few column, but it is very slow even though I have indexes on all required columns.
My query
SELECT
dysfonctionnement,
montant,
listRembArticles,
case when dys.reimputation is not null then dys.reimputation else dys.responsable end as responsable_final
FROM
db.commandes AS com
INNER JOIN db.dysfonctionnements AS dys ON com.id_commande = dys.id_commande
INNER JOIN db.pe AS pe ON com.code_pe = pe.pe_id
WHERE
com.prestataireLAD REGEXP '.*'
AND pe_nom REGEXP 'bordeaux|chambéry-annecy|grenoble|lyon|marseille|metz|montpellier|nancy|nice|nimes|rouen|strasbourg|toulon|toulouse|vitry|vitry bis 1|vitry bis 2|vlg'
AND com.date_livraison BETWEEN '2022-06-11 00:00:00'
AND '2022-07-08 00:00:00';
It takes around 20 seconds to compute and fetch 4123 rows.
The problem
In order to find what's wrong and why is it so slow, I've used the EXPLAIN statement, here is the output:
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
|----|-------------|-------|------------|--------|----------------------------|-------------|---------|------------------------|--------|----------|-------------|
| 1 | SIMPLE | dys | | ALL | id_commande,id_commande_2 | | | | 878588 | 100.00 | Using where |
| 1 | SIMPLE | com | | eq_ref | id_commande,date_livraison | id_commande | 110 | db.dys.id_commande | 1 | 7.14 | Using where |
| 1 | SIMPLE | pe | | ref | pe_id | pe_id | 5 | db.com.code_pe | 1 | 100.00 | Using where |
I can see that the dysfonctionnements JOIN is rigged, and doesn't use a key even though it could...
Table definitions
commandes (included relevant columns only)
CREATE TABLE `commandes` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`id_commande` varchar(36) NOT NULL DEFAULT '',
`date_commande` datetime NOT NULL,
`date_livraison` datetime NOT NULL,
`code_pe` int(11) NOT NULL,
`traitement_dysfonctionnement` tinyint(4) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `id_commande` (`id_commande`),
KEY `date_livraison` (`date_livraison`),
KEY `traitement_dysfonctionnement` (`traitement_dysfonctionnement`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
dysfonctionnements (again, relevant columns only)
CREATE TABLE `dysfonctionnements` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`id_commande` varchar(36) DEFAULT NULL,
`dysfonctionnement` varchar(150) DEFAULT NULL,
`responsable` varchar(50) DEFAULT NULL,
`reimputation` varchar(50) DEFAULT NULL,
`montant` float DEFAULT NULL,
`listRembArticles` text,
PRIMARY KEY (`id`),
UNIQUE KEY `id_commande` (`id_commande`,`dysfonctionnement`),
KEY `id_commande_2` (`id_commande`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
pe (again, relevant columns only)
CREATE TABLE `pe` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`pe_id` int(11) DEFAULT NULL,
`pe_nom` varchar(30) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `pe_nom` (`pe_nom`),
KEY `pe_id` (`pe_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
Investigation
If I remove the db.pe table from the query and the WHERE clause on pe_nom, the query takes 1.7 seconds to fetch 7k rows, and with the EXPLAIN statement, I can see it is using keys as I expect it to do:
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
|----|-------------|-------|------------|-------|----------------------------|----------------|---------|------------------------|--------|----------|-----------------------------------------------|
| 1 | SIMPLE | com | | range | id_commande,date_livraison | date_livraison | 5 | | 389558 | 100.00 | Using index condition; Using where; Using MRR |
| 1 | SIMPLE | dys | | ref | id_commande,id_commande_2 | id_commande_2 | 111 | ooshop.com.id_commande | 1 | 100.00 | |
I'm open to any suggestions, I see no reason not to use the key when it does on a very similar query and it definitely makes it faster...
I had a similar experience when MySQL optimiser selected a joined table sequence far from optimal. At that time I used MySQL specific STRAIGHT_JOIN operator to overcome default optimiser behaviour. In your case I would try this:
SELECT
dysfonctionnement,
montant,
listRembArticles,
case when dys.reimputation is not null then dys.reimputation else dys.responsable end as responsable_final
FROM
db.commandes AS com
STRAIGHT_JOIN db.dysfonctionnements AS dys ON com.id_commande = dys.id_commande
INNER JOIN db.pe AS pe ON com.code_pe = pe.pe_id
Also, in your WHERE clause one of the REGEXP probably might be changed to IN operator, I assume it can use index.
Remove com.prestataireLAD REGEXP '.*'. The Optimizer probably won't realize that this has no impact on the resultset. If you are dynamically building the WHERE clause, then eliminate anything else you can.
id_commande_2 is redundant. In queries where it might be useful, the UNIQUE can take care of it.
These indexes might help:
com: INDEX(date_livraison, id_commande, code_pe)
pe: INDEX(pe_nom, pe_id)

Horrible MySQL index behavior with a simplest IN statement

I have found that MySQL (Win 7 64, 5.6.14) does not use index properly if I specify table output for IN statement. USER table contains 900k records.
If I use IN (_SOME_TABLE_OUTPUT_) syntax - I get fullscan for all 900k users. Query runs forever.
If I use IN ('CONCRETE','VALUES') syntax - I get a correct index usage.
How can I make MySQL finally USE the index?
1st case:
explain SELECT gu.id FROM USER gu WHERE gu.uuid in
(select '11b6a540-0dc5-44e0-877d-b3b83f331231' union
select '11b6a540-0dc5-44e0-877d-b3b83f331232');
+----+--------------------+------------+-------+---------------+------+---------+------+--------+--------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+--------------------+------------+-------+---------------+------+---------+------+--------+--------------------------+
| 1 | PRIMARY | gu | index | NULL | uuid | 257 | NULL | 829930 | Using where; Using index |
| 2 | DEPENDENT SUBQUERY | NULL | NULL | NULL | NULL | NULL | NULL | NULL | No tables used |
| 3 | DEPENDENT UNION | NULL | NULL | NULL | NULL | NULL | NULL | NULL | No tables used |
| NULL | UNION RESULT | <union2,3> | ALL | NULL | NULL | NULL | NULL | NULL | Using temporary |
+----+--------------------+------------+-------+---------------+------+---------+------+--------+--------------------------+
2nd case:
explain SELECT gu.id FROM USER gu WHERE gu.uuid in
('11b6a540-0dc5-44e0-877d-b3b83f331231');
+----+-------------+-------+------+---------------+------+---------+-------+------+--------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+------+---------------+------+---------+-------+------+--------------------------+
| 1 | SIMPLE | gu | ref | uuid | uuid | 257 | const | 1 | Using where; Using index |
+----+-------------+-------+------+---------------+------+---------+-------+------+--------------------------+
Table structure:
CREATE TABLE `USER` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`version` bigint(20) NOT NULL,
`email` varchar(255) DEFAULT NULL,
`uuid` varchar(255) NOT NULL,
`partner_id` bigint(20) NOT NULL,
`password` varchar(255) DEFAULT NULL,
`date_created` datetime DEFAULT NULL,
`last_updated` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `unique-email` (`partner_id`,`email`),
KEY `uuid` (`uuid`),
CONSTRAINT `fk_USER_partner` FOREIGN KEY (`partner_id`) REFERENCES `partner` (`id`) ON DELETE CASCADE,
CONSTRAINT `FKB2D9FEBE725C505E` FOREIGN KEY (`partner_id`) REFERENCES `partner` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=3315452 DEFAULT CHARSET=latin1
FORCE INDEX and USE INDEX statements don't change anything.
Demonstration SQLfiddle: http://sqlfiddle.com/#!2/c607e1/2
In fact I faced such problem before and it happened that I had one table that had a single column set as UTF-8 and the other tables where latin1. It did not matter what I did, MySQL insisted on using no indexes. The problem is quite well described on this blog post Slow queries in MySQL due to collation problems. Once you manage to fix the character set, I believe any of the queries will work.
An inner join on your virtual table might give you better performance. Try something along these lines.
SELECT gu.id
FROM USER gu
INNER JOIN (
select '11b6a540-0dc5-44e0-877d-b3b83f331231' uuid
union all
select '11b6a540-0dc5-44e0-877d-b3b83f331232') ids
on gu.uuid = ids.uuid;

Limit With order by takes long time in mysql for only 10 records

Here is my query which takes more than 5 second s to fetch 10 records n time gets bigger by changing the offsets in limit clause.
Table contain 12 million records.
SELECT device_id
,media_id
,limit1.play_date
,limit1.start_time
,limit1.end_time
,SUBTIME(limit1.end_time, limit1.start_time) AS playback_duration
FROM device_media_log
INNER JOIN (
SELECT play_date
,start_time
,end_time
,device_media_id
FROM device_media_log
ORDER BY play_date DESC
,start_time DESC
,end_time DESC limit 0
,10
) AS limit1 ON device_media_log.device_media_id = limit1.device_media_id;
explain plan::
+----+-------------+------------------+--------+---------------+---------+---------+------------------------+---------+----------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+------------------+--------+---------------+---------+---------+------------------------+---------+----------------+
| 1 | PRIMARY | <derived2> | ALL | NULL | NULL | NULL | NULL | 10 | |
| 1 | PRIMARY | device_media_log | eq_ref | PRIMARY | PRIMARY | 8 | limit1.device_media_id | 1 | |
| 2 | DERIVED | device_media_log | ALL | NULL | NULL | NULL | NULL | 8345645 | Using filesort |
+----+-------------+------------------+--------+---------------+---------+---------+------------------------+---------+----------------+
here is create table::
CREATE TABLE `device_media_log` (
`device_media_id` bigint(20) NOT NULL AUTO_INCREMENT,
`device_id` int(11) NOT NULL DEFAULT '0',
`media_id` bigint(20) NOT NULL DEFAULT '0',
`playback_type_id` tinyint(4) NOT NULL DEFAULT '0',
`playback_id` int(11) NOT NULL DEFAULT '0',
`play_date` date DEFAULT NULL,
`start_time` time DEFAULT NULL,
`end_time` time DEFAULT NULL,
`client_id` bigint(20) DEFAULT NULL,
PRIMARY KEY (`device_media_id`),
KEY `Index_media_id` (`media_id`),
KEY `Index_device_id` (`device_id`),
KEY `Index_play_date` (`play_date`),
KEY `Index_start_time` (`start_time`),
KEY `Index_end_time` (`end_time`),
KEY `Index_client_id` (`client_id`)
)
ENGINE=InnoDB AUTO_INCREMENT=8366229 DEFAULT CHARSET=latin1
Describe after adding compound index
-+-------+---------------+----------------- +---------+------+------+-------+
| 1 | SIMPLE | device_media_log | index | NULL | index_composite | 12 | NULL | 10 | |
Try this query
SELECT device_id
,media_id
,limit1.play_date
,limit1.start_time
,limit1.end_time
,SUBTIME(limit1.end_time, limit1.start_time) AS playback_duration
FROM
device_media_log
ORDER BY
play_date DESC
,start_time DESC
,end_time DESC
limit 0, 10;
There is no need of subquery as you are directly using the result.
Also the explain statement shows that none of your created index is used.
Create a compound index on following column play_date, start_time, end_time.
ALTER TABLE device_media_log ADD INDEX device_media_log_date_time(play_date, start_time, end_time);
hope this helps...

Excluding large sets of objects from a query on a table with fast changing order

I have a table of products with a score column, which has a B-Tree Index on it. I have a query which returns products that have not been shown to the user in the current session. I can't simply use simple pagination with LIMIT for it, because the result should be ordered by the score column, which can change between query calls.
My current solution works like this:
SELECT *
FROM products p
LEFT JOIN product_seen ps
ON (ps.session_id = ? AND p.product_id = ps.product_id )
WHERE ps.product_id is null
ORDER BY p.score DESC
LIMIT 30;
This works fine for the first few pages, but the response time grows linear to the number of products already shown in the session and hits the second mark by the time this number reaches ~300. Is there a way to fasten this up in MySQL? Or should I solve this problem in an entirely other way?
Edit:
These are the two tables:
CREATE TABLE `products` (
`product_id` int(15) NOT NULL AUTO_INCREMENT,
`shop` varchar(15) NOT NULL,
`shop_id` varchar(25) NOT NULL,
`shop_category_id` varchar(20) DEFAULT NULL,
`shop_subcategory_id` varchar(20) DEFAULT NULL,
`shop_designer_id` varchar(20) DEFAULT NULL,
`shop_designer_name` varchar(40) NOT NULL,
`created_at` timestamp NULL DEFAULT NULL,
`product_url` varchar(255) NOT NULL,
`name` varchar(255) NOT NULL,
`description` mediumtext NOT NULL,
`price_cents` int(10) NOT NULL,
`list_image_url` varchar(255) NOT NULL,
`list_image_height` int(4) NOT NULL,
`ending` timestamp NULL DEFAULT NULL,
`category_id` int(5) NOT NULL,
`last_update` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`included_at` timestamp NULL DEFAULT NULL,
`hearts` int(5) NOT NULL,
`score` decimal(10,5) NOT NULL,
`rand_field` decimal(16,15) NOT NULL,
`last_score_update` timestamp NULL DEFAULT NULL,
`active` tinyint(1) NOT NULL DEFAULT '0',
PRIMARY KEY (`product_id`),
UNIQUE KEY `unique_shop_id` (`shop`,`shop_id`),
KEY `score_index` (`active`,`score`),
KEY `included_at_index` (`included_at`),
KEY `active_category_score` (`active`,`category_id`,`score`),
KEY `active_category` (`active`,`category_id`,`product_id`),
KEY `active_products` (`active`,`product_id`),
KEY `active_rand` (`active`,`rand_field`),
KEY `active_category_rand` (`active`,`category_id`,`rand_field`)
) ENGINE=InnoDB AUTO_INCREMENT=55985 DEFAULT CHARSET=utf8
CREATE TABLE `product_seen` (
`seenby_id` int(20) NOT NULL AUTO_INCREMENT,
`session_id` varchar(25) NOT NULL,
`product_id` int(15) NOT NULL,
`last_seen` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`sorting` varchar(10) NOT NULL,
`in_category` int(3) DEFAULT NULL,
PRIMARY KEY (`seenby_id`),
KEY `last_seen_index` (`last_seen`),
KEY `session_id` (`session_id`,`seenby_id`),
KEY `session_id_2` (`session_id`,`sorting`,`seenby_id`)
) ENGINE=InnoDB AUTO_INCREMENT=17431 DEFAULT CHARSET=utf8
Edit 2:
The query above is a simplification, this is the real query with EXPLAIN:
EXPLAIN SELECT
DISTINCT p.product_id AS id,
p.list_image_url AS image,
p.list_image_height AS list_height,
hearts,
active AS available,
(UNIX_TIMESTAMP( ) - ulp.last_action) AS last_loved
FROM `looksandgoods`.`products` p
LEFT JOIN `looksandgoods`.`user_likes_products` ulp
ON ( p.product_id = ulp.product_id AND ulp.user_id =1 )
LEFT JOIN `looksandgoods`.`product_seen` sb
ON (sb.session_id = 'y7lWunZKKABgMoDgzjwDjZw1'
AND sb.sorting = 'trend'
AND p.product_id = sb.product_id )
WHERE p.active =1
AND sb.product_id IS NULL
ORDER BY p.score DESC
LIMIT 30 ;
Explain output, there is still a temp table and filesort, although the keys for the join exist:
+----+-------------+-------+-------+----------------------------------------------------------------------------------------------------+------------------+---------+----------------------------------+------+----------------------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+-------+----------------------------------------------------------------------------------------------------+------------------+---------+----------------------------------+------+----------------------------------------------+
| 1 | SIMPLE | p | range | score_index,active_category_score,active_category,active_products,active_rand,active_category_rand | score_index | 1 | NULL | 2299 | Using where; Using temporary; Using filesort |
| 1 | SIMPLE | ulp | ref | love_count_index,user_to_product_index,product_id | love_count_index | 9 | looksandgoods.p.product_id,const | 1 | |
| 1 | SIMPLE | sb | ref | session_id,session_id_2 | session_id | 77 | const | 711 | Using where; Not exists; Distinct |
+----+-------------+-------+-------+----------------------------------------------------------------------------------------------------+------------------+---------+----------------------------------+------+----------------------------------------------+
New answer
I think the problem with the real query is the DISTINCT clause. The implication is that either or both of the product_seen and user_likes_products tables can join multiple rows for each product_id which could potentially appear in the result set (given the somewhat disturbing lack of UNIQUE KEYs on the product_seen table), and this is the reason you've included the DISTINCT clause. Unfortunately, it also means MySQL will have to create a temp table to process the query.
Before I go any further, if it's possible to do...
ALTER TABLE product_seen ADD UNIQUE KEY (session_id, product_id, sorting);
...and...
ALTER TABLE user_likes_products ADD UNIQUE KEY (user_id, product_id);
...then the DISTINCT clause is redundant, and removing it should eliminate the problem. N.B. I'm not suggesting you necessarily need to add these keys, but rather just to confirm that these fields are always unique.
If it's not possible, then there may be another solution, but I'd need to know a lot more about the tables involved in the joins.
Old answer
An EXPLAIN for your query yields...
+----+-------------+-------+------+---------------+------------+---------+-------+------+-------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+------+---------------+------------+---------+-------+------+-------------------------+
| 1 | SIMPLE | p | ALL | NULL | NULL | NULL | NULL | 10 | Using filesort |
| 1 | SIMPLE | ps | ref | session_id | session_id | 27 | const | 1 | Using where; Not exists |
+----+-------------+-------+------+---------------+------------+---------+-------+------+-------------------------+
...which shows it's not using an index on the products table, so it's having to do a table scan and a filesort, which is why it's slow.
I noticed there's an index on (active, score) which you could use by changing the query to only show active products...
SELECT *
FROM products p
LEFT JOIN product_seen ps
ON (ps.session_id = ? AND p.product_id = ps.product_id )
WHERE p.active=TRUE AND ps.product_id is null
ORDER BY p.score DESC
LIMIT 30;
...which changes the EXPLAIN to...
+----+-------------+-------+-------+-----------------------------+-------------+---------+-------+------+-------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+-------+-----------------------------+-------------+---------+-------+------+-------------------------+
| 1 | SIMPLE | p | range | score_index,active_products | score_index | 1 | NULL | 10 | Using where |
| 1 | SIMPLE | ps | ref | session_id | session_id | 27 | const | 1 | Using where; Not exists |
+----+-------------+-------+-------+-----------------------------+-------------+---------+-------+------+-------------------------+
...which is now doing a range scan and no filesort, which should be much faster.
Or if you want it to also return inactive products, then you'll need to add an index on score only, with...
ALTER TABLE products ADD KEY (score);

Improve performance or redesign 'greatest-n-per-group' mysql query

I'm using MySQL5 and I currently have a query that gets me the info I need but I feel like it could be improved in terms of performance.
Here's the query I built (roughly following this guide) :
SELECT d.*, dc.date_change, dc.cwd, h.name as hub
FROM livedata_dom AS d
LEFT JOIN ( SELECT dc1.*
FROM livedata_domcabling as dc1
LEFT JOIN livedata_domcabling AS dc2
ON dc1.dom_id = dc2.dom_id AND dc1.date_change < dc2.date_change
WHERE dc2.dom_id IS NULL
ORDER BY dc1.date_change desc) AS dc ON (d.id = dc.dom_id)
LEFT JOIN livedata_hub AS h ON (d.id = dc.dom_id AND dc.hub_id = h.id)
WHERE d.cluster = 'localhost'
GROUP BY d.id;
EDIT: Using ORDER BY + GROUP BY to avoid getting multiple dom entries in case 'domcabling' has an entry with null date_change and another one with a date for the same 'dom'.
I feel like I'm killing a mouse with a bazooka. This query takes more than 3 seconds with only about 5k entries in 'livedata_dom' and 'livedata_domcabling'. Also, EXPLAIN tells me that 2 filesorts are used:
+----+-------------+------------+--------+-----------------------------+-----------------------------+---------+-----------------+------+----------------------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+------------+--------+-----------------------------+-----------------------------+---------+-----------------+------+----------------------------------------------+
| 1 | PRIMARY | d | ALL | NULL | NULL | NULL | NULL | 3 | Using where; Using temporary; Using filesort |
| 1 | PRIMARY | <derived2> | ALL | NULL | NULL | NULL | NULL | 3 | |
| 1 | PRIMARY | h | eq_ref | PRIMARY | PRIMARY | 4 | dc.hub_id | 1 | |
| 2 | DERIVED | dc1 | ALL | NULL | NULL | NULL | NULL | 4 | Using filesort |
| 2 | DERIVED | dc2 | ref | livedata_domcabling_dc592d9 | livedata_domcabling_dc592d9 | 4 | live.dc1.dom_id | 2 | Using where; Not exists |
+----+-------------+------------+--------+-----------------------------+-----------------------------+---------+-----------------+------+----------------------------------------------+
How could I change this query to make it more efficient?
Using the dummy data (provided below), this is the expected result:
+-----+-------+---------+--------+----------+------------+-----------+---------------------+------+-----------+
| id | mb_id | prod_id | string | position | name | cluster | date_change | cwd | hub |
+-----+-------+---------+--------+----------+------------+-----------+---------------------+------+-----------+
| 249 | 47 | 47 | 47 | 47 | SuperDOM47 | localhost | NULL | NULL | NULL |
| 250 | 48 | 48 | 48 | 48 | SuperDOM48 | localhost | 2014-04-16 05:23:00 | 32A | megahub01 |
| 251 | 49 | 49 | 49 | 49 | SuperDOM49 | localhost | NULL | 22B | megahub01 |
+-----+-------+---------+--------+----------+------------+-----------+---------------------+------+-----------+
Basically I need 1 row for every 'dom' entry, with
the 'domcabling' record with the highest date_change
if record does not exist, I need null fields
ONE entry may have a null date_change field per dom (null datetime field considered older than any other datetime)
the name of the 'hub', when a 'domcabling' entry is found, null otherwise
CREATE TABLE + dummy INSERT for the 3 tables:
livedata_dom (about 5000 entries)
CREATE TABLE `livedata_dom` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`mb_id` varchar(12) NOT NULL,
`prod_id` varchar(8) NOT NULL,
`string` int(11) NOT NULL,
`position` int(11) NOT NULL,
`name` varchar(30) NOT NULL,
`cluster` varchar(9) NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `mb_id` (`mb_id`),
UNIQUE KEY `prod_id` (`prod_id`),
UNIQUE KEY `name` (`name`),
UNIQUE KEY `livedata_domgood_string_7bff074107b0e5a0_uniq` (`string`,`position`,`cluster`)
) ENGINE=InnoDB AUTO_INCREMENT=5485 DEFAULT CHARSET=latin1;
INSERT INTO `livedata_dom` VALUES (251,'49','49',49,49,'SuperDOM49','localhost'),(250,'48','48',48,48,'SuperDOM48','localhost'),(249,'47','47',47,47,'SuperDOM47','localhost');
livedata_domcabling (about 10000 entries and growing slowly)
CREATE TABLE `livedata_domcabling` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`dom_id` int(11) NOT NULL,
`hub_id` int(11) NOT NULL,
`cwd` varchar(3) NOT NULL,
`date_change` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `livedata_domcabling_dc592d9` (`dom_id`),
KEY `livedata_domcabling_4366aa6e` (`hub_id`),
CONSTRAINT `dom_id_refs_id_73e89ce0c50bf0a6` FOREIGN KEY (`dom_id`) REFERENCES `livedata_dom` (`id`),
CONSTRAINT `hub_id_refs_id_179c89d8bfd74cdf` FOREIGN KEY (`hub_id`) REFERENCES `livedata_hub` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=5397 DEFAULT CHARSET=latin1;
INSERT INTO `livedata_domcabling` VALUES (1,251,1,'22B',NULL),(2,250,1,'33A',NULL),(6,250,1,'32A','2014-04-16 05:23:00'),(5,250,1,'22B','2013-05-22 00:00:00');
livedata_hub (about 100 entries)
CREATE TABLE `livedata_hub` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(14) NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `name` (`name`)
) ENGINE=InnoDB AUTO_INCREMENT=98 DEFAULT CHARSET=latin;
INSERT INTO `livedata_hub` VALUES (1,'megahub01');
Try this rewriting (tested in SQL-Fiddle:
SELECT
d.*, dc.date_change, dc.cwd, h.name as hub
FROM
livedata_dom AS d
LEFT JOIN
livedata_domcabling as dc
ON dc.id =
( SELECT id
FROM livedata_domcabling AS dcc
WHERE dcc.dom_id = d.id
ORDER BY date_change DESC
LIMIT 1
)
LEFT JOIN
livedata_hub AS h
ON dc.hub_id = h.id
WHERE
d.cluster = 'localhost' ;
And index on (dom_id, date_change) would help efficiency.
I'm not sure about the selectivity of d.cluster = 'localhost' (how many rows of the livedata_dom table match this condiiton?) but adding an index on (cluster) might help as well.
set #rn := 0, #dom_id := 0;
select d.*, dc.date_change, dc.cwd, h.name as hub
from
livedata_dom d
left join (
select
hub_id, date_change, cwd, dom_id,
if(#dom_id = dom_id, #rn := #rn + 1, #rn := 1) as rn,
#dom_id := dom_id as dm_id
from
livedata_domcabling
order by dom_id, date_change desc
) dc on d.id = dc.dom_id
left join
livedata_hub h on h.id = dc.hub_id
where rn = 1 or rn is null
order by dom_id
The data you posted does not have the dom_id 249. And the #250 has one null date so it comes first. So your result does not reflect what I understand form your question.