How to improve performance of query with two inner joins? - mysql

I have 3 tables :
CREATE TABLE `t_event` (
`id` int(10) NOT NULL auto_increment,
`title` varchar(100) NOT NULL,
`kind` int(10) NOT NULL,
`type` int(10) NOT NULL,
`short_desc` varchar(500) default NULL,
`long_desc` varchar(1500) default NULL,
`location` int(10) NOT NULL,
`price` decimal(11,0) NOT NULL,
`currency` int(11) NOT NULL default '1',
`remark_price` varchar(250) default NULL,
`remark_prerequisite` varchar(250) default NULL,
`date_start` date NOT NULL,
`date_end` date default NULL,
`date_remark` varchar(300) default NULL,
`time_start` time default NULL,
`time_end` time default NULL,
`remark_time` varchar(50) default NULL,
`leader` int(50) NOT NULL,
`leader2` int(100) NOT NULL,
`eve_contact_name` varchar(50) default NULL,
`eve_contact_phone` varchar(50) default NULL,
`eve_contact_email` varchar(50) default NULL,
`eve_contact_url` varchar(150) default NULL,
`eve_image_path` varchar(250) default NULL,
`provider` int(10) default NULL,
`timestamp` datetime NOT NULL,
`last_change` datetime NOT NULL default '0000-00-00 00:00:00',
`quality` int(10) default NULL,
`min_number` int(10) NOT NULL,
`max_number` int(10) NOT NULL,
`active_for_reservation` tinyint(1) NOT NULL,
`cancellation_day1` int(10) NOT NULL,
`cancellation_day2` int(10) NOT NULL,
`cancellation_fee1` varchar(255) NOT NULL,
`cancellation_fee2` varchar(255) NOT NULL,
PRIMARY KEY (`id`),
KEY `FK_t_event_t_event_kind` (`kind`),
KEY `FK_t_event_t_event_type` (`type`),
KEY `FK_t_event_t_location` (`location`),
KEY `FK_t_event_t_currency` (`currency`),
KEY `FK_t_event_t_leader` (`leader`),
KEY `FK_t_event_t_provider` (`provider`),
KEY `FK_t_event_t_quality` (`quality`),
CONSTRAINT `FK_t_event_t_currency` FOREIGN KEY (`currency`) REFERENCES `t_currency` (`id`),
CONSTRAINT `FK_t_event_t_event_kind` FOREIGN KEY (`kind`) REFERENCES `t_event_kind` (`id`),
CONSTRAINT `FK_t_event_t_event_type` FOREIGN KEY (`type`) REFERENCES `t_event_type` (`id`),
CONSTRAINT `FK_t_event_t_leader` FOREIGN KEY (`leader`) REFERENCES `t_leader` (`id`),
CONSTRAINT `FK_t_event_t_location` FOREIGN KEY (`location`) REFERENCES `t_location` (`id`),
CONSTRAINT `FK_t_event_t_provider` FOREIGN KEY (`provider`) REFERENCES `t_provider` (`id`),
CONSTRAINT `FK_t_event_t_quality` FOREIGN KEY (`quality`) REFERENCES `t_quality` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=8432 DEFAULT CHARSET=latin1
CREATE TABLE `t_location` (
`id` int(10) NOT NULL auto_increment,
`loc_name` varchar(50) NOT NULL,
`loc_detail` varchar(50) default NULL,
`loc_adress1` varchar(50) NOT NULL,
`loc_adress2` varchar(50) default NULL,
`loc_country` int(50) NOT NULL default '1',
`loc_zip` varchar(50) NOT NULL,
`loc_loc` varchar(50) NOT NULL,
`loc_shortdesc` varchar(250) default NULL,
`loc_contact_name` varchar(250) default NULL,
`loc_contact_gender` int(10) default NULL,
`loc_contact_phone` varchar(250) default NULL,
`loc_contact_email` varchar(250) default NULL,
`loc_contact_url` varchar(250) default NULL,
`loc_image_path` varchar(250) default NULL,
`latitude` varchar(100) default NULL,
`longitude` varchar(100) default NULL,
`created` datetime NOT NULL,
`last_change` datetime NOT NULL default '0000-00-00 00:00:00',
`provider` int(10) NOT NULL default '1',
PRIMARY KEY (`id`),
UNIQUE KEY `id` USING BTREE (`id`),
KEY `FK_t_location_t_country` (`loc_country`),
KEY `FK_t_location_t_gender` (`loc_contact_gender`),
KEY `FK_t_location_t_provider` (`provider`),
CONSTRAINT `FK_t_location_t_country` FOREIGN KEY (`loc_country`) REFERENCES `t_country`(`id`),
CONSTRAINT `FK_t_location_t_provider` FOREIGN KEY (`provider`) REFERENCES `t_provider` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1287 DEFAULT CHARSET=latin1
CREATE TABLE `t_dates` (
`id` int(10) NOT NULL auto_increment,
`events_id` int(10) NOT NULL,
`events_start_date` date NOT NULL,
`events_end_date` date NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `IND_id` (`id`),
KEY `IND_events_id` (`events_id`),
CONSTRAINT `t_dates_ibfk_1` FOREIGN KEY (`events_id`) REFERENCES `t_event` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB AUTO_INCREMENT=32048 DEFAULT CHARSET=latin1
My Query is :
SELECT e.*,I.* ,d.*
FROM t_event AS e
INNER JOIN t_location AS I ON I.id = e.location
INNER JOIN t_dates AS d ON d.events_id = e.id
;
this query take 90s to be executed and return = 27727
The PROFILE command show that section "sending data" take almost the time of execution.
The EXPLAIN command is the following :
+----+------------+------+------+----------------------------+--------------------+---------+-----------+-------+-------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+------------+------+------+----------------------------+--------------------+---------+-----------+-------+-------+
| 1 | SIMPLE | I | ALL | PRIMARY,id | NULL | NULL | NULL | 1143 | |
| 1 | SIMPLE | e | ref | PRIMARY,FK_t_event_t_location | FK_t_event_t_location | 4 | wu_db.I.id | 4 | |
| 1 | SIMPLE | d | ref | IND_events_id | IND_events_id | 4 | wu_db.e.id | 3 | |
+----+------------+------+------+----------------------------+--------------------+---------+-----------+-------+-------+
My point of view is that the big number of column is responsible of this slowdown but even when I write "SELECT e.id, I.events_id, d.id" it still take 16 s.
I think that I have to rewrite the query with LIMIT and OFFSET clause, what do you think?
number of records for each tables :
t_event = 7991
t_location = 1086
t_dates = 27727

Broadly speaking, MySQL can only filter records using one index from each table in a query.
That is, whilst your t_event table has indexes defined on both id and location, only one of those indexes can be used to satisfy your query. You can see this in your EXPLAIN output, which indicates that both the PRIMARY and FK_t_event_t_location keys were identified as possibly useful (with the latter actually selected for use).
Therefore, your join with t_dates, which involves a test on the id column, is being fulfilled with a table scan rather than an index lookup. Again, you can see this from the first row in the EXPLAIN output which shows type = ALL (table scan) and key = NULL (no index being used).
You should create a composite index on (id, location) for your t_event table:
ALTER TABLE t_event ADD INDEX (id, location);

My point of view is that the big number of column is responsible of this slowdown but even > when I write "SELECT e.id, I.events_id, d.id" it still take 16 s.
I think that I have to rewrite the query with LIMIT and OFFSET clause, what do you think?
I think you're right.
If you could speed up the JOIN by a factor of infinite, you would decrease to zero the "select" phase, and would leave the "sending data" part untouched - that's the other 74 seconds.
In other words, an infinite effort of query optimization would give you an advantage of 16 seconds out of 90 - around 18% overall.
If this is a batch query, then the time isn't so important; if it is not, as I believe, then I think it's really unlikely that someone is going to want a display, or even a synopsis, of some 27 thousands items.
Apart from a "paging" approach, or if a paging approach turned out not to be practical, or even in addition to a paging approach, you could see whether your application could use some kind of "filter" query (date ranges, location ranges).
So I'd study what WHERE conditions might be used to make that selection leaner.
If it is a Web application, you could SELECT only the IDs (the query you already tried, the one taking only 16 s; and with a WHERE, maybe even less), or as few columns as possible. Let's imagine that now you're displaying a very long page with lots of "forms" holding all the information, e.g.
...
+----------------------------------------+
| Date: .... Place: ...... Notes: .... |
| Leader: .... Cost: .... |
... and so on and so forth ...
+----------------------------------------+
| Date: .... Place: ...... Notes: .... |
... and so on and so forth ...
+----------------------------------------+
...
You could, instead, display only a very basic, minimal set of information, corresponding to the columns you have fetched:
...
+----------------------------------------+
| Date: .... Place: ...... <OPEN>|
+----------------------------------------+
| Date: .... Place: ...... <OPEN>|
+----------------------------------------+
| Date: .... Place: ...... <OPEN>|
+----------------------------------------+
| Date: .... Place: ...... <OPEN>|
...
At this point, the user will be able to quickly browse the list, but almost certainly won't open all those forms, but only two or three. When the user clicks on OPEN, a jQuery function could issue a very fast AJAX call to the server, supplying the data with the IDs; then three separate queries would retrieve all the relevant data in milliseconds.
The data would be json_encode()d and sent back to jQuery, and the form would "open" displaying all the information in "accordion" fashion:
+----------------------------------------+
| Date: .... Place: ...... <OPEN>|
+----------------------------------------+
| Date: .... Place: ...... <CLOSE>|
| large form with all the information
| ...
| ...
+----------------------------------------+
| Date: .... Place: ...... <OPEN>|
This way you would not need to immediately retrieve all the columns, especially those largish columns such as short_desc and long_desc, which can reach two whole Kb between them, and yet the user would experience very fast response.

Related

MySQL shows index in execution plan, but doesn't really use it

I have a table with 20M rows and a query which takes 10 seconds.
select id from entity
where (entity.class_id = 67 and entity.name like '%321%' )
order by id desc
In execution plan there is an index, but it's not really used.
explain extended select id from entity
where (entity.class_id = 67 and entity.name like '%321%' )
order by id desc
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
| 1 | SIMPLE | entity | ref | constraint_unique_class_legacy_id,entity_tag,entity_class_modification_date_int_idx,entity_name_idx | entity_class_modification_date_int_idx | 8 | const | 288440 | 100.00 | Using where; Using filesort |
If I flush status and run this query, handlers show that there was a full scan
Handler_read_next: 20318800
But if I give a hint to use index which was in 'explain extended', then there is no full scan and query finishes in 250ms.
select id from entity
use index (entity_class_modification_date_int_idx)
where (entity.class_id = 67 and entity.name like '%321%' )
order by id desc
Only 166K of entities was scanned
Handler_read_next: 165894
Why do I have to give hint to use index which is already in execution plan?
If I add + 0 to order by, query finishes in 250ms as well.
select id from entity
where (entity.class_id = 67 and entity.name like '%321%' )
order by id + 0 desc
'explain extended' shows the same execution plan in every case, 'analyze' doesn't help.
Table 'entity':
CREATE TABLE `entity` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`name` varchar(4096) COLLATE utf8_bin DEFAULT NULL,
`tag` varchar(255) COLLATE utf8_bin DEFAULT NULL,
`revision` int(11) NOT NULL,
`class_id` bigint(20) NOT NULL,
`legacy_id` bigint(20) DEFAULT NULL,
`description` varchar(255) COLLATE utf8_bin DEFAULT NULL,
`last_modified_by` varchar(64) COLLATE utf8_bin DEFAULT NULL,
`removed` tinyint(1) NOT NULL DEFAULT '0',
`modification_date_int` bigint(20) DEFAULT NULL,
`creation_date_int` bigint(20) DEFAULT NULL,
`created_by` varchar(64) COLLATE utf8_bin DEFAULT NULL,
`ancestor_class_id` bigint(20) NOT NULL,
`acu_id` bigint(20) DEFAULT NULL,
`secured` tinyint(1) DEFAULT '1',
`system_modification_date` bigint(20) DEFAULT NULL,
`archived` tinyint(1) NOT NULL DEFAULT '0',
PRIMARY KEY (`id`),
UNIQUE KEY `constraint_unique_class_legacy_id` (`class_id`,`legacy_id`),
UNIQUE KEY `entity_tag` (`class_id`,`tag`),
UNIQUE KEY `class_hierarchy_tag` (`tag`,`ancestor_class_id`),
KEY `entity_legacy_id_idx` (`legacy_id`),
KEY `entity_modification_date_int_idx` (`modification_date_int`),
KEY `entity_class_modification_date_int_idx` (`class_id`,`removed`,`modification_date_int`),
KEY `ancestor_class_id` (`ancestor_class_id`),
KEY `acu_id` (`acu_id`),
KEY `entity_name_idx` (`class_id`,`name`(255)),
KEY `entity_archived_idx` (`archived`),
CONSTRAINT `entity_ibfk_1` FOREIGN KEY (`class_id`) REFERENCES `class` (`id`),
CONSTRAINT `entity_ibfk_2` FOREIGN KEY (`ancestor_class_id`) REFERENCES `class` (`id`),
CONSTRAINT `entity_ibfk_3` FOREIGN KEY (`acu_id`) REFERENCES `acu` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=60382455 DEFAULT CHARSET=utf8 COLLATE=utf8_bin
MySQL version:
SELECT ##version;
+--------------------+
| ##version |
+--------------------+
| 5.6.30-76.3-56-log |
+--------------------+
OK, I partially found an answer: MySQL Workbench which I'm using is implicitly adding 'limit 1000' to queries and it drastically reduce performance even though there are much less rows in response. With limit 'explain extended' shows PRIMARY as a key and this is not a question anymore. If I increase limit to 10000, then query finishes in 250ms. Looks like there is some heuristics in MySQL optimizer which forces it to use PRIMARY index in case of low 'limit'.

Is it realistic to get a SQL statement with multiple JOINs and no WHERE clause retrieving 8000 rows under 1s?

I'm working on page that summarizes the decks, win rates and other data regarding Magic cards at https://pennydreadfulmagic.com/seasons/all/cards/
The main SQL query frequently takes more than 10s on production (less on my laptop but still >5s). The server is a standard Linode box running openSUSE and the database is MariaDB (MySQL).
I'm trying to work out if running this query on user time when the page is requested is a foolish idea or if I just need the right optimizations to make it work in less than a second.
A naïve version of the query is:
SELECT
card,
COUNT(*) AS num_decks,
SUM(CASE WHEN dm.games > IFNULL(odm.games, 0) THEN 1 ELSE 0 END) AS wins,
SUM(CASE WHEN dm.games < odm.games THEN 1 ELSE 0 END) AS losses,
SUM(CASE WHEN dm.games = odm.games THEN 1 ELSE 0 END) AS draws
FROM
deck_card AS dc
INNER JOIN
deck AS d ON dc.deck_id = d.id
INNER JOIN
deck_match AS dm ON d.id = dm.deck_id
INNER JOIN
deck_match AS odm ON dm.match_id = odm.match_id AND dm.deck_id <> odm.deck_id
GROUP BY
dc.card
ORDER BY
num_decks DESC,
card
There are approximately this many rows in each table:
deck_card - 470,000 (DISTINCT card = 8,500)
deck - 20,000
match - 35,000
deck_match - 70,000
These don't seem like very big numbers for SQL to deal with which is why I want to know if it's realistic to make this query run significantly faster.
EXPLAIN says:
+--------+-----+-------+------------------------+-------------------------------+-----+----------------------+-------+----------------------------------------------+
| id | type | tbl | type | possible_keys | key | len | ref | rows | Extra |
+------+--------+-----+-------+------------------------+------------------------+-----+----------------------+-------+----------------------------------------------+
| 1 | SIMPLE | d | index | PRIMARY | person_id | 4 | NULL | 18888 | Using index; Using temporary; Using filesort |
| 1 | SIMPLE | dm | ref | match_id,deck_id | deck_id | 4 | decksite.d.id | 1 | |
| 1 | SIMPLE | odm | ref | match_id | match_id | 4 | decksite.dm.match_id | 1 | Using where |
| 1 | SIMPLE | dc | ref | deck_id_card_sideboard | deck_id_card_sideboard | 4 | decksite.d.id | 10 | Using index |
+------+--------+-----+-------+------------------------+------------------------+-----+----------------------+-------+----------------------------------------------+
4 rows in set (0.00 sec)
I have a somewhat faster version of the query (that is still too slow) where I pull the match/deck_card stuff out into a subquery that is then joined to the main query which I've left out here as it's harder to understand. This does make things incrementally faster but nothing close to the ideal speed.
I'm not necessarily looking to be spoon-fed the optimizations required here (although that would be nice too!) so much as to understand if it is realistic to run queries like this on user time with the right optimizations? Or should I put my time into finding the right caching strategy or denormalizing the database?
CREATE TABLEs are as follows:
mysql> SHOW CREATE TABLE deck_card;
+-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table |
+-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| deck_card | CREATE TABLE `deck_card` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`deck_id` int(11) NOT NULL,
`card` varchar(100) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`n` int(11) NOT NULL,
`sideboard` int(11) NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `deck_card_deck_id_card_sideboard` (`deck_id`,`card`,`sideboard`),
KEY `idx_card` (`card`),
KEY `idx_card_deck_id_sideboard_n` (`card`,`deck_id`,`sideboard`,`n`),
CONSTRAINT `deck_card_ibfk_1` FOREIGN KEY (`deck_id`) REFERENCES `deck` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB AUTO_INCREMENT=39407094 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci |
+-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.00 sec)
mysql> SHOW CREATE TABLE deck;
+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table |
+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| deck | CREATE TABLE `deck` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`person_id` int(11) NOT NULL,
`source_id` int(11) NOT NULL,
`identifier` varchar(190) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`name` text COLLATE utf8mb4_unicode_ci,
`created_date` int(11) NOT NULL,
`updated_date` int(11) NOT NULL,
`competition_id` int(11) DEFAULT NULL,
`url` text COLLATE utf8mb4_unicode_ci,
`archetype_id` int(11) DEFAULT NULL,
`resource_uri` text COLLATE utf8mb4_unicode_ci,
`featured_card` text COLLATE utf8mb4_unicode_ci,
`score` int(11) DEFAULT NULL,
`thumbnail_url` text COLLATE utf8mb4_unicode_ci,
`small_thumbnail_url` text COLLATE utf8mb4_unicode_ci,
`finish` int(11) DEFAULT NULL,
`decklist_hash` char(40) COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`retired` tinyint(1) DEFAULT '0',
`reviewed` tinyint(1) DEFAULT '0',
PRIMARY KEY (`id`),
UNIQUE KEY `deck_source_id_identifier` (`source_id`,`identifier`),
KEY `person_id` (`person_id`),
KEY `competition_id` (`competition_id`),
KEY `archetype_id` (`archetype_id`),
KEY `deck_hash` (`decklist_hash`),
CONSTRAINT `deck_ibfk_1` FOREIGN KEY (`person_id`) REFERENCES `person` (`id`),
CONSTRAINT `deck_ibfk_2` FOREIGN KEY (`source_id`) REFERENCES `source` (`id`),
CONSTRAINT `deck_ibfk_3` FOREIGN KEY (`competition_id`) REFERENCES `competition` (`id`),
CONSTRAINT `deck_ibfk_4` FOREIGN KEY (`archetype_id`) REFERENCES `archetype` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=21460 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci |
+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.00 sec)
mysql> SHOW CREATE TABLE deck_match;
+------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table |
+------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| deck_match | CREATE TABLE `deck_match` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`match_id` int(11) NOT NULL,
`deck_id` int(11) NOT NULL,
`games` int(11) NOT NULL,
PRIMARY KEY (`id`),
KEY `match_id` (`match_id`),
KEY `deck_id` (`deck_id`),
CONSTRAINT `deck_match_ibfk_2` FOREIGN KEY (`deck_id`) REFERENCES `deck` (`id`),
CONSTRAINT `deck_match_ibfk_3` FOREIGN KEY (`match_id`) REFERENCES `match` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB AUTO_INCREMENT=73857 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci |
+------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.00 sec)
Do you get the "right" answers? Or are the numbers bigger than they should be? The latter happens with the "explode-implode" that occurs when doing JOIN, then GROUP BY.
See what happens if you change
FROM deck_card AS dc
to
FROM ( SELECT DISTINCT card FROM deck_card ) AS dc
I didn't exactly fix this but I did improve the performance of this part of my application from about 10s to less than 1s.
I used a query like the above but "bucketed" by day (GROUP BY card, DATE(d.created_date)) to create a _card_stats table offline which I regenerate on a regular basis. When I want this data I can now much more quickly aggregate for last week/season/all time from the bucketed table.
Not what I was dreaming of but fairly workable at the cost of a little complexity/lag in updates.

MySQL doesn't use entire index when combining join and range

I'm trying to optimize a simple query that joins between two tables and applies a range condition.
From the explain plan below, you can see that the index inv_quantity_on_hand is only used partially (4 bytes, only for the first column - inv_item_sk). I would expect the entire index to be used, as the second part (inv_quantity_on_hand) of the index is used in the WHERE clause in a range condition.
Please note that this happens only with a join and a range condition. Replacing the range condition to a constant equality comparison (inv_quantity_on_hand = 5) will change the explain plan and MySQL will use the entire index.
It seems to be an instance of this bug: https://bugs.mysql.com/bug.php?id=8569.
I checked it with MySQL 5.7 and it still happens. Anyone can think of a good workaround please?
Schema structure:
CREATE TABLE `inventory` (
`inv_date_sk` INT(11) NOT NULL,
`inv_item_sk` INT(11) NOT NULL,
`inv_warehouse_sk` INT(11) NOT NULL,
`inv_quantity_on_hand` INT(11) DEFAULT NULL,
PRIMARY KEY (`inv_date_sk` , `inv_item_sk` , `inv_warehouse_sk`),
KEY `inv_w` (`inv_warehouse_sk`),
KEY `inv_i` (`inv_item_sk`),
KEY `inv_quantity_on_hand_index` (`inv_item_sk` , `inv_quantity_on_hand`),
CONSTRAINT `inv_d` FOREIGN KEY (`inv_date_sk`)
REFERENCES `date_dim` (`d_date_sk`)
ON DELETE NO ACTION ON UPDATE NO ACTION,
CONSTRAINT `inv_i` FOREIGN KEY (`inv_item_sk`)
REFERENCES `item` (`i_item_sk`)
ON DELETE NO ACTION ON UPDATE NO ACTION,
CONSTRAINT `inv_w` FOREIGN KEY (`inv_warehouse_sk`)
REFERENCES `warehouse` (`w_warehouse_sk`)
ON DELETE NO ACTION ON UPDATE NO ACTION
) ENGINE=INNODB DEFAULT CHARSET=UTF8
CREATE TABLE `item` (
`i_item_sk` INT(11) NOT NULL,
`i_item_id` CHAR(16) NOT NULL,
`i_rec_start_date` DATE DEFAULT NULL,
`i_rec_end_date` DATE DEFAULT NULL,
`i_item_desc` VARCHAR(200) DEFAULT NULL,
`i_current_price` DECIMAL(7 , 2 ) DEFAULT NULL,
`i_wholesale_cost` DECIMAL(7 , 2 ) DEFAULT NULL,
`i_brand_id` INT(11) DEFAULT NULL,
`i_brand` CHAR(50) DEFAULT NULL,
`i_class_id` INT(11) DEFAULT NULL,
`i_class` CHAR(50) DEFAULT NULL,
`i_category_id` INT(11) DEFAULT NULL,
`i_category` CHAR(50) DEFAULT NULL,
`i_manufact_id` INT(11) DEFAULT NULL,
`i_manufact` CHAR(50) DEFAULT NULL,
`i_size` CHAR(20) DEFAULT NULL,
`i_formulation` CHAR(20) DEFAULT NULL,
`i_color` CHAR(20) DEFAULT NULL,
`i_units` CHAR(10) DEFAULT NULL,
`i_container` CHAR(10) DEFAULT NULL,
`i_manager_id` INT(11) DEFAULT NULL,
`i_product_name` CHAR(50) DEFAULT NULL,
PRIMARY KEY (`i_item_sk`),
KEY `item_color_index` (`i_color`)
) ENGINE=INNODB DEFAULT CHARSET=UTF8
Query:
SELECT
*
FROM
inventory
INNER JOIN
item ON inventory.inv_item_sk = item.i_item_sk
WHERE
inventory.inv_quantity_on_hand > 100
AND item.i_color = 'red';
Execution plan:
# id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra
-----+-------------+-----------+------------+------+----------------------------------+----------------------------+---------+----------------------+-----------------+-------------------------
1 | SIMPLE | item | | ref | PRIMARY,item_color_index | item_color_index | 61 | const | 384 | 100.00 |
1 | SIMPLE | inventory | | ref | inv_i,inv_quantity_on_hand_index | inv_quantity_on_hand_index | 4 | tpcds.item.i_item_sk | 615 | 33.33 | Using where; Using index
The multi column indexes are stored as concats of the different columns.
I think MySQL will not evaluate a substring within a multi column index for comparison. When you use inv_quantity_on_hand = 5 (or in(1,2,3,4,5)) MySQL will build the strings from your input for comparison so it can use the full index. Using between or > basically provides an unlimited number of possible substrings to compare (before checking data type). Building all those strings and comparing them would take much more time than using the index for the first column (on-clause) and then check the inv_quantity_on_hand "using where".
use BETWEEN condition instead of conditional operator

Slow Query on Rails joins

The following rails query throws back in slow query log:
Class ParserRun
scope :active, -> {
where(completed_at: nil)
.joins('LEFT JOIN system_events ON parser_runs.id = system_events.parser_run_id')
.where("system_events.created_at > '#{active_system_events_threshold}' OR parser_runs.created_at > '#{1.minute.ago.to_s(:db)}'")
}
How can I optimize this?
Slow querylog:
SELECT `parser_runs`.*
FROM `parser_runs`
INNER JOIN `system_events` ON `system_events`.`parser_run_id` = `parser_runs`.`id`
WHERE `parser_runs`.`type` IN ('DatasetParserRun')
AND `parser_runs`.`completed_at` IS NULL
AND (system_events.created_at <= '2017-08-05 04:03:09');
# Time: 170805 5:03:43
Output of 'show create table parser_runs;'
| parser_runs | CREATE TABLE `parser_runs` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`type` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`customer_id` int(11) DEFAULT NULL,
`options` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`completed_at` datetime DEFAULT NULL,
`created_at` datetime DEFAULT NULL,
`updated_at` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `index_parser_runs_on_customer_id` (`customer_id`)
) ENGINE=InnoDB AUTO_INCREMENT=143327 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci |
output of 'show create table system_events;'
| system_events | CREATE TABLE `system_events` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`log_level` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL,
`customer_id` int(11) DEFAULT NULL,
`classification` int(11) DEFAULT NULL,
`information` text COLLATE utf8_unicode_ci,
`created_at` datetime DEFAULT NULL,
`updated_at` datetime DEFAULT NULL,
`parser_run_id` int(11) DEFAULT NULL,
`notified` tinyint(1) DEFAULT '0',
`dataset_log_id` int(11) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `index_system_events_on_classification` (`classification`),
KEY `index_system_events_on_customer_id` (`customer_id`),
KEY `index_system_events_on_parser_run_id` (`parser_run_id`),
KEY `index_system_events_on_dataset_log_id` (`dataset_log_id`)
) ENGINE=InnoDB AUTO_INCREMENT=730539 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci |
Output of EXPLAIN:
EXPLAIN for: SELECT `parser_runs`.* FROM `parser_runs` LEFT JOIN system_events ON parser_runs.id = system_events.parser_run_id WHERE `parser_runs`.`completed_at` IS NULL AND (system_events.created_at > '2017-08-07 10:09:03')
+----+-------------+---------------+--------+------------------------- -------------+---------+---------+--------------------------------------+- -------+-------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+---------------+--------+--------------------------------------+---------+---------+--------------------------------------+--------+-------------+
| 1 | SIMPLE | system_events | ALL | index_system_events_on_parser_run_id | NULL | NULL | NULL | 655946 | Using where |
| 1 | SIMPLE | parser_runs | eq_ref | PRIMARY | PRIMARY | 4 | ashblood.system_events.parser_run_id | 1 | Using where |
+----+-------------+---------------+--------+--------------------------------------+---------+---------+--------------------------------------+--------+-------------+
2 rows in set (0.00 sec)
The first step in the query execution plan (the output of EXPLAIN SELECT ...) indicates that the whole system_events table is being scanned in order to check which rows in the system_events table will be used in the join with the parser_runs table.
Please, add an index on the created_at column in the system_events and repeat the query. Please, check the new execution path to verify whether the whole table is being scanned, or if the new index is being used.
In addition, although probably not the root of the problem, you could add an index on the type and completed_at columns of the table parser_runs. Please, note that I mean an index on both columns (in the given order) instead of an index on each column.
INDEX(type, completed_at)
INDEX(completed_at, type)
INDEX(created_at, parser_run_id)
INDEX(parser_run_id, created_at)
It is not obvious which indexes the Optimizer will prefer; add all of those.
Don't use joins. Instead break the join queries in separate queries and store those data in variables. And later get your desired results from those data.

Query with LEFT JOIN on large table really slow

The following query takes approximately 12 seconds to execute. I have tried optimizing but was not able to. The table to be joined is pretty large (> 8.000.000 records).
SELECT
p0_.id AS id_0,
p0_.ean AS ean_1,
p0_.brand AS brand_2,
p0_.type AS type_3,
p0_.retail_price AS retail_price_4,
p0_.target_price AS target_price_5,
min(NULLIF(c1_.delivery_price, 0)) AS sclr_6,
COALESCE(((p0_.target_price - min(NULLIF(c1_.delivery_price, 0))) / p0_.target_price * -100), 0) AS sclr_7
FROM product p0_
LEFT JOIN crawl c1_ ON (
c1_.product_ean = p0_.ean AND (
c1_.crawl_date = p0_.last_crawl_date OR
p0_.last_crawl_date IS NULL
)
AND c1_.source_id IN (
SELECT o2_.source_id AS sclr_8
FROM organisation_source o2_
WHERE o2_.organisation_id = 5
)
)
WHERE p0_.organisation_id = 5 GROUP BY p0_.ean
I already tried writing the query in a lot of different ways, but unfortunately did not give me any performance win. If I remove the subquery in the last AND it does not help either.
See below the output of the EXPLAIN statement:
+------+--------------+-------+------+---------------------------------------------------+------------------+---------+------------------------+--------+-------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+------+--------------+-------+------+---------------------------------------------------+------------------+---------+------------------------+--------+-------------+
| 1 | PRIMARY | p0_ | ref | uniqueConstraint,IDX_D34A04AD9E6B1585 | uniqueConstraint | 5 | const | 69 | Using where |
| 1 | PRIMARY | c1_ | ref | IDX_product_ean,IDX_crawl_date | IDX_product_ean | 62 | admin_pricev-p.p0_.ean | 468459 | Using where |
| 2 | MATERIALIZED | o2_ | ref | PRIMARY,IDX_DD91A56E9E6B1585,IDX_DD91A56E953C1C61 | PRIMARY | 4 | const | 1 | Using index |
+------+--------------+-------+------+---------------------------------------------------+------------------+---------+------------------------+--------+-------------+
See below the CREATE TABLE statements of the product and crawl tabel:
CREATE TABLE `product` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`organisation_id` int(11) DEFAULT NULL,
`ean` varchar(20) COLLATE utf8_unicode_ci NOT NULL,
`brand` varchar(50) COLLATE utf8_unicode_ci NOT NULL,
`type` varchar(50) COLLATE utf8_unicode_ci NOT NULL,
`retail_price` decimal(10,2) NOT NULL,
`target_price` decimal(10,2) NOT NULL,
`last_crawl_date` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `uniqueConstraint` (`organisation_id`,`ean`),
KEY `IDX_D34A04AD9E6B1585` (`organisation_id`),
KEY `IDX_target_price` (`target_price`),
KEY `IDX_ean` (`ean`),
KEY `IDX_type` (`type`),
KEY `IDX_last_crawl_date` (`last_crawl_date`),
CONSTRAINT `FK_D34A04AD9E6B1585` FOREIGN KEY (`organisation_id`) REFERENCES `organisation` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=927 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
CREATE TABLE `crawl` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`source_id` int(11) DEFAULT NULL,
`store_id` int(11) DEFAULT NULL,
`product_ean` varchar(20) COLLATE utf8_unicode_ci NOT NULL,
`crawl_date` datetime NOT NULL,
`takeaway_price` decimal(10,2) DEFAULT NULL,
`delivery_price` decimal(10,2) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `IDX_B4E9F1C2953C1C61` (`source_id`),
KEY `IDX_B4E9F1C2B092A811` (`store_id`),
KEY `IDX_product_ean` (`product_ean`),
KEY `IDX_takeaway_price` (`takeaway_price`),
KEY `IDX_crawl_date` (`crawl_date`),
CONSTRAINT `FK_B4E9F1C2953C1C61` FOREIGN KEY (`source_id`) REFERENCES `source` (`id`),
CONSTRAINT `FK_B4E9F1C2B092A811` FOREIGN KEY (`store_id`) REFERENCES `store` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=8606874 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
Anyone has any idea how to improve the performance of this query? Many thanks! If more information is needed please let me know!
You can probably simplify the query to:
SELECT . . .
FROM product p0_ LEFT JOIN
crawl c1_
ON c1_.product_ean = p0_.ean AND
c1_.crawl_date = p0_.last_crawl_date AND
EXISTS (SELECT 1
FROM organisation_source o2_
WHERE o2_.organisation_id = 5 AND c1_.source_id = o2_.source_id
)
WHERE p0_.organisation_id = 5
GROUP BY p0_.ean;
The p0_.last_crawl_date IS NULL is presumably unnecessary. A LEFT JOIN will keep all rows in the first table even when there is a NULL in a comparison. Your logic matches all rows in the second table (that meet the other conditions). That may be what you want, but I am guessing not.
In MySQL, exists is sometimes faster than in, which is why I've rewritten that portion.
For this query, you can speed it up using indexes: product(organisation_id, ean, last_crawl_date), crawl(product_ean, crawl_date, source_id) and organisation_source(source_id, organisation_id).
Try with composite indexes on your LEFT JOINs
CREATE TABLE `product` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`organisation_id` int(11) DEFAULT NULL,
`ean` varchar(20) COLLATE utf8_unicode_ci NOT NULL,
`brand` varchar(50) COLLATE utf8_unicode_ci NOT NULL,
`type` varchar(50) COLLATE utf8_unicode_ci NOT NULL,
`retail_price` decimal(10,2) NOT NULL,
`target_price` decimal(10,2) NOT NULL,
`last_crawl_date` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `uniqueConstraint` (`organisation_id`,`ean`),
KEY `IDX_D34A04AD9E6B1585` (`organisation_id`),
KEY `IDX_target_price` (`target_price`),
KEY `IDX_ean` (`ean`),
KEY `IDX_type` (`type`),
KEY `IDX_last_crawl_date` (`last_crawl_date`),
INDEX `IDX_testing1` (`ean`,`last_crawl_date`),
CONSTRAINT `FK_D34A04AD9E6B1585` FOREIGN KEY (`organisation_id`) REFERENCES `organisation` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=927 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
CREATE TABLE `crawl` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`source_id` int(11) DEFAULT NULL,
`store_id` int(11) DEFAULT NULL,
`product_ean` varchar(20) COLLATE utf8_unicode_ci NOT NULL,
`crawl_date` datetime NOT NULL,
`takeaway_price` decimal(10,2) DEFAULT NULL,
`delivery_price` decimal(10,2) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `IDX_B4E9F1C2953C1C61` (`source_id`),
KEY `IDX_B4E9F1C2B092A811` (`store_id`),
KEY `IDX_product_ean` (`product_ean`),
KEY `IDX_takeaway_price` (`takeaway_price`),
KEY `IDX_crawl_date` (`crawl_date`),
INDEX `IDX_testing2` ( `source_id`,`product_ean`,`crawl_date`),
CONSTRAINT `FK_B4E9F1C2953C1C61` FOREIGN KEY (`source_id`) REFERENCES `source` (`id`),
CONSTRAINT `FK_B4E9F1C2B092A811` FOREIGN KEY (`store_id`) REFERENCES `store` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=8606874 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci