mysql selects for perf tuning - mysql

Any idea why cli.clitype and c.cli is not being picked even if I use/force index as hint in sql. Its giving me the output in 4 secs to fetch 1634 only for the below query. I'm using 5.5.25.log Please suggest.
mysql> explain SELECT DATE(sr.`date`), v.company_name, c.cli, COUNT(*), c.charge, SUM(c.`charge`) FROM subscriptionrequest AS sr, cli AS c , vendor AS v WHERE sr.cli = c.cli AND sr.secretkey = v.secretkey AND sr.`date` BETWEEN'2012-03-12 00:00:00' AND '2012-10-13 00:00:00' and c.clitype = 'chargemo' GROUP BY DATE(sr.`date`), sr.secretkey,c.cli;
+----+-------------+-------+------+-----------------------------------------------+----------------+---------+----------------------------+------+----------------------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+------+-----------------------------------------------+----------------+---------+----------------------------+------+----------------------------------------------+
| 1 | SIMPLE | c | ALL | idx_cli | NULL | NULL | NULL | 115 | Using where; Using temporary; Using filesort |
| 1 | SIMPLE | sr | ref | idx_subreq_key,idx_subreq_cli,idx_subreq_date | idx_subreq_cli | 53 | crystal_du_sm.c.cli | 869 | Using where |
| 1 | SIMPLE | v | ref | secretkey_idx | secretkey_idx | 52 | crystal_du_sm.sr.secretkey | 1 | Using where |
+----+-------------+-------+------+-----------------------------------------------+----------------+---------+----------------------------+------+----------------------------------------------+
3 rows in set (0.00 sec)
mysql> show indexes from cli;
+-------+------------+--------------+--------------+-------------+-----------+--------- ----+----------+--------+------+------------+---------+---------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+-------+------------+--------------+--------------+-------------+-----------+-------- -----+----------+--------+------+------------+---------+---------------+
| cli | 0 | PRIMARY | 1 | idcli | A | 115 | NULL | NULL | | BTREE | | |
| cli | 1 | idx_cli | 1 | cli | A | 115 | NULL | NULL | | BTREE | | |
| cli | 1 | cli_type_idx | 1 | clitype | A | 115 | NULL | NULL | YES | BTREE | | |
+-------+------------+--------------+--------------+-------------+-----------+--------- ----+----------+--------+------+------------+---------+---------------+
3 rows in set (0.00 sec)
mysql> show create table cli;
| cli | CREATE TABLE `cli` (
`idcli` bigint(255) NOT NULL AUTO_INCREMENT,
`cli` varchar(256) NOT NULL,
`type` enum('SDMF','MDMF') NOT NULL DEFAULT 'SDMF',
`priority` enum('realtime','high','normal','low','ignore') NOT NULL DEFAULT 'normal',
`status` enum('active','inactive','suspended','deleted') NOT NULL DEFAULT 'active',
`date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`description` text,
`charge` float DEFAULT '0',
`clitype` enum('chargemo','freemo') DEFAULT 'freemo',
PRIMARY KEY (`idcli`),
KEY `idx_cli` (`cli`),
KEY `cli_type_idx` (`clitype`)
) ENGINE=InnoDB AUTO_INCREMENT=117 DEFAULT CHARSET=latin1 |
1 row in set (0.00 sec)
mysql> show create table vendor;
| vendor | CREATE TABLE `vendor` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(256) NOT NULL,
`company_name` varchar(256) DEFAULT NULL,
`phone_no` varchar(256) DEFAULT NULL,
`status` enum('active','inactive','suspended','deleted') DEFAULT 'active',
`mo` bigint(255) NOT NULL,
`mt` bigint(255) NOT NULL,
`used_mo` bigint(255) DEFAULT '0',
`used_mt` bigint(255) DEFAULT '0',
`start_time` timestamp NULL DEFAULT '0000-00-00 00:00:00',
`end_time` timestamp NULL DEFAULT '0000-00-00 00:00:00',
`secretkey` varchar(50) NOT NULL,
`callback_url` text,
`payment_callback_url` text,
`date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`userid` int(255) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `secretkey_idx` (`secretkey`)
) ENGINE=InnoDB AUTO_INCREMENT=10 DEFAULT CHARSET=latin1 |
1 row in set (0.00 sec)
| subscriptionrequest | CREATE TABLE `subscriptionrequest` (
`id` bigint(255) unsigned NOT NULL AUTO_INCREMENT,
`ipaddress` varchar(256) CHARACTER SET latin1 NOT NULL DEFAULT '0.0.0.0',
`message` text,
`msisdn` varchar(50) CHARACTER SET latin1 DEFAULT NULL,
`mode` varchar(50) CHARACTER SET latin1 DEFAULT NULL,
`cli` varchar(50) CHARACTER SET latin1 DEFAULT NULL,
`transactionid` varchar(100) DEFAULT NULL,
`secretkey` varchar(100) CHARACTER SET latin1 DEFAULT NULL,
`date` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`error_code` int(10) DEFAULT NULL,
`success` int(10) NOT NULL DEFAULT '0',
`status` enum('waiting','processing','completed','moexceeds','reject') DEFAULT 'waiting',
PRIMARY KEY (`id`),
KEY `idx_subreq_key` (`secretkey`),
KEY `idx_subreq_status` (`status`),
KEY `idx_subreq_transid` (`transactionid`),
KEY `idx_subreq_cli` (`cli`),
KEY `idx_subreq_date` (`date`)
) ENGINE=InnoDB AUTO_INCREMENT=1594161 DEFAULT CHARSET=utf8 |
FOR SETSUNA ---
mysql> explain SELECT DATE(sr.`date`) AS sr_date, v.company_name, c.cli,
-> COUNT(*) AS cnt, c.charge,
-> SUM(c.`charge`) AS charge_sum
-> FROM
-> subscriptionrequest AS sr
-> JOIN cli AS c ON sr.cli = c.cli
-> JOIN vendor AS v ON sr.secretkey = v.secretkey
-> WHERE
-> sr.`date` >= '2012-03-12' AND sr.`date` <= '2012-10-13'
-> AND c.clitype = 'chargemo'
-> GROUP BY DATE(sr.`date`), sr.secretkey, c.cli;
+----+-------------+-------+------+-----------------------------+----------------+--------- +---------------------------+-------+---------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+------+-----------------------------+----------------+--------- +---------------------------+-------+---------------------------------+
| 1 | SIMPLE | v | ALL | secretkey_idx | NULL | NULL | NULL | 9 | Using temporary; Using filesort |
| 1 | SIMPLE | sr | ref | idx_subreq_key,cli_date_idx | idx_subreq_key | 103 | crystal_du_sm.v.secretkey | 88746 | Using where |
| 1 | SIMPLE | c | ref | idx_cli,cli_type_idx | idx_cli | 258 | crystal_du_sm.sr.cli | 1 | Using where |
+----+-------------+-------+------+-----------------------------+----------------+--------- +---------------------------+-------+---------------------------------+
3 rows in set (0.00 sec)
--- 23/8/2012 ---
+----+-------------+-------+------+---------------------------------------+------------ -----+---------+----------------------------+-------+-----------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+------+---------------------------------------+-----------------+---------+----------------------------+-------+-----------------+
| 1 | SIMPLE | v | ALL | secretkey_idx | NULL | NULL | NULL | 9 | Using temporary |
| 1 | SIMPLE | sr | ref | idx_subreq_key,idx_date_cli_secretkey | idx_subreq_key | 103 | crystal_du_sm.v.secretkey | 88608 | Using where |
| 1 | SIMPLE | c | ref | idx_cli_clitype | idx_cli_clitype | 260 | crystal_du_sm.sr.cli,const | 1 | Using where |
+----+-------------+-------+------+---------------------------------------+---------------- -+---------+----------------------------+-------+-----------------+
3 rows in set (0.00 sec)

A couple of general remarks:
Avoid using reserved keywords as column names (subscriptionrequest.date)
Use aliases on field names especially when using functions
I think this version is more readable
SELECT DATE(sr.`date`) AS sr_date, v.company_name, c.cli,
COUNT(*) AS cnt, c.charge,
SUM(c.`charge`) AS charge_sum
FROM
subscriptionrequest AS sr
JOIN cli AS c ON sr.cli = c.cli
JOIN vendor AS v ON sr.secretkey = v.secretkey
WHERE
sr.`date` >= '2012-03-12' AND sr.`date` <= '2012-10-13'
AND c.clitype = 'chargemo'
GROUP BY DATE(sr.`date`), sr.secretkey, c.cli;
You will probably need to modify the subscriptionrequest table:
ALTER TABLE subscriptionrequest DROP INDEX `idx_subreq_cli` , DROP INDEX
`idx_subreq_date`, ADD INDEX `cli_date` (`date`,`cli`);
This will help fetching the proper subset of records based on the date field thus diminishing the number of returned records from the subscriptionrequest table.
Edit #1
Schema Modifications & (Slight) Query Optimization:
ALTER TABLE subscriptionrequest DROP INDEX `cli_date`,
ADD INDEX `idx_date_cli_secretkey` (`date`,`secretkey`,`cli`);
ALTER TABLE `cli` DROP INDEX idx_cli, DROP INDEX cli_type_idx,
ADD INDEX `idx_cli_clitype` (cli,clitype);
EXPLAIN SELECT DATE(sr.`date`) AS sr_date, v.company_name, c.cli,
COUNT(*) AS cnt, c.charge, SUM(c.`charge`) AS charge_sum
FROM subscriptionrequest AS sr JOIN
cli AS c ON sr.cli = c.cli JOIN vendor AS v ON sr.secretkey = v.secretkey
WHERE sr.`date` >= '2012-03-12' AND sr.`date` <= '2012-10-13'
AND c.clitype = 'chargemo' GROUP BY DATE(sr.`date`), sr.secretkey, c.cli
ORDER BY NULL\G
*************************** 1. row ***************************
id: 1
select_type: SIMPLE
table: sr
type: index
possible_keys: idx_subreq_key,idx_date_cli_secretkey
key: idx_date_cli_secretkey
key_len: 160
ref: NULL
rows: 1
Extra: Using where; Using index; Using temporary
*************************** 2. row ***************************
id: 1
select_type: SIMPLE
table: c
type: ref
possible_keys: idx_cli_clitype
key: idx_cli_clitype
key_len: 260
ref: so_12055859.sr.cli,const
rows: 1
Extra: Using where
*************************** 3. row ***************************
id: 1
select_type: SIMPLE
table: v
type: ref
possible_keys: secretkey_idx
key: secretkey_idx
key_len: 52
ref: so_12055859.sr.secretkey
rows: 1
Extra: Using where
3 rows in set (0.01 sec)

Related

Update query not making full use of index in MySQL 8

Given this table:
CREATE TABLE `queue` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`type` int(10) unsigned NOT NULL,
`posted_on` timestamp(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6),
`status` enum('pending','complete','error') NOT NULL DEFAULT 'pending',
`body` blob NOT NULL,
`process_id` int(10) unsigned DEFAULT NULL,
`acquired_on` datetime(6) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `acquiredon` (`acquired_on`),
KEY `type_status_processid_postedon` (`type`,`status`,`process_id`,`posted_on`) USING BTREE
);
When I do a select on this table, it makes proper/full use of the index:
EXPLAIN SELECT *
FROM `queue`
FORCE INDEX (`type_status_processid_postedon`)
WHERE type = 1
AND `status` = 'pending'
AND `process_id` IS NULL
ORDER BY `posted_on` ASC
LIMIT 1;
+----+-------------+-------+------------+------+--------------------------------+--------------------------------+---------+-------------------+------+----------+-----------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+--------------------------------+--------------------------------+---------+-------------------+------+----------+-----------------------+
| 1 | SIMPLE | queue | NULL | ref | type_status_processid_postedon | type_status_processid_postedon | 10 | const,const,const | 1 | 100.00 | Using index condition |
+----+-------------+-------+------------+------+--------------------------------+--------------------------------+---------+-------------------+------+----------+-----------------------+
And yet, when I do the same query as an UPDATE, the index is not fully used.
EXPLAIN UPDATE `queue`
FORCE INDEX(`type_status_processid_postedon`)
SET `process_id` = 1
WHERE `type` = 1
AND `status` = 'pending'
AND `process_id` IS NULL
ORDER BY `posted_on` ASC
LIMIT 1;
+----+-------------+-------+------------+-------+--------------------------------+--------------------------------+---------+-------------------+------+----------+-----------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+--------------------------------+--------------------------------+---------+-------------------+------+----------+-----------------------------+
| 1 | UPDATE | queue | NULL | range | type_status_processid_postedon | type_status_processid_postedon | 10 | const,const,const | 1 | 100.00 | Using where; Using filesort |
+----+-------------+-------+------------+-------+--------------------------------+--------------------------------+---------+-------------------+------+----------+-----------------------------+
The update does a filesort. What's going on here?

MySQL not using primary index when it had compare with subquery result

I has table with the same schema
CREATE TABLE `stock` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`currency` varchar(3) COLLATE utf8_unicode_ci NOT NULL,
`against` varchar(3) COLLATE utf8_unicode_ci NOT NULL,
`date` date NOT NULL,
`time` time NOT NULL,
`rate` double(8,4) NOT NULL,
`ask` double(8,4) NOT NULL,
`bid` double(8,4) NOT NULL,
`created_at` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
`updated_at` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',
PRIMARY KEY (`id`),
KEY `stock_currency_index` (`currency`),
KEY `stock_against_index` (`against`),
KEY `stock_date_index` (`date`),
KEY `stock_time_index` (`time`),
KEY `created_at_index` (`created_at`)
) ENGINE=InnoDB AUTO_INCREMENT=244221 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
When i execute this query mysql has using index
mysql> explain select max(id) from stock group by currency;
+----+-------------+-------+------------+-------+----------------------+----------------------+---------+------+------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+----------------------+----------------------+---------+------+------+----------+--------------------------+
| 1 | SIMPLE | stock | NULL | range | stock_currency_index | stock_currency_index | 11 | NULL | 2 | 100.00 | Using index for group-by |
+----+-------------+-------+------------+-------+----------------------+----------------------+---------+------+------+----------+--------------------------+
1 row in set, 1 warning (0.00 sec)
Also when i am executing this query mysql has using primary index too
mysql> explain select * from stock where id in (244221, 244222);
+----+-------------+-------+------------+-------+---------------+---------+---------+------+------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+---------+---------+------+------+----------+-------------+
| 1 | SIMPLE | stock | NULL | range | PRIMARY | PRIMARY | 4 | NULL | 2 | 100.00 | Using where |
+----+-------------+-------+------------+-------+---------------+---------+---------+------+------+----------+-------------+
1 row in set, 1 warning (0.00 sec)
BUT when i am combine these two queries PRIMARY index are not using... i am confused. What i am doing wrong?
mysql> explain select * from stock where id in (select max(id) from stock group by currency);
+----+-------------+-------+------------+-------+----------------------+----------------------+---------+------+--------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+----------------------+----------------------+---------+------+--------+----------+--------------------------+
| 1 | PRIMARY | stock | NULL | ALL | NULL | NULL | NULL | NULL | 221800 | 100.00 | Using where |
| 2 | SUBQUERY | stock | NULL | range | stock_currency_index | stock_currency_index | 11 | NULL | 2 | 100.00 | Using index for group-by |
+----+-------------+-------+------------+-------+----------------------+----------------------+---------+------+--------+----------+--------------------------+
2 rows in set, 1 warning (0.00 sec)
First, try rewriting the query as:
select s.*
from stock s join
(select max(id) as maxid
from stock
group by currency
) ss
on ss.maxid = s.id;
Second, I would be tempted to put an index on stock(currency, id) and to use:
select s.*
from stock s
where s.id = (select max(s2.id) from stock s2 where s2.currency = s.currency);
Do either of these perform better?

accelerate MySQL query

This is the shortversion of my query:
SELECT product.* FROM product_list product
LEFT JOIN language_item language ON (product.title=language.languageVariable)
WHERE language.languageID = 1
ORDER BY language.languageValue ASC
When I use it, the query has 3 seconds. When I remove the order by the query has 0.3 seconds. Can you recommend a change to accelerate it?
product.title and language.languageVariable is a language variable like global.product.title1, and languageValue is the title like car, doll or something else.
CREATE TABLE `language_item` (
`languageItemID` int(10) UNSIGNED NOT NULL,
`languageID` int(10) UNSIGNED NOT NULL DEFAULT '0',
`languageVariable` varchar(255) NOT NULL DEFAULT '',
`languageValue` mediumtext NOT NULL,
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
ALTER TABLE `language_item`
ADD PRIMARY KEY (`languageItemID`),
ADD UNIQUE KEY `languageVariable` (`languageVariable`,`languageID`),
ADD KEY `languageValue` (`languageValue`(300));
id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra
1 | SIMPLE | product | NULL | ALL | PRIMARY,inactive,archive,productCategoryID | NULL | NULL | NULL | 1475 | 88.27 | Using where; Using temporary; Using filesort
1 | SIMPLE | language | NULL | ref | languageVariable | languageVariable | 767 | db.product.title | 136 | 1.00 | Using index condition
Here is the structur from language_item with the index:
CREATE TABLE `language_item` (
`languageItemID` int(10) UNSIGNED NOT NULL,
`languageID` int(10) UNSIGNED NOT NULL DEFAULT '0',
`languageVariable` varchar(255) NOT NULL DEFAULT '',
`languageValue` mediumtext NOT NULL,
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
ALTER TABLE `language_item`
ADD PRIMARY KEY (`languageItemID`),
ADD UNIQUE KEY `languageVariable` (`languageVariable`,`languageID`),
ADD KEY `languageValue` (`languageValue`(300));
The Explain:
id | select_type | table | partitions | type | possible_keys | key |
key_len | ref | rows | filtered | Extra 1 | SIMPLE | product | NULL |
ALL | PRIMARY,inactive,archive,productCategoryID | NULL | NULL | NULL
| 1475 | 88.27 | Using where; Using temporary; Using filesort 1 |
SIMPLE | language | NULL | ref | languageVariable | languageVariable |
767 | db.product.title | 136 | 1.00 | Using index condition
TRy this:
SELECT d.* from (
SELECT product.*, language.languageValue AS lv
FROM product_list product
JOIN language_item language ON (product.title=language.languageVariable)
WHERE language.languageID = 1
) as d
ORDER BY d.lv ASC

Query takes over 30 minutes -- How can I speed up? (Explain and schema included)

Goal: Display ethnicity data by district for each race and year as a percent of total.
Problem: Query takes over 30 minutes to run. I would like to get this down to under 10 seconds. Is there another strategy to solve this problem that I am not thinking of?
Query:
SELECT field_data_field_district_id.field_district_id_value as district_id, year, race, ROUND(SUM(count)/(
SELECT SUM(count)
FROM school_data_race_ethnicity_raw as school_data_race_ethnicity_raw_inner
INNER JOIN field_data_field_school_id as field_data_field_school_id_inner ON field_data_field_school_id_inner.field_school_id_value = school_data_race_ethnicity_raw_inner.school_id
INNER JOIN field_data_field_district as field_data_field_district_inner ON field_data_field_district_inner.entity_id = field_data_field_school_id_inner.entity_id
INNER JOIN field_data_field_district_id as field_data_field_district_id_inner ON field_data_field_district_inner.field_district_nid = field_data_field_district_id_inner.entity_id
WHERE field_data_field_district_id_inner.field_district_id_value = field_data_field_district_id.field_district_id_value and
school_data_race_ethnicity_raw_inner.year = school_data_race_ethnicity_raw.year
) * 100 ,2) as percent
FROM school_data_race_ethnicity_raw
INNER JOIN field_data_field_school_id ON field_data_field_school_id.field_school_id_value = school_data_race_ethnicity_raw.school_id
INNER JOIN field_data_field_district ON field_data_field_district.entity_id = field_data_field_school_id.entity_id
INNER JOIN field_data_field_district_id ON field_data_field_district.field_district_nid = field_data_field_district_id.entity_id
GROUP BY district_id, year, race
Explain:
+----+--------------------+--------------------------------------+------+------------------------------+--------------------+---------+--------------------------------------------------------------------+-------+---------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+--------------------+--------------------------------------+------+------------------------------+--------------------+---------+--------------------------------------------------------------------+-------+---------------------------------+
| 1 | PRIMARY | field_data_field_district_id | ALL | entity_id | NULL | NULL | NULL | 685 | Using temporary; Using filesort |
| 1 | PRIMARY | field_data_field_district | ref | entity_id,field_district_nid | field_district_nid | 5 | rocdocs_main_drupal_7.field_data_field_district_id.entity_id | 3 | Using where; Using index |
| 1 | PRIMARY | field_data_field_school_id | ref | entity_id | entity_id | 4 | rocdocs_main_drupal_7.field_data_field_district.entity_id | 1 | |
| 1 | PRIMARY | school_data_race_ethnicity_raw | ALL | NULL | NULL | NULL | NULL | 97098 | Using where; Using join buffer |
| 2 | DEPENDENT SUBQUERY | field_data_field_district_id_inner | ALL | entity_id | NULL | NULL | NULL | 685 | Using where |
| 2 | DEPENDENT SUBQUERY | field_data_field_district_inner | ref | entity_id,field_district_nid | field_district_nid | 5 | rocdocs_main_drupal_7.field_data_field_district_id_inner.entity_id | 3 | Using where; Using index |
| 2 | DEPENDENT SUBQUERY | field_data_field_school_id_inner | ref | entity_id | entity_id | 4 | rocdocs_main_drupal_7.field_data_field_district_inner.entity_id | 1 | |
| 2 | DEPENDENT SUBQUERY | school_data_race_ethnicity_raw_inner | ref | year | year | 4 | func | 32366 | Using where |
+----+--------------------+--------------------------------------+------+------------------------------+--------------------+---------+--------------------------------------------------------------------+-------+---------------------------------+
Tables:
school_data_race_ethnicity_raw - (Approx. 100,000 rows)
mysql> show create table school_data_race_ethnicity_raw;
+--------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table |
+--------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| school_data_race_ethnicity_raw | CREATE TABLE `school_data_race_ethnicity_raw` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`school_id` varchar(255) NOT NULL,
`year` int(11) NOT NULL,
`race` varchar(255) NOT NULL,
`count` int(11) NOT NULL,
PRIMARY KEY (`id`),
KEY `school_id` (`school_id`,`year`),
KEY `year` (`year`,`race`),
KEY `school_id_2` (`school_id`)
) ENGINE=MyISAM AUTO_INCREMENT=97099 DEFAULT CHARSET=latin1 |
+--------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.00 sec)
field_data_field_school_id - Approx. 5000 rows
mysql> show create table field_data_field_school_id;
+----------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table |
+----------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| field_data_field_school_id | CREATE TABLE `field_data_field_school_id` (
`entity_type` varchar(128) NOT NULL DEFAULT '' COMMENT 'The entity type this data is attached to',
`bundle` varchar(128) NOT NULL DEFAULT '' COMMENT 'The field instance bundle to which this row belongs, used when deleting a field instance',
`deleted` tinyint(4) NOT NULL DEFAULT '0' COMMENT 'A boolean indicating whether this data item has been deleted',
`entity_id` int(10) unsigned NOT NULL COMMENT 'The entity id this data is attached to',
`revision_id` int(10) unsigned DEFAULT NULL COMMENT 'The entity revision id this data is attached to, or NULL if the entity type is not versioned',
`language` varchar(32) NOT NULL DEFAULT '' COMMENT 'The language for this data item.',
`delta` int(10) unsigned NOT NULL COMMENT 'The sequence number for this data item, used for multi-value fields',
`field_school_id_value` varchar(255) DEFAULT NULL,
`field_school_id_format` varchar(255) DEFAULT NULL,
PRIMARY KEY (`entity_type`,`entity_id`,`deleted`,`delta`,`language`),
KEY `entity_type` (`entity_type`),
KEY `bundle` (`bundle`),
KEY `deleted` (`deleted`),
KEY `entity_id` (`entity_id`),
KEY `revision_id` (`revision_id`),
KEY `language` (`language`),
KEY `field_school_id_format` (`field_school_id_format`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='Data storage for field 234 (field_school_id)' |
+----------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.00 sec)
field_data_field_district - Approx. 5000 rows
mysql> show create table field_data_field_district;
+---------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table |
+---------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| field_data_field_district | CREATE TABLE `field_data_field_district` (
`entity_type` varchar(128) NOT NULL DEFAULT '' COMMENT 'The entity type this data is attached to',
`bundle` varchar(128) NOT NULL DEFAULT '' COMMENT 'The field instance bundle to which this row belongs, used when deleting a field instance',
`deleted` tinyint(4) NOT NULL DEFAULT '0' COMMENT 'A boolean indicating whether this data item has been deleted',
`entity_id` int(10) unsigned NOT NULL COMMENT 'The entity id this data is attached to',
`revision_id` int(10) unsigned DEFAULT NULL COMMENT 'The entity revision id this data is attached to, or NULL if the entity type is not versioned',
`language` varchar(32) NOT NULL DEFAULT '' COMMENT 'The language for this data item.',
`delta` int(10) unsigned NOT NULL COMMENT 'The sequence number for this data item, used for multi-value fields',
`field_district_nid` int(10) unsigned DEFAULT NULL,
PRIMARY KEY (`entity_type`,`entity_id`,`deleted`,`delta`,`language`),
KEY `entity_type` (`entity_type`),
KEY `bundle` (`bundle`),
KEY `deleted` (`deleted`),
KEY `entity_id` (`entity_id`),
KEY `revision_id` (`revision_id`),
KEY `language` (`language`),
KEY `field_district_nid` (`field_district_nid`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='Data storage for field 221 (field_district)' |
+---------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.00 sec)
field_data_field_district_id - Approx: 1000 rows
mysql> show create table field_data_field_district_id;
+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table |
+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| field_data_field_district_id | CREATE TABLE `field_data_field_district_id` (
`entity_type` varchar(128) NOT NULL DEFAULT '' COMMENT 'The entity type this data is attached to',
`bundle` varchar(128) NOT NULL DEFAULT '' COMMENT 'The field instance bundle to which this row belongs, used when deleting a field instance',
`deleted` tinyint(4) NOT NULL DEFAULT '0' COMMENT 'A boolean indicating whether this data item has been deleted',
`entity_id` int(10) unsigned NOT NULL COMMENT 'The entity id this data is attached to',
`revision_id` int(10) unsigned DEFAULT NULL COMMENT 'The entity revision id this data is attached to, or NULL if the entity type is not versioned',
`language` varchar(32) NOT NULL DEFAULT '' COMMENT 'The language for this data item.',
`delta` int(10) unsigned NOT NULL COMMENT 'The sequence number for this data item, used for multi-value fields',
`field_district_id_value` varchar(255) DEFAULT NULL,
`field_district_id_format` varchar(255) DEFAULT NULL,
PRIMARY KEY (`entity_type`,`entity_id`,`deleted`,`delta`,`language`),
KEY `entity_type` (`entity_type`),
KEY `bundle` (`bundle`),
KEY `deleted` (`deleted`),
KEY `entity_id` (`entity_id`),
KEY `revision_id` (`revision_id`),
KEY `language` (`language`),
KEY `field_district_id_format` (`field_district_id_format`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='Data storage for field 218 (field_district_id)' |
+------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
NOTE: The only table I can change is school_data_race_ethnicity_raw as the others are generated by drupal.
mysql> explain SELECT field_data_field_district_id_inner.field_district_id_value,
-> school_data_race_ethnicity_raw_inner.year,
-> SUM(count) as total
-> FROM school_data_race_ethnicity_raw as school_data_race_ethnicity_raw_inner
-> INNER JOIN field_data_field_school_id as field_data_field_school_id_inner
-> ON field_data_field_school_id_inner.field_school_id_value
-> = school_data_race_ethnicity_raw_inner.school_id
-> INNER JOIN field_data_field_district as field_data_field_district_inner
-> ON field_data_field_district_inner.entity_id
-> = field_data_field_school_id_inner.entity_id
-> INNER JOIN field_data_field_district_id as field_data_field_district_id_inner
-> ON field_data_field_district_inner.field_district_nid
-> = field_data_field_district_id_inner.entity_id
-> GROUP BY field_district_id_value, year
-> ;
+----+-------------+--------------------------------------+------+------------------------------+--------------------+---------+--------------------------------------------------------------------+-------+---------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+--------------------------------------+------+------------------------------+--------------------+---------+--------------------------------------------------------------------+-------+---------------------------------+
| 1 | SIMPLE | field_data_field_district_id_inner | ALL | entity_id | NULL | NULL | NULL | 773 | Using temporary; Using filesort |
| 1 | SIMPLE | field_data_field_district_inner | ref | entity_id,field_district_nid | field_district_nid | 5 | rocdocs_main_drupal_7.field_data_field_district_id_inner.entity_id | 3 | Using where; Using index |
| 1 | SIMPLE | field_data_field_school_id_inner | ref | entity_id | entity_id | 4 | rocdocs_main_drupal_7.field_data_field_district_inner.entity_id | 1 | |
| 1 | SIMPLE | school_data_race_ethnicity_raw_inner | ALL | NULL | NULL | NULL | NULL | 97098 | Using where; Using join buffer |
+----+-------------+--------------------------------------+------+------------------------------+--------------------+---------+--------------------------------------------------------------------+-------+---------------------------------+
4 rows in set (0.00 sec)
How about this:
SELECT field_data_field_district_id.field_district_id_value as district_id,
x.year,
race,
ROUND(SUM(count)/(x.total) * 100 , 2) as percent
FROM school_data_race_ethnicity_raw
INNER JOIN field_data_field_school_id
ON field_data_field_school_id.field_school_id_value
= school_data_race_ethnicity_raw.school_id
INNER JOIN field_data_field_district
ON field_data_field_district.entity_id
= field_data_field_school_id.entity_id
INNER JOIN field_data_field_district_id
ON field_data_field_district.field_district_nid
= field_data_field_district_id.entity_id
INNER JOIN (
SELECT field_data_field_district_id_inner.field_district_id_value,
school_data_race_ethnicity_raw_inner.year,
SUM(count) as total
FROM school_data_race_ethnicity_raw as school_data_race_ethnicity_raw_inner
INNER JOIN field_data_field_school_id as field_data_field_school_id_inner
ON field_data_field_school_id_inner.field_school_id_value
= school_data_race_ethnicity_raw_inner.school_id
INNER JOIN field_data_field_district as field_data_field_district_inner
ON field_data_field_district_inner.entity_id
= field_data_field_school_id_inner.entity_id
INNER JOIN field_data_field_district_id as field_data_field_district_id_inner
ON field_data_field_district_inner.field_district_nid
= field_data_field_district_id_inner.entity_id
GROUP BY field_district_id_value, year
) x ON x.field_district_id_value = field_data_field_district_id.field_district_id_value
AND x.year = school_data_race_ethnicity_raw.year
GROUP BY district_id, x.year, race
I basically moved your query that calculates the total count for each district/year out of the SELECT and into another JOIN.
Explain:
+----+-------------+--------------------------------------+------+------------------------------+--------------------+---------+--------------------------------------------------------------------+-------+---------------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+--------------------------------------+------+------------------------------+--------------------+---------+--------------------------------------------------------------------+-------+---------------------------------+
| 1 | PRIMARY | field_data_field_district_id | ALL | entity_id | NULL | NULL | NULL | 773 | Using temporary; Using filesort |
| 1 | PRIMARY | field_data_field_district | ref | entity_id,field_district_nid | field_district_nid | 5 | rocdocs_main_drupal_7.field_data_field_district_id.entity_id | 3 | Using where; Using index |
| 1 | PRIMARY | field_data_field_school_id | ref | entity_id | entity_id | 4 | rocdocs_main_drupal_7.field_data_field_district.entity_id | 1 | |
| 1 | PRIMARY | <derived2> | ALL | NULL | NULL | NULL | NULL | 1902 | Using where; Using join buffer |
| 1 | PRIMARY | school_data_race_ethnicity_raw | ref | year | year | 4 | x.year | 32366 | Using where |
| 2 | DERIVED | field_data_field_district_id_inner | ALL | entity_id | NULL | NULL | NULL | 773 | Using temporary; Using filesort |
| 2 | DERIVED | field_data_field_district_inner | ref | entity_id,field_district_nid | field_district_nid | 5 | rocdocs_main_drupal_7.field_data_field_district_id_inner.entity_id | 3 | Using where; Using index |
| 2 | DERIVED | field_data_field_school_id_inner | ref | entity_id | entity_id | 4 | rocdocs_main_drupal_7.field_data_field_district_inner.entity_id | 1 | |
| 2 | DERIVED | school_data_race_ethnicity_raw_inner | ALL | NULL | NULL | NULL | NULL | 97098 | Using where; Using join buffer |
+----+-------------+--------------------------------------+------+------------------------------+--------------------+---------+--------------------------------------------------------------------+-------+---------------------------------+
9 rows in set (4 min 0.59 sec)

Efficient MySQL query to find entries in A where not matched in B

I have a couple of tables (products and suppliers) and want to find out which items are no longer listed in the suppliers table.
Table uc_products has the products. Table uc_supplier_csv has supplier stocks. uc_products.model joins against uc_suppliers.sku.
I am seeing very long queries when trying to identify the stock in the products table which are not referred to in the suppliers table. I only want to extract the nid of the entries which match; sid IS NULL is just so I can identify which items don't have a supplier.
For the first of the queries below, it takes the DB server (4GB ram / 2x 2.4GHz intel) an hour to get a result (507 rows). I didn't wait for the second query to finish.
How can I make this query more optimal? Is it due to the mismatched character sets?
I was thinking that the following would be the most efficient SQL to use:
SELECT nid, sid
FROM uc_products p
LEFT OUTER JOIN uc_supplier_csv c
ON p.model = c.sku
WHERE sid IS NULL ;
For this query, I get the following EXPLAIN result:
mysql> EXPLAIN SELECT nid, sid FROM uc_products p LEFT OUTER JOIN uc_supplier_csv c ON p.model = c.sku WHERE sid IS NULL;
+----+-------------+-------+------+---------------+------+---------+------+--------+-------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------+------+---------------+------+---------+------+--------+-------------------------+
| 1 | SIMPLE | p | ALL | NULL | NULL | NULL | NULL | 6526 | |
| 1 | SIMPLE | c | ALL | NULL | NULL | NULL | NULL | 126639 | Using where; Not exists |
+----+-------------+-------+------+---------------+------+---------+------+--------+-------------------------+
2 rows in set (0.00 sec)
I would have thought that the keys idx_sku and idx_model would be valid for use here, but they aren't. Is that because the tables' default charsets do not match? One is UTF-8 and one is latin1.
I also considered this form:
SELECT nid
FROM uc_products
WHERE model
NOT IN (
SELECT DISTINCT sku FROM uc_supplier_csv
) ;
EXPLAIN shows the following results for that query:
mysql> explain select nid from uc_products where model not in ( select sku from uc_supplier_csv ) ;
+----+--------------------+-----------------+-------+-----------------------+---------+---------+------+--------+--------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+--------------------+-----------------+-------+-----------------------+---------+---------+------+--------+--------------------------+
| 1 | PRIMARY | uc_products | ALL | NULL | NULL | NULL | NULL | 6520 | Using where |
| 2 | DEPENDENT SUBQUERY | uc_supplier_csv | index | idx_sku,idx_sku_stock | idx_sku | 258 | NULL | 126639 | Using where; Using index |
+----+--------------------+-----------------+-------+-----------------------+---------+---------+------+--------+--------------------------+
2 rows in set (0.00 sec)
And just so I don't miss anything out, here are a few more exciting details: the table sizes and stats, and the table structure :)
mysql> show table status where Name in ( 'uc_supplier_csv', 'uc_products' ) ;
+-----------------+--------+---------+------------+--------+----------------+-------------+-----------------+--------------+-----------+----------------+---------------------+---------------------+---------------------+-------------------+----------+----------------+---------+
| Name | Engine | Version | Row_format | Rows | Avg_row_length | Data_length | Max_data_length | Index_length | Data_free | Auto_increment | Create_time | Update_time | Check_time | Collation | Checksum | Create_options | Comment |
+-----------------+--------+---------+------------+--------+----------------+-------------+-----------------+--------------+-----------+----------------+---------------------+---------------------+---------------------+-------------------+----------+----------------+---------+
| uc_products | MyISAM | 10 | Dynamic | 6520 | 89 | 585796 | 281474976710655 | 232448 | 912 | NULL | 2009-04-24 11:03:15 | 2009-10-12 14:23:43 | 2009-04-24 11:03:16 | utf8_general_ci | NULL | | |
| uc_supplier_csv | MyISAM | 10 | Dynamic | 126639 | 26 | 3399704 | 281474976710655 | 5864448 | 0 | NULL | 2009-10-12 14:28:25 | 2009-10-12 14:28:25 | 2009-10-12 14:28:27 | latin1_swedish_ci | NULL | | |
+-----------------+--------+---------+------------+--------+----------------+-------------+-----------------+--------------+-----------+----------------+---------------------+---------------------+---------------------+-------------------+----------+----------------+---------+
and
CREATE TABLE `uc_products` (
`vid` mediumint(9) NOT NULL default '0',
`nid` mediumint(9) NOT NULL default '0',
`model` varchar(255) NOT NULL default '',
`list_price` decimal(10,2) NOT NULL default '0.00',
`cost` decimal(10,2) NOT NULL default '0.00',
`sell_price` decimal(10,2) NOT NULL default '0.00',
`weight` float NOT NULL default '0',
`weight_units` varchar(255) NOT NULL default 'lb',
`length` float unsigned NOT NULL default '0',
`width` float unsigned NOT NULL default '0',
`height` float unsigned NOT NULL default '0',
`length_units` varchar(255) NOT NULL default 'in',
`pkg_qty` smallint(5) unsigned NOT NULL default '1',
`default_qty` smallint(5) unsigned NOT NULL default '1',
`unique_hash` varchar(32) NOT NULL,
`ordering` tinyint(2) NOT NULL default '0',
`shippable` tinyint(2) NOT NULL default '1',
PRIMARY KEY (`vid`),
KEY `idx_model` (`model`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8
CREATE TABLE `uc_supplier_csv` (
`sid` int(10) unsigned NOT NULL default '0',
`sku` varchar(255) default NULL,
`stock` int(10) unsigned NOT NULL default '0',
`list_price` decimal(8,2) default '0.00',
KEY `idx_sku` (`sku`),
KEY `idx_stock` (`stock`),
KEY `idx_sku_stock` (`sku`,`stock`),
KEY `idx_sid` (`sid`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
EDIT: Adding query plans for a couple of suggested queries from Martin below:
mysql> explain SELECT nid FROM uc_products p WHERE NOT EXISTS ( SELECT 1 FROM uc_supplier_csv c WHERE p.model = c.sku ) ;
+----+--------------------+-------+-------+---------------+---------+---------+------+--------+--------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+--------------------+-------+-------+---------------+---------+---------+------+--------+--------------------------+
| 1 | PRIMARY | p | ALL | NULL | NULL | NULL | NULL | 6526 | Using where |
| 2 | DEPENDENT SUBQUERY | c | index | NULL | idx_sku | 258 | NULL | 126639 | Using where; Using index |
+----+--------------------+-------+-------+---------------+---------+---------+------+--------+--------------------------+
2 rows in set (0.00 sec)
mysql> explain SELECT nid FROM uc_products WHERE model NOT IN ( SELECT sku FROM uc_supplier_csv ) ;
+----+--------------------+-----------------+-------+-----------------------+---------+---------+------+--------+--------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+--------------------+-----------------+-------+-----------------------+---------+---------+------+--------+--------------------------+
| 1 | PRIMARY | uc_products | ALL | NULL | NULL | NULL | NULL | 6526 | Using where |
| 2 | DEPENDENT SUBQUERY | uc_supplier_csv | index | idx_sku,idx_sku_stock | idx_sku | 258 | NULL | 126639 | Using where; Using index |
+----+--------------------+-----------------+-------+-----------------------+---------+---------+------+--------+--------------------------+
2 rows in set (0.00 sec)
Perhaps try using NOT EXISTS rather than counts? For example:
SELECT nid
FROM uc_products p
WHERE NOT EXISTS (
SELECT 1
FROM uc_supplier_csv c
WHERE p.model = c.sku
)
SO user Quassnoi has a short article outlining some tests that suggest that this might also be worth a try:
SELECT nid
FROM uc_products
WHERE model NOT IN (
SELECT sku
FROM uc_supplier_csv
)
basically as per your original query, without the DISTINCTion.
Another one for you Chris, this time with help for the cross-encoding join:
SELECT nid
FROM uc_products p
WHERE NOT EXISTS (
SELECT 1
FROM uc_supplier_csv c
WHERE CONVERT( p.model USING latin1 ) = c.sku
)