how to rewrite a join query with CTE? - mysql

here I have tried to rewrite the query with cte cuz of good readability but when I try to rewrite the data is mismatched how to solve the problem for this?
Query;
select count(1) as rage_tap
from ue_summary.summary_funnel_1066 s
join user_tasks_metadata utm on utm.asi = s.asi
join user_tasks ut on ut.id = utm.user_task_id
where s.seq_no = 1
and s.created_at between '2022-09-27 00:00:00' and '2022-10-27 00:00:00'
and ut.is_ragetap = 1
Explain plan ;
*************************** 1. row ***************************
id: 1
select_type: SIMPLE
table: ut
partitions: NULL
type: ref
possible_keys: PRIMARY,idx_ir
key: idx_ir
key_len: 1
ref: const
rows: 8413412
filtered: 100.00
Extra: Using index
*************************** 2. row ***************************
id: 1
select_type: SIMPLE
table: utm
partitions: NULL
type: ref
possible_keys: id_asi,asi
key: id_asi
key_len: 8
ref: ue_stage.ut.id
rows: 1
filtered: 100.00
Extra: Using index
*************************** 3. row ***************************
id: 1
select_type: SIMPLE
table: s
partitions: NULL
type: eq_ref
possible_keys: PRIMARY,unique_asi_seq_no,seq_no_date,created_at,idx_combo,idx_seq_created_asi
key: unique_asi_seq_no
key_len: 12
ref: ue_stage.utm.asi,const
rows: 1
filtered: 50.00
Extra: Using where; Using index
Table structure;
Create Table: CREATE TABLE `summary_funnel_1066` (
`funnel_id` int DEFAULT NULL,
`app_id` int DEFAULT NULL,
`platform` int DEFAULT NULL,
`app_version_id` int NOT NULL,
`seq_no` int NOT NULL,
`property_id` bigint DEFAULT NULL,
`property_name` varchar(255) DEFAULT NULL,
`property_type` varchar(50) DEFAULT NULL,
`asi` bigint NOT NULL,
`created_at` datetime NOT NULL,
`capture_time_relative` decimal(15,4) DEFAULT NULL,
`last_event_id` bigint DEFAULT NULL,
`last_event_name` varchar(100) DEFAULT NULL,
`last_message_id` bigint DEFAULT NULL,
`last_message_name` varchar(100) DEFAULT NULL,
`last_tag_id` bigint DEFAULT NULL,
`last_tag_name` varchar(100) DEFAULT NULL,
`is_crash` tinyint DEFAULT NULL,
`is_anr` tinyint DEFAULT NULL,
`is_ragetap` tinyint DEFAULT NULL,
`last_error_type_id` bigint DEFAULT NULL,
`last_error_type` varchar(100) DEFAULT NULL,
`screen_id` bigint DEFAULT NULL,
`screen_name` varchar(100) DEFAULT NULL,
`last_screen_id` bigint DEFAULT NULL,
`last_screen_name` varchar(100) DEFAULT NULL,
`user_task_id` bigint DEFAULT NULL,
`ue_id` bigint DEFAULT NULL,
PRIMARY KEY (`asi`,`seq_no`,`created_at`,`app_version_id`),
UNIQUE KEY `unique_asi_seq_no` (`asi`,`seq_no`),
KEY `seq_no_date` (`seq_no`,`created_at`),
KEY `last_ids` (`last_screen_id`,`last_event_id`),
KEY `idx_seq_created_asi`(seq_no,created_at,asi),
KEY `created_at` (`created_at`),
KEY `idx_combo` (`seq_no`,`property_id`,`property_name`,`created_at`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
Table: user_tasks_metadata
Create Table: CREATE TABLE `user_tasks_metadata` (
`id` bigint NOT NULL AUTO_INCREMENT,
`user_task_id` bigint NOT NULL,
`device_id` bigint NOT NULL,
`custom_user_id` bigint DEFAULT NULL,
`asi` bigint NOT NULL DEFAULT '0',
`session_id` varchar(300) DEFAULT NULL,
`model` bigint DEFAULT NULL,
`api_level` varchar(300) DEFAULT NULL,
`app_version_id` bigint NOT NULL DEFAULT '0',
`os_version` bigint DEFAULT NULL,
`location` bigint DEFAULT NULL,
`connection_speed` varchar(10) DEFAULT NULL,
`network_operator` varchar(100) CHARACTER SET utf8mb3 COLLATE utf8_general_ci DEFAULT NULL,
`config_response` tinyint DEFAULT '1',
`total_internal_memory` double(12,5) DEFAULT NULL,
`available_internal_memory` double(12,5) DEFAULT NULL,
`total_ram` double(12,5) DEFAULT NULL,
`available_ram` double(12,5) DEFAULT NULL,
`framework` varchar(45) DEFAULT '',
`ue_sdk_version` mediumint DEFAULT NULL,
`crash_type` bigint DEFAULT NULL,
`created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
`user_profile_id` bigint DEFAULT NULL,
`associated_custom_user_id` bigint DEFAULT NULL,
`first_usr_interaction` bigint DEFAULT NULL,
`app_launch_type` varchar(45) DEFAULT '',
`app_launch_time` bigint DEFAULT '0',
PRIMARY KEY (`id`),
KEY `session_metadata_filter_idx` (`custom_user_id`,`device_id`),
KEY `usertask_fk_idx` (`user_task_id`),
KEY `idx_app_version` (`app_version_id`),
KEY `asi_idx` (`asi`),
KEY `device_id` (`device_id`),
KEY `user_profile_id` (`user_profile_id`),
KEY `id_asi` (`user_task_id`,`asi`),
KEY `asi` (`asi`)
) ENGINE=InnoDB AUTO_INCREMENT=2252872743 DEFAULT CHARSET=latin1
Table: user_tasks
Create Table: CREATE TABLE `user_tasks` (
`id` bigint NOT NULL AUTO_INCREMENT,
`app_id` bigint NOT NULL,
`status` tinyint NOT NULL DEFAULT '0',
`app_version` varchar(100) DEFAULT NULL,
`platform` tinyint NOT NULL DEFAULT '1',
`exception_type` tinyint NOT NULL DEFAULT '0',
`error_count` smallint NOT NULL DEFAULT '0',
`crash_type` varchar(300) DEFAULT NULL,
`crash_log` varchar(300) DEFAULT NULL,
`avg_signal_level` int DEFAULT '0',
`is_read` tinyint(1) NOT NULL DEFAULT '0',
`is_important` tinyint(1) NOT NULL DEFAULT '0',
`is_video_available` tinyint(1) NOT NULL DEFAULT '0',
`is_video_played` tinyint(1) NOT NULL DEFAULT '0',
`is_ex` tinyint(1) NOT NULL DEFAULT '0',
`is_ragetap` tinyint(1) NOT NULL DEFAULT '0',
`session_start_time` datetime DEFAULT NULL,
`network_type` tinyint NOT NULL DEFAULT '0',
`s3_video_url` varchar(255) DEFAULT NULL,
`image_format` tinyint DEFAULT '0',
`ue_release_version` smallint NOT NULL DEFAULT '0',
`created_at` datetime NOT NULL,
`updated_at` datetime DEFAULT NULL,
`batch_created_at` datetime DEFAULT NULL,
`sys_creation_date` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
KEY `session_filter_idx_2` (`app_id`,`platform`,`created_at`,`exception_type`,`app_version`),
KEY `batch_created_idx` (`app_id`,`platform`,`batch_created_at`),
KEY `app_id_created_at` (`app_id`,`created_at`),
KEY `id_app_id` (`app_id`),
KEY `idx_ir` (`is_ragetap`)
) ENGINE=InnoDB AUTO_INCREMENT=1648177712 DEFAULT CHARSET=latin1
rewritten query;
with cte1 as (
select asi,count(1) as rage_tap
from ue_summary.summary_funnel_1066
where s.seq_no = 1
and s.created_at between '2022-09-27 00:00:00' and '2022-10-27 00:00:00'
),
cte2 as (
select id, count(*) 'rage_tap1'
from user_tasks ut where is_ragetap = 1
)
select cte1.*,cte2.* from cte1
inner join user_tasks_metadata utm on utm.asi = cte1.asi
inner join cte2 on b.id = utm.user_task_id
I need like below output;
+----------+
| rage_tap |
+----------+
| 1812564 |
+----------+
It takes time to search so I choose cte, I have tried with subquery but it does not work and it takes around 30 sec - 1.14 min.
as per this, I have indexed the column but also takes time : slow performance of query and scanning many rows
is there any other way to optimize it?

This is your query with two CTEs. The aggregation takes place after the tables after the joins, just as in the original query.
with s as
(
select *
from ue_summary.summary_funnel_1066
where seq_no = 1
and created_at >= date '2022-09-27'
and created_at < date '2022-10-27'
)
, ut as
(
select *
from user_tasks
where is_ragetap = 1
)
select count(*) as rage_tap
from s
join user_tasks_metadata utm on utm.asi = s.asi
join ut on ut.id = utm.user_task_id;
As created_at is a datetime, you should not use BETWEEN, but >= and <. Please check if the date range that I put in my query matches your requirements. It excludes 2022-10-27. If you want to include it, change this to and created_at < date '2022-10-28'.

select id, count(*) 'rage_tap1'
from user_tasks ut where is_ragetap = 1
does not make sense. There is an aggregate (COUNT(*)) but no GROUP BY. Were you showing to get one row? If so, which row? GROUP BY id does not make since id is Unique.
DOUBLE(m,n) is worse than simply DOUBLE. In fact, (m,n) is going away in 8.0 for FLOAT and DOUBLE.
When you have INDEX(a,b), you don't need INDEX(a).
count(1) as rage_tap is done after the JOINs, so it may have an inflated value. Did you do a sanity check?
utm has two indexes on asi. Toss both and add INDEX(asi, user_task_id)
As for turning the Joins into Ctes, go back to when you envisioned the query. You probably said "I need this stuff from this table", then "that stuff from that table", and finally "put things together this way". If you can go back to that thought process, you have the CTEs. (I don't have any idea what the data means or what the goal is, so I cannot reproduce that thought process.)

Related

slow performance of query and scanning many rows

The below query is taking 6.18 min to execute for one row and the cardinality value for the exception_type = 1 is 3, I don't know how to improve the performance.
Query;
select count(1) as rage_tap
from summary_funnel_1066 s
join user_tasks_metadata utm on utm.asi = s.asi
join user_tasks ut on ut.id = utm.user_task_id
where s.seq_no = 1
and s.created_at between '2022-09-27 00:00:00' and '2022-10-27 00:00:00'
and ut.exception_type = 1
explain plan;
*************************** 1. row ***************************
id: 1
select_type: SIMPLE
table: ut
partitions: NULL
type: ALL
possible_keys: PRIMARY
key: NULL
key_len: NULL
ref: NULL
rows: 129554700
filtered: 10.00
Extra: Using where
*************************** 2. row ***************************
id: 1
select_type: SIMPLE
table: utm
partitions: NULL
type: ref
possible_keys: usertask_fk_idx,asi_idx,id_asi,asi
key: usertask_fk_idx
key_len: 8
ref: ue_stage.ut.id
rows: 1
filtered: 100.00
Extra: NULL
*************************** 3. row ***************************
id: 1
select_type: SIMPLE
table: s
partitions: NULL
type: eq_ref
possible_keys: PRIMARY,unique_asi_seq_no,seq_no_date,created_at,idx_combo
key: unique_asi_seq_no
key_len: 12
ref: ue_stage.utm.asi,const
rows: 1
filtered: 50.00
Extra: Using where; Using index
table structure;
Create Table: CREATE TABLE `summary_funnel_1066` (
`funnel_id` int DEFAULT NULL,
`app_id` int DEFAULT NULL,
`platform` int DEFAULT NULL,
`app_version_id` int NOT NULL,
`seq_no` int NOT NULL,
`property_id` bigint DEFAULT NULL,
`property_name` varchar(255) DEFAULT NULL,
`property_type` varchar(50) DEFAULT NULL,
`asi` bigint NOT NULL,
`created_at` datetime NOT NULL,
`capture_time_relative` decimal(15,4) DEFAULT NULL,
`last_event_id` bigint DEFAULT NULL,
`last_event_name` varchar(100) DEFAULT NULL,
`last_message_id` bigint DEFAULT NULL,
`last_message_name` varchar(100) DEFAULT NULL,
`last_tag_id` bigint DEFAULT NULL,
`last_tag_name` varchar(100) DEFAULT NULL,
`is_crash` tinyint DEFAULT NULL,
`is_anr` tinyint DEFAULT NULL,
`is_ragetap` tinyint DEFAULT NULL,
`last_error_type_id` bigint DEFAULT NULL,
`last_error_type` varchar(100) DEFAULT NULL,
`screen_id` bigint DEFAULT NULL,
`screen_name` varchar(100) DEFAULT NULL,
`last_screen_id` bigint DEFAULT NULL,
`last_screen_name` varchar(100) DEFAULT NULL,
`user_task_id` bigint DEFAULT NULL,
`ue_id` bigint DEFAULT NULL,
PRIMARY KEY (`asi`,`seq_no`,`created_at`,`app_version_id`),
UNIQUE KEY `unique_asi_seq_no` (`asi`,`seq_no`),
KEY `seq_no_date` (`seq_no`,`created_at`),
KEY `last_ids` (`last_screen_id`,`last_event_id`),
KEY `created_at` (`created_at`),
KEY `idx_combo` (`seq_no`,`property_id`,`property_name`,`created_at`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
Table: user_tasks_metadata
Create Table: CREATE TABLE `user_tasks_metadata` (
`id` bigint NOT NULL AUTO_INCREMENT,
`user_task_id` bigint NOT NULL,
`device_id` bigint NOT NULL,
`custom_user_id` bigint DEFAULT NULL,
`asi` bigint NOT NULL DEFAULT '0',
`session_id` varchar(300) DEFAULT NULL,
`model` bigint DEFAULT NULL,
`api_level` varchar(300) DEFAULT NULL,
`app_version_id` bigint NOT NULL DEFAULT '0',
`os_version` bigint DEFAULT NULL,
`location` bigint DEFAULT NULL,
`connection_speed` varchar(10) DEFAULT NULL,
`network_operator` varchar(100) CHARACTER SET utf8mb3 COLLATE utf8_general_ci DEFAULT NULL,
`config_response` tinyint DEFAULT '1',
`total_internal_memory` double(12,5) DEFAULT NULL,
`available_internal_memory` double(12,5) DEFAULT NULL,
`total_ram` double(12,5) DEFAULT NULL,
`available_ram` double(12,5) DEFAULT NULL,
`framework` varchar(45) DEFAULT '',
`ue_sdk_version` mediumint DEFAULT NULL,
`crash_type` bigint DEFAULT NULL,
`created_at` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
`user_profile_id` bigint DEFAULT NULL,
`associated_custom_user_id` bigint DEFAULT NULL,
`first_usr_interaction` bigint DEFAULT NULL,
`app_launch_type` varchar(45) DEFAULT '',
`app_launch_time` bigint DEFAULT '0',
PRIMARY KEY (`id`),
KEY `session_metadata_filter_idx` (`custom_user_id`,`device_id`),
KEY `usertask_fk_idx` (`user_task_id`),
KEY `idx_app_version` (`app_version_id`),
KEY `asi_idx` (`asi`),
KEY `device_id` (`device_id`),
KEY `user_profile_id` (`user_profile_id`),
KEY `id_asi` (`user_task_id`,`asi`),
KEY `asi` (`asi`)
) ENGINE=InnoDB AUTO_INCREMENT=2252872743 DEFAULT CHARSET=latin1
Table: user_tasks
Create Table: CREATE TABLE `user_tasks` (
`id` bigint NOT NULL AUTO_INCREMENT,
`app_id` bigint NOT NULL,
`status` tinyint NOT NULL DEFAULT '0',
`app_version` varchar(100) DEFAULT NULL,
`platform` tinyint NOT NULL DEFAULT '1',
`exception_type` tinyint NOT NULL DEFAULT '0',
`error_count` smallint NOT NULL DEFAULT '0',
`crash_type` varchar(300) DEFAULT NULL,
`crash_log` varchar(300) DEFAULT NULL,
`avg_signal_level` int DEFAULT '0',
`is_read` tinyint(1) NOT NULL DEFAULT '0',
`is_important` tinyint(1) NOT NULL DEFAULT '0',
`is_video_available` tinyint(1) NOT NULL DEFAULT '0',
`is_video_played` tinyint(1) NOT NULL DEFAULT '0',
`is_ex` tinyint(1) NOT NULL DEFAULT '0',
`is_ragetap` tinyint(1) NOT NULL DEFAULT '0',
`session_start_time` datetime DEFAULT NULL,
`network_type` tinyint NOT NULL DEFAULT '0',
`s3_video_url` varchar(255) DEFAULT NULL,
`image_format` tinyint DEFAULT '0',
`ue_release_version` smallint NOT NULL DEFAULT '0',
`created_at` datetime NOT NULL,
`updated_at` datetime DEFAULT NULL,
`batch_created_at` datetime DEFAULT NULL,
`sys_creation_date` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`),
KEY `session_filter_idx_2` (`app_id`,`platform`,`created_at`,`exception_type`,`app_version`),
KEY `batch_created_idx` (`app_id`,`platform`,`batch_created_at`),
KEY `app_id_created_at` (`app_id`,`created_at`),
KEY `id_app_id` (`app_id`,`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1636224717 DEFAULT CHARSET=latin1
is there any way possible to improve the performance of the query?
fdsfdfjdnfkjdkjfnkjdnjkfndkjnfjkndjkfnkjdnsfkjndsjknf
Indexes:
utm: INDEX(asi, user_task_id)
s: INDEX(seq_no, created_at, asi)

how to optimize the below MYSQL query

the below query it scans more rows while the table has an index on it but not using that index for that column.
Query;
SELECT
*
FROM
st_aepsrequest_log
WHERE
`snd_transno` IN (
SELECT
pwcashout_transno
FROM
st_aeps_transaction_master a
WHERE
a.`entry_date` >= '2022-09:29 13:00:00'
AND a.entry_date <= '2022-09-29 13:30:00'
)
row scans;
*************************** 1. row ***************************
id: 1
select_type: SIMPLE
table: st_aepsrequest_log
partitions: NULL
type: ALL
possible_keys: NULL
key: NULL
key_len: NULL
ref: NULL
rows: 7355201
filtered: 100.00
Extra: NULL
*************************** 2. row ***************************
id: 1
select_type: SIMPLE
table: a
partitions: NULL
type: eq_ref
possible_keys: snd_unique,pwaeps_transno,entry_date
key: snd_unique
key_len: 92
ref: func
rows: 1
filtered: 5.00
Extra: Using index condition; Using where
table structure;
*************************** 1. row ***************************
Table: st_aepsrequest_log
Create Table: CREATE TABLE `st_aepsrequest_log` (
`serno` int(11) NOT NULL AUTO_INCREMENT,
`db_serno` char(2) NOT NULL DEFAULT '',
`brand` char(2) NOT NULL DEFAULT '',
`counter_code` char(20) NOT NULL DEFAULT '',
`transno` char(30) NOT NULL DEFAULT '',
`snd_transno` char(30) NOT NULL DEFAULT '',
`latlog` char(100) NOT NULL DEFAULT '',
`trans_mode` char(20) NOT NULL DEFAULT '',
`amount` double NOT NULL DEFAULT '0',
`intime` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
`outtime` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
`user_agent` text NOT NULL,
`remote_ip` text NOT NULL,
`request` text NOT NULL,
`response` text NOT NULL,
`url` text NOT NULL,
PRIMARY KEY (`serno`),
KEY `sndtransno` (`snd_transno`)
) ENGINE=InnoDB AUTO_INCREMENT=16912804 DEFAULT CHARSET=latin1
*************************** 1. row ***************************
Table: st_aeps_transaction_master
Create Table: CREATE TABLE `st_aeps_transaction_master` (
`serno` bigint(20) NOT NULL AUTO_INCREMENT,
`entry_date` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
`mode_id` varchar(20) NOT NULL DEFAULT '',
`db_serno` char(2) NOT NULL DEFAULT '',
`merchant_id` int(10) unsigned NOT NULL DEFAULT '0',
`service_group_id` int(10) unsigned NOT NULL,
`merchant_channel` enum('RETAIL','B2C') DEFAULT NULL,
`merchant_users_id` varchar(20) NOT NULL,
`provider_user_id` varchar(100) NOT NULL DEFAULT '',
`merchant_transno` varchar(50) DEFAULT '',
`pwcashout_transno` varchar(30) NOT NULL,
`rrn` varchar(100) DEFAULT NULL,
`pw_stan` varchar(50) NOT NULL DEFAULT '',
`provider_id` int(10) unsigned NOT NULL DEFAULT '0',
`service_id` int(10) unsigned NOT NULL,
`bank_id` int(11) NOT NULL DEFAULT '0',
`amount` double DEFAULT '0',
`total_comm` double NOT NULL DEFAULT '0',
`provider_comm` double NOT NULL DEFAULT '0',
`gst` enum('INCLUSIVE','EXCLUSIVE') NOT NULL DEFAULT 'INCLUSIVE',
`gst_value` double NOT NULL DEFAULT '0',
`aadhar_no` char(12) NOT NULL DEFAULT '',
`aeps_identifier` char(50) NOT NULL DEFAULT '',
`provider_rate_mode` enum('PERCENT','AMOUNT','CUSTOM') DEFAULT 'PERCENT',
`device_info` varchar(200) NOT NULL DEFAULT '',
`device` varchar(20) DEFAULT NULL,
`device_serno` varchar(20) NOT NULL DEFAULT '',
`client_ip` varchar(20) NOT NULL DEFAULT '',
`refund_date` datetime DEFAULT '0000-00-00 00:00:00',
`requery_date` datetime DEFAULT '0000-00-00 00:00:00',
`provider_response_message` text,
`provider_response_code` varchar(20) NOT NULL DEFAULT '',
`response` text NOT NULL,
`trans_settle_date` date NOT NULL DEFAULT '0000-00-00',
`trans_settle_datetime` datetime DEFAULT '0000-00-00 00:00:00',
`trans_settle_status` char(1) NOT NULL DEFAULT 'N',
`status` enum('INITIATED','SUCCESS','FAILED') NOT NULL,
PRIMARY KEY (`serno`),
UNIQUE KEY `snd_unique` (`pwcashout_transno`),
KEY `pwaeps_transno` (`pwcashout_transno`),
KEY `merchant_transno` (`merchant_transno`),
KEY `entry_date` (`entry_date`),
KEY `provider_id` (`provider_id`),
KEY `trans_settle_datetime` (`trans_settle_datetime`),
KEY `idx_ent_merc` (`merchant_users_id`,`entry_date`)
) ENGINE=InnoDB AUTO_INCREMENT=87032220 DEFAULT CHARSET=utf8
is there any way to optimize the above query?
i have added the format of the databases
dfsdfdsfdsfdsfdsfdsfdsfdsfsdfsdfdsfdsfdsf
fdshfjsdhjkfhkjdshkfhsjkdfhkdfjhdsjhfgdhjgfjhdjfhgdshjfgjdsf
Character sets and collations are baked into indexes on columns with data types like the CHAR(30) you use for st_aepsrequest_log.snd_transno and st_aeps_transaction_master.pwcashout_transno. So, like #BillKarwin mentioned, if the character sets and collations vary it defeats the use of indexes.
Now, it looks like your subquery SELECT pwcashout_transno ... produces a modest number of rows in its result set. And, the character set for st_aepsrequest_log.snd_transno is latin1. So if you convert the output of the subquery to latin1, it should be possible for your IN() clause to use the index on that column. SELECT CONVERT(pwcashout_transno USING latin1) should do the trick. Try this version of your query:
SELECT
*
FROM
st_aepsrequest_log
WHERE
`snd_transno` IN (
SELECT
CONVERT(pwcashout_transno USING latin1)
FROM
st_aeps_transaction_master a
WHERE
a.`entry_date` >= '2022-09:29 13:00:00'
AND a.entry_date <= '2022-09-29 13:30:00'
)
But this is a bit of a hack. It's always better when doing your table design to make the character sets and collations of CHAR() and VARCHAR() columns match. This is especially true if you JOIN on them or use them on IN() or = clauses.
Of course, redefining the tables may not be possible for your application.
If the query from OJones does not work, then avoid IN ( SELECT ... ) by doing this:
SELECT log.*
FROM
( SELECT CONVERT(a.pwcashout_transno USING latin1) AS pt
FROM st_aeps_transaction_master a
WHERE a.entry_date >= '2022-09:29 13:00:00'
AND a.entry_date < '2022-09-29 13:00:00'
+ INTERVAL 30 MINUTE
) AS x
JOIN st_aepsrequest_log AS log ON log.snd_transno = x.pt
Then, this index may help: INDEX(entry_date, pwcashout_transno)
Note: If there could be multiple rows with the same pwcashout_transno, then the inner (derived) query may need DISTINCT.

Optimizing MySQL query to reduced runtime

Below is the query that will going to run on two tables with 60+ million and 400+ million records. Only the table name will be different, otherwise query is same for both the tables.
SELECT * FROM
(
SELECT A.CUSIP, A.ISIN, A.SEDOL, A.LocalCode, A.MIC, A.ExchgCD, A.PrimaryExchgCD, A.Currency, A.Open, A.High, A.Low, A.Close, A.Mid, A.Ask, A.Last,
A.Bid, A.Bidsize, A.Asksize, A.TradedVolume, A.SecID, A.PriceDate, A.MktCloseDate, A.VolFlag, A.IssuerName, A.TotalTrades, A.CloseType, A.SectyCD,
row_number() OVER (partition by A.CUSIP order by A.MktCloseDate desc) as 'rank'
from EDI_Price04 A
WHERE A.CUSIP IN (
"91879Q109", "583840509", "583840608", "59001A102", "552848103") AND (A.PrimaryExchgCD = A.ExchgCD) AND A.CloseType='CC'
) t WHERE t.rank <= 3;
When A.CUSIP IN () condition have 10-15 values, the query complete in 2-3sec. With 400 values it took 28sec. But I want to make A.CUSIP IN () take 2k-3k value at a time.
This is my table structure.
CREATE TABLE `EDI_Price04` (
`MIC` varchar(6) NOT NULL DEFAULT '',
`LocalCode` varchar(60) NOT NULL DEFAULT '' COMMENT 'PricefileSymbol',
`ISIN` varchar(12) DEFAULT NULL,
`Currency` varchar(3) NOT NULL DEFAULT '',
`PriceDate` date DEFAULT NULL,
`Open` double DEFAULT NULL,
`High` double DEFAULT NULL,
`Low` double DEFAULT NULL,
`Close` double DEFAULT NULL,
`Mid` double DEFAULT NULL,
`Ask` double DEFAULT NULL,
`Last` double DEFAULT NULL,
`Bid` double DEFAULT NULL,
`BidSize` int(11) DEFAULT NULL,
`AskSize` int(11) DEFAULT NULL,
`TradedVolume` bigint(20) DEFAULT NULL,
`SecID` int(11) NOT NULL DEFAULT '0',
`MktCloseDate` date NOT NULL DEFAULT '0000-00-00',
`Volflag` char(1) DEFAULT NULL,
`IssuerName` varchar(255) DEFAULT NULL,
`SectyCD` varchar(3) DEFAULT NULL,
`SecurityDesc` varchar(255) DEFAULT NULL,
`SEDOL` varchar(7) DEFAULT NULL,
`CUSIP` varchar(9) DEFAULT NULL COMMENT 'USCode',
`PrimaryExchgCD` varchar(6) DEFAULT NULL,
`ExchgCD` varchar(6) NOT NULL DEFAULT '',
`TradedValue` double DEFAULT NULL,
`TotalTrades` int(11) DEFAULT NULL,
`Comment` varchar(255) DEFAULT NULL,
`Repush` tinyint(4) NOT NULL DEFAULT '0',
`CloseType` varchar(2) NOT NULL DEFAULT '',
PRIMARY KEY (`MIC`,`LocalCode`,`Currency`,`SecID`,`MktCloseDate`,`ExchgCD`,`Repush`,`CloseType`),
KEY `idx_EDI_Price04_0` (`MIC`),
KEY `idx_EDI_Price04_1` (`LocalCode`),
KEY `idx_EDI_Price04_2` (`ISIN`),
KEY `idx_EDI_Price04_3` (`PriceDate`),
KEY `idx_EDI_Price04_4` (`SEDOL`),
KEY `idx_EDI_Price04_5` (`CUSIP`),
KEY `idx_EDI_Price04_6` (`PrimaryExchgCD`),
KEY `idx_EDI_Price04_7` (`ExchgCD`),
KEY `idx_EDI_Price04_8` (`CloseType`),
KEY `idx_EDI_Price04_9` (`MktCloseDate`),
KEY `idx_EDI_Price04_CUSIP_ExchgCD_CloseType_MktCloseDate` (`CUSIP`,`ExchgCD`,`CloseType`,`MktCloseDate`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
For this query:
SELECT *
FROM (SELECT a.*
ROW_NUMBER() OVER (PARTITION BY A.CUSIP ORDER BY A.MktCloseDate DESC) as rank
FROM EDI_Price04 A
WHERE A.CUSIP IN ('91879Q109', '583840509', '583840608', '59001A102', '552848103') AND
A.PrimaryExchgCD = A.ExchgCD AND
A.CloseType = 'CC'
) t
WHERE t.rank <= 3;
The place to start is with an index. For this query, you want an index on EDI_Price04(CloseType, CUSIP, ExchgCD, MktCloseDate).
Unfortunately, the condition A.PrimaryExchgCD = A.ExchgCD prevents index seeks. If you were to make changes to the query/data, then one approach would be to add a flag when these are the same, rather then looking at the values separately. That would allow an index on:
EDI_Price04(CloseType, IsPrimary, CUSIP, PrimaryExchgCD, ExchgCD, MktCloseDate)
PRIMARY KEY (id),
UNIQUE(`MIC`,`LocalCode`,`Currency`,`SecID`,`MktCloseDate`,
`ExchgCD`,`Repush`,`CloseType`),
-- KEY `idx_EDI_Price04_0` (`MIC`),
KEY `idx_EDI_Price04_1` (`LocalCode`),
KEY `idx_EDI_Price04_2` (`ISIN`),
KEY `idx_EDI_Price04_3` (`PriceDate`),
KEY `idx_EDI_Price04_4` (`SEDOL`),
-- KEY `idx_EDI_Price04_5` (`CUSIP`),
KEY `idx_EDI_Price04_6` (`PrimaryExchgCD`),
KEY `idx_EDI_Price04_7` (`ExchgCD`),
KEY `idx_EDI_Price04_8` (`CloseType`),
KEY `idx_EDI_Price04_9` (`MktCloseDate`),
KEY `idx_EDI_Price04_CUSIP_ExchgCD_CloseType_MktCloseDate` (`CUSIP`,
`ExchgCD`, `CloseType`, `MktCloseDate`)
KEY (CUSIP, MktCloseDate)
Having so many columns in the PK costs in space and insert time. So, I added an id, which needs to be AUTO_INCREMENT.
Keys 0 and 5 are redundant because of the rule "If you have INDEX(a,b), INDEX(a) redundant.
I added (CUSIP, MktCloseDate) in hopes that it will optimize the RANK expression.

Improve a query from Explain results

I have a complex query that is dynamically assembled based upon search criteria. However, in its simplest form, it is still very slow. The main table it runs against has ~10M records. I ran an explain against a 'base' query and the first row of the explain looks bad (at least to a novice dba like me). I have read a couple tutorials about EXPLAIN, but I still am unsure how to fix the query. So, the first row of the results seems to indicate the problem, but I don't know what to do with it. I couldn't make a composite key that long even if I wanted to and some of the field names in that possible_keys column are not even in the patients table. Any help will be greatly appreciated.
id,select_type,table,type,possible_keys,key,key_len,ref,rows,Extra
1,SIMPLE,patients,range,"PRIMARY,location,appt_date,status,radiologist,contract,lastname,paperwork,images_archived,hash,created,document_attached,all_images_archived,last_image_archived,modality,study_uid,company,second_access,firstname,report_delivered,ssn,order_entry_status,dob,tech,doctor,mobile_facility,accession,location_appt_date,location_created,location_lastname,ref,person_seq",location_appt_date,55,NULL,573534,"Using index condition; Using where; Using temporary; Using filesort"
1,SIMPLE,receivable_transactions,ref,patient_seq,patient_seq,4,ris-dev.patients.seq,1,NULL
1,SIMPLE,patients_dispatch,ref,patient_seq,patient_seq,4,ris-dev.patients.seq,1,NULL
1,SIMPLE,mobile_facility,ref,"unique_index,name,location",unique_index,115,"ris-dev.patients.mobile_facility,const",1,"Using where"
1,SIMPLE,mobile_facility_service_areas,eq_ref,PRIMARY,PRIMARY,4,ris-dev.mobile_facility.service_area,1,NULL
Edit: same EXPLAIN, but reformatted to be easier to read:
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE patients range PRIMARY location_appt_date 55 NULL 573534 Using index condition; Using where; Using temporary; Using filesort
location
appt_date
status
radiologist
contract
lastname
paperwork
images_archived
hash
created
document_attached
all_images_archived
last_image_archived
modality
study_uid
company
second_access
firstname
report_delivered
ssn
order_entry_status
dob
tech
doctor
mobile_facility
accession
location_appt_date
location_created
location_lastname
ref
person_seq
1 SIMPLE receivable_transactions ref patient_seq patient_seq 4 ris-dev.patients.seq 1 NULL
1 SIMPLE patients_dispatch ref patient_seq patient_seq 4 ris-dev.patients.seq 1 NULL
1 SIMPLE mobile_facility ref unique_index unique_index 115 ris-dev.patients.mobile_facility,const 1 Using where
name
location
1 SIMPLE mobile_facility_service_areas eq_ref PRIMARY PRIMARY 4 ris-dev.mobile_facility.service_area 1 NULL
The explain is setup against the following query and table structures.
SELECT patients.fax_in_queue, patients.modality, patients.stat, patients.created, patients.seq, patients.lastname,
patients.firstname, patients.appt_date, patients.status, patients.contract, patients.location, patients.unique_hash,
patients.images_archived, patients.report_delivered, patients.doctor, patients.mobile_facility, patients.history,
patients.dob, patients.all_images_archived, patients.order_entry_status, patients.tech, patients.radiologist,
patients.last_image_archived, patients.state, patients.ss_comments, patients.completed, patients.report_status,
patients.have_paperwork, patients.facility_room_number, patients.facility_station_name, patients.facility_bed,
patients.findings_level, patients.document_attached, patients.study_start, patients.company, patients.accession,
patients.number_images, patients.client_number_images, patients.sex, patients.threshhold , GROUP_CONCAT(CONCAT(CONCAT(receivable_transactions.modifier, " "),
receivable_transactions.description) SEPARATOR ", ") AS rt_desc , patients_dispatch.seq AS doc_seq, patients_dispatch.requisition_last_sent,
patients_dispatch.requisition_signed_by_file_seq, patients_dispatch.requisition_signed, patients_dispatch.order_reason, patients_dispatch.order_comments,
patients_dispatch.order_taken, patients_dispatch.order_tech_last_notified, patients_dispatch.order_tech_in_transit, patients_dispatch.order_tech_in,
patients_dispatch.order_tech_out, patients_dispatch.order_tech_ack, patients_dispatch.addr1 AS d_addr1, patients_dispatch.addr2 AS d_addr2,
patients_dispatch.city AS d_city, patients_dispatch.state AS d_state, patients_dispatch.zip AS d_zip, CONCAT(patients.status, order_tech_out,
order_tech_in, order_tech_in_transit) as pseudo_status , mobile_facility.requisition_fax, mobile_facility.station_list, mobile_facility.address1 as mf_addr1,
mobile_facility.address2 as mf_addr2, mobile_facility.city as mf_city, mobile_facility.state as mf_state, mobile_facility.zip as mf_zip,
mobile_facility.phone as mf_phone, mobile_facility.phone2 as mf_phone2, mobile_facility_service_areas.name as mf_service_area
FROM patients LEFT JOIN receivable_transactions ON patients.seq = receivable_transactions.patient_seq
LEFT JOIN patients_dispatch ON patients.seq = patients_dispatch.patient_seq
LEFT JOIN mobile_facility ON patients.location = mobile_facility.location AND patients.mobile_facility = mobile_facility.name
LEFT JOIN mobile_facility_service_areas ON mobile_facility.service_area = mobile_facility_service_areas.seq
WHERE patients.location = "XYZCompany" AND ((patients.appt_date >= '2020-03-19' AND patients.appt_date <= '2020-03-19 23:59:59')
OR (patients.appt_date <= '2020-03-19' AND patients.status < 'X'))
GROUP BY patients.seq DESC
ORDER BY patients.status, patients.order_entry_status, pseudo_status, patients.order_entry_status,patients.lastname);
CREATE TABLE `patients` (
`seq` int(11) NOT NULL AUTO_INCREMENT,
`person_seq` int(11) NOT NULL,
`firstname` varchar(20) NOT NULL DEFAULT '',
`lastname` varchar(30) NOT NULL DEFAULT '',
`middlename` varchar(20) NOT NULL DEFAULT '',
`ref` varchar(50) NOT NULL DEFAULT '',
`location` varchar(50) NOT NULL DEFAULT '',
`doctor` varchar(50) NOT NULL,
`radiologist` varchar(20) NOT NULL DEFAULT '',
`contract` varchar(50) NOT NULL,
`history` mediumtext NOT NULL,
`dob` varchar(15) NOT NULL DEFAULT '0000-00-00',
`appt_date` date NOT NULL DEFAULT '0000-00-00',
`status` tinyint(4) NOT NULL DEFAULT '0',
`tech` varchar(50) NOT NULL DEFAULT '',
`created` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
`ss_comments` mediumtext NOT NULL,
`mobile_facility` varchar(60) NOT NULL DEFAULT '',
`facility_room_number` varchar(50) NOT NULL,
`facility_bed` varchar(20) NOT NULL,
`facility_station_name` varchar(50) NOT NULL,
`stat` tinyint(4) NOT NULL DEFAULT '0',
`have_paperwork` tinyint(4) NOT NULL DEFAULT '0',
`completed` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
`sex` char(1) NOT NULL DEFAULT '',
`unique_hash` varchar(100) NOT NULL DEFAULT '',
`number_images` int(11) NOT NULL DEFAULT '0',
`client_number_images` int(11) NOT NULL,
`images_archived` tinyint(4) NOT NULL DEFAULT '0',
`completed_fax` varchar(10) NOT NULL DEFAULT '0' COMMENT 'This is the number the completed report is faxed to.',
`report_delivered` tinyint(4) NOT NULL DEFAULT '0',
`report_delivered_time` datetime NOT NULL,
`document_attached` tinyint(4) NOT NULL DEFAULT '0',
`modality` varchar(3) NOT NULL,
`last_image_archived` datetime NOT NULL,
`all_images_archived` tinyint(4) NOT NULL DEFAULT '0',
`fax_in_queue` varchar(12) NOT NULL,
`accession` varchar(100) NOT NULL,
`study_uid` varchar(100) NOT NULL,
`order_entry_status` tinyint(4) NOT NULL,
`compare_to` varchar(15) NOT NULL,
`state` varchar(3) NOT NULL,
`company` int(11) NOT NULL,
`second_access` varchar(50) NOT NULL,
`threshhold` datetime NOT NULL,
`report_status` tinyint(4) NOT NULL,
`second_id` varchar(50) NOT NULL,
`rad_alerted` tinyint(4) NOT NULL,
`assigned` datetime NOT NULL,
`findings_level` tinyint(4) NOT NULL,
`report_viewed` tinyint(4) NOT NULL,
`study_received` datetime NOT NULL,
`study_start` datetime NOT NULL,
`study_end` datetime NOT NULL,
`completed_email` varchar(50) NOT NULL,
`completed_send` varchar(255) NOT NULL,
`ssn` varchar(12) NOT NULL,
`exorder_number` varchar(30) NOT NULL,
`exvisit_number` varchar(30) NOT NULL,
`row_updated` timestamp NOT NULL ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`seq`),
KEY `location` (`location`),
KEY `appt_date` (`appt_date`),
KEY `status` (`status`),
KEY `radiologist` (`radiologist`),
KEY `contract` (`contract`),
KEY `lastname` (`lastname`),
KEY `paperwork` (`have_paperwork`),
KEY `images_archived` (`images_archived`),
KEY `hash` (`unique_hash`),
KEY `created` (`created`),
KEY `document_attached` (`document_attached`),
KEY `all_images_archived` (`all_images_archived`),
KEY `last_image_archived` (`last_image_archived`),
KEY `modality` (`modality`),
KEY `study_uid` (`study_uid`),
KEY `company` (`company`),
KEY `second_access` (`second_access`),
KEY `firstname` (`firstname`),
KEY `report_delivered` (`report_delivered`),
KEY `ssn` (`ssn`),
KEY `order_entry_status` (`order_entry_status`),
KEY `dob` (`dob`),
KEY `tech` (`tech`),
KEY `doctor` (`doctor`),
KEY `mobile_facility` (`mobile_facility`),
KEY `accession` (`accession`),
KEY `location_appt_date` (`location`,`appt_date`),
KEY `location_created` (`location`,`created`),
KEY `location_lastname` (`location`,`lastname`),
KEY `ref` (`ref`),
KEY `person_seq` (`person_seq`)
) ENGINE=InnoDB AUTO_INCREMENT=10242952 DEFAULT CHARSET=latin1;
CREATE TABLE `receivable_transactions` (
`seq` int(11) NOT NULL AUTO_INCREMENT,
`patient_seq` int(11) NOT NULL DEFAULT '0',
`cptcode` varchar(15) NOT NULL DEFAULT '',
`modifier` char(2) NOT NULL DEFAULT '',
`description` varchar(100) NOT NULL DEFAULT '',
`amount` decimal(6,2) NOT NULL DEFAULT '0.00',
`type` char(2) NOT NULL DEFAULT '',
`transaction` varchar(10) NOT NULL DEFAULT '',
`radiologist` varchar(20) NOT NULL DEFAULT '',
`status` tinyint(4) NOT NULL DEFAULT '0',
`completed` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
`created` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
`report_meta_seq` int(11) NOT NULL DEFAULT '0',
`report_header` varchar(255) NOT NULL,
`report_body` blob NOT NULL,
`report_impression` mediumtext NOT NULL,
`report_hide` tinyint(4) NOT NULL,
`radiologist_group` varchar(50) NOT NULL,
`addendum` int(4) NOT NULL DEFAULT '0',
`addendum_type` varchar(20) NOT NULL,
`peer_review` int(4) NOT NULL DEFAULT '0',
`qa_reason` varchar(255) NOT NULL DEFAULT '',
`qa_agree` decimal(2,1) NOT NULL DEFAULT '0.0',
`findings` tinyint(4) NOT NULL,
`comments` mediumtext NOT NULL,
`company` int(11) NOT NULL,
`row_updated` timestamp NOT NULL ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`seq`),
KEY `patient_seq` (`patient_seq`),
KEY `cptcode` (`cptcode`),
KEY `transaction` (`transaction`),
KEY `type` (`type`),
KEY `created` (`created`),
KEY `radiologist` (`radiologist`),
KEY `status` (`status`),
KEY `report_meta_seq` (`report_meta_seq`),
KEY `Billing Check Dropdown` (`status`,`completed`),
KEY `qa_agree` (`qa_agree`),
KEY `peer_review` (`peer_review`),
KEY `addendum` (`addendum`),
KEY `company` (`company`),
KEY `completed` (`completed`)
) ENGINE=InnoDB AUTO_INCREMENT=9380351 DEFAULT CHARSET=latin1;
CREATE TABLE `patients_dispatch` (
`seq` int(11) NOT NULL AUTO_INCREMENT,
`patient_seq` int(11) NOT NULL,
`order_taken` datetime NOT NULL,
`order_taken_by` varchar(50) NOT NULL,
`order_person_calling` varchar(50) NOT NULL,
`order_supervising_physician` varchar(50) NOT NULL,
`order_trip_count` tinyint(4) NOT NULL,
`order_trip_count_max` tinyint(4) NOT NULL,
`order_trip_visit` tinyint(4) NOT NULL,
`order_tech_in` datetime NOT NULL,
`order_tech_out` datetime NOT NULL,
`order_ssn` varchar(12) NOT NULL,
`order_service_request_time` datetime NOT NULL,
`order_reason` varchar(255) NOT NULL,
`order_tech_ack` datetime NOT NULL,
`order_tech_assigned` datetime NOT NULL,
`order_tech_last_notified` datetime NOT NULL,
`requisition_last_sent` datetime NOT NULL,
`requisition_signed` datetime NOT NULL,
`requisition_signed_by` varchar(50) NOT NULL,
`requisition_signed_by_text` varchar(75) NOT NULL,
`requisition_signed_by_file_seq` int(11) NOT NULL,
`order_comments` mediumtext NOT NULL,
`order_tech_in_transit` datetime NOT NULL,
`fasting` tinyint(1) NOT NULL,
`collection_time` time DEFAULT NULL,
`addr1` varchar(100) NOT NULL,
`addr2` varchar(100) NOT NULL,
`city` varchar(30) NOT NULL,
`state` varchar(3) NOT NULL,
`zip` varchar(12) NOT NULL,
`phone` varchar(15) NOT NULL,
`mileage_start` int(11) NOT NULL,
`mileage_end` int(11) NOT NULL,
PRIMARY KEY (`seq`),
KEY `patient_seq` (`patient_seq`)
) ENGINE=InnoDB AUTO_INCREMENT=2261091 DEFAULT CHARSET=latin1;
CREATE TABLE `mobile_facility` (
`seq` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(60) NOT NULL,
`location` varchar(50) DEFAULT NULL,
`address1` varchar(50) NOT NULL,
`address2` varchar(50) NOT NULL,
`city` varchar(50) NOT NULL,
`state` varchar(2) NOT NULL,
`zip` varchar(10) NOT NULL,
`phone` varchar(15) NOT NULL,
`phone2` varchar(15) NOT NULL,
`fax` varchar(110) NOT NULL,
`rads_can_read` text NOT NULL,
`rads_cant_read` text NOT NULL,
`only_techs` text NOT NULL,
`never_modalities` varchar(255) NOT NULL COMMENT 'A serialized list of modalities a facility may not use.',
`station_list` mediumtext NOT NULL,
`email` varchar(255) NOT NULL,
`misc1` varchar(255) NOT NULL,
`latitude` float NOT NULL DEFAULT '0',
`longitude` float NOT NULL DEFAULT '0',
`affiliation` int(11) NOT NULL COMMENT 'mobile_facility_affiliations seq',
`branch` int(11) NOT NULL COMMENT 'mobile_facility_branches seq',
`service_area` int(11) NOT NULL COMMENT 'mobile_facility_service_areas seq',
`other_id` varchar(50) NOT NULL COMMENT 'Usually used for HL7',
`facility_type` varchar(2) DEFAULT NULL,
`no_stat` tinyint(1) NOT NULL DEFAULT '0' COMMENT 'Should the facility allow stat priority on patients?',
`facility_notes` varchar(512) DEFAULT NULL,
`requisition_fax` varchar(110) NOT NULL,
`report_template` text NOT NULL,
`all_orders_stat` tinyint(1) NOT NULL,
`sms_notification` varchar(15) NOT NULL,
`tat` varchar(10) NOT NULL,
`npi` varchar(15) NOT NULL,
`NMXR` tinyint(4) NOT NULL DEFAULT '0',
`billing_type` varchar(10) NOT NULL,
`salesman` varchar(75) NOT NULL,
`created_at` datetime NOT NULL,
`updated_at` datetime NOT NULL,
`default_bill_to` tinyint(4) NOT NULL DEFAULT '0',
PRIMARY KEY (`seq`),
UNIQUE KEY `unique_index` (`name`,`location`),
KEY `name` (`name`),
KEY `location` (`location`)
) ENGINE=InnoDB AUTO_INCREMENT=155104 DEFAULT CHARSET=latin1;
CREATE TABLE `mobile_facility_service_areas` (
`seq` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(50) NOT NULL,
`location` varchar(50) NOT NULL,
PRIMARY KEY (`seq`)
) ENGINE=InnoDB AUTO_INCREMENT=841 DEFAULT CHARSET=latin1;
It's only using the index on location, but that only narrows down the search to about a half a million rows. You'd like it to use an index to further narrow down by the appt_date.
However, the use of OR in your WHERE clause is causing a problem. It can't decide how to use the index.
Here's what I suggest:
Drop the index on location because it's redundant with the other indexes that have location as their first column.
Replace the index on location_appt_date with an index on location_appt_date_status.
ALTER TABLE patients
DROP KEY location,
DROP KEY location_appt_date,
ADD KEY location_appt_date_status (location, appt_date, status);
Refactor the query to use UNION instead of OR:
SELECT ... (all the columns you have) ...
FROM (
SELECT * FROM patients USE INDEX (location_appt_date_status)
WHERE location = 'XYZCompany' AND appt_date >= '2020-03-19' AND appt_date < '2020-03-20'
UNION
SELECT * FROM patients USE INDEX (location_appt_date_status)
WHERE location = 'XYZCompany' AND appt_date <= '2020-03-19' AND status < 'X'
) AS p
LEFT JOIN receivable_transactions FORCE INDEX (patient_seq)
ON p.seq = receivable_transactions.patient_seq
LEFT JOIN patients_dispatch FORCE INDEX (patient_seq)
ON p.seq = patients_dispatch.patient_seq
INNER JOIN mobile_facility FORCE INDEX (unique_index)
ON p.location = mobile_facility.location AND p.mobile_facility = mobile_facility.name
INNER JOIN mobile_facility_service_areas FORCE INDEX (PRIMARY)
ON mobile_facility.service_area = mobile_facility_service_areas.seq
GROUP BY p.seq
ORDER BY p.status, p.order_entry_status, pseudo_status, p.order_entry_status, p.lastname
You might not need all the USE INDEX() / FORCE INDEX() optimizer hints I used. I did those because I was testing with empty tables, and that can confuse the optimizer.
Let me focus on the part that affects optimization the most:
FROM patients AS p
LEFT JOIN receivable_transactions AS rt ON p.seq = rt.patient_seq
LEFT JOIN patients_dispatch AS pd ON p.seq = pd.patient_seq
LEFT JOIN mobile_facility AS mf ON p.location = mf.location
AND p.mobile_facility AS mf = mf.name
LEFT JOIN mobile_facility_service_areas AS sa ON mf.service_area = sa.seq
WHERE p.location = "XYZCompany"
AND ((p.appt_date >= '2020-03-19'
AND p.appt_date <= '2020-03-19 23:59:59')
OR (p.appt_date <= '2020-03-19'
AND p.status < 'X')
)
GROUP BY p.seq DESC
ORDER BY p.status, p.order_entry_status, pseudo_status, p.order_entry_status,
p.lastname);
The biggest issue is the OR. It often prevents most optimizations. The usual fix is to turn it into a UNION:
( SELECT ...
FROM .. JOIN ..
WHERE p.location = "XYZCompany"
AND p.appt_date >= '2020-03-19'
AND p.appt_date < '2020-03-19' + INTERVAL 1 DAY
...
)
UNION ALL
( SELECT ...
FROM .. JOIN ..
WHERE p.location = "XYZCompany"
AND p.appt_date <= '2020-03-19'
AND p.status < 'X'
...
)
Each select can benefit from this composite index on patients:
(location, appt_date, status)
The < 'X' is problematic because two ranges (appt_date and status) cannot both be used effectively. What are the possible values of status? If there is only one value before 'X', say 'M', then this would be much better: p.status = 'M' together with another index: (location, status, appt_date)
SELECT lots of stuff, then GROUP BY p.seq -- This is probably create strange results. (Search for ONLY_FULL_GROUP_BY for more discussion). It may be better to first get the patients.seq values (since that is all you are filtering on), then join to the other tables. This would eliminate the GROUP BY, or at least force you to deal with which row to fetch from each of the other tables.
range location_appt_date 55 573534 Using index condition; Using where; Using temporary; Using filesort -- says
55 = 2+50 (for varchar(50)) + 3 (for date) -- neither is NULL.
Based on the 55, I wonder if it is so well optimized that the OR->UNION is not needed.
"Using index condition" is internally called ICP (Index Condition Pushdown) if you want further understanding.
"Using filesort" may be an understatement -- There are probably two sorts, one for GROUP BY, one for ORDER BY. EXPLAIN FORMAT=JSON SELECT ... would make it clear. (And hence my hint that the GROUP BY should be avoided.
You have some redundant indexes (not relevant to much other than disk space): INDEX(a,b), INDEX(a) --> toss INDEX(a).
patients has an awful number of indexes.
The other tables seem to have adequate indexes for your query.

MySQL use separate indices for JOIN and GROUP BY

I am trying to execute following query
SELECT
a.sessionID AS `sessionID`,
firstSeen, birthday, gender,
isAnonymous, LanguageCode
FROM transactions AS trx
INNER JOIN actions AS a ON a.sessionID = trx.SessionID
WHERE a.ActionType = 'PURCHASE'
GROUP BY trx.TransactionNumber
Explain provides the following output
1 SIMPLE trx ALL TransactionNumber,SessionID NULL NULL NULL 225036 Using temporary; Using filesort
1 SIMPLE a ref sessionID sessionID 98 infinitiExport.trx.SessionID 1 Using index
The problem is that I am trying to use one field for join and different field for GROUP BY.
How can I tell MySQL to use different indices for same table?
CREATE TABLE `transactions` (
`SessionID` varchar(32) NOT NULL DEFAULT '',
`date` datetime DEFAULT NULL,
`TransactionNumber` varchar(32) NOT NULL DEFAULT '',
`CustomerECommerceTrackID` int(11) DEFAULT NULL,
`SKU` varchar(45) DEFAULT NULL,
`AmountPaid` double DEFAULT NULL,
`Currency` varchar(10) DEFAULT NULL,
`Quantity` int(11) DEFAULT NULL,
`Name` tinytext NOT NULL,
`Category` varchar(45) NOT NULL DEFAULT '',
`customerInfoXML` text,
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`id`),
KEY `TransactionNumber` (`TransactionNumber`),
KEY `SessionID` (`SessionID`)
) ENGINE=InnoDB AUTO_INCREMENT=212007 DEFAULT CHARSET=utf8;
CREATE TABLE `actions` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`sessionActionDate` datetime DEFAULT NULL,
`actionURL` varchar(255) DEFAULT NULL,
`sessionID` varchar(32) NOT NULL DEFAULT '',
`ActionType` varchar(64) DEFAULT NULL,
`CustomerID` int(11) DEFAULT NULL,
`IPAddressID` int(11) DEFAULT NULL,
`CustomerDeviceID` int(11) DEFAULT NULL,
`customerInfoXML` text,
PRIMARY KEY (`id`),
KEY `ActionType` (`ActionType`),
KEY `CustomerDeviceID` (`CustomerDeviceID`),
KEY `sessionID` (`sessionID`)
) ENGINE=InnoDB AUTO_INCREMENT=15042833 DEFAULT CHARSET=utf8;
Thanks
EDIT 1: My indexes were broken, I had to add (SessionID, TransactionNumber) index to transactions table, however now, when I try to include trx.customerInfoXML table mysql stops using index
EDIT 2 Another answer does not really solved my problem because it's not standard sql syntax and generally not a good idea to force indices.
For ORM users such syntax is a unattainable luxury.
EDIT 3 I updated my indices and it solved the problem, see EDIT 1