Optimization of query using covering indices - mysql

I have the following query with a subquery and self join:
SELECT bucket.patient_sid AS sid
FROM
(SELECT clinical_data.patient_sid,
clinical_data.lft,
clinical_data.rgt
FROM clinical_data INNER JOIN
(SELECT clinical_data.patient_sid,
clinical_data.lft,
clinical_data.rgt,
clinical_data.attribute_id
FROM clinical_data
WHERE clinical_data.attribute_id = '33' AND clinical_data.string_value = '2160-0') AS attribute
ON clinical_data.patient_sid = attribute.patient_sid
AND clinical_data.lft >= attribute.lft
AND clinical_data.rgt <= attribute.rgt
WHERE clinical_data.attribute_id = '36') AS bucket;
I have the following indices defined on this:
KEY `idx_bucket` (`attribute_id`,`string_value`)
KEY `idx_self_join` (`patient_sid`,`attribute_id`,`lft`,`rgt`)
When I look at the query using EXPLAIN, the subquery using the covering index idx_bucket is definitely optimized, but the self join and where clause are not. Furthermore, why does it report that only patient_sid and attribute_id are used for used_key_parts while an attachment_condition is shown for lft, rgt (what does this mean?). Both lft and 'rgt` are just defined as integers with no special properties, so why aren't they being used in my covering index?
Even more strange is when I define
KEY `idx_self_join` (`patient_sid`,`lft`,`rgt`,`attribute_id`)
only patient_sid is registered in used_key_parts. Furthermore filtered drops to 1.60% from 11.00%!
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "645186.71"
},
"nested_loop": [
{
"table": {
"table_name": "clinical_data",
"access_type": "ref",
"possible_keys": [
"fk_attribute_idx",
"idx_value_string",
"idx_value_double",
"idx_bucket",
"idx_self_join_idx"
],
"key": "idx_bucket",
"used_key_parts": [
"attribute_id",
"string_value"
],
"key_length": "308",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 126402,
"rows_produced_per_join": 126402,
"filtered": "100.00",
"cost_info": {
"read_cost": "126402.00",
"eval_cost": "25280.40",
"prefix_cost": "151682.40",
"data_read_per_join": "46M"
},
"used_columns": [
"patient_sid",
"string_value",
"attribute_id",
"lft",
"rgt"
],
"attached_condition": "(`ns_large2`.`clinical_data`.`patient_sid` is not null)"
}
},
{
"table": {
"table_name": "clinical_data",
"access_type": "ref",
"possible_keys": [
"fk_attribute_idx",
"idx_value_string",
"idx_value_double",
"idx_bucket",
"idx_self_join_idx"
],
"key": "idx_self_join_idx",
"used_key_parts": [
"attribute_id",
"patient_sid"
],
"key_length": "10",
"ref": [
"const",
"ns_large2.clinical_data.patient_sid"
],
"rows_examined_per_scan": 14,
"rows_produced_per_join": 201169,
"filtered": "11.11",
"using_index": true,
"cost_info": {
"read_cost": "131327.39",
"eval_cost": "40233.83",
"prefix_cost": "645186.71",
"data_read_per_join": "73M"
},
"used_columns": [
"patient_sid",
"attribute_id",
"lft",
"rgt"
],
"attached_condition": "((`ns_large2`.`clinical_data`.`lft` >= `ns_large2`.`clinical_data`.`lft`) and (`ns_large2`.`clinical_data`.`rgt` <= `ns_large2`.`clinical_data`.`rgt`))"
}
}
]
}
}

Here's your basic JOIN:
SELECT
FROM clinical_data cd1
JOIN clinical_data cd2
ON cd1.patient_sid = cd2.patient_sid
AND cd2.attribute_id = '33'
WHERE cd1.attribute_id = '36'

Here's what I finally came up with:
SELECT
cd1.patient_sid as sid
FROM clinical_data cd1
JOIN clinical_data cd2
ON cd1.patient_sid = cd2.patient_sid
AND cd1.lft >= cd2.lft
AND cd1.rgt <= cd2.rgt
WHERE cd1.attribute_id = '36'
AND cd2.attribute_id = '33'
AND cd2.string_value = '2160-0'

"Used_columns" says that it is 'covering'. The final "used key parts" are not all used as a "key" because they are needed in a "range", not '='.
Get rid of the outer query:
SELECT clinical_data.patient_sid, clinical_data.lft, clinical_data.rgt
FROM clinical_data
INNER JOIN
( SELECT clinical_data.patient_sid, clinical_data.lft, clinical_data.rgt,
clinical_data.attribute_id
FROM clinical_data
WHERE clinical_data.attribute_id = '33'
AND clinical_data.string_value = '2160-0'
) AS attribute ON clinical_data.patient_sid = attribute.patient_sid
AND clinical_data.lft >= attribute.lft
AND clinical_data.rgt <= attribute.rgt
WHERE clinical_data.attribute_id = '36'
Sorry, but the lft-rgt schema is not very efficient.

Related

why select a more column make mysql use index condition pushdown?

in the test database employees, employees has 300,000+ rows.
CREATE TABLE employees (
emp_no INT NOT NULL,
birth_date DATE NOT NULL,
first_name VARCHAR(14) NOT NULL,
last_name VARCHAR(16) NOT NULL,
gender ENUM ('M','F') NOT NULL,
hire_date DATE NOT NULL,
PRIMARY KEY (emp_no)
);
alter table employees add index IDX_LAST_NAME(last_name);
explain
select last_name, emp_no
from employees
where last_name in ('Aamodt', 'Aaaa', 'Ab', 'Ac')
order by last_name asc, emp_no asc
limit 100;
shows
It uses covering index for where filter and order by.
explain format=JSON
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "43.97"
},
"ordering_operation": {
"using_filesort": false,
"table": {
"table_name": "employees",
"access_type": "range",
"possible_keys": [
"IDX_LAST_NAME"
],
"key": "IDX_LAST_NAME",
"used_key_parts": [
"last_name"
],
"key_length": "66",
"rows_examined_per_scan": 208,
"rows_produced_per_join": 208,
"filtered": "100.00",
"using_index": true,
"cost_info": {
"read_cost": "23.17",
"eval_cost": "20.80",
"prefix_cost": "43.97",
"data_read_per_join": "27K"
},
"used_columns": [
"emp_no",
"last_name"
],
"attached_condition": "(`employees`.`employees`.`last_name` in ('Aamodt','Aaaa','Ab','Ac'))"
}
}
}
}
While
explain
select last_name, emp_no, first_name
from employees
where last_name in ('Aamodt', 'Aaaa', 'Ab', 'Ac')
order by last_name asc, emp_no asc
limit 100;
shows
explain format=JSON
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "168.10"
},
"ordering_operation": {
"using_filesort": false,
"table": {
"table_name": "employees",
"access_type": "range",
"possible_keys": [
"IDX_LAST_NAME"
],
"key": "IDX_LAST_NAME",
"used_key_parts": [
"last_name"
],
"key_length": "66",
"rows_examined_per_scan": 208,
"rows_produced_per_join": 208,
"filtered": "100.00",
"index_condition": "(`employees`.`employees`.`last_name` in ('Aamodt','Aaaa','Ab','Ac'))",
"cost_info": {
"read_cost": "147.30",
"eval_cost": "20.80",
"prefix_cost": "168.10",
"data_read_per_join": "27K"
},
"used_columns": [
"emp_no",
"first_name",
"last_name"
]
}
}
}
}
I think it use IDX_LAST_NAME index for where filer and order by, but also need to retrieve full table rows.
But why the second SQL use index condition pushdown? Considering performance, does the
second SQL only take more cost for retrieving the full rows than the first SQL?
These must be InnoDB tables. Indexes in that storage engine always have the PK appended to them, so your first query can be covered by the index on last_name.
Your second query probably uses your index on last_name for two things: finding the right rows and handling the ORDER BY last_name ... LIMIT 100 operation without sorting the entire table. But it still must hit the clustered index implementing the primary key to retrieve the first_name.
To see more details use EXPLAIN ANALYZE FORMAT=TREE on MySQL or ANALYZE FORMAT=JSON on MariaDB to retrieve your execution plan.
To know whether your query is unacceptably slow, you need to test it. If your second query is unacceptably slow you can create an index to cover it.
CREATE INDEX l_e_f ON employees (last_name, emp_id, first_name);

Why below mentioned query scans all the rows even if the indexing has created, query using "NOT IN" operator for checking values and subquery?

SELECT col1
FROM table1
WHERE col2 BETWEEN '2023-01-15 00:00:00' AND '2023-01-15 07:31:09'
AND col5 NOT IN('lit#yopmail.com','sneh#yopmail.com','snehah#yopmial.com','vishm#yopmail.com','aneha#yopmail.com','sneha10#yopmail.com','mukesh.sh#qa.team','ssw#yopmail.com','vish#yopmail.com','neuro#yopmail.com','delete#yopmail.com','sms#yopmail.com','krk#yopmail.com','samarora916#gmail.com','testing221#yopmail.com','om#yopmail.com','muskes#yopmail.com','putruuui#yopmail.com','rajuu#yopmail.com','priyan123#yopmail.com','prep28#yopmail.com','kewq#yopmail.com','ER.KUNALKHATRubbbhbh#GMAIL.COM')
AND col4 IN (value1,value2)
AND col3 > 2
AND col1 IN (val1,val2,val3,val4,val5)
AND orderID NOT IN (
SELECT col6
FROM table1
WHERE col1 = value61
AND col4 IN (value1,value2)
AND col3 > 0
AND col2 BETWEEN '2023-01-15 00:00:00' AND '2023-01-15 07:31:09'
);
And this the explain format = json of the query
"query_cost": "376074.60"
"table_name": "table1",
"access_type": "ALL",
"possible_keys": [
"col5",
"col1_col7_col3"
],
"rows_examined_per_scan": 1663413,
"rows_produced_per_join": 9300,
"filtered": "0.56",
"cost_info": {
"read_cost": "374214.59",
"eval_cost": "1860.01",
"prefix_cost": "376074.60",
"data_read_per_join": "99M"
"table": {
"table_name": "table1",
"access_type": "ref",
"possible_keys": [
"col6",
"col1_col7_col3"
],
"key": "col1_col6_col3",
"used_key_parts": [
"col1"
],
"key_length": "4",
"ref": [
"const"
],
"rows_examined_per_scan": 63,
"rows_produced_per_join": 0,
"filtered": "0.74",
i created composite indexing for this query but it does't show any effect on this query So, I want to optimise this query by reducing the row scan and query cost of the query, please suggest something for this query.
I thing the problem is occur due to using not in where clause.

order by slowing down query with multiple joins and limit/offset on larger result sets

I am having trouble with the following query taking quite a long time to process when results are large. The limit and offset can change as this is used with pagination. The range on capture_timestamp can also change, but in this example is finding ALL results (between 0 and 9999999999 - this field is an int of utc timestamp). The issue seems to be the ORDER BY taking up most of the processing time. It looks like it uses user_id for the table join, but then never uses anything for the ordering.
On the logs table I have the following indexes :
PRIMARY : activity_id
user_id : (user_id, capture_timestamp)
capture_timestamp : capture_timestamp (added this to see if by itself would make a difference - it did not)
There are keys setup for all the ON joins.
This particular query for example has 2440801 results (the logs table itself is currently holding 18332067 rows), but I am only showing the first 10 sorted by capture_timestamp and it takes roughly 7 seconds to return the results.
SELECT
logs.activity_id,
users.username,
computers.computer_name,
computers.os,
logs.event_title,
logs.event_target,
logs.capture_timestamp
FROM computers
INNER JOIN users
ON users.computer_id = computers.computer_id
INNER JOIN logs
ON logs.user_id = users.user_id AND logs.capture_timestamp BETWEEN :cw_date_start AND :cw_date_end
WHERE computers.account_id = :cw_account_id AND computers.status = 1
ORDER BY logs.capture_timestamp DESC
LIMIT 0,10
analyze :
Array
(
[0] => Array
(
[ANALYZE] => {
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": 6848.2,
"filesort": {
"sort_key": "logs.capture_timestamp desc",
"r_loops": 1,
"r_total_time_ms": 431.25,
"r_limit": 10,
"r_used_priority_queue": true,
"r_output_rows": 11,
"temporary_table": {
"table": {
"table_name": "computers",
"access_type": "ref",
"possible_keys": ["PRIMARY", "account_id_2", "account_id"],
"key": "account_id_2",
"key_length": "4",
"used_key_parts": ["account_id"],
"ref": ["const"],
"r_loops": 1,
"rows": 294,
"r_rows": 294,
"r_total_time_ms": 0.4544,
"filtered": 100,
"r_filtered": 100,
"attached_condition": "computers.`status` = 1"
},
"table": {
"table_name": "users",
"access_type": "ref",
"possible_keys": ["PRIMARY", "unique_filter"],
"key": "unique_filter",
"key_length": "4",
"used_key_parts": ["computer_id"],
"ref": ["db.computers.computer_id"],
"r_loops": 294,
"rows": 1,
"r_rows": 3.415,
"r_total_time_ms": 0.7054,
"filtered": 100,
"r_filtered": 100,
"using_index": true
},
"table": {
"table_name": "logs",
"access_type": "ref",
"possible_keys": ["user_id", "capture_timestamp"],
"key": "user_id",
"key_length": "4",
"used_key_parts": ["user_id"],
"ref": ["db.users.user_id"],
"r_loops": 1004,
"rows": 424,
"r_rows": 2431.1,
"r_total_time_ms": 4745.3,
"filtered": 100,
"r_filtered": 100,
"index_condition": "logs.capture_timestamp between '0' and '9999999999'"
}
}
}
}
}
)
)
Is there anything I can do here to speed these up? When the result set is smaller everything is pretty much immediate although I guess that is because there isn't as much sorting to do.

Avoid table scan and use index instead in query

I am designing a new database and have noticed my queries are not scaling as well as they should be. When my aggregations involve hundreds of records I am seeing significant increases in response times. I am wondering if my query is deeply flawed or if I am just not using the right index.
I have done a lot of tweaking to my query but have not come up with a way to eliminate doing a full table scan and instead use an index. When I use a tool similar to EXPLAIN on my query I see the following:
Full table scans are generally inefficient, avoid using them.
Your query uses MySQL's 'filesort' operation. This tends to slow down queries.
Your query uses MySQL's temporary tables. This can require extra I/O and tends to slow down queries.
Table:
CREATE TABLE `indexTable` (
`id` int(10) unsigned NOT NULL,
`userId` int(10) unsigned NOT NULL,
`col1` varbinary(320) NOT NULL,
`col2` tinyint(3) unsigned NOT NULL,
`col3` tinyint(3) unsigned NOT NULL,
`createdAt` bigint(20) unsigned NOT NULL,
`updatedAt` bigint(20) unsigned NOT NULL,
`metadata` json NOT NULL,
PRIMARY KEY (`id`,`userId`,`col1`,`col2`,`col3`),
KEY `createdAt` (`createdAt`),
KEY `id_userId_col1_col2_createdAt` (`id`,`userId`,`col1`,`col2`,`createdAt`),
KEY `col1_col2_createdAt` (`col1`,`col2`,`createdAt`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8
Query:
SELECT t1.id, t1.userId, t1.col1, t1.col2, t1.col3, t1.metadata
FROM indexTable as t1
INNER JOIN(
SELECT col1, col2, MAX(createdAt) AS maxCreatedAt
FROM indexTable
WHERE id = ? AND userId = ?
GROUP BY col1, col2
ORDER BY maxCreatedAt
LIMIT 10 OFFSET 0) AS sub
ON t1.col1 = sub.col1
AND t1.col2 = sub.col2
AND t1.createdAt = sub.maxCreatedAt
WHERE t1.id = ? AND t1.userId = ?
ORDER BY t1.createdAt;
PK: id, userId, col1, col2, col3
Index: createdAt
Explain:
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "34.50"
},
"ordering_operation": {
"using_temporary_table": true,
"using_filesort": true,
"cost_info": {
"sort_cost": "10.00"
},
"nested_loop": [
{
"table": {
"table_name": "sub",
"access_type": "ALL",
"rows_examined_per_scan": 10,
"rows_produced_per_join": 10,
"filtered": "100.00",
"cost_info": {
"read_cost": "10.50",
"eval_cost": "2.00",
"prefix_cost": "12.50",
"data_read_per_join": "3K"
},
"used_columns": [
"col1",
"col2",
"maxCreatedAt"
],
"attached_condition": "(`sub`.`maxCreatedAt` is not null)",
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "10.27"
},
"ordering_operation": {
"using_filesort": true,
"grouping_operation": {
"using_temporary_table": true,
"using_filesort": false,
"table": {
"table_name": "indexTable",
"access_type": "ref",
"possible_keys": [
"PRIMARY",
"createdAt",
"id_userId_col1_col2_createdAt",
"col1_col2_createdAt"
],
"key": "PRIMARY",
"used_key_parts": [
"id",
"userId"
],
"key_length": "8",
"ref": [
"const",
"const"
],
"rows_examined_per_scan": 46,
"rows_produced_per_join": 46,
"filtered": "100.00",
"cost_info": {
"read_cost": "1.07",
"eval_cost": "9.20",
"prefix_cost": "10.27",
"data_read_per_join": "16K"
},
"used_columns": [
"id",
"userId",
"createdAt",
"col1",
"col2",
"col3"
],
"attached_condition": "((`MyDB`.`indexTable`.`id` <=> 53) and (`MyDB`.`indexTable`.`userId` <=> 549814))"
}
}
}
}
}
}
},
{
"table": {
"table_name": "t1",
"access_type": "ref",
"possible_keys": [
"PRIMARY",
"createdAt",
"id_userId_col1_col2_createdAt",
"col1_col2_createdAt"
],
"key": "id_userId_col1_col2_createdAt",
"used_key_parts": [
"id",
"userId",
"col1",
"col2",
"createdAt"
],
"key_length": "339",
"ref": [
"const",
"const",
"sub.col1",
"sub.col2",
"sub.maxCreatedAt"
],
"rows_examined_per_scan": 1,
"rows_produced_per_join": 10,
"filtered": "100.00",
"cost_info": {
"read_cost": "10.00",
"eval_cost": "2.00",
"prefix_cost": "24.50",
"data_read_per_join": "3K"
},
"used_columns": [
"id",
"userId",
"createdAt",
"updatedAt",
"col1",
"col2",
"col3",
"metadata",
]
}
}
]
}
}
}
This query finds the most recent record in the grouping of col1 and col2, orders by createdAt, and limits the entries to 10.
The "derived" table (subquery) needs this composite index:
INDEX(id, userid, -- in either order
col1, col2, -- in this order
createdAt) -- to make it "covering"
With that index, is probably will not do a full table scan. However, it will involve a filesort. This is because the ORDER BY is not the same as the GROUP BY and it is an aggregate.
t1 needs
INDEX(col1, col2, -- in either order
createdAt)
sub,maxCreatedAt -- typo??
ORDER BY t1.createdAt -- another necessary filesort.
Do not beware of filesorts. Especially when there are only 10 rows (as in the second case).
Without seeing SHOW CREATE TABLE, I cannot say whether the "filesort" and the "temporary table" touched the disk at all, or was done in RAM.
FORCE INDEX is almost always a bad idea -- even if it helps today, it may hurt tomorrow.
The Optimizer will deliberately (and rightly) use a table scan if too much of the table needs to be looked at -- it is faster than bouncing between the index and the data.
I was able to solve this issue by updating my query to include id and userId in the GROUP BY. I was then able to join on the two additional columns and for some reason that made MySQL use the right index.

Mysql slow performance with group by order by

I am using Mysql 5.7 I have table which having 7006500 rows. My query performing group by and fetching row which has maximum count with each group on column which is already indexed but still takes time for execution. Below is my query,execution plan and table schema.
Table Schema
CREATE TABLE templog (
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
userid bigint(12) unsigned NOT NULL,
type tinyint(3) NOT NULL DEFAULT '0',
os tinyint(4) NOT NULL DEFAULT '0',
day date DEFAULT NULL,
activetime smallint(5) unsigned NOT NULL DEFAULT '0',
createdat datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
timegroupid tinyint(4) NOT NULL DEFAULT '0',
PRIMARY KEY (`id`),
KEY templog_type_IDX (`type`,`day`,`userid`,`timegroupid`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=7006500 DEFAULT CHARSET=utf8;
My Query:-
SELECT SQL_NO_CACHE y.userid, y.timegroupid as besttime,y.cnt
FROM (
SELECT #row_number := CASE WHEN #userid=x.userid THEN #row_number+1 ELSE 1 END AS row_number ,
#userid := x.userid AS userid ,x.cnt,x.timegroupid
FROM (
SELECT userid, timegroupid ,COUNT(userid) as cnt
from templog
where type = 3
AND day BETWEEN '2020-01-01' AND '2020-01-20'
AND userid < 771267
GROUP by userid, timegroupid
ORDER by userid DESC ,cnt DESC
) x,
( SELECT #row_number:=0, #userid:='') AS t
) y
where y.row_number = 1
ORDER by y.userid DESC
LIMIT 1000;
Query Explain format:
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "12.00"
},
"ordering_operation": {
"using_filesort": true,
"table": {
"table_name": "y",
"access_type": "ref",
"possible_keys": [
"<auto_key0>"
],
"key": "<auto_key0>",
"used_key_parts": [
"row_number"
],
"key_length": "9",
"ref": [
"const"
],
"rows_examined_per_scan": 10,
"rows_produced_per_join": 10,
"filtered": "100.00",
"cost_info": {
"read_cost": "10.00",
"eval_cost": "2.00",
"prefix_cost": "12.00",
"data_read_per_join": "320"
},
"used_columns": [
"row_number",
"userid",
"cnt",
"timegroupid"
],
"attached_condition": "((`y`.`row_number` <=> 1))",
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "6441.25"
},
"nested_loop": [
{
"table": {
"table_name": "t",
"access_type": "system",
"rows_examined_per_scan": 1,
"rows_produced_per_join": 1,
"filtered": "100.00",
"cost_info": {
"read_cost": "0.00",
"eval_cost": "0.20",
"prefix_cost": "0.00",
"data_read_per_join": "16"
},
"used_columns": [
"#row_number:=0",
"#userid:=''"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 4,
"message": "No tables used"
}
}
}
},
{
"table": {
"table_name": "x",
"access_type": "ALL",
"rows_examined_per_scan": 25725,
"rows_produced_per_join": 25725,
"filtered": "100.00",
"cost_info": {
"read_cost": "1296.25",
"eval_cost": "5145.00",
"prefix_cost": "6441.25",
"data_read_per_join": "602K"
},
"used_columns": [
"userid",
"timegroupid",
"cnt"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 3,
"cost_info": {
"query_cost": "140807.11"
},
"ordering_operation": {
"using_filesort": true,
"grouping_operation": {
"using_temporary_table": true,
"using_filesort": false,
"table": {
"table_name": "templog",
"access_type": "range",
"possible_keys": [
"templog_type_IDX"
],
"key": "templog_type_IDX",
"used_key_parts": [
"type",
"day"
],
"key_length": "13",
"rows_examined_per_scan": 694718,
"rows_pr
oduced_per_join": 25725,
"filtered": "33.33",
"using_index": true,
"cost_info": {
"read_cost": "1863.51",
"eval_cost": "5145.03",
"prefix_cost": "140807.11",
"data_read_per_join": "803K"
},
"used_columns": [
"id",
"userid",
"type",
"day",
"timegroupid"
],
"attached_condition": "((`templog`.`type` = 3) and (`templog`.`day` between '2020-01-01' and '2020-01-20') and (`templog`.`userid` < 771267))"
}
}
}
}
}
}
}
]
}
}
}
}
}
}
Is there any other to optimize query or change index order or rewrite query in another way for better performance?
Do not count on #variables working like you would expect them to. I think the next version is beginning to disallow them.
The optimizer is free to throw away the ORDER BY in the derived table. This will lead to wrong results. Tacking on a large LIMIT to the subquery may prevent that.
Build and maintain a "summary table". This can significantly speed up this and similar queries.
CREATE TABLE Summary (
userid ...,
timegroupid ...,
type ...,
day ...,
cnt SMALLINT UNSIGNED NOT NULL, -- COUNT(*)
tottime INT UNSIGNED NOT NULL, -- SUM(activetime)
PRIMARY KEY(timegroupid, userid, type, day)
However, without understanding the data better, I cannot predict whether this table will be noticeably smaller than the original. If it is significantly smaller, this summary table will not be practical.
I added another tag -- follow it for more discussion of groupwise-max.