Reduce MySQL Query Runtime - mysql

I have a query like this:
SELECT DISTINCT `cr`.`idCustomer`, `rbase`.`id`
FROM `customers` `t`
JOIN `customersregion` `cr` ON t.idCustomer = cr.idCustomer
and cr.isDeleted = 0
JOIN `calendaritems` `rbase` ON rbase.idAgentsRegion = cr.idRegion
and rbase.isDeleted = 0
where (
(rbase.startDate <= '2020-07-06 00:00:00' and rbase.endDate >= '2020-07-06 00:00:00') or
(rbase.startDate <= '2020-07-28 00:00:00' and rbase.endDate >= '2020-07-28 00:00:00') or
(rbase.startDate >= '2020-07-06 00:00:00' and rbase.startDate <= '2020-07-28 23:59:59') or
(rbase.endDate >= '2020-07-06 00:00:00' and rbase.endDate <= '2020-07-28 23:59:59')
)
Database: MySQL
Customers: 132,000 row
CustomersRegion: 1,754,000 row
CalendarItems: 3,838,000 row (with conditions reduce to 555,000 row)
t.idCustomer & cr.idCustomer & cr.isDeleted & rbase.idAgentsRegion & cr.idRegion & rbase.isDeleted are indexes
This query runtime is about 100 seconds and i want to reduce the runtime of this query
I can't have limit on rows or have another condition in tables
Can you help me?
Thank you
Explain Query:
Customers DDL:
create table customers
(
idCustomer int auto_increment
primary key,
CustomerName varchar(255) not null comment 'نام فروشگاه',
FirstName varchar(60) null comment 'نام رابط',
LastName varchar(60) null comment 'نام مشتري',
idUser int null comment '!#dont show',
idPayment int null,
idCompany int default 0 not null,
LatitudePoint decimal(18, 12) default 0.000000000000 null comment 'gpslat',
LongitudePoint decimal(18, 12) default 0.000000000000 null comment 'gpslongs',
LastOrderDate datetime default '0000-00-00 00:00:00' null comment 'lastorderdate',
VisitPeriod int default 0 null comment 'visitperiod',
LastVisit datetime default '0000-00-00 00:00:00' null comment 'LastVisitDate',
LastNoOrderDate datetime default '0000-00-00 00:00:00' null,
Credit decimal(20, 4) default 0.0000 null comment 'credit',
RemainCredit decimal(20, 4) default 0.0000 null comment 'remaincredit',
Balance decimal(20, 4) default 0.0000 null comment '!#dont show',
RFID varchar(60) null comment 'rfid',
ReturnCheck tinyint(1) default 0 null comment '!#dont show',
AccountStatus tinyint(1) default 0 null comment 'accountstatus',
FaxNumber varchar(20) null,
LiquidationDate date default '0000-00-00' null comment '!#dont show',
EldestDue date default '0000-00-00' null comment '!#dont show',
MaturityDate date default '0000-00-00' null comment '!#dont show',
PriceKind int null,
isDefault tinyint(1) default 0 not null comment '!#dont show',
TimeStamp timestamp default current_timestamp() not null on update current_timestamp(),
isDeleted tinyint(1) default 0 not null,
Address varchar(255) null,
PhoneNumber varchar(60) null,
MobileNumber varchar(60) null,
CustomerErpCode varchar(60) null comment '!#dont show',
StoreType int null,
country varchar(255) null,
state varchar(255) null,
City varchar(30) null,
Region varchar(30) null,
idUserCreator int null,
idBranche int null,
idTagsinfo int null,
shop_id int null,
shop_id_address int null,
lastActivityDate datetime null,
lastActivityType tinyint(1) null,
duplicateOf int null,
isConfirmed tinyint(1) default 2 not null comment '0:rejected - 1:confirmed - 2:notChecked',
Status tinyint(1) default 1 not null,
createDate datetime null,
idProcess int null comment 'نیازی نیست به اینکه حتما پروسه داشته باشد',
idUserConfirmer int null comment 'this is refered to agents table',
nextDate datetime null,
prevDate datetime null,
idImage int null,
idColor int null,
idRate int null,
LastImageDate datetime null,
LastOrderAgentName varchar(255) null,
LastVisitAgentName varchar(255) null,
LastNoOrderAgentName varchar(255) null,
LastImageAgentName varchar(255) null,
LastOrderIdAgent int null,
LastVisitIdAgent int null,
LastNoOrderIdAgent int null,
LastImageIdAgent int null,
isSaleActive tinyint(1) default 1 null,
isReturnActive tinyint(1) default 1 null,
alley varchar(256) null,
street varchar(256) null,
plaque varchar(256) null,
secondAddress varchar(255) null,
description varchar(255) null,
appType varchar(50) default 'iorder' not null,
idPipeline varchar(255) default '0' null,
constraint shop_id
unique (shop_id),
constraint shop_id_address
unique (shop_id_address),
constraint ux_customererp
unique (CustomerErpCode),
constraint customers_ibfk_1
foreign key (idBranche) references branches (idBranche)
on update set null on delete set null,
constraint customers_ibfk_2
foreign key (idTagsinfo) references tagsinfo (idTag)
on update set null on delete set null,
constraint customers_ibfk_3
foreign key (idRate) references rates (idRate)
on update set null on delete set null,
constraint customers_ibfk_4
foreign key (idColor) references colors (idColor)
on update set null on delete set null,
constraint customers_ibfk_5
foreign key (idRate) references rates (idRate)
on update set null on delete set null,
constraint customers_ibfk_6
foreign key (idColor) references colors (idColor)
on update set null on delete set null,
constraint fk_customer_agents
foreign key (idUser) references agents (idAgents)
on update set null on delete set null,
constraint fk_customer_paymant
foreign key (idPayment) references payment (idPayment),
constraint fk_customer_pricelist
foreign key (PriceKind) references pricelist (idPriceList),
constraint fk_customer_storeinfo
foreign key (StoreType) references storesinfo (idStore)
)
charset = utf8;
create index fk_customer_agents_idx
on customers (idUser);
create index fk_customer_paymant_idx
on customers (idPayment);
create index fk_customer_pricelist_idx
on customers (PriceKind);
create index fk_customer_storeinfo_idx
on customers (StoreType);
create index idBranche
on customers (idBranche);
create index idColor
on customers (idColor);
create index idProcess
on customers (idProcess);
create index idRate
on customers (idRate);
create index idTagsinfo
on customers (idTagsinfo);
create index idx_isdeleted_customername
on customers (isDeleted, CustomerName);
create index isdeleted_lat_lng
on customers (isDeleted, LatitudePoint, LongitudePoint);
create index isdeleted_status_isconfirmed
on customers (isDeleted, Status, isConfirmed);
create index lat_lng
on customers (LatitudePoint, LongitudePoint);
CalendarItems DDL:
create table calendaritems
(
id int auto_increment
primary key,
TimeStamp timestamp default current_timestamp() not null on update current_timestamp(),
isDone tinyint(1) null,
isDeleted tinyint(1) default 0 not null,
subject varchar(255) null,
startDate datetime not null,
endDate datetime not null,
isAllDayEvent tinyint(1) default 1 null,
message varchar(255) null,
color varchar(200) null,
rMessage varchar(255) null,
rTime datetime null,
rLocationLat decimal(18, 12) null,
rLocationLong decimal(18, 12) null,
idAgent int not null,
idCustomer int null,
idVisitPath int null,
isFinal tinyint(1) null,
idUserCreator int not null,
idAgentsRegion int null,
type int(5) default 1 not null,
systemFill tinyint(1) default 0 not null,
createDate datetime null,
reqUp tinyint(1) default 0 not null,
dependOn int null,
idPlan int null comment 'to keep track of customer types of a region inside a plan',
idPlanTour int null,
startTime time null,
endTime time null,
constraint calendaritems_ibfk_agents
foreign key (idAgent) references agents (idAgents),
constraint calendaritems_ibfk_agents2
foreign key (idUserCreator) references agents (idAgents),
constraint calendaritems_ibfk_customers
foreign key (idCustomer) references customers (idCustomer)
on delete set null
)
charset = utf8;
create index `Index 10`
on calendaritems (isDeleted, idAgent, startDate, idCustomer);
create index `Index 14`
on calendaritems (isDeleted, idAgent, idAgentsRegion, idPlan, startDate, endDate);
create index `Index 7`
on calendaritems (startDate);
create index `Index 8`
on calendaritems (isDeleted, idAgent, startDate, idVisitPath);
create index `Index 9`
on calendaritems (isDeleted, idAgent, startDate, idAgentsRegion);
create index createDate
on calendaritems (createDate);
create index idAgent
on calendaritems (idAgent);
create index idAgentsRegion
on calendaritems (idAgentsRegion);
create index idCustomer
on calendaritems (idCustomer);
create index idUserCreator
on calendaritems (idUserCreator);
create index idVisitPath
on calendaritems (idVisitPath);
create index reqUp
on calendaritems (reqUp);
create index `systemFill-startDate-idAgent-idPlan`
on calendaritems (systemFill, startDate, idAgent, idPlan);
CustomersRegion DDL:
create table customersregion
(
idCustomer int not null,
idRegion int not null,
idCompany int default 0 null,
isDeleted tinyint(1) default 0 null,
TimeStamp timestamp default current_timestamp() null on update current_timestamp(),
ERPCode varchar(255) default '' null,
createDate datetime null,
primary key (idCustomer, idRegion),
constraint customersregion_ibfk_1
foreign key (idCustomer) references customers (idCustomer)
on update cascade on delete cascade,
constraint customersregion_ibfk_2
foreign key (idRegion) references region (idRegion)
on update cascade on delete cascade
)
charset = utf8;
create index idRegion
on customersregion (idRegion);
create index isdeleted_idregion_idcustomer
on customersregion (isDeleted, idRegion, idCustomer);

The EXPLAIN plan shows that the first step taken is to scan the calendaritems table ("rbase"), scanning a total of estimated 1.6 million rows.
There is an index being used, but it's not really fitting as it has too many extra columns not really used. A better index would be one consisting of (isDeleted, startDate, endDate, idAgentsRegion), in that order, the first three columns of that would be perfect for the first three OR parts of the WHERE condition, but unfortunately not for the last one.
The idAgentsRegion column is not needed for the WHERE or JOIN conditions at all, by adding it you make the index a "covering" one though, so that all data needed can be retrieved from the index alone, without extra lookup steps needed for actual table rows.
What I would do in this case would be to have two indexes, one on (isDeleted, startDate, endDate, idAgentsRegion) and one on just (isDeleted, startDate, endDate, idAgentsRegion), and then split the query into two separate ones combined by UNION:
SELECT DISTINCT `cr`.`idCustomer`, `rbase`.`id`
FROM `customers` `t`
JOIN `customersregion` `cr` ON t.idCustomer = cr.idCustomer and cr.isDeleted = 0
JOIN `calendaritems` `rbase` ON rbase.idAgentsRegion = cr.idRegion and rbase.isDeleted = 0
where (
(rbase.startDate <= '2020-07-06 00:00:00' and rbase.endDate >= '2020-07-06 00:00:00') or
(rbase.startDate <= '2020-07-28 00:00:00' and rbase.endDate >= '2020-07-28 00:00:00') or
(rbase.startDate >= '2020-07-06 00:00:00' and rbase.startDate <= '2020-07-28 23:59:59')
)
UNION
SELECT DISTINCT `cr`.`idCustomer`, `rbase`.`id`
FROM `customers` `t`
JOIN `customersregion` `cr` ON t.idCustomer = cr.idCustomer and cr.isDeleted = 0
JOIN `calendaritems` `rbase` ON rbase.idAgentsRegion = cr.idRegion and rbase.isDeleted = 0
where (rbase.endDate >= '2020-07-06 00:00:00' and rbase.endDate <= '2020-07-28 23:59:59')
For the first part the first index is perfect, for the 2nd part the 2nd index is perfect, leading to much smaller index range scans, and in the end the results just need to be combined and duplicates removed.

First of all, there is a foreign key relation from customersreqion to customers, so you don't need the customers table in your query. You don't select anything from it and the foreign key relation already ensures that you won't select any customerid's that are not in the customers table. This doesn't reduce your 100 seconds significantly, but every bit helps.
To get full gain of indexes, you will need two extra indexes:
CREATE INDEX firstindextoadd ON calendaritems(idAgentsRegion, isDeleted, startDate, endDate);
CREATE INDEX secondindextoadd ON calendaritems(idAgentsRegion, isDeleted, endDate);
The first index will be used for your first 3 conditions:
(rbase.startDate <= '2020-07-06 00:00:00' and rbase.endDate >= '2020-07-06 00:00:00') or
(rbase.startDate <= '2020-07-28 00:00:00' and rbase.endDate >= '2020-07-28 00:00:00') or
(rbase.startDate >= '2020-07-06 00:00:00' and rbase.startDate <= '2020-07-28 23:59:59')
The second will be your for the forth condition:
(rbase.endDate >= '2020-07-06 00:00:00' and rbase.endDate <= '2020-07-28 23:59:59')
It depends on the number of deleted records if you should include the isDeleted, but I added them 'just in case'.
It didn't test it on a huge dataset, so you need to tell me if this worked for you.
In addtion you can simplify your conditions to:
SELECT DISTINCT `cr`.`idCustomer`, `rbase`.`id`
FROM `customersregion` `cr` ON t.idCustomer = cr.idCustomer and cr.isDeleted = 0
JOIN `calendaritems` `rbase` ON rbase.idAgentsRegion = cr.idRegion and rbase.isDeleted = 0
where
rbase.startDate <= '2020-07-06 00:00:00' and rbase.endDate >= '2020-07-28 00:00:00' OR
rbase.startDate BETWEEN '2020-07-06 00:00:00' and '2020-07-28 00:00:00' OR
rbase.endDate BETWEEN '2020-07-06 00:00:00' and '2020-07-28 00:00:00'

rbase: INDEX(isDeleted, startDate, endDate, idAgentsRegion, id)
rbase: INDEX(isDeleted, endDate, startDate, idAgentsRegion, id)
Those have these qualities:
First two columns are useful in ON and WHERE.
Optimizer will pick between them based on whether startDate or endDate is more selective.
Covering
That assumes that the Optimizer will start with rbase. If, instead, it starts with cr, then have both of these for the Optimizer to choose between:
rbase: INDEX(idAgentsRegion, isDeleted, startDate, endDate, id)
rbase: INDEX(idAgentsRegion, isDeleted, endDate, startDate, id)
cr is the only other table that the Optimizer might start with. (There is a WHERE clause to filter by.)
cr: INDEX(isDeleted, idRegion, -- first, (in either order)
idCustomer) -- last
Assuming that start <= end, the range test can probably be simplified to only this:
WHERE rbase.startDate < '2020-07-28'
AND rbase.endDate >= '2020-07-06'
(I don't recognize the funnybusiness with '2020-07-28' versus '2020-07-28 23:59:59'.)
I recommend using "< midnight" and ">= midnight" consistently. A plain date is equivalent to midnight for that morning. Another way to specify '2020-07-28' is '2020-07-06' + INTERVAL 22 DAY. The latter is convenient when you know the span (22 days) and don't want to fuss with leap days, etc.
It is "proper" for the ON to specify how the tables are 'related', and the WHERE to be used for filtering. That is, the isDeleted tests belong in the WHERE clause. (The execution is unaffected for JOIN, but important for LEFT JOIN.)
The Last...Id... and Last...Name columns seem to be redundant? Somewhere else there is a mapping from id to name?
Rates and Colors -- Those seem like things that are not worth normalizing? If you ever need to search on either, undoing this normalization will help performance, possibly a lot.
This combo seems 'wrong'; is there a reason for it:
startDate DATETIME
startTime TIME
When you have both of these,
INDEX(a) -- drop
INDEX(a,b) -- keep (it takes care of the other case)
LatitudePoint decimal(18, 12) takes 9 bytes; gross overkill. Suggested alternatives: http://mysql.rjweb.org/doc.php/latlng#representation_choices

Unless you can assume startDate is no more than e.g. 30 days before endDate, there is no practical way to index for this query to avoid having to check all rows. You can try a composite index on (startDate,endDate) and that may help some.
You can try doing a union with some of your where conditions using a start date index and some using an end date index, but if you really are expecting half a million of your 3.8 million rows to get selected, it may not help at all.

Related

MySQL 8.0.26 Slow Query When Counting Results on Three Tables with Indexes

I have a statistics page on my internal admin site to show some traffic information on individual sites. However, the query is taking nearly 80 seconds to run, even with Indexes placed on the keys for each of the tables.
I'm typically running this query searching for session status within 7 days of the date ran.
SELECT
*,
(
SELECT
COUNT(`session_id`)
FROM
`my-db`.`sessions`
WHERE
`my-db`.`sessions`.`site_id` = `my-db`.`sites`.`site_id`
AND `session_datetime` > '2021-10-17 00:00:00'
) as session_count,
(
SELECT
`session_datetime`
FROM
`my-db`.`sessions`
WHERE
`my-db`.`sessions`.`site_id` = `my-db`.`sites`.`site_id`
AND `session_datetime` > '2021-10-17 00:00:00'
ORDER BY
`session_id` ASC
LIMIT
1
) as first_session,
(
SELECT
`session_datetime`
FROM
`my-db`.`sessions`
WHERE
`my-db`.`sessions`.`site_id` = `my-db`.`sites`.`site_id`
AND `session_datetime` > '2021-10-17 00:00:00'
ORDER BY
`session_id` DESC
LIMIT
1
) as last_session,
(
SELECT
COUNT(`site_profiles_id`)
FROM
`my-db`.`sites_profiles`
WHERE
`my-db`.`sites_profiles`.`site_id` = `my-db`.`sites`.`site_id`
AND `origin` = 1
AND `date_added` > '2021-10-17 00:00:00'
) as profiles_originated,
(
SELECT
COUNT(`site_profiles_id`)
FROM
`my-db`.`sites_profiles`
WHERE
`my-db`.`sites_profiles`.`site_id` = `my-db`.`sites`.`site_id`
AND `scanned` = 1
AND `date_added` > '2021-10-17 00:00:00'
) as profiles_scanned,
(
SELECT
COUNT(`site_profiles_id`)
FROM
`my-db`.`sites_profiles`
WHERE
`my-db`.`sites_profiles`.`site_id` = `my-db`.`sites`.`site_id`
AND `date_added` > '2021-10-17 00:00:00'
) as profiles_collected
FROM
`my-db`.`sites`
WHERE
`site_id` in (
SELECT
DISTINCT(`site_id`)
FROM
`my-db`.`sessions`
WHERE
`session_datetime` > '2021-10-17 00:00:00'
)
ORDER BY
`session_count` DESC
LIMIT
25;
I'm trying to understand the results of EXPLAIN, but I believe the issue is because of the RANGE type of the index used on the datetime.
It's worth noting, I'm dynamically changing the ORDER BY clause depending on a sort dropdown selected by the admin user to sort the results by - site_id ASC/DESC, session_count ASC/DESC and profiles_collected ASC/DESC.
The performance of the profiles_collected DESC is significantly impacted when compared to the others.
network_sites
CREATE TABLE `sites` (
`site_id` bigint NOT NULL AUTO_INCREMENT,
`account_id` bigint NOT NULL,
`site_hash` varchar(128) CHARACTER SET utf8 NOT NULL,
`site_address` varchar(255) CHARACTER SET utf8 NOT NULL,
`site_status` int NOT NULL,
`site_created` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
`site_updated` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`site_id`),
UNIQUE KEY `site_id_UNIQUE` (`site_id`),
UNIQUE KEY `site_hash_UNIQUE` (`site_hash`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
network_profiles_sessions
CREATE TABLE `sessions` (
`session_id` bigint NOT NULL AUTO_INCREMENT,
`site_id` bigint NOT NULL,
`profile_id` bigint DEFAULT NULL,
`session_hash` varchar(128) CHARACTER SET utf8 NOT NULL,
`session_ip_address` varchar(45) CHARACTER SET utf8 DEFAULT NULL,
`session_useragent` text CHARACTER SET utf8,
`session_page_uri` text CHARACTER SET utf8,
`session_datetime` datetime DEFAULT CURRENT_TIMESTAMP,
`session_has_data` tinyint DEFAULT '0',
`session_processed` tinyint DEFAULT '0',
`session_queued` tinyint DEFAULT '0',
PRIMARY KEY (`session_id`),
UNIQUE KEY `session_id_UNIQUE` (`session_id`),
KEY `session_has_data` (`session_has_data`,`session_id`),
KEY `session_processed` (`session_processed`,`session_id`),
KEY `session_queued` (`session_queued`,`session_id`),
KEY `session_datetime` (`session_datetime`,`session_id`),
KEY `session_hash` (`session_hash`,`session_id`),
KEY `site_id` (`site_id`,`session_id`),
FULLTEXT KEY `session_page_uri` (`session_page_uri`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
network_sites_profiles
CREATE TABLE `sites_profiles` (
`site_profiles_id` bigint NOT NULL AUTO_INCREMENT,
`site_id` bigint NOT NULL,
`profile_id` bigint NOT NULL,
`origin` int DEFAULT NULL,
`scanned` int DEFAULT NULL,
`date_added` datetime DEFAULT CURRENT_TIMESTAMP,
`date_lastseen` datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`site_profiles_id`),
UNIQUE KEY `site_users_id_UNIQUE` (`site_profiles_id`),
KEY `site_id` (`site_id`,`site_profiles_id`),
KEY `date_added` (`date_added` DESC,`site_profiles_id`),
KEY `origin` (`origin`,`site_profiles_id`),
KEY `scanned` (`scanned`,`site_profiles_id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
PRIMARY KEY(a)
UNIQUE KEY(a) -- redundant, DROP it
A PK is a UNIQUE key is an INDEX.
The last 3 subqueries can be combined:
SELECT SUM(origin = 1) AS profiles_originated,
SUM(scanned = 1) AS profiles_scanned,
COUNT(*) AS profiles_collected
FROM profiles
WHERE date_added >= '2021-10-17'
And then JOIN to that. However, there are some potential problems...
How do session.datetime and date_added compare? I'm assuming that a session is added before it happens?
I assume you want to include midnight of the morning of Oct 17?
The first 3 subqueries can perhaps be similarly simplified. Note that MAX(session_datetime) is sufficient for last_session.

Mysql Queries in big data table

I have problem with my mysql database table. I have more then 20 millions rows in table. Table structure shown below. Main problem is that queries take really long time to execute (some queries take more then 20 second). I use indexes where i can, however many queries use date range and with date range my indexes don't work. Also in queries i use almost every column. What i need to change to my data table, to improve efficiency?
`history` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`barcode` varchar(100) DEFAULT NULL,
`bag` varchar(100) DEFAULT NULL,
`action` int(10) unsigned DEFAULT NULL,
`place` int(10) unsigned DEFAULT NULL,
`price` decimal(10,2) DEFAULT NULL,
`old_price` decimal(10,2) DEFAULT NULL,
`user` int(11) DEFAULT NULL,
`amount` int(10) DEFAULT NULL,
`rotation` int(10) unsigned DEFAULT NULL,
`discount` decimal(10,2) DEFAULT NULL,
`discount_type` tinyint(2) unsigned DEFAULT NULL,
`original` int(10) unsigned DEFAULT NULL,
`was_in_shop` int(10) unsigned DEFAULT NULL,
`cate` int(10) unsigned DEFAULT NULL COMMENT 'grupe',
`sub_cate` int(10) unsigned DEFAULT NULL,
`comment` varchar(255) DEFAULT NULL,
`helper` varchar(255) DEFAULT NULL,
`ywd` varchar(255) DEFAULT NULL,
`created_at` timestamp NULL DEFAULT NULL,
`updated_at` timestamp NULL DEFAULT NULL ON UPDATE CURRENT_TIMESTAMP,
`deleted_at` timestamp NULL DEFAULT NULL
)
PRIMARY KEY (`id`),
KEY `barcode` (`barcode`) USING BTREE,
KEY `action` (`action`) USING BTREE,
KEY `original` (`original`) USING BTREE,
KEY `created_at` (`created_at`) USING BTREE,
KEY `bag` (`bag`) USING BTREE
ENGINE=InnoDB
Some of my queries:
select SUM(amount) as amount,
SUM(comment) as price,
cate
from `history`
where ( `action` = '4'
and `place` = '28'
and `created_at` >= '2018-04-01 00:00:00'
and `created_at` <= '2018-04-30 23:59:59'
)
and `history`.`deleted_at` is null
group by `cate`;
select cate,
SUM(amount) AS kiekis,
SUM(IF(discount>0,(price*amount)-discount,(price*amount))) AS suma,
SUM(IF(discount>0,IF(discount_type=1,(discount*price)/100,discount),0)) AS nuolaida
from `history`
where ( `history`.`action` = '4'
and `history`.`created_at` >= '2018-01-01 00:00:00'
and `history`.`created_at` <= '2018-01-23 23:59:59'
)
and LENGTH(barcode) > 7
and `history`.`deleted_at` is null
group by `cate`;
Your first query is better written as:
select SUM(h.amount) as amount,
SUM(h.comment) as price,
h.cate
from history h
where h.action = 4 and
h.place = 28 and
h.created_at >= '2018-04-01' and
h.created_at < '2018-05-01' and
h.deleted_at is null
group by h.cate;
Why?
place and action are numbers. The comparison should be to a number. Mixing types can prevent the use of indexes.
The time component is not useful for the date comparison.
Qualifying all columns names is just a good idea.
Then, for this query, a reasonable index is history(action, place, created_at, deleted_at).
So, I would start with multi-column indexes.
If you continue to have performance issues, you should then consider partitioning the data based on the created_at date.
INDEX(a), INDEX(b) serves some purposes, but the "composite" INDEX(a,b) better serves some queries.
where ( `action` = '4'
and `place` = '28'
and `created_at` >= '2018-04-01 00:00:00'
and `created_at` <= '2018-04-30 23:59:59'
)
and `history`.`deleted_at` is null
Needs
INDEX(action, place, -- first, but in either order
deleted_at,
created_at) -- last
I prefer to write the date range thus:
and `history`.`created_at` >= '2018-04-01'
and `history`.`created_at` < '2018-04-01' + INTERVAL 1 MONTH
It's a lot easier than dealing with leap year, end of year, etc. And it works 'correctly' for DATE, DATETIME, DATETIME(6), TIMESTAMP, and TIMESTAMP(6).
For this
where ( `history`.`action` = '4'
and `history`.`created_at` >= '2018-01-01 00:00:00'
and `history`.`created_at` <= '2018-01-23 23:59:59'
)
and LENGTH(barcode) > 7
and `history`.`deleted_at` is null
I would try this as the most likely:
INDEX(action, deleted_at, created_at) -- in this order
Do not have separate tables for separate years. If you will be deleting old data, then consider PARTITION BY RANGE(TO_DAYS(...)) in order to get the speed of DROP PARTITION. (But that is another discussion.)
If I was in your situation I would consider a paged database name. By this I mean have multiple history_X tables where X is an int related to the content.
Since this is a history table is it possible to include part of the date in the name?
You said that you use ranges to search for the data, so if you were to use year in the table name you could have
history_2014
history_2015
history_2016
history_2017
history_2018
etc.
Then you could search with the table that applies to your date range.
If you need date from a range that spans to tables then you could use a UNION query to bridge the 2 result sets into one.

False Positives outside date range in this mySQL JOIN

I am getting historical count data together in an automated report. The two main tables schemas are below. The third table referenced is person which has it's ids as foreign keys in email_list_subscription. That table's primary key consists of the two foreign keys email_list and person.
SQLFIDDLE HERE
The query below is coming up with a count which is outside the date ranges allowed in the query and I can't figure out why. It has rows for an email list that definitely has now rows in 2014 at all.
CREATE TABLE `email_list` (
`id` smallint(5) unsigned NOT NULL AUTO_INCREMENT,
`handle` varchar(50) NOT NULL DEFAULT '',
`title` varchar(255) DEFAULT NULL,
`operator` varchar(255) DEFAULT NULL,
`operator_contact_name` varchar(255) DEFAULT NULL,
`operator_contact_email` varchar(150) DEFAULT NULL,
`operator_contact_phone` varchar(20) DEFAULT NULL,
`operator_listid` int(10) unsigned DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `handle` (`handle`),
KEY `handle_2` (`handle`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
CREATE TABLE `email_list_subscription` (
`email_list` smallint(5) unsigned NOT NULL DEFAULT '0',
`person` int(10) unsigned NOT NULL DEFAULT '0',
`as_email_address` varchar(150) DEFAULT NULL,
`datetime_synced_to_operator` datetime DEFAULT NULL,
`opted_in` datetime DEFAULT NULL,
`opted_out` datetime NOT NULL,
`undeliverable` datetime NOT NULL,
PRIMARY KEY (`email_list`,`person`),
KEY `email_list` (`email_list`),
KEY `person` (`person`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
Here is a query dumped from the script and it's results checked directly in mySQL monitor:
SELECT
el.id, el.handle,
els.`email_list` ,
COUNT( els.person ) AS c
FROM
`email_list` el,
`email_list_subscription` els
WHERE
el.id = els.email_list
AND (
DATE( els.`datetime_synced_to_operator` ) >= '2014-04-01'
OR
DATE( els.`opted_in` ) >= '2014-04-01'
)
AND (
DATE( els.`datetime_synced_to_operator` ) <= '2014-05-18'
OR
DATE( els.`opted_in` ) <= '2014-05-18'
)
GROUP BY els.`email_list`
How is this capturing els rows whose dates are not in the range?
Those DATE() calls are going to kill your performance, much better to do
els.`datetime_synced_to_operator` >= '2014-04-01 00:00:00'
(for example).
Also, it is not clear your date ranges are going to work as intended; this seems more clear (but may have different results depending on data):
WHERE el.id = els.email_list
AND (
( els.`datetime_synced_to_operator` BETWEEN '2014-04-01 00:00:00' AND '2014-05-18 23:59:59')
OR
( els.`opted_in` BETWEEN '2014-04-01 00:00:00' AND '2014-05-18 23:59:59')
)
;
Also: What was wrong with the original where (below)?
AND (
DATE( els.`datetime_synced_to_operator` ) >= '2014-04-01'
OR
DATE( els.`opted_in` ) >= '2014-04-01'
)
AND (
DATE( els.`datetime_synced_to_operator` ) <= '2014-05-18'
OR
DATE( els.`opted_in` ) <= '2014-05-18'
)
Best illustrated with an example... any row with datetime_synced_to_operator any time after the start date (even after the end date) and an opted_in any time before the end date (even before the start date) gives true for this clause; and vice versa.

Between a MIN and MAX date in joined table

I have a table called Booking and a table called FacilityBooking. A booking is a composition of facility bookings, a one to many relation. The date and time of the booking is determined by the lowest start date, and the highest end date of the facility bookings that belongs to it.
I want to pull some statistics of how many private and how many business bookings there has been between two dates.
CREATE TABLE `Booking` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`comments` varchar(255) DEFAULT NULL,
`createdBy` varchar(255) DEFAULT NULL,
`customerName` varchar(255) DEFAULT NULL,
`email` varchar(255) DEFAULT NULL,
`isPaid` bit(1) DEFAULT NULL,
`isPrivateClient` bit(1) DEFAULT NULL,
`needsPermission` bit(1) DEFAULT NULL,
`phoneNumber` varchar(255) DEFAULT NULL,
`referenceNumber` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
# Dump of table FacilityBooking
# ------------------------------------------------------------
CREATE TABLE `FacilityBooking` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`comments` varchar(2000) DEFAULT NULL,
`from` datetime DEFAULT NULL,
`to` datetime DEFAULT NULL,
`bookablePlace_id` int(11) DEFAULT NULL,
`booking_id` int(11) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `FK_2tv9w7g5vyx9po8vs6ceogldb` (`bookablePlace_id`),
KEY `FK_n17h188ecbdos5lsva51b8j29` (`booking_id`),
CONSTRAINT `FK_n17h188ecbdos5lsva51b8j29` FOREIGN KEY (`booking_id`) REFERENCES `Booking` (`id`),
CONSTRAINT `FK_2tv9w7g5vyx9po8vs6ceogldb` FOREIGN KEY (`bookablePlace_id`) REFERENCES `BookablePlace` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
I have created an sqlfiddle: http://sqlfiddle.com/#!9/7ae95/2
And this is what i have so far:
SELECT
CASE isPrivateClient
WHEN 0 THEN "business"
WHEN 1 THEN "private"
END AS clientType,
count(isPrivateClient) as count
FROM
Booking
GROUP BY
isPrivateClient
So what i need from here is to join the facilitybookings and search between the lowest from date and the highest to date.
Hope someone can help me :)
Join the FacilityBooking table and filter using WHERE:
SELECT
CASE isPrivateClient
WHEN 0 THEN "business"
WHEN 1 THEN "private"
END AS clientType,
count(FacilityBooking.id) as count
FROM
Booking INNER JOIN FacilityBooking ON
Booking.id = FacilityBooking.booking_id
-- Between 2015-05-01 AND 2015-06-01 INCLUSSIVE'
WHERE FacilityBooking.from <= '2015-06-01' AND FacilityBooking.to >= '2015-05-01'
GROUP BY
isPrivateClient
fixed fiddle
If you want to only include "full" Booking in which all FacilityBooking are between 2 date, something like this should do the trick :
SELECT clientType, count(bookId)
FROM (
SELECT
b.id as bookId,
CASE b.isPrivateClient
WHEN 0 THEN "business"
WHEN 1 THEN "private"
END AS clientType,
Min(fb.from) as minFrom,
Max(fb.to) as maxTo
FROM
Booking b
INNER JOIN FacilityBooking fb ON b.id = fb.booking_id
GROUP BY bookId
) tbl
WHERE minFrom >= '2015-05-22' -- Min Date
AND maxTo <= '2015-05-24' -- Max Date
GROUP BY
clientType

Mysql foreign key check taking too much time

SELECT
*
FROM products
WHERE products.deleted_at is null
AND products.section_id = '1'
AND products.status = '2'
ORDER BY products.created_at DESC
LIMIT 10 OFFSET 0
Time: 3 seconds
SELECT
*
FROM products
WHERE products.deleted_at is null
AND products.status = '2'
ORDER BY products.created_at DESC
LIMIT 10 OFFSET 0
Time:0.2 seconds
Why this foreign key check (where section_id='1') is taking too much of time?
section_id refers to id of sections table.
The table structure is:
CREATE TABLE IF NOT EXISTS `bishal`.`products` (
......
`section_id` INT(10) UNSIGNED NOT NULL,
......
`status` TINYINT(4) NOT NULL,
......
`deleted_at` TIMESTAMP NULL DEFAULT NULL,
`created_at` TIMESTAMP NOT NULL DEFAULT '0000-00-00 00:00:00',
......
INDEX `products_section_id_index` (`section_id` ASC),
......
CONSTRAINT `products_section_id_foreign`
FOREIGN KEY (`section_id`)
REFERENCES `bishal`.`sections` (`id`),
......
ENGINE = InnoDB
AUTO_INCREMENT = 225532
DEFAULT CHARACTER SET = utf8
COLLATE = utf8_unicode_ci;