Query WHERE NOT IN (SELECT) INNER JOIN - mysql

SELECT
SUM(m_out) AS totalOut
FROM
m_detal
WHERE
opers = '25'
AND (m_type = 'Out'
OR m_type = 'Merged')
AND m_date <= '2018-11-28 07:30:00'
AND mark_delete IS NULL
AND m_ids NOT IN (SELECT
m.m_ids
FROM
(SELECT
m_ids
FROM
m_detal
WHERE
opers = '25'
AND (m_type = 'Out'
OR m_type = 'Merged')
AND (m_onhold != 'onhold'
OR m_onhold IS NULL)
AND mark_delete IS NULL
AND m_date <= '2018-11-28 07:30:00') AS m
INNER JOIN
n_combine_tbl AS t ON (t.comb_id1 = m.m_ids
OR t.comb_id2 = m.m_ids)
AND t.time <= '2018-11-28 07:30:00');
This query took me more than 30sec or more! The query inside the NOT IN is little bit huge around 7-9k of ids. Is there a more efficient way of doing this? I think the inner join part is make it slow where the checking the two column of n_combine_tables which is (comb_id1 or comb_id2).
Is there a more efficient way of doing this?

I'd remove duplicated conditions in sub-queries and do something like this (two NOT in could be replaced with UNION):
SELECT
SUM(m.m_out) AS totalOut
FROM
m_detal AS m
WHERE
m.opers = '25'
AND m.m_type IN ('Out', 'Merged')
AND m.m_date <= '2018-11-28 07:30:00'
AND m.mark_delete IS NULL
AND m.m_onhold = 'onhold'
AND m.m_ids NOT IN (
SELECT
t1.comb_id1
FROM
n_combine_tbl AS t1
WHERE
t1.time <= '2018-11-28 07:30:00'
)
AND m.m_ids NOT IN (
SELECT
t2.comb_id2
FROM
n_combine_tbl AS t2
WHERE
t2.time <= '2018-11-28 07:30:00'
)
;
OR with NOT EXISTS:
SELECT
SUM(m.m_out) AS totalOut
FROM
m_detal AS m
WHERE
m.opers = '25'
AND m.m_type IN ('Out', 'Merged')
AND m.m_date <= '2018-11-28 07:30:00'
AND m.mark_delete IS NULL
AND m.m_onhold = 'onhold'
AND NOT EXISTS (
SELECT
*
FROM
n_combine_tbl AS t
WHERE
t.time <= '2018-11-28 07:30:00'
AND (
t.comb_id1 = m.m_ids
OR t.comb_id2 = m.m_ids
)
)
;
You should check the 'onhold' logic.
You should try to add indexes on n_combine_tbl: (time, comb_id1), (time, comb_id2), (comb_id1, time), (comb_id2, time) and check what is good for your data.
Multi-column indexes on m_detal table should also be considered.

Related

Query optimization needed because of too many sub queries and sub query dependency in where conditions

I'm writing code for the production report.
I had written this query
SELECT
P.*,
(
SELECT
COUNT(id) AS cnt
FROM
bales
WHERE
create_date < '2019-11-01' AND product_id = P.id AND(TYPE = 'bale' OR TYPE = 'bag')
) AS before_prod,
(
SELECT
COUNT(id) AS cnt
FROM
bales
WHERE
(
dispatched = '0' OR disp_bunch = '0'
) AND dispatch_date < '2019-11-01' AND product_id = P.id AND(TYPE = 'bale' OR TYPE = 'bag')
) AS before_dispatched,
(
SELECT
COUNT(id) AS cnt
FROM
bales
WHERE
create_date BETWEEN '2019-11-01' AND '2019-11-06' AND product_id = P.id AND(TYPE = 'bale' OR TYPE = 'bag')
) AS production,
(
SELECT
COUNT(id) AS cnt
FROM
bales
WHERE
(
dispatched = '0' OR disp_bunch = '0'
) AND dispatch_date BETWEEN '2019-11-01' AND '2019-11-06' AND product_id = P.id AND(TYPE = 'bale' OR TYPE = 'bag')
) AS production_dispatched,
C.name AS category_name
FROM
products P
INNER JOIN category C ON
C.id = P.category
This query is working but as I have too many records in all tables it takes too much time.
also, I need only records where before_prod, before_dispatched, production, production_dispatched all these subquery results should be greater than 0.
I tried to use having clause but it also takes too much time.
I have also tried php for loop, * LOGIC: first all products than in for loop its production. but it was much slower.*
How can I optimize my query?
You can use join instead and select case to sum your data that matches your conditions.
select p.*, t.*
from products p
inner join (
select t2.id, sum(case when create_date < '2019-11-01' then 1 else 0 end) as before_prod
, sum(case when (dispatched = '0' or disp_bunch = '0') and create_date < '2019-11-01' then 1 else 0 end) as before_dispatched
, sum(case when create_date between '2019-11-01' and '2019-11-06' then 1 else 0 end) as production
, sum(case when (dispatched = '0' or disp_bunch = '0') and create_date between '2019-11-01' and '2019-11-06' then 1 else 0 end) as production_dispatched
from bales t1
inner join product t2 on t2.id= t1.product_id
inner join category t3 on t3.id = t2.category
where t1.TYPE in ('bale', 'bag')
group by t2.id) t
on t.id = p.id

MySQL: Grouped by hour, need to show all hours, null where no data

Here's the query:
SELECT h.idhour, h.`hour`, outnumber, count(*) as `count`, sum(talktime) as `duration`
FROM (
SELECT
`cdrs`.`dcustomer` AS `dcustomer`,
(CASE
WHEN (`cdrs`.`cnumber` like "02%") THEN '02'
WHEN (`cdrs`.`cnumber` like "05%") THEN '05'
END) AS `outnumber`,
FROM_UNIXTIME(`cdrs`.`start`) AS `start`,
(`cdrs`.`end` - `cdrs`.`start`) AS `duration`,
`cdrs`.`talktime` AS `talktime`
FROM `cdrs`
WHERE `cdrs`.`start` >= #_START and `cdrs`.`start` < #_END
AND `cdrs`.`dtype` = _LATIN1'external'
GROUP BY callid
) cdr
JOIN customers c ON c.id = cdr.dcustomer
LEFT JOIN hub.hours h ON HOUR(cdr.`start`) = h.idhour
WHERE (c.parent = _ID or cdr.dcustomer = _ID or c.parent IN
(SELECT id FROM customers WHERE parent = _ID))
GROUP BY h.idhour, cdr.outnumber
ORDER BY h.idhour;
The above query results skips the hours where there is no data, but I need to show all hours (00:00 to 23:00) with null or 0 values. How can I do this?
SELECT h.idhour
, h.hour
,IFNULL(outnumber,'') AS outnumber
,IFNULL(cdr2.duration,0) AS duration
,IFNULL(output_count,0) AS output_count
FROM hub.hours h
LEFT JOIN (
SELECT HOUR(start) AS start,outnumber, SUM(talktime) as duration ,COUNT(1) AS output_count
FROM
(
SELECT cdrs.dcustomer AS dcustomer
, (CASE WHEN (cdrs.cnumber like "02%") THEN '02' WHEN (cdrs.cnumber like "05%") THEN '05' END) AS outnumber
, FROM_UNIXTIME(cdrs.start) AS start
, (cdrs.end - cdrs.start) AS duration
, cdrs.talktime AS talktime
FROM cdrs cdrs
INNER JOIN customers c ON c.id = cdrs.dcustomer
WHERE cdrs.start >= #_START and cdrs.start < #_END AND cdrs.dtype = _LATIN1'external'
AND
(c.parent = _ID or cdrs.dcustomer = _ID or c.parent IN (SELECT id FROM customers WHERE parent = _ID))
GROUP BY callid
) cdr
GROUP BY HOUR(start),outnumber
) cdr2
ON cdr2.start = h.idhour
ORDER BY h.idhour
You need a table with all hours, nothing else.
Then use LEFT JOIN with the hours table on the "left" and your current query on the "right":
SELECT b.*
FROM hours h
LEFT JOIN ( ... ) b ON b.hr = h.hr
WHERE h.hr BETWEEN ... AND ...
ORDER BY hr;
Any missing hours will be NULLs in b.*.

How to optimize Mysql Query that has 2 Inner Joins with Distinct? ( InnoDB )

I have a query which I use InnoDB storage engine.
I want to optimize it. It takes too much time to execute. I have 5 million data in my database. Now it takes 250 seconds to execute.
INSERT INTO dynamicgroups (adressid)
SELECT SQL_NO_CACHE DISTINCT(addressid) FROM (
SELECT cluster_0.addressid FROM (
SELECT DISTINCT addressid FROM (
SELECT group_all.addressid FROM (
SELECT g.addressid FROM table2.635_emadresmgroups g
INNER JOIN table2.emaildata f_0
ON f_0.addressid = g.addressid
WHERE (f_0.birthday > date(DATE_SUB(NOW(),INTERVAL 18 MONTH))
AND f_0.birthday < CURDATE() )
) group_all
) AS groups
) AS cluster_0
INNER JOIN(
SELECT DISTINCT addressid FROM (
SELECT group_all.addressid FROM (
SELECT g.addressid FROM table2.635_emadresmgroups g
INNER JOIN table2.emaildata f_0
ON f_0.addressid = g.addressid
WHERE (marriage_date = ''
OR marriage_date = '1900-01-01'
OR marriage_date = '0000-00-00' )
) group_all
) AS groups
) AS cluster_1 ON cluster_1.addressid = cluster_0.addressid
INNER JOIN(
SELECT DISTINCT addressid FROM (
SELECT group_all.addressid FROM (
SELECT g.addressid FROM table2.635_emadresmgroups g
INNER JOIN table2.emaildata f_0
ON f_0.addressid = g.addressid
WHERE (f_0.city = '34' )
) group_all
) AS groups
) AS cluster_2 ON cluster_2.addressid = cluster_1.addressid
) AS t
Your queries all seem to be variations of this query:
SELECT g.addressid
FROM table2.635_emadresmgroups g INNER JOIN
table2.emaildata f_0
ON f_0.addressid = g.addressid
WHERE (f_0.birthday > date(DATE_SUB(NOW(),INTERVAL 18 MONTH)) AND f_0.birthday < CURDATE() )
I would suggest approaching this using group by and having:
SELECT g.addressid
FROM table2.635_emadresmgroups g INNER JOIN
table2.emaildata f_0
ON f_0.addressid = g.addressid
GROUP BY g.addressid
HAVING SUM(f_0.birthday > date(DATE_SUB(NOW(), INTERVAL 18 MONTH)) AND f_0.birthday < CURDATE() ) > 0 AND
SUM(marriage_date = '' OR marriage_date = '1900-01-01' OR marriage_date = '0000-00-00' ) > 0 AND
SUM(f_0.city = '34' ) > 0;
Depending on the volume of data, filtering before the group by can also help:
SELECT g.addressid
FROM table2.635_emadresmgroups g INNER JOIN
table2.emaildata f_0
ON f_0.addressid = g.addressid
WHERE (f_0.birthday > date(DATE_SUB(NOW(), INTERVAL 18 MONTH)) AND f_0.birthday < CURDATE() ) OR
(marriage_date = '' OR marriage_date = '1900-01-01' OR marriage_date = '0000-00-00' ) OR
(f_0.city = '34' )
GROUP BY g.addressid
HAVING SUM(f_0.birthday > date(DATE_SUB(NOW(), INTERVAL 18 MONTH)) AND f_0.birthday < CURDATE() ) > 0 AND
SUM(marriage_date = '' OR marriage_date = '1900-01-01' OR marriage_date = '0000-00-00' ) > 0 AND
SUM(f_0.city = '34' ) > 0;
Even though the EXPLAIN operator isn't implemented as well as others.. I'd suggest you use it for your query.
After that you can analyse what the result that EXPLAIN give and decide which columns should be indexed.
For more information I'd suggest viewing these sources:
MySQL syntax: EXPLAIN
MySQL using: EXPLAIN
Furthermore, the last 2 selects appear to be very similar, maybe you can make a temporary table or a view out of these, so that you don't have to run the entire select twice?
marriage_date -- Make it NULLable and use NULL instead of '', etc. That will avoid an inefficient OR and might lead to usability of an INDEX.
Please provide SHOW CREATE TABLE so we can assess the current indexes.
What version are you running? Until very recently this construct was very inefficient:
FROM ( SELECT ... )
JOIN ( SELECT ... )
The workaround was to put the subqueries into tmp tables and add an INDEX.
This may help in your case, since you seem to be using the JOINs for filtering: Turn JOIN ( SELECT ... ) into WHERE EXISTS ( SELECT * ... ).
Please describe, in English, what the query is trying to do.
Another approach, building on Gordon's suggestion of having a common SELECT: Put that common SELECT into a TEMPORARY table; add index(es), then query from it.

How to optimize for speed a sql multiple select with SUM

I have a really long select from my database with many joins. The problem is with counting SUM: without sum, select time is about 3s, but with SUM is about 15s.
Is it possible to optimize my select to obtain a shorter select time?
Here is my code:
SELECT
accomodation.id,
accomodation.aid,
accomodation.title_en,
accomodation.title_url_en,
accomodation.address,
accomodation.zip,
accomodation.stars,
accomodation.picture,
accomodation.valid_from,
accomodation.valid_to,
accomodation.latitude,
accomodation.longitude,
accomodation.city_id AS
accomodation_city_id,
db_cities.id AS city_id,
db_cities.title_en AS city,
db_cities.title_url AS city_url,
db_countries.title_en AS country_title,
db_countries.title_url_en AS country_url,
accomodation_type.class AS accomodation_type_class,
accomodation_review_value_total.value AS review_total,
MIN(accomodation_price.price) AS price_from,
accomodation_rooms.total_persons
FROM
(SELECT aid, MAX(info_date_add) AS max_info_date_add FROM accomodation GROUP BY aid) accomodation_max
INNER JOIN accomodation
ON
accomodation_max.aid = accomodation.aid AND
accomodation_max.max_info_date_add = accomodation.info_date_add
LEFT JOIN db_cities
ON (
db_cities.id = accomodation.city_id OR
(((acos(sin((db_cities.latitude*pi()/180)) * sin((accomodation.latitude*pi()/180)) + cos((db_cities.latitude*pi()/180)) * cos((accomodation.latitude*pi()/180)) * cos(((db_cities.longitude - accomodation.longitude)*pi()/180))))*180/pi())*60*1.1515*1.609344) < '20')
JOIN db_countries
ON db_countries.id = accomodation.country_id
LEFT JOIN accomodation_review_value_total
ON accomodation_review_value_total.accomodation_aid = accomodation.aid
LEFT JOIN accomodation_type_value
ON accomodation_type_value.accomodation_id = accomodation.id
LEFT JOIN accomodation_type
ON accomodation_type.id = accomodation_type_value.accomodation_type_id
JOIN accomodation_season
ON (
accomodation_season.accomodation_aid = accomodation.aid AND
( '2013-11-04' BETWEEN accomodation_season.start_date AND accomodation_season.end_date OR '2013-11-05' BETWEEN accomodation_season.start_date AND accomodation_season.end_date ) )
JOIN accomodation_price
ON
accomodation_price.accomodation_aid = accomodation.aid AND
accomodation_price.accomodation_price_type_id = '1' AND
accomodation_price.accomodation_price_cat_id = '1' AND
accomodation_price.price BETWEEN '20' AND '250' AND
accomodation_price.accomodation_season_id = accomodation_season.id
JOIN accomodation_theme_value
ON accomodation_theme_value.accomodation_id = accomodation.id
INNER JOIN
(SELECT
accomodation_id,
SUM(accomodation_rooms.rooms) AS total_rooms,
SUM(accomodation_rooms.beds * accomodation_rooms.rooms) AS total_persons
FROM accomodation_rooms
GROUP BY accomodation_id) accomodation_rooms
ON
accomodation_rooms.accomodation_id = accomodation.id AND
accomodation_rooms.total_persons >= '4'
WHERE
db_countries.title_url_en LIKE '%spain%' AND
db_cities.title_url LIKE '%barcelona%' AND
accomodation_type_value.accomodation_type_id IN (5,10) AND
total_rooms >= '2' AND
accomodation_theme_value.accomodation_theme_id IN (11,12,13) AND
accomodation.stars IN (3,4,5) AND
( accomodation_review_value_total.value >= '4.5' ) AND
db_cities.id = '2416'
GROUP BY accomodation.aid
ORDER BY
CASE
WHEN accomodation.valid_to>=NOW() AND accomodation.valid_from<=NOW() AND MIN(accomodation_price.price) IS NOT NULL THEN 0
WHEN NOW()>accomodation.valid_to AND accomodation.valid_to>'0000-00-00' AND MIN(accomodation_price.price) IS NOT NULL THEN 1
WHEN accomodation.valid_to>=NOW() AND accomodation.valid_from<=NOW() THEN 2
WHEN NOW()>accomodation.valid_to AND accomodation.valid_to>'0000-00-00' THEN 3
ELSE 4 END,
review_total DESC,
accomodation.title_en
LIMIT 10

MySQL 500 million rows table in select query with join

I'm concerned about the performance of the query below once the tables are fully populated. So far it's under development and performs well with dummy data.
The table "adress_zoo" will contain about 500 million records once fully populated. "adress_zoo" table looks like this:
CREATE TABLE `adress_zoo`
( `adress_id` int(11) NOT NULL, `zoo_id` int(11) NOT NULL,
UNIQUE KEY `pk` (`adress_id`,`zoo_id`),
KEY `adress_id` (`adress_id`) )
ENGINE=InnoDB DEFAULT CHARSET=latin1;
The other tables will contain maximum 500 records each.
The full query looks like this:
SELECT a.* FROM jos_zoo_item AS a
JOIN jos_zoo_search_index AS zsi2 ON zsi2.item_id = a.id
WHERE a.id IN (
SELECT r.id FROM (
SELECT zi.id AS id, Max(zi.priority) as prio
FROM jos_zoo_item AS zi
JOIN jos_zoo_search_index AS zsi ON zsi.item_id = zi.id
LEFT JOIN jos_zoo_tag AS zt ON zt.item_id = zi.id
JOIN jos_zoo_category_item AS zci ON zci.item_id = zi.id
**JOIN adress_zoo AS az ON az.zoo_id = zi.id**
WHERE 1=1
AND ( (zci.category_id != 0 AND ( zt.name != 'prolong' OR zt.name is NULL))
OR (zci.category_id = 0 AND zt.name = 'prolong') )
AND zi.type = 'telefoni'
AND zsi.element_id = '44d3b1fd-40f6-4fd7-9444-7e11643e2cef'
AND zsi.value = 'Small'
AND zci.category_id > 15
**AND az.adress_id = 5**
GROUP BY zci.category_id ) AS r
)
AND a.application_id = 6
AND a.access IN (1,1)
AND a.state = 1
AND (a.publish_up = '0000-00-00 00:00:00' OR a.publish_up <= '2012-06-07 07:51:26')
AND (a.publish_down = '0000-00-00 00:00:00' OR a.publish_down >= '2012-06-07 07:51:26')
AND zsi2.element_id = '1c3cd26e-666d-4f8f-a465-b74fffb4cb14'
GROUP BY a.id
ORDER BY zsi2.value ASC
The query will usually return about 25 records.
Based on your experience, will this query perform acceptable (respond within say 3 seconds)?
What can I do to optimise this?
As adviced by #Jack I ran the query with EXPLAIN and got this:
This part is an important limiter:
az.adress_id = 5
MySQL will limit the table to only those records where adress_id matches before joining it with the rest of the statement, so it will depend on how big you think that result set might be.
Btw, you have a UNIQUE(adress_id, zoo_id) and a separate INDEX. Is there a particular reason? Because the first part of a spanning key can be used by MySQL to select with as well.
What's also important is to use EXPLAIN to understand how MySQL will "attack" your query and return the results. See also: http://dev.mysql.com/doc/refman/5.5/en/execution-plan-information.html
To avoid subquery you can try to rewrite your query as:
SELECT a.* FROM jos_zoo_item AS a
JOIN jos_zoo_search_index AS zsi2 ON zsi2.item_id = a.id
INNER JOIN
(
SELECT ** distinct ** r.id FROM (
SELECT zi.id AS id, Max(zi.priority) as prio
FROM jos_zoo_item AS zi
JOIN jos_zoo_search_index AS zsi ON zsi.item_id = zi.id
LEFT JOIN jos_zoo_tag AS zt ON zt.item_id = zi.id
JOIN jos_zoo_category_item AS zci ON zci.item_id = zi.id
**JOIN adress_zoo AS az ON az.zoo_id = zi.id**
WHERE 1=1
AND ( (zci.category_id != 0 AND ( zt.name != 'prolong' OR zt.name is NULL))
OR (zci.category_id = 0 AND zt.name = 'prolong') )
AND zi.type = 'telefoni'
AND zsi.element_id = '44d3b1fd-40f6-4fd7-9444-7e11643e2cef'
AND zsi.value = 'Small'
AND zci.category_id > 15
**AND az.adress_id = 5**
GROUP BY zci.category_id ) AS r
) T
on a.id = T.id
where
AND a.application_id = 6
AND a.access IN (1,1)
AND a.state = 1
AND (a.publish_up = '0000-00-00 00:00:00' OR a.publish_up <= '2012-06-07 07:51:26')
AND (a.publish_down = '0000-00-00 00:00:00' OR a.publish_down >= '2012-06-07 07:51:26')
AND zsi2.element_id = '1c3cd26e-666d-4f8f-a465-b74fffb4cb14'
GROUP BY a.id
ORDER BY zsi2.value ASC
This approach don't perform subquery for each candidate row. Performance may be increased only if T is calculated in few milliseconds.