Faceted search mysql (EAV). How to increase speed of facet counting? - mysql

I make faceted search using mysql and it was good on test data. Then I upload 7000 of items and 20000 of filter-item relations. Now some queries to count items amount for each filters takes much time.
Each category has own filters. Each filter has own filter values (filter_v_id).
For simplicity, I will show two tables:
CREATE TABLE `item_cat` (
`item_id` int(11) NOT NULL,
`category_id` int(11) NOT NULL,
`publicate` int(1) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
ALTER TABLE `item_cat`
ADD PRIMARY KEY (`item_id`,`category_id`),
ADD KEY `item_id` (`item_id`),
ADD KEY `category_id` (`category_id`);
CREATE TABLE `filter_item` (
`item_id` int(11) NOT NULL,
`filter_id` int(11) NOT NULL,
`filter_v_id` int(11) NOT NULL,
`filter_v` decimal(10,2) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
ALTER TABLE `filter_item`
ADD PRIMARY KEY (`item_id`,`filter_id`,`filter_v_id`),
ADD KEY `item_id` (`item_id`),
ADD KEY `item_id_2` (`item_id`,`filter_id`),
ADD KEY `item_id_3` (`item_id`,`filter_id`,`filter_v_id`),
ADD KEY `filter_id` (`filter_id`,`filter_v_id`),
ADD KEY `filter_id_2` (`filter_id`);
DB Fiddle - fast query
Category_id=70 has 46 items, and any set of filters fast (100-150ms)
SELECT `filter_id`,`filter_v_id`, count(`item_id`) as `cnt` FROM
(SELECT * FROM `item_cat` WHERE `category_id`='70') as `ic`
JOIN `filter_item` as `fi` USING(`item_id`)
WHERE `item_id` IN(
SELECT `ic`.`item_id` FROM
(SELECT * FROM `item_cat` WHERE `category_id`='70') as `ic`
JOIN `filter_item` as `fi` USING(`item_id`)
GROUP BY `ic`.`item_id`
HAVING (
(`fi`.`filter_id` = '1' OR MAX(`fi`.`filter_v_id` = '932')=1 OR MAX(`fi`.`filter_v_id` = '938')=1 OR MAX(`fi`.`filter_v_id` = '1006')=1 )
AND (`fi`.`filter_id` = '17' OR MAX(`fi`.`filter_v_id` = '563')=1 )
AND (`fi`.`filter_id` = '28' OR MAX(`fi`.`filter_v` >= '2' AND `fi`.`filter_v` <= '22500')=1)
AND (`fi`.`filter_id` = '46' OR MAX(`fi`.`filter_v_id` = '580')=1 OR MAX(`fi`.`filter_v_id` = '595')=1 OR MAX(`fi`.`filter_v_id` = '559')=1 )
AND (`fi`.`filter_id` = '70' OR MAX(`fi`.`filter_v_id` = '564')=1 )
AND (`fi`.`filter_id` = '73' OR MAX(`fi`.`filter_v_id` = '562')=1 )
AND (`fi`.`filter_id` = '74' OR MAX(`fi`.`filter_v_id` = '561')=1 )
)
)
GROUP BY `filter_id`,`filter_v_id`
DB Fiddle - slow query
Category_id=52 has 651 items, and one applied filter took 3000ms+
SELECT `filter_id`,`filter_v_id`, count(`item_id`) as `cnt` FROM
(SELECT * FROM `item_cat` WHERE `category_id`='52') as `ic`
JOIN `filter_item` as `fi` USING(`item_id`)
WHERE `item_id` IN(
SELECT `item_id` FROM
(SELECT * FROM `item_cat` WHERE `category_id`='52') as `ic`
JOIN `filter_item` as `fi` USING(`item_id`)
GROUP BY `ic`.`item_id`
HAVING (
(`fi`.`filter_id` = '47' OR MAX(`fi`.`filter_v_id` = '474')=1 )
)
)
GROUP BY `filter_id`,`filter_v_id`
Please tell me how to increase the speed of query?

Related

MariaDB query issue | query works when index is specified or ignored but fails to return expected results otherwise| Why would this occur?

I'm trying to get results of a query from schools that were in a particular region over time. I've created the queries that I think should do this -
SELECT *
FROM team t1
JOIN district_team t2 ON t1.school_name = t2.team_name
join district t3 ON t2.district_num = t3.districut_number
JOIN region_district t4 ON (t2.season = t4.season AND t3.districut_number = t4.district_num AND t4.region_num = 3);
SELECT result.school_name, result.region_num, result.season
FROM (SELECT * from team t1
JOIN district_team t2 ON t1.school_name = t2.team_name
join district t3 ON t2.district_num = t3.districut_number
NATURAL JOIN region_district t4)result
WHERE result.region_num = 3;
but these queries give no results even though the first query gives results that are in region 3.
--- Schema of tables in query ---
CREATE TABLE `team` (
`school_name` VARCHAR(50) NOT NULL,
`mascot_name` VARCHAR(50) NULL DEFAULT NULL,
PRIMARY KEY (`school_name`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB
;
CREATE TABLE `district_team` (
`district_num` INT(11) NOT NULL,
`team_name` VARCHAR(50) NOT NULL DEFAULT '',
`season` INT(11) NOT NULL,
INDEX `district_fk` (`district_num`),
INDEX `team_district_fk` (`team_name`),
CONSTRAINT `district_fk` FOREIGN KEY (`district_num`) REFERENCES `district` (`districut_number`),
CONSTRAINT `team_district_fk` FOREIGN KEY (`team_name`) REFERENCES `team` (`school_name`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB
;
CREATE TABLE `district` (
`districut_number` INT(11) NOT NULL,
PRIMARY KEY (`districut_number`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB
;
CREATE TABLE `region_district` (
`district_num` INT(11) NOT NULL,
`region_num` INT(11) NOT NULL,
`season` INT(11) NULL DEFAULT NULL,
INDEX `region_fk` (`region_num`),
INDEX `district_region_fk` (`district_num`),
CONSTRAINT `district_region_fk` FOREIGN KEY (`district_num`) REFERENCES `district` (`districut_number`),
CONSTRAINT `region_fk` FOREIGN KEY (`region_num`) REFERENCES `region` (`region_number`)
)
COLLATE='utf8_general_ci'
ENGINE=InnoDB
;
Does anyone know why this would be the case or what's going on here? Could this be a character encoding issue?
UPDATE---
The Query works when I tell the table which index to use or to ignore the index.
So these queries give the expected output --
SELECT *
FROM team t1
JOIN district_team t2 ON t1.school_name = t2.team_name
join district t3 ON t2.district_num = t3.districut_number
JOIN region_district t4 IGNORE INDEX(region_fk) ON (t2.season = t4.season AND t3.districut_number = t4.district_num AND t4.region_num = 3);
SELECT *
FROM team t1
JOIN district_team t2 ON t1.school_name = t2.team_name
join district t3 ON t2.district_num = t3.districut_number
JOIN region_district t4 use INDEX(region_fk) ON (t2.season = t4.season AND t3.districut_number = t4.district_num AND t4.region_num = 3);
But this query doesn't
SELECT *
FROM team t1
JOIN district_team t2 ON t1.school_name = t2.team_name
join district t3 ON t2.district_num = t3.districut_number
JOIN region_district t4 ON (t2.season = t4.season AND t3.districut_number = t4.district_num AND t4.region_num = 3);
Does anyone know why this would occur?
image of above query execution

Mysql query with multiple selects results in high CPU load

I'm trying to do a link exchange script and run into a bit of trouble.
Each link can be visited by an IP address a number of x times (frequency in links table). Each visit costs a number of credits (spend limit given in limit in links table)
I've got the following tables:
CREATE TABLE IF NOT EXISTS `contor` (
`key` varchar(25) NOT NULL,
`uniqueHandler` varchar(30) DEFAULT NULL,
`uniqueLink` varchar(30) DEFAULT NULL,
`uniqueUser` varchar(30) DEFAULT NULL,
`owner` varchar(50) NOT NULL,
`ip` varchar(15) DEFAULT NULL,
`credits` float NOT NULL,
`tstamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`key`),
KEY `uniqueLink` (`uniqueLink`),
KEY `uniqueHandler` (`uniqueHandler`),
KEY `uniqueUser` (`uniqueUser`),
KEY `owner` (`owner`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE IF NOT EXISTS `links` (
`unique` varchar(30) NOT NULL DEFAULT '',
`url` varchar(1000) DEFAULT NULL,
`frequency` varchar(5) DEFAULT NULL,
`limit` float NOT NULL DEFAULT '0',
PRIMARY KEY (`unique`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
I've got the following query:
$link = MYSQL_QUERY("
SELECT *
FROM `links`
WHERE (SELECT count(key) FROM contor WHERE ip = '$ip' AND contor.uniqueLink = links.unique) <= `frequency`
AND (SELECT sum(credits) as cost FROM contor WHERE contor.uniqueLink = links.unique) <= `limit`")
There are 20 rows in the table links.
The problem is that whenever there are about 200k rows in the table contor the CPU load is huge.
After applying the solution provided by #Barmar:
Added composite index on (uniqueLink, ip) and droping all other indexes except PRIMARY, EXPLAIN gives me this:
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY l ALL NULL NULL NULL NULL 18
1 PRIMARY <derived2> ALL NULL NULL NULL NULL 15
2 DERIVED pop_contor index NULL contor_IX1 141 NULL 206122
Try using a join rather than a correlated subquery.
SELECT l.*
FROM links AS l
LEFT JOIN (
SELECT uniqueLink, SUM(ip = '$ip') AS ip_visits, SUM(credits) AS total_credits
FROM contor
GROUP BY uniqueLink
) AS c
ON c.uniqueLink = l.unique AND ip_visits <= frequency AND total_credits <= limit
If this doesn't help, try adding an index on contor.ip.
The current query is of the form:
SELECT l.*
FROM `links` l
WHERE l.frequency >= ( SELECT COUNT(ck.key)
FROM contor ck
WHERE ck.uniqueLink = l.unique
AND ck.ip = '$ip'
)
AND l.limit >= ( SELECT SUM(sc.credits)
FROM contor sc
WHERE sc.uniqueLink = l.unique
)
Those correlated subqueries are going to each your lunch. And your lunchbox too.
I'd suggest testing an inline view that performs both of the aggregations from contor in one pass, and then join the result from that to the links table.
Something like this:
SELECT l.*
FROM ( SELECT c.uniqueLink
, SUM(c.ip = '$ip' AND c.key IS NOT NULL) AS count_key
, SUM(c.credits) AS sum_credits
FROM `contor` c
GROUP
BY c.uniqueLink
) d
JOIN `links` l
ON l.unique = d.uniqueLink
AND l.frequency >= d.count_key
AND l.limit >= d.sum_credits
For optimal performance of the aggregation inline view query, provide a covering index that MySQL can use to optimize the GROUP BY (avoiding a Using filesort operation)
CREATE INDEX `contor_IX1` ON `contor` (`uniqueLink`, `credits`, `ip`) ;
Adding that index renders the uniqueLink index redundant, so also...
DROP INDEX `uniqueLink` ON `contor` ;
EDIT
Since we have a guarantee that contor.key column is non-NULL (i.e. the NOT NULL constraint), this part of the query above is unneeded AND c.key IS NOT NULL, and can be removed. (I also removed the key column from the covering index definition above.)
SELECT l.*
FROM ( SELECT c.uniqueLink
, SUM(c.ip = '$ip') AS count_key
, SUM(c.credits) AS sum_credits
FROM `contor` c
GROUP
BY c.uniqueLink
) d
JOIN `links` l
ON l.unique = d.uniqueLink
AND l.frequency >= d.count_key
AND l.limit >= d.sum_credits

Improve speed of MySQL query with 5 left joins

Working on a support ticketing system with not a lot of tickets (~3,000). To get a summary grid of ticket information, there are five LEFT JOIN statements on custom field table (j25_field_value) containing about 10,000 records. The query runs too long (~10 seconds) and in cases with a WHERE clause, it runs even longer (up to ~30 seconds or more).
Any suggestions for improving the query to reduce the time to run?
Four tables:
j25_support_tickets
CREATE TABLE `j25_support_tickets` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`category_id` int(11) NOT NULL DEFAULT '0',
`user_id` int(11) DEFAULT NULL,
`email` varchar(50) DEFAULT NULL,
`subject` varchar(255) DEFAULT NULL,
`message` text,
`modified_date` datetime DEFAULT NULL,
`priority_id` tinyint(3) unsigned DEFAULT NULL,
`status_id` tinyint(3) unsigned DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `id` (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=3868 DEFAULT CHARSET=utf8
j25_support_priorities
CREATE TABLE `j25_support_priorities` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`title` varchar(100) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `id` (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=14 DEFAULT CHARSET=utf8
j25_support_statuses
CREATE TABLE `j25_support_statuses` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`title` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `id` (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=7 DEFAULT CHARSET=utf8
j25_field_value (id, ticket_id, field_id, field_value)
CREATE TABLE `j25_support_field_value` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`ticket_id` int(11) DEFAULT NULL,
`field_id` int(11) DEFAULT NULL,
`field_value` tinytext,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=10889 DEFAULT CHARSET=utf8
Also, ran this:
SELECT LENGTH(field_value) len FROM j25_support_field_value ORDER BY len DESC LIMIT 1
note: the result = 38
The query:
SELECT DISTINCT t.id as ID
, (select p.title from j25_support_priorities p where p.id = t.priority_id) as Priority
, (select s.title from j25_support_statuses s where s.id = t.status_id) as Status
, t.subject as Subject
, t.email as SubmittedByEmail
, type.field_value AS IssueType
, ver.field_value AS Version
, utype.field_value AS UserType
, cust.field_value AS Company
, refno.field_value AS RefNo
, t.modified_date as Modified
FROM j25_support_tickets AS t
LEFT JOIN j25_support_field_value AS type ON t.id = type.ticket_id AND type.field_id =1
LEFT JOIN j25_support_field_value AS ver ON t.id = ver.ticket_id AND ver.field_id =2
LEFT JOIN j25_support_field_value AS utype ON t.id = utype.ticket_id AND utype.field_id =3
LEFT JOIN j25_support_field_value AS cust ON t.id = cust.ticket_id AND cust.field_id =4
LEFT JOIN j25_support_field_value AS refno ON t.id = refno.ticket_id AND refno.field_id =5
ALTER TABLE j25_support_field_value
ADD INDEX (`ticket_id`,`field_id`,`field_value`(50))
This index will work as a covering index for your query. It will allow the joins to use only this index to look up the values. It should perform massively faster than without this index, since currently your query would have to read every row in the table to find what matches each combination of ticket_id and field_id.
I would also suggest converting your tables to InnoDB engine, unless you have a very explicit reason for using MyISAM.
ALTER TABLE tablename ENGINE=InnoDB
As above - a better index would help. You could probably then simplify your query into something like this too (join to the table only once):
SELECT t.id as ID
, p.title as Priority
, s.title as Status
, t.subject as Subject
, t.email as SubmittedByEmail
, case when v.field_id=1 then v.field_value else null end as IssueType
, case when v.field_id=2 then v.field_value else null end as Version
, case when v.field_id=3 then v.field_value else null end as UserType
, case when v.field_id=4 then v.field_value else null end as Company
, case when v.field_id=5 then v.field_value else null end as RefNo
, t.modified_date as Modified
FROM j25_support_tickets AS t
LEFT JOIN j25_support_field_value v ON t.id = v.ticket_id
LEFT JOIN j25_support_priorities p ON p.id = t.priority_id
LEFT JOIN j25_support_statuses s ON s.id = t.status_id;
You can do away with the subqueries for starters and just get them from another join. You can add an index to j25_support_field_value
alter table j25_support_field_value add key(id, field_type);
I assume there is an index on id in j25_support_tickets - if not and if they are unique, add a unique index alter table j25_support_tickets add unique key(id); If they're not unique, remove the word unique from that statement.
In MySQL, a join usually requires an index on the field(s) that you are using to join on. This will hold up and produce very reasonable results with huge tables (100m+), if you follow that rule, you will not go wrong.
are the ids in j25_support_tickets unique? If they are you can do away with the distinct - if not, or if you are getting exact dupicates in each row, still do away with the distinct and add a group by t.id to the end of this:
SELECT t.id as ID
, p.title as Priority
, s.title as Status
, t.subject as Subject
, t.email as SubmittedByEmail
, type.field_value AS IssueType
, ver.field_value AS Version
, utype.field_value AS UserType
, cust.field_value AS Company
, refno.field_value AS RefNo
, t.modified_date as Modified
FROM j25_support_tickets AS t
LEFT JOIN j25_support_field_value AS type ON t.id = type.ticket_id AND type.field_id =1
LEFT JOIN j25_support_field_value AS ver ON t.id = ver.ticket_id AND ver.field_id =2
LEFT JOIN j25_support_field_value AS utype ON t.id = utype.ticket_id AND utype.field_id =3
LEFT JOIN j25_support_field_value AS cust ON t.id = cust.ticket_id AND cust.field_id =4
LEFT JOIN j25_support_field_value AS refno ON t.id = refno.ticket_id AND refno.field_id =5
LEFT JOIN j25_support_priorities p ON p.id = t.priority_id
LEFT JOIN j25_support_statuses s ON s.id = t.status_id;
Switch to InnoDB.
After switching to InnoDB, make the PRIMARY KEY for j25_support_field_value be (ticket_id, field_id) (and get rid if id). (Tacking on field_value(50) will hurt, not help.)
A PRIMARY KEY is a UNIQUE KEY, so don't have both.
Use VARCHAR(255) instead of the nearly-equivalent TINYTEXT.
EAV schema sucks. My ran on EAV.

How to improve sub query performance in MySQL

I have a CRM system that generates attributes using an EAV model. The problem as you may be very aware that EAV model require a complex queries to pull the data. Each attribute have to be returned in a separate column.
When using sub queries, MySQL performance sucks. I have to find a better way to write my queries by analyzing them using the giving where clause, sort order and the limit "if any"!
By sub query I am refereeing to a query that look like this
SELECT a.account_name, a.account_type, a.status, a.account_id, s.fieldValue, s2.last_training_on, s3.fieldValue
FROM accounts AS a
INNER JOIN clients AS c ON c.client_id = a.client_id
LEFT JOIN (
SELECT p.related_to AS account_id, decimal_value AS fieldValue
FROM df_answers_text AS p
INNER JOIN df_field_to_client_relation AS r ON r.field_id = p.field_id
WHERE p.field_id = '19' AND r.client_id = '7';
) AS s ON s.account_id = a.account_id
LEFT JOIN (
SELECT p.related_to AS account_id, datetime_value AS last_training_on
FROM df_answers_text AS p
INNER JOIN df_field_to_client_relation AS r ON r.field_id = p.field_id
WHERE p.field_id = '10' AND r.client_id = '7';
) AS s2 ON s2.account_id = a.account_id
LEFT JOIN (
SELECT
p.related_to
, CAST(GROUP_CONCAT(o.label SEPARATOR " | ") AS CHAR(255)) AS fieldValue
FROM df_answer_predefined AS p
INNER JOIN df_fields_options AS o ON o.option_id = p.option_id
INNER JOIN df_field_to_client_relation AS r ON r.field_id = o.field_id
WHERE o.is_place_holder = 0 AND o.field_id = '16' AND r.field_id = '16' AND r.client_id = '7'
GROUP BY p.related_to;
) AS s3 ON s3.related_to = a.account_id
WHERE c.client_id = '7' AND c.status = 'Active' AND ( a.account_type = 'TEST' OR a.account_type = 'VALUE' OR s2.last_training_on > '2015-01-01 00:00:00') AND (s.fieldValue = 'Medium' OR s.fieldValue = 'Low' OR a.expType = 'Very High')
ORDER BY a.account_name
LIMIT 500;
I thought about creating a temporary table using MEMORY engine with the content of the sub query like this
CREATE TEMPORARY TABLE s (KEY(account_id, fieldValue)) ENGINE = MEMORY
SELECT p.related_to AS account_id, decimal_value AS fieldValue
FROM df_answers_text AS p
INNER JOIN df_field_to_client_relation AS r ON r.field_id = p.field_id
WHERE p.field_id = '19' AND r.client_id = '7';
CREATE TEMPORARY TABLE s2 (KEY(account_id, INDEX USING BTREE last_training_on)) ENGINE = MEMORY
SELECT p.related_to AS account_id, datetime_value AS last_training_on
FROM df_answers_text AS p
INNER JOIN df_field_to_client_relation AS r ON r.field_id = p.field_id
WHERE p.field_id = '10' AND r.client_id = '7';
CREATE TEMPORARY TABLE s3 (KEY(related_to, fieldValue)) ENGINE = MEMORY
SELECT
p.related_to
, CAST(GROUP_CONCAT(o.label SEPARATOR " | ") AS CHAR(255)) AS fieldValue
FROM df_answer_predefined AS p
INNER JOIN df_fields_options AS o ON o.option_id = p.option_id
INNER JOIN df_field_to_client_relation AS r ON r.field_id = o.field_id
WHERE o.is_place_holder = 0 AND o.field_id = '16' AND r.field_id = '16' AND r.client_id = '7'
GROUP BY p.related_to;
CREATE TEMPORARY TABLE s3 (KEY(related_to)) ENGINE = MEMORY
SELECT
p.related_to
, CAST(GROUP_CONCAT(o.label SEPARATOR " | ") AS CHAR(255)) AS fieldValue
FROM df_answer_predefined AS p
INNER JOIN df_fields_options AS o ON o.option_id = p.option_id
INNER JOIN df_field_to_client_relation AS r ON r.field_id = o.field_id
WHERE o.is_place_holder = 0 AND o.field_id = '16' AND r.field_id = '16' AND r.client_id = '7'
GROUP BY p.related_to;
Then my new query will look like this
SELECT a.account_name, a.account_type, a.status, a.account_id, s.fieldValue, s2.last_training_on, s3.fieldValue
FROM accounts AS a
INNER JOIN clients AS c ON c.client_id = a.client_id
LEFT JOIN s ON s.account_id = a.account_id
LEFT JOIN s2 ON s2.account_id = a.account_id
LEFT JOIN s3 ON s2.related_to = a.account_id
WHERE c.client_id = '7' AND c.status = 'Active' AND ( a.account_type = 'TEST' OR a.account_type = 'VALUE' OR s2.last_training_on > '2015-01-01 00:00:00') AND (s.fieldValue = 'Medium' OR s.fieldValue = 'Low' OR a.expType = 'Very High')
ORDER BY a.account_name
LIMIT 500;
DROP TEMPORARY TABLE s, s2;
The problem that I am facing now of that the temporary table will create a temporary table of the entire data available in the database which consume lots of time. but my outer query is only looking for 500 records sorted by the a.account_name. If the temporary table has 1 million records that will be waste of time and obviously give me bad performance.
I am looking to find a better way to pass on the clause to the sub query so that way I would only create a temporary table with the needed data for the outer query
Note: these queries are generated dynamic using a GUI. I am unable to figure out how to extract the logic/clause and properly pass them to the sub query.
QUESTIONS
How can I look at the where clause, parse them and pass them to the sub query to refuse the amount of the data in the sub quires? If the call the clause where "AND" then my life will be easier but since I have a mix or "AND" and "OR" it is very complex.
Is there a better approach to this problem rather than using Temporary tables.
EDITED
Here are my table definitions
CREATE TABLE df_answer_predefined (
answer_id int(11) unsigned NOT NULL AUTO_INCREMENT,
field_id int(11) unsigned DEFAULT NULL,
related_to int(11) unsigned DEFAULT NULL,
option_id int(11) unsigned DEFAULT NULL,
created_by int(11) unsigned NOT NULL,
created_on datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (answer_id),
UNIQUE KEY un_row (field_id,option_id,related_to),
KEY field_id (field_id),
KEY related_to (related_to),
KEY to_delete (field_id,related_to),
KEY outter_view (field_id,option_id,related_to)
) ENGINE=InnoDB AUTO_INCREMENT=4946214 DEFAULT CHARSET=utf8;
`CREATE TABLE df_fields_options (
option_id int(11) unsigned NOT NULL AUTO_INCREMENT,
field_id int(11) unsigned NOT NULL,
label varchar(255) DEFAULT NULL,
is_place_holder tinyint(1) NOT NULL DEFAULT '0',
is_default tinyint(1) NOT NULL DEFAULT '0',
sort smallint(3) NOT NULL DEFAULT '1',
status tinyint(1) NOT NULL DEFAULT '1',
PRIMARY KEY (option_id),
KEY i (field_id),
KEY d (option_id,field_id,is_place_holder)
) ENGINE=InnoDB AUTO_INCREMENT=155 DEFAULT CHARSET=utf8;`
`CREATE TABLE df_field_to_client_relation (
relation_id int(11) unsigned NOT NULL AUTO_INCREMENT,
client_id int(11) unsigned DEFAULT NULL,
field_id int(11) unsigned DEFAULT NULL,
PRIMARY KEY (relation_id),
UNIQUE KEY unique_row (field_id,client_id),
KEY client_id (client_id),
KEY flient_id (field_id)
) ENGINE=InnoDB AUTO_INCREMENT=26 DEFAULT CHARSET=utf8;`
`CREATE TABLE df_answers_text (
answer_id int(11) unsigned NOT NULL AUTO_INCREMENT,
notes varchar(20000) DEFAULT NULL,
datetime_value datetime DEFAULT NULL,
date_value date DEFAULT NULL,
us_phone_number char(10) DEFAULT NULL,
field_id int(11) unsigned DEFAULT NULL,
related_to int(11) unsigned DEFAULT NULL,
created_by int(11) unsigned NOT NULL,
created_on datetime DEFAULT CURRENT_TIMESTAMP,
modified_by int(11) DEFAULT NULL,
modified_on datetime DEFAULT NULL,
big_unsigned_value bigint(20) DEFAULT NULL,
big_signed_value bigint(19) DEFAULT NULL,
unsigned_value int(11) DEFAULT NULL,
signed_value int(10) DEFAULT NULL,
decimal_value decimal(18,4) DEFAULT NULL,
PRIMARY KEY (answer_id),
UNIQUE KEY unique_answer (field_id,related_to),
KEY field_id (field_id),
KEY related_to (related_to),
KEY big_unsigned_value (big_unsigned_value),
KEY big_signed_value (big_signed_value),
KEY unsigned_value (unsigned_value),
KEY signed_value (signed_value),
KEY decimal_Value (decimal_value)
) ENGINE=InnoDB AUTO_INCREMENT=2458748 DEFAULT CHARSET=utf8;`
The query that takes the most time is the third sub query with the alias s3
Here is the execution plan for the query that us taking long time "2 seconds"
UNIQUE(a,b,c)
INDEX (a)
DROP the INDEX, since the UNIQUE key is an INDEX and the INDEX is a prefix of the UNIQUE.
PRIMARY KEY(d)
UNIQUE(a,b,c)
Why have d at all? Simply say PRIMARY KEY(a,b,c).
FROM ( SELECT ... )
JOIN ( SELECT ... ) ON ...
optimizes poorly (until 5.6.6). Whenever possible turn JOIN ( SELECT ) into a JOIN with the table. As you suggested, using tmp tables may be better, if you can add a suitable index to the tmp table. Best is to try to avoid more than one "table" that is a subquery.
In a many-to-many relation table, don't include an id for the table, instead have only
PRIMARY KEY (a,b), -- for enforcing uniqueness, providing a PK, and going one direction
INDEX (b,a) -- for going the other way.
The EXPLAIN does not seem to match the SELECT you provided. Each is useless without the other.
Another approach that might help... Instead of
SELECT ..., s2.foo, ...
...
JOIN ( SELECT ... FROM x WHERE ... ) AS s2 ON s2.account_id = a.account_id
see if you can reformulate it as:
SELECT ...,
( SELECT foo FROM x WHERE ... AND related = a.account_id) AS foo, ...
...
That is, replace the JOIN subquery with a correlated subquery for the one value you need.
The bottom line is that the EAV model sucks.
Hmmm... I don't see the need for this at all, since r is not used elsewhere in he query...
INNER JOIN df_field_to_client_relation AS r ON r.field_id = p.field_id
WHERE p.field_id = '19' AND r.client_id = '7'
It seems to be equivalent to
WHERE EXISTS ( SELECT * FROM df_field_to_client_relation
WHERE field_id = '19' AND client_id = '7' )
but why bother checking for existence?

MySQL: UPDATE with SUM and JOIN

I'm trying to do a SUM and store it in another table. The SUM is simple :
SELECT award.alias_id,
SUM(award.points) AS points
FROM award
INNER JOIN achiever ON award.id = achiever.award_id
I now want to store that. I figured out how to do it on a row-by-row basis :
UPDATE aliaspoint
SET points = (SELECT SUM(award.points) AS points
FROM award
INNER JOIN achiever ON award.id = achiever.award_id
WHERE achiever.alias_id = 2000)
WHERE alias_id = 2000;
I thought something like this might work but I get:
ERROR 1111 (HY000): Invalid use of group function
UPDATE aliaspoint
INNER JOIN achiever ON aliaspoint.alias_id = achiever.alias_id
INNER JOIN award ON achiever.award_id = award.id
SET aliaspoint.points = SUM(award.points)
And some table definitions to help :
mysql> show create table aliaspoint;
| metaward_aliaspoint | CREATE TABLE `aliaspoint` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`alias_id` int(11) NOT NULL,
`points` double DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `alias_id` (`alias_id`),
KEY `aliaspoint_points` (`points`)
) ENGINE=MyISAM AUTO_INCREMENT=932081 DEFAULT CHARSET=latin1 |
mysql> show create table achiever;
| metaward_achiever | CREATE TABLE `achiever` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`modified` datetime NOT NULL,
`created` datetime NOT NULL,
`award_id` int(11) NOT NULL,
`alias_id` int(11) NOT NULL,
`count` int(11) NOT NULL,
PRIMARY KEY (`id`),
KEY `achiever_award_id` (`award_id`),
KEY `achiever_alias_id` (`alias_id`)
) ENGINE=MyISAM AUTO_INCREMENT=87784996 DEFAULT CHARSET=utf8 |
mysql> show create table award;
| metaward_award | CREATE TABLE `award` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`points` double DEFAULT NULL,
PRIMARY KEY (`id`),
) ENGINE=MyISAM AUTO_INCREMENT=131398 DEFAULT CHARSET=utf8 |
You're missing the GROUP BY clause in:
SET points = (SELECT SUM(award.points) AS points
FROM award
INNER JOIN achiever ON award.id = achiever.award_id
WHERE achiever.alias_id = 2000)
There isn't enough information on the AWARD and ACHIEVER tables, so I recommend testing this before updating the UPDATE statement:
SELECT t.id, -- omit once confirmed data is correct
a.alias_id, -- omit once confirmed data is correct
SUM(t.points) AS points
FROM AWARD t
JOIN ACHIEVER a ON a.award_id = t.id
GROUP BY t.id, a.alias_id
Once you know the summing is correct, update the INSERT statement:
SET points = (SELECT SUM(t.points)
FROM AWARD t
JOIN ACHIEVER a ON a.award_id = t.id
WHERE a.alias_id = 2000 --don't include if you don't need it
GROUP BY t.id, a.alias_id)