View with join between field that can be null - mysql

I have 2 tables with more than 50,000 records each, each one of these.
CREATE TABLE IF NOT EXISTS `call_attempt` (
`csvleads_id_fk` int(11) NOT NULL,
`users_id_fk` int(11) NOT NULL,
`status` int(11) NOT NULL,
`call_id` varchar(50) default NULL,
KEY `csvlead` (`csvleads_id_fk`),
KEY `user_id` (`users_id_fk`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
CREATE TABLE IF NOT EXISTS `recorded` (
`record_id` int(11) NOT NULL auto_increment,
`agent_id_fk` int(11) NOT NULL,
`lead_id_fk` int(11) NOT NULL,
`duration` int(11) NOT NULL,
`recording_url` text collate utf8_unicode_ci NOT NULL,
`recorded_at` timestamp NULL default NULL,
`call_id` varchar(50) character set utf8 default NULL,
PRIMARY KEY (`record_id`),
UNIQUE KEY `filterme` (`recording_url`(100))
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci AUTO_INCREMENT=72158 ;
I added a call_id field to make the link. This means that new records must have these fields filled, at least in table call_attempt.
call_attempt recorded
------------------- ----------------
users_id_fk call_id call_id dutarion
------------------- ----------------
1 NULL NULL 10
2 NULL NULL 8
3 NULL NULL 5
4 NULL ca12 19
50000 ca12 ca14 9
50001 ca13
50002 ca14
I need to throw me a view that all records in the table call_attempt, and if you find records in table recorded the duration is brought.
I tried LEFT JOIN but takes more than 50 seconds.
SELECT
`cal`.`csvleads_id_fk` AS `Id`,
`rc`.`duration` AS `duration`
FROM `call_attempt` AS cal
LEFT JOIN recorded AS rc ON (cal.call_id = rc.call_id AND (rc.call_id IS NOT NULL))
84.625 sec / 0.000 sec
Next, I tried doing a LEFT JOIN to a SELECT table recorded with only the records that call_id are NOT NULL but don't let me create the view.
CREATE
ALGORITHM = UNDEFINED
DEFINER = `benscott`#`%`
SQL SECURITY DEFINER
VIEW `callHistory4` AS
SELECT
`cal`.`csvleads_id_fk` AS `Id`,
`rc`.`duration` AS `duration`
FROM `call_attempt` AS cal
LEFT JOIN (SELECT call_id , duration FROM `recorded` WHERE call_id IS NOT NULL) AS rc ON (rc.call_id = cal.call_id)
Error Code: 1349. View's SELECT contains a subquery in the FROM clause
Then I create another view with just the results NOT NULL from call_id.
CREATE
ALGORITHM = UNDEFINED
DEFINER = `benscott`#`%`
SQL SECURITY DEFINER
VIEW `view_recorded` AS
SELECT call_id , duration FROM `recorded` WHERE call_id IS NOT NULL
And in the new view I join with view_recorded but also takes more than 40 seconds.
SELECT
`cal`.`csvleads_id_fk` AS `Id`,
`rc`.`duration` AS `duration`
FROM `call_attempt` AS cal
LEFT JOIN view_recorded AS rc ON (rc.call_id = cal.call_id)
92.094 sec / 0.000 sec
So, what options Do I have? Thank you!!

For this query:
SELECT `cal`.`csvleads_id_fk` AS `Id`, `rc`.`duration` AS `duration`
FROM `call_attempt_log` cal LEFT JOIN
recorded_calls rc
ON cal.call_id = rc.call_id AND rc.call_id IS NOT NULL;
The best index is recorded_calls(call_id, duration).

Related

How to use helper functions to create a stored procedure that updates two fields mySQL

So I have a few tables on mySQL:
CREATE TABLE IF NOT EXISTS `salarygrade` (
`GRADE` INT(11) NOT NULL,
`HOURLYRATE` FLOAT NOT NULL,
PRIMARY KEY (`GRADE`));
===========================================================================
CREATE TABLE IF NOT EXISTS `staffongrade` (
`STAFFNO` INT(11) NOT NULL,
`GRADE` INT(11) NOT NULL,
`STARTDATE` DATE NULL DEFAULT NULL,
`FINISHDATE` DATE NULL DEFAULT NULL,
INDEX `STAFFONGRADE_FK` (`STAFFNO` ASC),
INDEX `STAFFONGRADE2_FK` (`GRADE` ASC),
PRIMARY KEY (`GRADE`, `STAFFNO`),
CONSTRAINT `FK_STAFFONG_STAFFONGR_SALARYGR`
FOREIGN KEY (`GRADE`)
REFERENCES `salarygrade` (`GRADE`),
CONSTRAINT `FK_STAFFONG_STAFFONGR_STAFF`
FOREIGN KEY (`STAFFNO`)
REFERENCES `staff` (`STAFFNO`));
===========================================================================
CREATE TABLE IF NOT EXISTS `campaign` (
`CAMPAIGN_NO` INT(11) NOT NULL,
`TITLE` VARCHAR(30) NOT NULL,
`CUSTOMER_ID` INT(11) NOT NULL,
`THEME` VARCHAR(40) NULL DEFAULT NULL,
`CAMPAIGNSTARTDATE` DATE NULL DEFAULT NULL,
`CAMPAIGNFINISHDATE` DATE NULL DEFAULT NULL,
`ESTIMATEDCOST` INT(11) NULL DEFAULT NULL,
`ACTUALCOST` FLOAT NULL DEFAULT NULL,
PRIMARY KEY (`CAMPAIGN_NO`),
INDEX `OWNS_FK` (`CUSTOMER_ID` ASC),
CONSTRAINT `FK_CAMPAIGN_OWNS_CUSTOMER`
FOREIGN KEY (`CUSTOMER_ID`)
REFERENCES `customer` (`CUSTOMER_ID`)
ON DELETE RESTRICT
ON UPDATE RESTRICT);
===========================================================================
CREATE TABLE IF NOT EXISTS `workson` (
`STAFFNO` INT(11) NOT NULL,
`CAMPAIGN_NO` INT(11) NOT NULL,
`WDATE` DATE NOT NULL,
`HOUR` FLOAT NULL DEFAULT NULL,
PRIMARY KEY (`STAFFNO`, `CAMPAIGN_NO`, `WDATE`),
INDEX `WORKSON_FK` (`STAFFNO` ASC),
INDEX `FK_WORKSON_WORKSON2_CAMPAIGN_idx` (`CAMPAIGN_NO` ASC),
CONSTRAINT `FK_WORKSON_WORKSON2_CAMPAIGN`
FOREIGN KEY (`CAMPAIGN_NO`)
REFERENCES `campaign` (`CAMPAIGN_NO`)
ON DELETE RESTRICT
ON UPDATE RESTRICT,
CONSTRAINT `FK_WORKSON_WORKSON_STAFF`
FOREIGN KEY (`STAFFNO`)
REFERENCES `staff` (`STAFFNO`));
And I want to create a stored procedure called sp_finish_campaign (in c_title varchar(30)) that takes a title of a campaign and finishes the campaign by updating the CAMPAIGNFINISHDATE to the current date and ACTUALCOST to the cost of the campaign, which is calculated from the number of hours different staff put into it on different dates, and the salary grade (this changes based on staffID and the timeframe based on the STARTDATE and FINISHDATE of the staffongrade table.
To calculate the ACTUALCOST, I created a helper function:
DELIMITER //
CREATE FUNCTION rate_on_date(staff_id int, given_date date)
RETURNS int
DETERMINISTIC
BEGIN
DECLARE salaryGrade int;
SET salaryGrade = (select grade from staffongrade
where staffno = staff_id AND (given_date BETWEEN STARTDATE AND FINISHDATE));
RETURN salaryGrade;
END //
DELIMITER ;
Which returns the pay grade based on the staff_id and given_date parameters I give it:
select rate_on_date(1, "2018-02-02") as Grade_On_Date;
For the parameters of this I assume I would have to get it from the workson table which looks like this:
I have tried using a select statement to get the paygrade:
select hourlyrate as 'grade' from salarygrade
where rate_on_date(1, "2018-02-02") = grade;
To calculate ACTUALCOST I assume I would have to do a calculation by multiplying HOUR column with the grade costs, and use the WDATE and STAFFNO columns in the workson table as parameters for my stored procedure that will calculate and update the CAMPAIGNFINISHDATE and ACTUALCOST of the campaign by inputting the campaign title into it.
But how would I go about doing this?
I'm just confused as to how to go about creating this procedure, and also confused about how to properly use these helper functions in my stored procedure.
I feel like this question is quite long but I don't really know what to ask or what direction I should take to solve this problem.
You don't really need a function. mysql can do multi-table updates (see https://dev.mysql.com/doc/refman/8.0/en/update.html) in your case it could look like this
update campaign c
join
(select c.campaign_no,
sum(hour * hourlyrate) cost
from campaign c
join workson w on w.campaign_no = c.campaign_no
join staffongrade s on s .staffno = w.staffno and w.wdate between s.startdate and s.finishdate
join salarygrade g on g.grade = s.grade
group by c.campaign_no
) s
on s.campaign_no = c.campaign_no
set actualcost = s.cost
where c.campaign_no = 1
;
Where the sub query does the needful
if you simplify your data this should be easy to prove;
drop table if exists salarygrade,campaign,workson,staffongrade;
CREATE TABLE `salarygrade`
( GRADE INT NOT NULL,
hOURLYRATE decimal(10,2) NOT NULL
);
insert into salarygrade values(1,10),(2,20);
cREATE TABLE IF NOT EXISTS `staffongrade` (
`STAFFNO` INT(11) NOT NULL,
`GRADE` INT(11) NOT NULL,
`STARTDATE` DATE NULL DEFAULT NULL,
`FINISHDATE` DATE NULL DEFAULT NULL
);
insert into staffongrade values
(1,1,'2019-01-01','2019-06-30'),(1,2,'2019-06-01','2019-12-31'),(2,1,'2019-01-01','2019-01-31');
CREATE TABLE IF NOT EXISTS `campaign` (
`CAMPAIGN_NO` INT(11) NOT NULL,
`CAMPAIGNSTARTDATE` DATE NULL DEFAULT NULL,
`CAMPAIGNFINISHDATE` DATE NULL DEFAULT NULL,
`ESTIMATEDCOST` INT(11) NULL DEFAULT NULL,
`ACTUALCOST` FLOAT NULL DEFAULT NULL
);
insert into campaign values (1,'2019-01-01','2019-12-31',null,null);
CREATE TABLE IF NOT EXISTS `workson` (
`STAFFNO` INT(11) NOT NULL,
`CAMPAIGN_NO` INT(11) NOT NULL,
`WDATE` DATE NOT NULL,
`HOUR` FLOAT NULL DEFAULT NULL
);
insert into workson values
(1,1,'2019-01-01',1),(1,1,'2019-12-01',1),(2,1,'2019-01-01',1);
select * from campaign;
+-------------+-------------------+--------------------+---------------+------------+
| CAMPAIGN_NO | CAMPAIGNSTARTDATE | CAMPAIGNFINISHDATE | ESTIMATEDCOST | ACTUALCOST |
+-------------+-------------------+--------------------+---------------+------------+
| 1 | 2019-01-01 | 2019-12-31 | NULL | 40 |
+-------------+-------------------+--------------------+---------------+------------+
1 row in set (0.00 sec)
Got to dash so I'll leave you to drop the update into a procedure.
IF staffongrade has NULL for finishdate then a bit of data cleansing is required. for simplicity I would create a temporary table to fill in gaps and change the update statement to use the projectfinishdate (if that's not known then substitute a suitable future date). This code would be inserted in your procedure prior to the update
so
insert into staffongrade values
(1,1,'2019-01-01',null),(1,2,'2019-07-01',null),(2,1,'2019-01-01',null);
drop temporary table if exists staffongradetemp;
create temporary table staffongradetemp like staffongrade;
insert into staffongradetemp
select s.STAFFNO,s.GRADE,s.STARTDATE,
case when s.FINISHDATE is not null then s.finishdate
else date_sub((select s1.startdate
from staffongrade s1
where s1.STAFFNO = s.STAFFNO and s1.startdate > s.STARTDATE
order by startdate limit 1), interval 1 day)
end
from staffongrade s
;
select * from staffongradetemp;
+---------+-------+------------+------------+
| STAFFNO | GRADE | STARTDATE | FINISHDATE |
+---------+-------+------------+------------+
| 1 | 1 | 2019-01-01 | 2019-06-30 |
| 1 | 2 | 2019-07-01 | NULL |
| 2 | 1 | 2019-01-01 | NULL |
+---------+-------+------------+------------+
3 rows in set (0.00 sec)
Which leaves all the last finshdates as null which we can trap in the update statement using coalesce
update campaign c
join
(select c.campaign_no,
sum(hour * hourlyrate) cost
from campaign c
join workson w on w.campaign_no = c.campaign_no
join **staffongradetemp s** on s .staffno = w.staffno and w.wdate between s.startdate and **coalesce(s.finishdate,c.CAMPAIGNFINISHDATE)**
join salarygrade g on g.grade = s.grade
where c.campaign_no = 1
group by c.campaign_no
) s
on s.campaign_no = c.campaign_no
set actualcost = s.cost
where 1 = 1;

Optimize tables MySQL

I have a query that is executed in 35s, which is waaaaay too long.
Here are the 3 tables concerned by the query (each table is approx. 13000 lines long, and should be much longer in the future) :
Table 1 : Domains
CREATE TABLE IF NOT EXISTS `domain` (
`id_domain` int(11) NOT NULL AUTO_INCREMENT,
`domain_domain` varchar(255) NOT NULL,
`projet_domain` int(11) NOT NULL,
`date_crea_domain` int(11) NOT NULL,
`date_expi_domain` int(11) NOT NULL,
`active_domain` tinyint(1) NOT NULL,
`remarques_domain` text NOT NULL,
PRIMARY KEY (`id_domain`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
Table 2 : Keywords
CREATE TABLE IF NOT EXISTS `kw` (
`id_kw` int(11) NOT NULL AUTO_INCREMENT,
`kw_kw` varchar(255) NOT NULL,
`clics_kw` int(11) NOT NULL,
`cpc_kw` float(11,3) NOT NULL,
`date_kw` int(11) NOT NULL,
PRIMARY KEY (`id_kw`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
Table 3 : Linking between domain and keyword
CREATE TABLE IF NOT EXISTS `kw_domain` (
`id_kd` int(11) NOT NULL AUTO_INCREMENT,
`kw_kd` int(11) NOT NULL,
`domain_kd` int(11) NOT NULL,
`selected_kd` tinyint(1) NOT NULL,
PRIMARY KEY (`id_kd`),
KEY `kw_to_domain` (`kw_kd`,`domain_kd`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
The query is as follows :
SELECT ng.*, kd.*, kg.*
FROM domain ng
LEFT JOIN kw_domain kd ON kd.domain_kd = ng.id_domain
LEFT JOIN kw kg ON kg.id_kw = kd.kw_kd
GROUP BY ng.id_domain
ORDER BY kd.selected_kd DESC, kd.id_kd DESC
Basically, it selects all domains, with, for each one of these domains, the last associated keyword.
Does anyone have an idea on how to optimize the tables or the query ?
The following will get the last keyword, according to your logic:
select ng.*,
(select kw_kd
from kw_domain kd
where kd.domain_kd = ng.id_domain and kd.selected_kd = 1
order by kd.id_kd desc
limit 1
) as kw_kd
from domain ng;
For performance, you want an index on kw_domain(domain_kd, selected_kd, kw_kd). In this case, the order of the fields matters.
You can use this as a subquery to get more information about the keyword:
select ng.*, kg.*
from (select ng.*,
(select kw_kd
from kw_domain kd
where kd.domain_kd = ng.id_domain and kd.selected_kd = 1
order by kd.id_kd desc
limit 1
) as kw_kd
from domain ng
) ng left join
kw kg
on kg.id_kw = ng.kw_kd;
In MySQL, group by can have poor performance, so this might work better, particularly with the right indexes.

How to INNER JOIN around a loop of tables

I have four tables as follows:
CREATE TABLE IF NOT EXISTS `categories` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL,
PRIMARY KEY (`id`),
) ENGINE=MyISAM DEFAULT CHARSET=utf8 ;
CREATE TABLE IF NOT EXISTS `categories_friends` (
`category_id` int(10) unsigned NOT NULL,
`friend_id` int(10) unsigned NOT NULL,
UNIQUE KEY `category_id` (`friend_id`,`category_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
CREATE TABLE IF NOT EXISTS `friends` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`user_id` int(10) unsigned NOT NULL,
`friend_id` int(10) unsigned NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `user_id` (`user_id`,`friend_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 ;
CREATE TABLE IF NOT EXISTS `ratings` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`user_id` int(10) unsigned NOT NULL,
`category_id` int(10) unsigned NOT NULL,
`title` varchar(255) NOT NULL,
`description` text NOT NULL,
`rating` tinyint(2) unsigned NOT NULL,
`public` tinyint(1) NOT NULL DEFAULT '0',
`created` datetime NOT NULL,
PRIMARY KEY (`id`),
) ENGINE=MyISAM DEFAULT CHARSET=utf8 ;
I am trying to perform the following query on those tables:
SELECT *
FROM `favred`.`ratings` AS `Rating`
INNER JOIN `favred`.`friends` AS `JFriend`
ON (`JFriend`.`friend_id` = `Rating`.`user_id`)
INNER JOIN `favred`.`categories_friends` AS `JCategoriesFriend`
ON (`JCategoriesFriend`.`category_id` = `Rating`.`category_id`
AND `JCategoriesFriend`.`friend_id` = `JFriend`.`id`)
INNER JOIN `favred`.`categories` AS `JCategory`
ON (`JCategory`.`id` = `Rating`.`category_id`
AND `JCategory`.`id` = `JCategoriesFriend`.`category_id`)
WHERE `JFriend`.`user_id` = 1
AND `Rating`.`user_id` <> 1
AND `JCategory`.`id` IN (4, 14)
GROUP BY `Rating`.`id`
The query above is not working, as it returns no results (although there is data in the tables that should return), what I'm trying to do is to find all the Ratings that were not authored by me (ID:1), but were authored by my Friends, but only if I've selected to view a specific Category for that Friend, with the resulting set being filtered by a given set of specific Categories.
The INNER JOINs loop around through Rating --> Friend --> CategoriesFreind --> Category --> back to Rating.
If I remove the additional portion of the INNER JOIN's ON clauses as follows:
SELECT *
FROM `favred`.`ratings` AS `Rating`
INNER JOIN `favred`.`friends` AS `JFriend`
ON (`JFriend`.`friend_id` = `Rating`.`user_id`)
INNER JOIN `favred`.`categories_friends` AS `JCategoriesFriend`
ON (`JCategoriesFriend`.`friend_id` = `JFriend`.`id`)
INNER JOIN `favred`.`categories` AS `JCategory`
ON (`JCategory`.`id` = `JCategoriesFriend`.`category_id`)
WHERE `JFriend`.`user_id` = 1
AND `Rating`.`user_id` <> 1
AND `JCategory`.`id` IN (4, 14)
GROUP BY `Rating`.`id`
then the query will return results, but because the INNER JOIN joining the CategoriesFriend to the Rating is not being filtered by the 'JCategory'.'id' IN (4, 14) clause, it returns all Ratings by that friend instead of filtered as it should be.
Any suggestions on how to modify my query to get it to pull the filtered results?
And I'm using CakePHP, so a query that would fit into it's unique query format would be preferred although not required.
first ,why are you use the JFriend.id, does it mean something,or is it as the same as user_id?
try this one,the same logic but it's from top to bottom ,I feel:
SELECT * FROM categories as JCategory
INNER JOIN categories_friends as JCategoriesFriend ON JCategoriesFriend.category_id = JCategory.id
INNER JOIN friends AS JFriend ON JFriend.friend_id = JCategoriesFriend.friend_id
INNER JOIN ratings AS Rating ON Rating.user_id = JFriend.friend_id
WHERE JCategory.id IN (4,14) AND JFriend.user_id = 1 AND Rating.user_id <> 1 GROUP BY Rating.id
I got one result from all the data that I made for the testing.
if it does not work also,try make some correct data,maybe the data is not right...
the testing data below:
categories: id | name (14| 141414)
categories_friends: category_id| friend_id (14| 2)
friends: id | user_id | friend_id (4| 1| 2)
ratings: id | user_id | category_id | title (2| 2| 14 | 'haha')
So I wondered if the INNER JOINs were being a little too limiting and specific in their ON clauses. So I thought that maybe a LEFT JOIN would work better...
SELECT *
FROM `favred`.`ratings` AS `Rating`
INNER JOIN `favred`.`friends` AS `JFriend`
ON (`JFriend`.`friend_id` = `Rating`.`user_id`)
LEFT JOIN `favred`.`categories_friends` AS `JCategoriesFriend`
ON (`JCategoriesFriend`.`friend_id` = `JFriend`.`id`
AND `JCategoriesFriend`.`category_id` = `Rating`.`category_id`)
WHERE `JFriend`.`user_id` = 1
AND `JRatingsUser`.`id` IS NULL
AND `Rating`.`user_id` <> 1
GROUP BY `Rating`.`id`
That query worked for me.
I did away with linking to the categories table directly, and linked indirectly through the categories_friends table which sped up the query a little bit, and everything is working great.

Sorting result of mysql join by avg of third table?

I have three tables.
One table contains submissions which has about 75,000 rows
One table contains submission ratings and only has < 10 rows
One table contains submission => competition mappings and for my test data also has about 75,000 rows.
What I want to do is
Get the top 50 submissions in a round of a competition.
Top is classified as highest average rating, followed by highest amount of votes
Here is the query I am using which works, but the problem is that it takes over 45 seconds to complete! I profiled the query (results at bottom) and the bottlenecks are copying the data to a tmp table and then sorting it so how can I speed this up?
SELECT `submission_submissions`.*
FROM `submission_submissions`
JOIN `competition_submissions`
ON `competition_submissions`.`submission_id` = `submission_submissions`.`id`
LEFT JOIN `submission_ratings`
ON `submission_submissions`.`id` = `submission_ratings`.`submission_id`
WHERE `top_round` = 1
AND `competition_id` = '2'
AND `submission_submissions`.`date_deleted` IS NULL
GROUP BY submission_submissions.id
ORDER BY AVG(submission_ratings.`stars`) DESC,
COUNT(submission_ratings.`id`) DESC
LIMIT 50
submission_submissions
CREATE TABLE `submission_submissions` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`account_id` int(11) NOT NULL,
`title` varchar(255) NOT NULL,
`description` varchar(255) DEFAULT NULL,
`genre` int(11) NOT NULL,
`goals` text,
`submission` text NOT NULL,
`date_created` datetime DEFAULT NULL,
`date_modified` datetime DEFAULT NULL,
`date_deleted` datetime DEFAULT NULL,
`cover_image` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `genre` (`genre`),
KEY `account_id` (`account_id`),
KEY `date_created` (`date_created`)
) ENGINE=InnoDB AUTO_INCREMENT=115037 DEFAULT CHARSET=latin1;
submission_ratings
CREATE TABLE `submission_ratings` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`account_id` int(11) NOT NULL,
`submission_id` int(11) NOT NULL,
`stars` tinyint(1) NOT NULL,
`date_created` datetime DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `submission_id` (`submission_id`),
KEY `account_id` (`account_id`),
KEY `stars` (`stars`)
) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=latin1;
competition_submissions
CREATE TABLE `competition_submissions` (
`competition_id` int(11) NOT NULL,
`submission_id` int(11) NOT NULL,
`top_round` int(11) DEFAULT '1',
PRIMARY KEY (`submission_id`),
KEY `competition_id` (`competition_id`),
KEY `top_round` (`top_round`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
SHOW PROFILE Result (ordered by duration)
state duration (summed) in sec percentage
Copying to tmp table 33.15621 68.46924
Sorting result 11.83148 24.43260
removing tmp table 3.06054 6.32017
Sending data 0.37560 0.77563
... insignificant amounts removed ...
Total 48.42497 100.00000
EXPLAIN
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE competition_submissions index_merge PRIMARY,competition_id,top_round competition_id,top_round 4,5 18596 Using intersect(competition_id,top_round); Using where; Using index; Using temporary; Using filesort
1 SIMPLE submission_submissions eq_ref PRIMARY PRIMARY 4 inkstakes.competition_submissions.submission_id 1 Using where
1 SIMPLE submission_ratings ALL submission_id 5 Using where; Using join buffer (flat, BNL join)
Assuming that in reality you won't be interested in unrated submissions, and that a given submission only has a single competition_submissions entry for a given match and top_round, I suggest:
SELECT s.*
FROM (SELECT `submission_id`,
AVG(`stars`) AvgStars,
COUNT(`id`) CountId
FROM `submission_ratings`
GROUP BY `submission_id`
ORDER BY AVG(`stars`) DESC, COUNT(`id`) DESC
LIMIT 50) r
JOIN `submission_submissions` s
ON r.`submission_id` = s.`id` AND
s.`date_deleted` IS NULL
JOIN `competition_submissions` c
ON c.`submission_id` = s.`id` AND
c.`top_round` = 1 AND
c.`competition_id` = '2'
ORDER BY r.AvgStars DESC,
r.CountId DESC
(If there is more than one competition_submissions entry per submission for a given match and top_round, then you can add the GROUP BY clause back in to the main query.)
If you do want to see unrated submissions, you can union the results of this query to a LEFT JOIN ... WHERE NULL query.
There is a simple trick that works on MySql and helps to avoid copying/sorting huge temp tables in queries like this (with LIMIT X).
Just avoid SELECT *, this copies all columns to the temporary table, then this huge table is sorted, and in the end, the query takes only 50 records from this huge table ( 50 / 70000 = 0,07 % ).
Select only columns that are really necessary to perform sort and limit, and then join missing columns only for selected 50 records by id.
select ss.*
from submission_submissions ss
join (
SELECT `submission_submissions`.id,
AVG(submission_ratings.`stars`) stars,
COUNT(submission_ratings.`id`) cnt
FROM `submission_submissions`
JOIN `competition_submissions`
ON `competition_submissions`.`submission_id` = `submission_submissions`.`id`
LEFT JOIN `submission_ratings`
ON `submission_submissions`.`id` = `submission_ratings`.`submission_id`
WHERE `top_round` = 1
AND `competition_id` = '2'
AND `submission_submissions`.`date_deleted` IS NULL
GROUP BY submission_submissions.id
ORDER BY AVG(submission_ratings.`stars`) DESC,
COUNT(submission_ratings.`id`) DESC
LIMIT 50
) xx
ON ss.id = xx.id
ORDER BY xx.stars DESC,
xx.cnt DESC;

MySQL: UPDATE with SUM and JOIN

I'm trying to do a SUM and store it in another table. The SUM is simple :
SELECT award.alias_id,
SUM(award.points) AS points
FROM award
INNER JOIN achiever ON award.id = achiever.award_id
I now want to store that. I figured out how to do it on a row-by-row basis :
UPDATE aliaspoint
SET points = (SELECT SUM(award.points) AS points
FROM award
INNER JOIN achiever ON award.id = achiever.award_id
WHERE achiever.alias_id = 2000)
WHERE alias_id = 2000;
I thought something like this might work but I get:
ERROR 1111 (HY000): Invalid use of group function
UPDATE aliaspoint
INNER JOIN achiever ON aliaspoint.alias_id = achiever.alias_id
INNER JOIN award ON achiever.award_id = award.id
SET aliaspoint.points = SUM(award.points)
And some table definitions to help :
mysql> show create table aliaspoint;
| metaward_aliaspoint | CREATE TABLE `aliaspoint` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`alias_id` int(11) NOT NULL,
`points` double DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `alias_id` (`alias_id`),
KEY `aliaspoint_points` (`points`)
) ENGINE=MyISAM AUTO_INCREMENT=932081 DEFAULT CHARSET=latin1 |
mysql> show create table achiever;
| metaward_achiever | CREATE TABLE `achiever` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`modified` datetime NOT NULL,
`created` datetime NOT NULL,
`award_id` int(11) NOT NULL,
`alias_id` int(11) NOT NULL,
`count` int(11) NOT NULL,
PRIMARY KEY (`id`),
KEY `achiever_award_id` (`award_id`),
KEY `achiever_alias_id` (`alias_id`)
) ENGINE=MyISAM AUTO_INCREMENT=87784996 DEFAULT CHARSET=utf8 |
mysql> show create table award;
| metaward_award | CREATE TABLE `award` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`points` double DEFAULT NULL,
PRIMARY KEY (`id`),
) ENGINE=MyISAM AUTO_INCREMENT=131398 DEFAULT CHARSET=utf8 |
You're missing the GROUP BY clause in:
SET points = (SELECT SUM(award.points) AS points
FROM award
INNER JOIN achiever ON award.id = achiever.award_id
WHERE achiever.alias_id = 2000)
There isn't enough information on the AWARD and ACHIEVER tables, so I recommend testing this before updating the UPDATE statement:
SELECT t.id, -- omit once confirmed data is correct
a.alias_id, -- omit once confirmed data is correct
SUM(t.points) AS points
FROM AWARD t
JOIN ACHIEVER a ON a.award_id = t.id
GROUP BY t.id, a.alias_id
Once you know the summing is correct, update the INSERT statement:
SET points = (SELECT SUM(t.points)
FROM AWARD t
JOIN ACHIEVER a ON a.award_id = t.id
WHERE a.alias_id = 2000 --don't include if you don't need it
GROUP BY t.id, a.alias_id)