Slow SQL query with GROUP BY optimization - mysql

I have a MySQL DB. Acquired data are stored in raw_data_headers, raw_data_rows and raw_data_row_details table.
raw_data_row_details has a foreign key that reference raw_data_rows.ID, the same for raw_data_rows and raw_data_headers.
In raw_data_headers are stored data headers, in raw_data_rows are stored every stage of acquisition program and in raw_data_row_details are stored details for each stage of acquisition program.
This is the query:
SELECT
q1.ProcessTypeID,
q1.TestTypeID,
q1.ComponentID,
q1.TestResultID,
COUNT(*) AS Counter
FROM (
SELECT
raw_data_headers.batch_id AS BatchID,
raw_data_test_outputs.test_output_type_id AS TestOutputTypeID,
raw_data_test_types.process_type_id AS ProcessTypeID,
raw_data_test_types.ID AS TestTypeID,
raw_data_row_details.component_id AS ComponentID,
raw_data_test_results.ID AS TestResultID
FROM raw_data_row_details
INNER JOIN raw_data_rows ON raw_data_rows.ID = raw_data_row_details.row_id
INNER JOIN raw_data_headers ON raw_data_headers.ID = raw_data_rows.header_id
INNER JOIN raw_data_test_results ON raw_data_test_results.ID = raw_data_row_details.Value
INNER JOIN raw_data_test_outputs ON raw_data_test_outputs.ID = raw_data_row_details.test_output_id
INNER JOIN raw_data_test_types ON raw_data_test_types.ID = raw_data_test_outputs.test_type_id
HAVING TestOutputTypeID = 2 AND BatchID = 1
) AS q1
GROUP BY q1.ProcessTypeID, q1.TestTypeID, q1.ComponentID, q1.TestResultID
raw_data_headers has 989'180 entries, row_data_rows has 2'967'540 entries and raw_data_row_details has 13'848'520 entries.
The subquery q1 take about 3 minutes, but final query takes about 25 minutes. I think that the point is in the GROUP BY.
How can I improve performance?
EDIT 1:
SELECT
gnuhmi.raw_data_test_types.process_type_id AS ProcessTypeID,
gnuhmi.raw_data_test_types.ID AS TestTypeID,
gnuhmi.raw_data_row_details.component_id AS ComponentID,
gnuhmi.raw_data_test_results.ID AS TestResultID,
COUNT(*) AS Counter
FROM gnuhmi.raw_data_row_details
INNER JOIN gnuhmi.raw_data_rows ON gnuhmi.raw_data_rows.ID = gnuhmi.raw_data_row_details.row_id
INNER JOIN gnuhmi.raw_data_headers ON gnuhmi.raw_data_headers.ID = gnuhmi.raw_data_rows.header_id
INNER JOIN gnuhmi.raw_data_test_results ON gnuhmi.raw_data_test_results.ID = gnuhmi.raw_data_row_details.Value
INNER JOIN gnuhmi.raw_data_test_outputs ON gnuhmi.raw_data_test_outputs.ID = gnuhmi.raw_data_row_details.test_output_id
INNER JOIN gnuhmi.raw_data_test_types ON gnuhmi.raw_data_test_types.ID = gnuhmi.raw_data_test_outputs.test_type_id
WHERE gnuhmi.raw_data_test_outputs.test_output_type_id = 2 AND gnuhmi.raw_data_headers.batch_id = 1
GROUP BY
gnuhmi.raw_data_test_results.ID,
gnuhmi.raw_data_row_details.component_id,
gnuhmi.raw_data_test_types.ID,
gnuhmi.raw_data_test_types.process_type_id
This is the new query, without subquery and WHERE. This increased performance (thanks #Yogesh Sharma).
this is raw_data_headers structure:
CREATE TABLE `raw_data_headers` (
`ID` int(11) unsigned NOT NULL AUTO_INCREMENT COMMENT 'Univocal record key',
`ProductID` int(11) NOT NULL COMMENT 'Product numeric ID',
`Datetime` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Univocal record creation date',
`batch_id` int(11) DEFAULT NULL COMMENT 'Univocal batch key',
`RecipeName` varchar(80) DEFAULT NULL COMMENT 'Used recipe name',
`RecipeVersion` smallint(6) DEFAULT NULL COMMENT 'Used recipe version',
`process_result_id` smallint(6) DEFAULT NULL COMMENT 'Process result key',
`invalidated` tinyint(1) NOT NULL DEFAULT '0' COMMENT 'invalidation after counters reset',
PRIMARY KEY (`ID`),
KEY `FK_raw_data_headers_batches_ID` (`batch_id`),
KEY `FK_raw_data_headers_process_re` (`process_result_id`),
CONSTRAINT `FK_raw_data_headers_batches_ID` FOREIGN KEY (`batch_id`) REFERENCES `batches` (`ID`) ON UPDATE CASCADE,
CONSTRAINT `FK_raw_data_headers_process_re` FOREIGN KEY (`process_result_id`) REFERENCES `process_result` (`ID`) ON DELETE NO ACTION ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='Stores raw data headers'
This the raw_dato_rows:
CREATE TABLE `raw_data_rows` (
`ID` int(11) unsigned NOT NULL AUTO_INCREMENT COMMENT 'Univocal record key',
`Datetime` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT 'Univocal record creation date',
`header_id` int(11) unsigned NOT NULL COMMENT 'Univocal raw data header key',
`process_type_id` smallint(6) NOT NULL COMMENT 'Univocal process type key',
`process_result_id` smallint(6) NOT NULL COMMENT 'Univocal process result key',
PRIMARY KEY (`ID`),
KEY `FK_raw_data_rows_header_id` (`header_id`),
KEY `FK_raw_data_rows_process_resu2` (`process_result_id`),
KEY `FK_raw_data_rows_process_resul` (`process_type_id`),
CONSTRAINT `FK_raw_data_rows_header_id` FOREIGN KEY (`header_id`) REFERENCES `raw_data_headers` (`ID`) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT `FK_raw_data_rows_process_resu2` FOREIGN KEY (`process_result_id`) REFERENCES `process_result` (`ID`) ON DELETE NO ACTION ON UPDATE CASCADE,
CONSTRAINT `FK_raw_data_rows_process_resul` FOREIGN KEY (`process_type_id`) REFERENCES `process_types` (`ID`) ON DELETE NO ACTION ON UPDATE CASCADE
) ENGINE=InnoDB AUTO_INCREMENT=2967541 DEFAULT CHARSET=utf8 COMMENT='Stores row data rows'
and finally this is the raw_data_row_details one:
CREATE TABLE `raw_data_row_details` (
`ID` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'Univocal row detail key',
`row_id` int(11) unsigned NOT NULL COMMENT 'Univocal row key',
`test_output_id` int(11) NOT NULL COMMENT 'Univocal test output key',
`component_id` int(11) NOT NULL COMMENT 'The component that take the measurement',
`Value` double NOT NULL COMMENT 'Output value',
PRIMARY KEY (`ID`),
KEY `FK_raw_data_row_details_row_id` (`row_id`),
KEY `FK_raw_data_rows_raw_data_test_outputs_ID` (`test_output_id`),
KEY `raw_data_row_details_components_FK` (`component_id`),
CONSTRAINT `FK_raw_data_row_details_row_id` FOREIGN KEY (`row_id`) REFERENCES `raw_data_rows` (`ID`) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT `FK_raw_data_rows_raw_data_test_outputs_ID` FOREIGN KEY (`test_output_id`) REFERENCES `raw_data_test_outputs` (`ID`) ON UPDATE CASCADE,
CONSTRAINT `raw_data_row_details_components_FK` FOREIGN KEY (`component_id`) REFERENCES `components` (`ID`) ON UPDATE CASCADE
) ENGINE=InnoDB AUTO_INCREMENT=13848521 DEFAULT CHARSET=utf8 COMMENT='Stores raw data rows details'

You don't need to use subquery, just use where clause with group by :
SELECT raw_data_test_types.process_type_id AS ProcessTypeID,
raw_data_test_types.ID AS TestTypeID,
raw_data_row_details.component_id AS ComponentID,
raw_data_test_results.ID AS TestResultID, COUNT(*) AS Counter
FROM raw_data_row_details INNER JOIN
raw_data_rows
ON raw_data_rows.ID = raw_data_row_details.row_id INNER JOIN
raw_data_headers
ON raw_data_headers.ID = raw_data_rows.header_id INNER JOIN
raw_data_test_results
ON raw_data_test_results.ID = raw_data_row_details.Value INNER JOIN
raw_data_test_outputs
ON raw_data_test_outputs.ID = raw_data_row_details.test_output_id INNER JOIN
raw_data_test_types
ON raw_data_test_types.ID = raw_data_test_outputs.test_type_id
WHERE raw_data_headers.batch_id = 1 AND raw_data_test_outputs.test_output_type = 2
GROUP BY raw_data_test_types.process_type_id, raw_data_test_types.ID,
raw_data_row_details.component_id, raw_data_test_results.ID;

Add indexes. TestOutputTypeID and BatchID need to be covered and probably are not.
To see what's currently going on, use EXPLAIN in the MySQL console. You will probably see an indication that a full table scan is happening i.e. the join type is marked as ALL.
It's often the case that the query optimiser will use the same execution plan for different queries e.g. by expanding the subquery as if you hadn't used it. Only EXPLAIN will show you what's what.
Here's the docs on how to interpret the EXPLAIN output: https://dev.mysql.com/doc/refman/8.0/en/explain-output.html

HAVING TestOutputTypeID = 2 AND BatchID = 1
Change that from HAVING to WHERE, and have indexes in each of those columns.
Also have these indexes:
raw_data_row_details: (row_id)
raw_data_rows: (header_id)
raw_data_row_details: (test_output_id)
raw_data_test_outputs: (test_type_id)
Get rid of raw_data_ from the table names; it just clutters the queries.
If those do not help enough, please provide EXPLAIN SELECT ... and SHOW CREATE TABLE.

Related

Select data using if statement to show Not Available for missing values

I have a database table in mysql that store types like "temperature" , "oxygen_saturation" , "heart_rate" , "systolic_pressure" , "diastolic_pressure".
However, for a given checkup_id, some types may not stored. Those I want to show them as N/A.
My table is the following:
CREATE TABLE `exam` (
`id` int NOT NULL AUTO_INCREMENT,
`checkup_id` int NOT NULL,
`type` varchar(100) NOT NULL,
`result` longtext NOT NULL,
`combined_exam_id` int NOT NULL DEFAULT '-1',
`status` tinyint NOT NULL DEFAULT '1',
`version_id` int NOT NULL,
PRIMARY KEY (`id`,`version_id`),
UNIQUE KEY `exam_id` (`id`,`version_id`) /*!80000 INVISIBLE */,
KEY `checkup_idx` (`checkup_id`) /*!80000 INVISIBLE */,
KEY `version_id_fk_exam_idx` (`version_id`),
CONSTRAINT `exam_ibfk_1` FOREIGN KEY (`checkup_id`) REFERENCES `endorse_checkup` (`id`),
CONSTRAINT `version_id_fk_exam` FOREIGN KEY (`version_id`) REFERENCES `ps_versioning` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=65 DEFAULT CHARSET=utf8;
I have the following select statement:
SELECT end_up.id as checkup_id,
end_e.type,
end_e.result,
FROM endorse_exam as end_e
left JOIN endorse_checkup as end_up on end_up.id = end_e.checkup_id
left JOIN ps_versioning as v ON v.id = end_e.version_id
left JOIN u_users as u ON u.user_id = v.user_id
WHERE end_up.patient_id = 50 AND v.id <= (SELECT IF(164 > -1, 164, max(v.id))) and checkup_id = 25;
Note: I put 164 value by hand, but actually it comes through an API.
Using the SQL code above, I get a result like
However, I want to create a query to get info like:

Query with three join incredibly slow

I'm trying to return all the country that have football matches which play in a specific date. The data are defined in the following tables:
competition
id | country_id | name
50 1 Premier League
competition_seasons
id | competition_id | name
70 50 2019
competition_rounds
id | season_id | name
58 70 Regular Season
match
id | round_id | home | away | result | datetime
44 58 22 87 1 - 0 2019-03-16:00:00
There are different competitions stored in the competition table, and then each competition can have multiple season which are stored in the competition_seasons. A season can also have different competition rounds, these are stored in competition_rounds.
All the matches are stored in the match table and are grouped for the round_id.
I wrote this method for the API:
$app->get('/country/get_countries/{date}', function (Request $request, Response $response, array $args)
{
$start_date = $args["date"] . " 00:00";
$end_date = $args["date"] . " 23:59";
$sql = $this->db->query("SELECT n.* FROM country n
LEFT JOIN competition c ON c.country_id = n.id
LEFT JOIN competition_seasons s ON s.competition_id = c.id
LEFT JOIN competition_rounds r ON r.season_id = s.id
LEFT JOIN `match` m ON m.round_id = r.id
WHERE m.datetime BETWEEN '" . $start_date . "' AND '" . $end_date . "'
GROUP BY n.id");
$sql->execute();
$countries = $sql->fetchAll();
return $response->withJson($countries);
});
there are thousands of records organized by id, but the query took about 6, 7 seconds to return all the countries that play in the specified date.
How can I optimize this process?
Performance
UPDATE
I noticed an interesting thing, if I do:
SELECT round_id, DATE("2019-03-18") FROM `match`
the query is really fast, so I guess the datetime field is slow down the join part, any idea about that?
Table Structure
CREATE TABLE IF NOT EXISTS `swp`.`competition` (
`id` INT NOT NULL,
`country_id` INT NULL,
`name` VARCHAR(255) NULL,
`category` INT NULL,
PRIMARY KEY (`id`),
INDEX `id_idx` (`country_id` ASC),
INDEX `FK_competition_types_competition_type_id_idx` (`category` ASC),
CONSTRAINT `FK_country_competition_country_id`
FOREIGN KEY (`country_id`)
REFERENCES `swp`.`country` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION,
CONSTRAINT `FK_competition_categories_competition_category_id`
FOREIGN KEY (`category`)
REFERENCES `swp`.`competition_categories` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION)
ENGINE = InnoDB;
CREATE TABLE IF NOT EXISTS `swp`.`competition_seasons` (
`id` INT NOT NULL AUTO_INCREMENT,
`competition_id` INT NOT NULL,
`season_id` INT NULL,
`name` VARCHAR(45) NOT NULL,
`update_at` DATETIME NULL,
PRIMARY KEY (`id`),
INDEX `FK_competition_competition_seasons_competition_id_idx` (`competition_id` ASC),
CONSTRAINT `FK_competition_competition_seasons_competition_id`
FOREIGN KEY (`competition_id`)
REFERENCES `swp`.`competition` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION)
ENGINE = InnoDB;
CREATE TABLE IF NOT EXISTS `swp`.`competition_rounds` (
`id` INT NOT NULL AUTO_INCREMENT,
`round_id` INT NULL,
`season_id` INT NOT NULL,
`name` VARCHAR(255) NULL,
PRIMARY KEY (`id`),
INDEX `FK_competition_seasons_competition_rounds_season_id_idx` (`season_id` ASC),
CONSTRAINT `FK_competition_seasons_competition_rounds_season_id`
FOREIGN KEY (`season_id`)
REFERENCES `swp`.`competition_seasons` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION)
ENGINE = InnoDB;
-- -----------------------------------------------------
-- Table `swp`.`match`
-- -----------------------------------------------------
CREATE TABLE IF NOT EXISTS `swp`.`match` (
`id` INT NOT NULL,
`round_id` INT NOT NULL,
`group_id` INT NULL,
`datetime` DATETIME NULL,
`status` INT NULL,
`gameweek` INT NULL,
`home_team_id` INT NULL,
`home_team_half_time_score` INT NULL,
`home_team_score` INT NULL,
`home_extra_time` INT NULL,
`home_penalties` INT NULL,
`away_team_id` INT NULL,
`away_team_half_time_score` INT NULL,
`away_team_score` INT NULL,
`away_extra_time` INT NULL,
`away_penalties` INT NULL,
`venue_id` INT NULL,
`venue_attendance` INT NULL,
`aggregate_match_id` INT NULL,
PRIMARY KEY (`id`),
INDEX `home_team_id_idx` (`home_team_id` ASC),
INDEX `away_team_id_idx` (`away_team_id` ASC),
INDEX `venue_id_idx` (`venue_id` ASC),
INDEX `match_status_id_idx` (`status` ASC),
INDEX `FK_competition_rounds_match_round_id_idx` (`round_id` ASC),
INDEX `FK_match_match_aggregate_match_id_idx` (`aggregate_match_id` ASC),
INDEX `FK_competition_groups_match_group_id_idx` (`group_id` ASC),
CONSTRAINT `FK_team_match_home_team_id`
FOREIGN KEY (`home_team_id`)
REFERENCES `swp`.`team` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION,
CONSTRAINT `FK_team_match_away_team_id`
FOREIGN KEY (`away_team_id`)
REFERENCES `swp`.`team` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION,
CONSTRAINT `FK_venue_match_venue_id`
FOREIGN KEY (`venue_id`)
REFERENCES `swp`.`venue` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION,
CONSTRAINT `FK_match_status_match_status_id`
FOREIGN KEY (`status`)
REFERENCES `swp`.`match_status` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION,
CONSTRAINT `FK_competition_rounds_match_round_id`
FOREIGN KEY (`round_id`)
REFERENCES `swp`.`competition_rounds` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION,
CONSTRAINT `FK_match_match_aggregate_match_id`
FOREIGN KEY (`aggregate_match_id`)
REFERENCES `swp`.`match` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION,
CONSTRAINT `FK_competition_groups_match_group_id`
FOREIGN KEY (`group_id`)
REFERENCES `swp`.`competition_groups` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION)
ENGINE = InnoDB;
First, write the query as:
SELECT n.*
FROM country n JOIN
competition c
ON c.country_id = n.id JOIN
competition_seasons s
ON s.competition_id = c.id JOIN
competition_rounds r
ON r.season_id = s.id JOIN
`match` m
ON m.round_id = r.id
WHERE m.datetime >= ? AND
m.datetime < ?
GROUP BY n.id;
The changes here are relatively minor and will not affect performance. But they are important:
JOIN instead of LEFT JOIN, because you require that the conditions match.
Parameters for the date rather than munging the query string, because this is a good idea.
>= and < for the comparison, because this works with both dates and date times. You will need to add 1 day to the end date -- but leave off the time component.
Then, for performance, you want indexes:
match(datetime, round_id)
competition_rounds(id, season_id)
competition_seasons(id, competition_id)
competition(id, country_id)
country(id)
Actually, the first is the most important. The last four are not needed if the respective id columns are declared as primary keys.
With LEFT JOIN, the query can only be executed top-bottom, meaning the last table is scanned for every product of entries in the before tables. Also, using LEFT JOIN and GROUP BY without any aggregate makes no sense, because it will always return all country ids. This having said, I would rewrite it like this:
SELECT DISTINCT
c.country_id
FROM
competition c,
WHERE
EXISTS (
SELECT
*
FROM
competition_seasons s,
competition_rounds r,
`match` m
WHERE
s.competition_id = c.id
AND r.season_id = s.id
AND m.round_id = r.id
AND m.datetime BETWEEN ...
)
This will be correctly optimized by all RDB's I know of.
Note, an 2-column index on (match.datetime, match.round_id) - in this order, will make a huge performance impact. Or is write speed is a concern, at least a single column index on (match.datetime) would be recommended.
Important note about indexes on strings: String comparison is always quirky in RDBs. Make sure you use a binary collation for the datetime column or use native DATETIME format. Various RDBs may fail to use indexes on case-insensitive columns.
Note I removed the join on n - that just add another PK lookup to check that the country still exists in the countries table. You can add it back in if you don't have any ON DELETE CASCADE or other kind of constraint that ensures data consistency, like this:
SELECT DISTINCT
n.id
FROM
country n
WHERE
EXISTS (
SELECT
*
FROM
competition c,
competition_seasons s,
competition_rounds r,
`match` m
WHERE
c.country_id=n.id
AND s.competition_id = c.id
AND r.season_id = s.id
AND m.round_id = r.id
AND m.datetime BETWEEN ...
)

Collect data from 2 different tables, following conditions

I have 2 MySQL tables. One is pastsergicalhistory_type and the other one is pastsurgicalhistory
Below is pastsergicalhistory_type
CREATE TABLE `pastsergicalhistory_type` (
`idPastSergicalHistory_Type` int(11) NOT NULL AUTO_INCREMENT,
`idUser` int(11) DEFAULT NULL,
`Name` varchar(45) NOT NULL,
PRIMARY KEY (`idPastSergicalHistory_Type`),
KEY `fk_PastSergicalHistory_Type_User1_idx` (`idUser`),
CONSTRAINT `fk_PastSergicalHistory_Type_User1` FOREIGN KEY (`idUser`) REFERENCES `user` (`idUser`) ON DELETE NO ACTION ON UPDATE NO ACTION
) ENGINE=InnoDB AUTO_INCREMENT=13 DEFAULT CHARSET=utf8
Below is pastsurgicalhistory
CREATE TABLE `pastsurgicalhistory` (
`idPastSurgicalHistory` int(11) NOT NULL AUTO_INCREMENT,
`idPatient` int(11) NOT NULL,
`idPastSergicalHistory_Type` int(11) NOT NULL,
`Comment` varchar(45) DEFAULT NULL,
`ActiveStatus` tinyint(1) NOT NULL,
`LastUpdated` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`idPastSurgicalHistory`),
KEY `fk_PastSurgicalHistory_Patient1_idx` (`idPatient`),
KEY `fk_PastSurgicalHistory_PastSergicalHistory_Type1_idx` (`idPastSergicalHistory_Type`),
CONSTRAINT `fk_PastSurgicalHistory_PastSergicalHistory_Type1` FOREIGN KEY (`idPastSergicalHistory_Type`) REFERENCES `pastsergicalhistory_type` (`idPastSergicalHistory_Type`) ON DELETE NO ACTION ON UPDATE NO ACTION,
CONSTRAINT `fk_PastSurgicalHistory_Patient1` FOREIGN KEY (`idPatient`) REFERENCES `patient` (`idPatient`) ON DELETE NO ACTION ON UPDATE NO ACTION
) ENGINE=InnoDB AUTO_INCREMENT=11 DEFAULT CHARSET=utf8
Now my requirement is as this, I will explain it in point form.
Get all the data from pastsergicalhistory_type where idUser is NULL or idUser is 1.
Get all the data from pastsurgicalhistory where idPatient is 2.
as you can see, the foreign key of pastsurgicalhistory is the primary key of pastsergicalhistory_type.
I tried the below query, but it gave me the wrong results. It only displayed what is available in pastsurgicalhistory. The data in pastsergicalhistory_type (which follows the condition in point 1) which is not in pastsurgicalhistory is not displayed.
SELECT pastsergicalhistory_type.*,
pastsurgicalhistory.*
FROM pastsergicalhistory_type
LEFT JOIN pastsurgicalhistory ON pastsurgicalhistory.`idPastSergicalHistory_Type` = pastsergicalhistory_type.`idPastSergicalHistory_Type`
WHERE pastsergicalhistory_type.idUser = NULL OR pastsergicalhistory_type.idUser=1 AND pastsurgicalhistory.idPatient=2
So, how can I solve this problem?
EDIT
If I use the AND pastsurgicalhistory.idPatient=2 in my where clause, it actually filters the "entire" result set. This will give me results where idPatient is related to 2. But as I mentioned, I need data which is not available in pastsurgicalhistory table as well.
Try
SELECT pastsergicalhistory_type.*,
pastsurgicalhistory.*
FROM pastsergicalhistory_type
LEFT JOIN pastsurgicalhistory ON
(pastsurgicalhistory.`idPastSergicalHistory_Type` =
pastsergicalhistory_type.`idPastSergicalHistory_Type` and
pastsurgicalhistory.idPatient=2)
WHERE (pastsergicalhistory_type.idUser = NULL OR
pastsergicalhistory_type.idUser=1) ;
Move pastsurgicalhistory.idPatient=2 to join condition
SELECT pastsergicalhistory_type.*,
pastsurgicalhistory.*
FROM pastsergicalhistory_type
LEFT JOIN pastsurgicalhistory ON pastsurgicalhistory.`idPastSergicalHistory_Type` = pastsergicalhistory_type.`idPastSergicalHistory_Type`
AND pastsurgicalhistory.idPatient=2
WHERE pastsergicalhistory_type.idUser IS NULL OR pastsergicalhistory_type.idUser=1
Use paraenthises?
WHERE pastsergicalhistory_type.idUser = NULL OR pastsergicalhistory_type.idUser=1 AND pastsurgicalhistory.idPatient=2
I belive would return results where idUser is 1 and idPatient is 2 or iduser is null
Try this:
WHERE (pastsergicalhistory_type.idUser = NULL OR pastsergicalhistory_type.idUser=1) AND pastsurgicalhistory.idPatient=2
If I understand you correctly?
SELECT pastsergicalhistory_type.*,
pastsurgicalhistory.*
FROM pastsergicalhistory_type
RIGHT JOIN pastsurgicalhistory ON pastsurgicalhistory.`idPastSergicalHistory_Type` = pastsergicalhistory_type.`idPastSergicalHistory_Type`
WHERE (pastsergicalhistory_type.idUser = NULL OR pastsergicalhistory_type.idUser=1) AND pastsurgicalhistory.idPatient=2
Even if it works without parenthesis for you, I would say it's better to use to make it more readable.

Optimizing query to use the primary key instead of another index

I am trying to optimize the execution of a MYSQL query that joins two tables as follows:
CREATE TABLE `CPP` (
`RecordEntryType` varchar(7) NOT NULL default '',
`PositionNumber` mediumint(9) NOT NULL default '0',
`FundId` smallint(6) default NULL,
`QuantityHeld` decimal(14,2) default NULL,
`MarketValue` decimal(14,2) default NULL,
`PeriodBeginDate` date default NULL,
`PeriodEndDate` date NOT NULL default '0000-00-00',
PRIMARY KEY (`PositionNumber`,`PeriodEndDate`,`RecordEntryType`),
KEY `Index1` (`FundId`,`PeriodBeginDate`,`PeriodEndDate`),
KEY `FundId_idx` (`FundId`),
KEY `PeriodBeginDate_idx` (`PeriodBeginDate`),
KEY `PeriodEndDate_idx` (`PeriodEndDate`),
KEY `PositionNumber_id` (`PositionNumber`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
CREATE TABLE `classification_entity_map` (
`entity_id` varchar(32) NOT NULL,
`entity_type` varchar(32) NOT NULL,
`scheme_id` int(11) NOT NULL,
`class_id` varchar(24) NOT NULL,
PRIMARY KEY (`entity_id`,`entity_type`,`scheme_id`),
KEY `fk_classification_entity_map_1` (`scheme_id`),
KEY `fk_class` (`class_id`),
CONSTRAINT `fk_class` FOREIGN KEY (`class_id`) REFERENCES `classification_hierarchy` (`external_id`),
CONSTRAINT `fk_scheme` FOREIGN KEY (`scheme_id`) REFERENCES `classification_schemes` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
select cpp.*, cem.class_id from cpp
LEFT JOIN classification_entity_map cem
ON cem.entity_id = cpp.PositionNumber and cem.entity_type = 'Security'
AND cem.scheme_id = 9901
WHERE cpp.RecordEntryType = 'CURRENT'
AND ( cpp.MarketValue != 0 OR cpp.QuantityHeld != 0 )
AND FundId = 28
AND cpp.PeriodEndDate = '2013-09-30';
The issue is that the query takes longer than expected in mysql workbench (9.4 secs) as it is using the fk_classification_entity_map_1 index rather than the primary index on classification_entity_map table. cpp has 626,648 rows and cem has 63,487 rows.
I suspect that the issue has to do with the datatypes of cem.entity_id & cpp.PositionNumber but I am not sure as they cannot be changed. Please help. I can upload the explain output if that is helpful.
More Info: Changing the join to convert(cpp.PositionNumber, char(32)) as below does not help as the time goes up to 10 secs:
ON cem.entity_id = convert(cpp.PositionNumber, char(32)) and cem.entity_type = 'Security'
AND cem.scheme_id = 9901
The explain output for the query without convert is below and sees the PRIMARY as possible (but not in the query with convert):
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE cpp index_merge Index1,FundId_idx,PeriodEndDate_idx FundId_idx,PeriodEndDate_idx 3,3 402 Using intersect(FundId_idx,PeriodEndDate_idx); Using where
1 SIMPLE cem ref PRIMARY,fk_classification_entity_map_1 fk_classification_entity_map_1 4 const 24100

Querying 'photos' table, but also 'bookmarks' table at same time so bookmarked photos are known

How can I query for all records in a table called photos, and know which of the resulting photos have been bookmarked by the current user using a single query?
Here are my tables:
--
-- Table structure for table `photos`
--
CREATE TABLE IF NOT EXISTS `photos` (
`id` int(11) NOT NULL auto_increment,
`author` bigint(20) NOT NULL COMMENT 'The user''s Facebook ID.',
`filename` varchar(255) NOT NULL,
`thumbnail` varchar(255) NOT NULL,
`post_date` timestamp NOT NULL default CURRENT_TIMESTAMP,
`description` varchar(140) NOT NULL,
`finalist` tinyint(4) NOT NULL DEFAULT '0',
CONSTRAINT user_must_exist FOREIGN KEY (author)
REFERENCES users(facebook_id)
ON DELETE CASCADE,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 AUTO_INCREMENT=1 ;
-- --------------------------------------------------------
--
-- Table structure for table `bookmarks`
--
CREATE TABLE IF NOT EXISTS `bookmarks` (
`facebook_id` bigint(20) NOT NULL COMMENT 'The author''s Facebook ID.',
`photo_id` int(11) NOT NULL,
CONSTRAINT photo_should_exist FOREIGN KEY (photo_id)
REFERENCES photos(id)
ON DELETE CASCADE,
CONSTRAINT user_should_exist FOREIGN KEY (facebook_id)
REFERENCES users(facebook_id)
ON DELETE CASCADE,
UNIQUE KEY `no_duplicates` (`facebook_id`,`photo_id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COMMENT='The user''s favourite photos.';
I would imagine this query would look something like the following:
SELECT
photos.*,
bookmarks.photo_id AS bookmark
FROM photos
LEFT JOIN bookmarks
ON bookmarks.photo_id = photos.id
AND photos.author = 123456789
However this doesn't work and I receive the following MySQL error:
Unknown column 'photos.id' in 'on clause'
keyur's code worked for me after a minor typo fix which Barmar pointed out.
SELECT photos . * , bookmarks.photo_id AS bookmark
FROM photos
LEFT JOIN bookmarks ON photos.id = bookmarks.photo_id
AND bookmarks.facebook_id = 123456789
Thank you.
Editted for typo.
Editted for second typo.
Here is a sample working code on SQL Fiddle. As pointed out on a comment, you do not have a bookmarks.id column on you bookmarks table and photo.id indeed exists on the photos table. Check again the error you are receiving as seems something is not correct.
you query should be like this.
SELECT
photos.*,
bookmarks.id AS bookmark
FROM photos
LEFT JOIN bookmarks
ON photos.id = bookmarks.photo_id
AND bookmarks.facebook_id = 123456789
according your table structure photos.facebook_id is not exist; it is in bookmarks table