In on result, get a number of dependency - mysql

I have this MySQL DB :
MySQL in order to create DB + set of test value :
DROP DATABASE IF EXISTS test;
CREATE DATABASE test;
CREATE TABLE table_status ( Id TINYINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, Status CHAR(27) NOT NULL) ENGINE=INNODB;
INSERT INTO table_status (Id, Status) VALUES (1, 'CREATED'), (2, 'UNSURE'), (3, 'RUNNING'), (4, 'BEGIN_ACTION_TRUE'), (5, 'STOPPED_ACTION_TRUE_OK'), (6, 'STOPPED_ACTION_TRUE_NOT_OK'), (7, 'BEGIN_ACTION_FALSE'), (8, 'STOPPED_ACTION_FALSE_OK'), (9, 'STOPPED_ACTION_FALSE_NOT_OK'), (10, 'STOPPED_NOT_OK'), (11, 'STOPPED_OK'), (12, 'CANCEL_REQUESTED'), (13, 'CANCELLED');
CREATE TABLE table_group ( Id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, Status TINYINT UNSIGNED NOT NULL DEFAULT 1, CONSTRAINT fk_group_Status FOREIGN KEY (Status) REFERENCES table_status(Id)) ENGINE=INNODB;
CREATE TABLE table_task ( Id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, IdGroup BIGINT UNSIGNED NOT NULL, Status TINYINT UNSIGNED NOT NULL DEFAULT 1, Command TEXT NOT NULL, CONSTRAINT fk_task_IdGroup FOREIGN KEY (IdGroup) REFERENCES table_group(Id) ON DELETE CASCADE, CONSTRAINT fk_task_Status FOREIGN KEY (Status) REFERENCES table_status(Id)) ENGINE=INNODB;
CREATE TABLE table_dependency ( IdBeforeTask BIGINT UNSIGNED NOT NULL, IdAfterTask BIGINT UNSIGNED NOT NULL, PRIMARY KEY (IdBeforeTask, IdAfterTask), CONSTRAINT fk_dependency_IdBeforeTask FOREIGN KEY (IdBeforeTask) REFERENCES table_task(Id) ON DELETE CASCADE, CONSTRAINT fk_dependency_IdAfterTask FOREIGN KEY (IdAfterTask) REFERENCES table_task(Id) ON DELETE CASCADE) ENGINE=INNODB;
INSERT INTO table_group (Id) VALUES (1), (2), (3), (4);
INSERT INTO table_task (IdGroup, Command) VALUES (1, "command_group_1_task_1"), (1, "command_group_1_task_2"), (1, "command_group_1_task_3"), (2, "command_group_2_task_1"), (2, "command_group_2_task_2"), (2, "command_group_2_task_3"), (3, "command_group_3_task_1"), (3, "command_group_3_task_2"), (3, "command_group_3_task_3"), (4, "command_group_4_task_1"), (4, "command_group_4_task_2"), (4, "command_group_4_task_3");
INSERT INTO table_dependency (IdBeforeTask, IdAfterTask) VALUES (1, 2), (2, 3), (4, 5), (5, 6), (7, 8), (8, 9), (10, 11), (11, 12);
The data in table_status is fixed to this :
INSERT INTO table_status (Id, Status) VALUES
(1, 'CREATED')
(2, 'UNSURE')
(3, 'RUNNING')
(4, 'BEGIN_ACTION_TRUE')
(5, 'STOPPED_ACTION_TRUE_OK')
(6, 'STOPPED_ACTION_TRUE_NOT_OK')
(7, 'BEGIN_ACTION_FALSE')
(8, 'STOPPED_ACTION_FALSE_OK')
(9, 'STOPPED_ACTION_FALSE_NOT_OK')
(10, 'STOPPED_NOT_OK')
(11, 'STOPPED_OK')
(12, 'CANCEL_REQUESTED')
(13, 'CANCELLED')
The purpose is to have a group containing task, each task can be dependant on another one.
What I want to obtain :
Group.Id, NB_TASK_TOTAL, NB_TASK_CREATED, NB_TASK_UNSURE
But there is a trap : The number of NB_TASK_CREATED is defined as : the number of task with status to CREATED and with no unresolved dependency.
An unresolved dependency is defined as a task depend on another wich have a Status != 11 (STOPPED_OK).
So if I have this :
table_dependency :
IdBeforeTask IdAfterTask
1 2
2 3
table_task :
Id IdGroup Status Command
1 1 1 command_1
2 1 1 command_2
3 1 1 command_3
I want to have
Group.Id, NB_TASK_TOTAL, NB_TASK_CREATED, NB_TASK_UNSURE
1 3 1 0
Here my unsucesful try :
SELECT
G.Id,
(SELECT COUNT(T.Id) FROM table_task as T WHERE T.IdGroup = G.Id),
(SELECT COUNT(T.Status = 1) FROM table_task as T WHERE T.IdGroup = G.Id AND T.Id NOT IN (SELECT IdAfterTask from table_dependency as D1 join table_task as T1 on T1.Id=D1.IdBeforeTask WHERE T1.Status!=11)),
(SELECT COUNT(T.Status = 2) FROM table_task as T WHERE T.IdGroup = G.Id)
FROM
table_group as G
The problem is I obtain this :
Group.Id, NB_TASK_TOTAL, NB_TASK_CREATED, NB_TASK_UNSURE
1 3 1 3
2 3 0 3
(I have 4 group, each group containing 3 task, all the task with Status = 1 (CREATED) and the dependency set in order to have just on "resolved" dependency so i should have
Group.Id, NB_TASK_TOTAL, NB_TASK_CREATED, NB_TASK_UNSURE
1 3 1 0
2 3 1 0
instead.
I'm a beginner in MySQL, and i'm lost on this one request.

This should do it for you:
SELECT T.IdGroup as Group_Id,
COUNT(T.IdGroup) as NB_TASK_TOTAL,
(SELECT COUNT(Tt.Id) FROM table_task Tt WHERE Tt.IdGroup = T.IdGroup AND Tt.Status = 1) as NB_TASK_CREATED,
(SELECT COUNT(Tt.Id) FROM table_task Tt WHERE Tt.IdGroup = T.IdGroup AND Tt.Status = 2 AND Tt.Id NOT IN (SELECT IdAfterTask from table_dependency as D1 join table_task as T1 on T1.Id=D1.IdBeforeTask WHERE T1.Status!=11)) as NB_TASK_UNSURE
FROM table_task T
GROUP BY T.IdGroup;

Related

Mapping AppUsers to a Customer by AppUser information

I have database with ERD:
And sample data:
CREATE TABLE `AppUser` (
`AppUser_ID` bigint(20) NOT NULL,
`SomeFields` varchar(30) COLLATE utf8mb4_bin DEFAULT NULL,
PRIMARY KEY (`AppUser_ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
INSERT INTO `AppUser` (`AppUser_ID`, `SomeFields`) VALUES
(1, 'values'),
(2, 'values'),
(3, 'values'),
(4, 'values'),
(5, 'values');
CREATE TABLE `IdpUser` (
`IdpUser_ID` bigint(20) NOT NULL,
`SomeFields` varchar(30) COLLATE utf8mb4_bin DEFAULT NULL,
ADD PRIMARY KEY (`IdpUser_ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
INSERT INTO `IdpUser` (`IdpUser_ID`, `SomeFields`) VALUES
(1, 'values'),
(2, 'values'),
(3, 'values'),
(4, 'values'),
(5, 'values');
CREATE TABLE `UserClaim` (
`Attribute_ID` bigint(20) NOT NULL,
`IdpUser_ID` bigint(20) DEFAULT NULL,
`AttributeKey` varchar(30) COLLATE utf8mb4_bin DEFAULT NULL,
`AttributeValue` bigint(20) DEFAULT NULL,
PRIMARY KEY (`Attribute_ID`),
FOREIGN KEY (`IdpUser_ID`) REFERENCES `IdpUser` (`IdpUser_ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
INSERT INTO `UserClaim` (`Attribute_ID`, `IdpUser_ID`, `AttributeKey`, `AttributeValue`) VALUES
(1, 1, 'Email_ID', 1),
(2, 2, 'Email_ID', 2),
(3, 3, 'Email_ID', 3),
(4, 4, 'Email_ID', 4),
(5, 5, 'Email_ID', 5),
(6, 2, 'Phone_ID', 4),
(7, 3, 'Phone_ID', 2),
(8, 4, 'Phone_ID', 3),
(9, 5, 'Phone_ID', 5),
(10, 2, 'PublicKey_ID', 1),
(11, 3, 'PublicKey_ID', 2),
(12, 1, 'PublicKey_ID', 1);
CREATE TABLE `UserInfo` (
`Attribute_ID` bigint(20) NOT NULL,
`AppUser_ID` bigint(20) DEFAULT NULL,
`AttributeKey` varchar(30) COLLATE utf8mb4_bin DEFAULT NULL,
`AttributeValue` bigint(20) DEFAULT NULL,
PRIMARY KEY (`Attribute_ID`),
FOREIGN KEY (`AppUser_ID`) REFERENCES `AppUser` (`AppUser_ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
INSERT INTO `UserInfo` (`Attribute_ID`, `AppUser_ID`, `AttributeKey`, `AttributeValue`) VALUES
(1, 1, 'Email_ID', 1),
(2, 2, 'Email_ID', 2),
(3, 3, 'Email_ID', 3),
(4, 4, 'Email_ID', 4),
(5, 5, 'Email_ID', 2),
(6, 2, 'Phone_ID', 1),
(7, 3, 'Phone_ID', 2),
(8, 4, 'Phone_ID', 3),
(9, 5, 'Phone_ID', 4);
CREATE TABLE `UserMapping` (
`Mapping_ID` int(11) NOT NULL,
`AppUser_ID` bigint(20) NOT NULL,
`IdpUser_ID` bigint(20) NOT NULL,
PRIMARY KEY (`Mapping_ID`),
FOREIGN KEY (`AppUser_ID`) REFERENCES `AppUser` (`AppUser_ID`),
FOREIGN KEY (`IdpUser_ID`) REFERENCES `IdpUser` (`IdpUser_ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin;
INSERT INTO `UserMapping` (`Mapping_ID`, `AppUser_ID`, `IdpUser_ID`) VALUES
(1, 2, 2),
(2, 3, 3),
(3, 1, 1),
(4, 4, 5),
(5, 5, 4);
UserInfo holds internal User info, UserClaim holds external User info, my application has multiple login type. Internal User and External User is mapped.
I use both internal and external User attribute for mapping Users to a Customer. If Users have the same AttributeKey and AttributeValue will be grouped in a Customer and the following attributes of those Users must be added to the Customer
All User with the same AttributeKey and AttributeValue is
grouped into a group.
The Customer will have all attribute of the Users belong to that Customer.
For example - with above data we have Mapping_ID = 2 belongs to Customer1 with list of attributes :
Email_ID = [2]
Phone_ID = [1,4]
PublicKey_ID = [1]
And we also have Mapping_ID = 5 with list of attributes:
Email_ID = [2,5]
Phone_ID = [4,5]
PublicKey_ID = []
And we also have Mapping_ID = 1 with list of attributes:
Email_ID = [1]
Phone_ID = []
PublicKey_ID = [1]
Because Email_ID = 2, Phone_ID = 4 belong to Mapping_ID = [2,5] so Mapping_ID = [2,5] is mapped to Customer1.
Because PublicKey_ID = 1 belong to Mapping_ID = [2,1] so Mapping_ID = [2,1] is mapped to Customer1.
=> Mapping_ID = [1,2,5] are mapped to Customer1 and Customer1 attributes :
Email_ID = [1,2,5]
Phone_ID = [1,4,5]
PublicKey_ID = [1]
My approach is to group by AttributeKey and AttributeValue in each UserInfo and UserClaim to get temporary Customers and their attributes, then join their attributes together and group to a Customer with the same AttributeKey and AttributeValue. I saw my approach made thing difficult to intersect list of attributes to group the Customer and how to store Customer attributes for effectively add or remove attributes.
I am looking for an idea for this problem or any better approach to solve this puzzle.
UserInfo has approximately 100m rows.
UserClaim has approximately 300m rows.
I can use MySql, PDI (Pentaho Data Integration) tools to design and maintain ETL to get my result.

SQL Query to find number of users in a Job Area

I have three tables:
jobAreas (id, title)
jobSkills (id,title, jobAreaID)
userSkills (id, userID, jobSkillID)
Each jobSkills entry belongs to a JobArea (linked by foreign key jobAreaID). And each userSkills entry has a JobSkill that is related to a jobSkill.
I am trying to create a SQL select query that will list the number of users that belong to each Job Area.
SELECT ja.id, ja.title, COUNT(*) as numUsers FROM user_skill_types uskills INNER JOIN job_areas ja INNER JOIN skill_types st ON ja.id = st.parent_id GROUP BY ja.id
But the numbers I am getting are not correct.
Given the following example (based on the table structure provided in the question).
CREATE TABLE `jobareas` (
`id` int(11) NOT NULL,
`title` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO `jobareas` (`id`, `title`) VALUES
(1, 'area1'),
(2, 'area2'),
(3, 'area3'),
(4, 'area4'),
(5, 'area5'),
(6, 'area6'),
(7, 'area7'),
(8, 'area8');
-- --------------------------------------------------------
CREATE TABLE `jobskills` (
`id` int(11) NOT NULL,
`title` varchar(255) COLLATE utf8mb4_unicode_ci NOT NULL,
`jobAreaID` int(11) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO `jobskills` (`id`, `title`, `jobAreaID`) VALUES
(1, 'skill1', 1),
(2, 'skill2', 3),
(3, 'skill3', 3),
(4, 'skill4', 7),
(5, 'skill5', 4),
(6, 'skill6', 5),
(7, 'skill7', 1),
(8, 'skill8', 7),
(9, 'skill9', 6),
(10, 'skill10', 3),
(11, 'skill11', 4),
(12, 'skill12', 2),
(13, 'skill13', 6),
(14, 'skill14', 7),
(15, 'skill15', 2);
-- --------------------------------------------------------
CREATE TABLE `userskills` (
`id` int(11) NOT NULL,
`userID` int(11) NOT NULL,
`jobSkillID` int(11) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
INSERT INTO `userskills` (`id`, `userID`, `jobSkillID`) VALUES
(1, 5, 10),
(2, 2, 11),
(3, 4, 14),
(4, 4, 6),
(5, 2, 8),
(6, 6, 9),
(7, 3, 9),
(8, 1, 12),
(9, 1, 3),
(10, 5, 10);
ALTER TABLE `jobareas`
ADD UNIQUE KEY `id` (`id`);
ALTER TABLE `jobskills`
ADD PRIMARY KEY (`id`),
ADD KEY `jobAreaID` (`jobAreaID`);
ALTER TABLE `userskills`
ADD PRIMARY KEY (`id`),
ADD KEY `userID` (`userID`),
ADD KEY `jobSkillID` (`jobSkillID`);
ALTER TABLE `jobskills`
ADD CONSTRAINT `jobskills_ibfk_1` FOREIGN KEY (`jobAreaID`) REFERENCES `jobareas` (`id`);
ALTER TABLE `userskills`
ADD CONSTRAINT `userskills_ibfk_1` FOREIGN KEY (`jobSkillID`) REFERENCES `jobskills` (`id`);
Your query should use DISTINCT.
SELECT COUNT(DISTINCT(`us`.`userID`)) AS `num`,`ja`.`title` FROM `userskills` `us`
INNER JOIN `jobskills` `js` ON `js`.`id` = `us`.`jobSkillID`
INNER JOIN `jobareas` `ja` ON `ja`.`id` = `js`.`jobAreaID`
GROUP BY `ja`.`id`;
The results can be checked in this SQLFiddle
Your SQL Query shared does not seem to match the schema shared. Also you have not specified how to join the job_areas table
Use
select
ja.id, ja.title , count(us.id) as numUsers
from jobAreas ja
INNER JOIN jobSkills js on ja.id = js.jobAreaID
INNER JOIN userSkills us on js.id = us.jobSkillID
GROUP BY ja.id, ja.title
You are probably getting duplicates in your result because of users having multiple skills or jobs having multiple areas, or both. Rather than COUNT(*), use COUNT(DISTINCT userID) to work around that:
SELECT ja.id, ja.title, COUNT(DISTINCT us.userID) as numUsers
FROM jobAreas ja
JOIN jobSkills js ON js.jobAreaID = ja.id
JOIN userSkills us ON us.jobSkillsID = js.id
GROUP BY ja.id, ja.title
Note I've written the query based on the schema in your question. Based on the query you have written, it should probably look something like (it's not clear what the user_skill_types userID column is called, or how to JOIN user_skill_types to job_skills):
SELECT ja.id, ja.title, COUNT(DISTINCT uskills.userID) as numUsers
FROM job_areas ja
JOIN skill_types st ON ja.id = st.parent_id
JOIN user_skill_types uskills ON uskills.jobSkillID = st.id
GROUP BY ja.id, ja.title

Condition based selecting from group_concat in Mysql query

Here is my table and sample data.
CREATE TABLE `articles`
(
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`title` varchar(100) NOT NULL,
PRIMARY KEY (`id`)
);
CREATE TABLE `tags`
(
`id` int(11) NOT NULL,
`name` varchar(100) NOT NULL,
PRIMARY KEY (`id`)
);
CREATE TABLE `article_tags`
(
`article_id` int(11) NOT NULL,
`tag_id` int(11) NOT NULL
);
INSERT INTO `tags` (`id`, `name`) VALUES
(1, 'Wap Stories'),
(2, 'App Stories');
INSERT INTO `articles` (`id`, `title`) VALUES
(1, 'USA'),
(2, 'England'),
(3, 'Germany'),
(4, 'India'),
(5, 'France'),
(6, 'Dubai'),
(7, 'Poland'),
(8, 'Japan'),
(9, 'China'),
(10, 'Australia');
INSERT INTO `article_tags` (`article_id`, `tag_id`) VALUES
(1, 1),
(1, 2),
(4, 1),
(5, 1),
(2, 2),
(2, 1),
(6, 2),
(7, 2),
(8, 1),
(9, 1),
(3, 2),
(9, 2),
(10, 2);
How can I get the below output I have tried using group_concat function. It gives all the results. But my requirement is I need to groupconcat values as
a. Combination of 1,2 can be there, only 1 can be there but 2 alone cannot be there.
b. Combination of 2,1 can be there, only 2 can be there but 1 alone cannot be there
Below is the output I need
id, title, groupconcat
--------------------------
1, USA, 1,2
2, England, 1,2
4, India, 1
5, France, 1
8, Japan, 1
9, China, 1,2
SqlFiddle Link
The query which I am using is
select id, title, group_concat(tag_id order by tag_id) as 'groupconcat' from articles a
left join article_tags att on a.id = att.article_id
where att.tag_id in (1,2)
group by article_id order by id
You can try like this
SELECT id, title, GROUP_CONCAT(tag_id ORDER BY tag_id) AS 'groupconcat'
FROM articles a
LEFT join article_tags att on a.id = att.article_id
WHERE att.tag_id in (1,2)
GROUP BY article_id
HAVING SUBSTRING_INDEX(groupconcat,',',1) !='2'
ORDER BY id

How to refactor, (shorten) this query

I have a database with tables: applicant (or candidate for a job), application (candidate applied for a certain job), test, selected_test(any application has a defined set of tests) and test_result.
When I need to show which applicant scored what result for any application and test I would use this query:
SELECT applicant.first_name, applicant.last_name, application.job, test.name, test_result.score
FROM applicant
INNER JOIN application ON application.applicant_id=applicant.id
INNER JOIN selected_test ON application.id=selected_test.application_id
INNER JOIN test ON selected_test.test_id=test.id
INNER JOIN test_result ON selected_test.test_id=test_result.test_id AND applicant.id=test_result.applicant_id
What I need to accomplish is sorting by certain test type (test.name) along with test.score
This is what I mean:
SELECT a.first_name, a.last_name, app.job, iq.score AS iqScore, math.score AS mathScore, personality.score AS personalityScore, logic.score AS logicScore
FROM applicant a
INNER JOIN application app ON a.id=app.applicant_id
LEFT JOIN
(SELECT app.id AS appId, tr.score
FROM applicant a
INNER JOIN application app ON app.applicant_id=a.id
INNER JOIN selected_test st ON app.id=st.application_id
INNER JOIN test t ON st.test_id=t.id AND t.name='iq'
INNER JOIN test_result tr ON st.test_id=tr.test_id AND a.id=tr.applicant_id) AS iq ON app.id=iq.appId
LEFT JOIN
(SELECT app.id AS appId, tr.score
FROM applicant a
INNER JOIN application app ON app.applicant_id=a.id
INNER JOIN selected_test st ON app.id=st.application_id
INNER JOIN test t ON st.test_id=t.id AND t.name='math'
INNER JOIN test_result tr ON st.test_id=tr.test_id AND a.id=tr.applicant_id) AS math ON app.id=math.appId
LEFT JOIN
(SELECT app.id AS appId, tr.score
FROM applicant a
INNER JOIN application app ON app.applicant_id=a.id
INNER JOIN selected_test st ON app.id=st.application_id
INNER JOIN test t ON st.test_id=t.id AND t.name='personality'
INNER JOIN test_result tr ON st.test_id=tr.test_id AND a.id=tr.applicant_id) AS personality ON app.id=personality.appId
LEFT JOIN
(SELECT app.id AS appId, tr.score
FROM applicant a
INNER JOIN application app ON app.applicant_id=a.id
INNER JOIN selected_test st ON app.id=st.application_id
INNER JOIN test t ON st.test_id=t.id AND t.name='logic'
INNER JOIN test_result tr ON st.test_id=tr.test_id AND a.id=tr.applicant_id) AS logic ON app.id=logic.appId
ORDER BY mathScore DESC, iqScore DESC, logicScore DESC
The query returns a set of applications, showing applicant data, job, test names and scores.
For instance, if I want candidate applications with higher "math" score, followed by highest scores in "IQ" and then in "logic" to be on top, 'ORDER BY' clause looks like the above.
The query works correct but the problem is that in real situation it deals with large data sets and I need a way to shorten/refactor this query.
Example database it works on is here:
CREATE TABLE IF NOT EXISTS `applicant` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`first_name` varchar(255) NOT NULL,
`last_name` varchar(255) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=8 ;
--
-- Dumping data for table `applicant`
--
INSERT INTO `applicant` (`id`, `first_name`, `last_name`) VALUES
(2, 'Jack', 'Redburn'),
(4, 'Barry', 'Leon'),
(6, 'Elisabeth', 'Logan'),
(7, 'Jane', 'Doe');
-- --------------------------------------------------------
--
-- Table structure for table `application`
--
CREATE TABLE IF NOT EXISTS `application` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`applicant_id` int(11) NOT NULL,
`job` varchar(255) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=10 ;
--
-- Dumping data for table `application`
--
INSERT INTO `application` (`id`, `applicant_id`, `job`) VALUES
(2, 2, 'Salesman'),
(4, 4, 'Policeman'),
(6, 6, 'Journalist'),
(8, 6, 'Hostess'),
(9, 7, 'Journalist');
-- --------------------------------------------------------
--
-- Table structure for table `selected_test`
--
CREATE TABLE IF NOT EXISTS `selected_test` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`application_id` int(11) NOT NULL,
`test_id` int(11) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=24 ;
--
-- Dumping data for table `selected_test`
--
INSERT INTO `selected_test` (`id`, `application_id`, `test_id`) VALUES
(1, 1, 1),
(2, 1, 2),
(3, 1, 3),
(5, 2, 1),
(6, 2, 2),
(7, 2, 3),
(8, 2, 4),
(9, 3, 4),
(10, 3, 2),
(11, 4, 1),
(12, 4, 2),
(13, 4, 3),
(14, 4, 4),
(15, 5, 2),
(16, 5, 3),
(17, 6, 1),
(18, 6, 4),
(19, 7, 3),
(20, 7, 2),
(21, 7, 1),
(22, 8, 2),
(23, 8, 3);
-- --------------------------------------------------------
--
-- Table structure for table `test`
--
CREATE TABLE IF NOT EXISTS `test` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=5 ;
--
-- Dumping data for table `test`
--
INSERT INTO `test` (`id`, `name`) VALUES
(1, 'math'),
(2, 'logic'),
(3, 'iq'),
(4, 'personality');
-- --------------------------------------------------------
--
-- Table structure for table `test_result`
--
CREATE TABLE IF NOT EXISTS `test_result` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`applicant_id` int(11) NOT NULL,
`test_id` int(11) NOT NULL,
`score` int(11) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=24 ;
--
-- Dumping data for table `test_result`
--
INSERT INTO `test_result` (`id`, `applicant_id`, `test_id`, `score`) VALUES
(2, 2, 1, 6),
(3, 4, 1, 7),
(6, 6, 1, 3),
(7, 7, 1, 8),
(9, 2, 2, 15),
(11, 4, 2, 12),
(13, 6, 2, 11),
(14, 7, 2, 9),
(15, 7, 3, 105),
(16, 6, 3, 112),
(18, 4, 3, 108),
(20, 2, 3, 117),
(22, 4, 4, 70);
And here is what results look like:
First query is just to show you how data is related:
The large query, shows score data horizontally so it is possible to sort by test name and score:
caveat I don't know mysql
Googling mysql pivot gives this result http://en.wikibooks.org/wiki/MySQL/Pivot_table
So if we apply the same logic using the test.id as the seed number (which is exam in the example from the google search) we get this:
SQLFIDDLE
select first_name, last_name, job,
sum(score*(1-abs(sign(testid-1)))) as math,
sum(score*(1-abs(sign(testid-2)))) as logic,
sum(score*(1-abs(sign(testid-3)))) as iq,
sum(score*(1-abs(sign(testid-4)))) as personality
from
(
SELECT applicant.first_name, applicant.last_name, application.job, test.name, test_result.score, test.id as testid
FROM applicant
INNER JOIN application ON application.applicant_id=applicant.id
INNER JOIN selected_test ON application.id=selected_test.application_id
INNER JOIN test ON selected_test.test_id=test.id
INNER JOIN test_result ON selected_test.test_id=test_result.test_id AND applicant.id=test_result.applicant_id
) t
group by first_name, last_name, job
Now you've got your short query yu can apply sorting as required - you can use case statement in you order by to dynamically change the order as required...
I noticed that you have only defined primary keys. You should see a noticeable performance improvement when you index other fields. Index at least the following: application.applicant_id, selected_test.application_id, selected_test.test_id, test_result.applicant_id, test_result.test_id, test_result.score.
You might be surprised how much this speeds things up for you. In fact, mysql tells us this is the best way to improve performance: https://dev.mysql.com/doc/refman/5.5/en/optimization-indexes.html.

search a set of values in other set of values for a row

Hello I am having issues with execution time on a query that searches for users ( from users table ) that are having at least one interest from one specified interests set and a location from a specified locations set. So I have this test DB:
CREATE TABLE IF NOT EXISTS `interests` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 AUTO_INCREMENT=10 ;
--
-- Dumping data for table `interests`
--
INSERT INTO `interests` (`id`, `name`) VALUES
(1, 'auto'),
(2, 'moto'),
(3, 'health'),
(4, 'garden'),
(5, 'house'),
(6, 'music'),
(7, 'video'),
(8, 'games'),
(9, 'it');
-- --------------------------------------------------------
--
-- Table structure for table `locations`
--
CREATE TABLE IF NOT EXISTS `locations` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(50) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 AUTO_INCREMENT=11 ;
--
-- Dumping data for table `locations`
--
INSERT INTO `locations` (`id`, `name`) VALUES
(1, 'engalnd'),
(2, 'austia'),
(3, 'germany'),
(4, 'france'),
(5, 'belgium'),
(6, 'italy'),
(7, 'russia'),
(8, 'poland'),
(9, 'norway'),
(10, 'romania');
-- --------------------------------------------------------
--
-- Table structure for table `users`
--
CREATE TABLE IF NOT EXISTS `users` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`email` varchar(255) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 AUTO_INCREMENT=11 ;
--
-- Dumping data for table `users`
--
INSERT INTO `users` (`id`, `email`) VALUES
(1, 'email1#test.com'),
(2, 'email2#test.com'),
(3, 'email3#test.com'),
(4, 'email4#test.com'),
(5, 'email5#test.com'),
(6, 'email6#test.com'),
(7, 'email7#test.com'),
(8, 'email8#test.com'),
(9, 'email9#test.com'),
(10, 'email10#test.com');
-- --------------------------------------------------------
--
-- Table structure for table `users_interests`
--
CREATE TABLE IF NOT EXISTS `users_interests` (
`user_id` int(11) NOT NULL,
`interest_id` int(11) NOT NULL,
PRIMARY KEY (`user_id`,`interest_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
--
-- Dumping data for table `users_interests`
--
INSERT INTO `users_interests` (`user_id`, `interest_id`) VALUES
(1, 1),
(1, 2),
(2, 5),
(2, 7),
(2, 8),
(3, 1),
(4, 1),
(4, 5),
(4, 6),
(4, 7),
(4, 8),
(5, 1),
(5, 2),
(5, 8),
(6, 3),
(6, 7),
(6, 8),
(7, 7),
(7, 9),
(8, 5);
-- --------------------------------------------------------
--
-- Table structure for table `users_locations`
--
CREATE TABLE IF NOT EXISTS `users_locations` (
`user_id` int(11) NOT NULL,
`location_id` int(11) NOT NULL,
PRIMARY KEY (`user_id`,`location_id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
--
-- Dumping data for table `users_locations`
--
INSERT INTO `users_locations` (`user_id`, `location_id`) VALUES
(2, 5),
(2, 7),
(2, 8),
(3, 1),
(4, 1),
(4, 5),
(4, 6),
(4, 7),
(4, 8),
(5, 1),
(5, 2),
(5, 8),
(6, 3),
(6, 7),
(6, 8),
(7, 7),
(7, 9),
(8, 5);
Is there a better way to query it than this:
SELECT email,
GROUP_CONCAT( DISTINCT ui.interest_id ) AS interests,
GROUP_CONCAT( DISTINCT ul.location_id ) AS locations
FROM `users` u
LEFT JOIN users_interests ui ON u.id = ui.user_id
LEFT JOIN users_locations ul ON u.id = ul.user_id
GROUP BY u.id
HAVING IF( interests IS NOT NULL , FIND_IN_SET( 2, interests )
OR FIND_IN_SET( 3, interests ) , 1 )
AND IF( locations IS NOT NULL , FIND_IN_SET( 2, locations )
OR FIND_IN_SET( 3, locations ) , 1 )
This is the best solution I found but it still slow on a 500k and 1mil rows in the relational tables ( locations and interests ). Especially when you are matching against a large set of values ( let's say above 50 locations and interests ).
So I am trying to achieve the result this query produces, but a bit faster:
email interests locations
email1#test.com 1,2 [BLOB - 0B]
email5#test.com 1,2,8 1,2,8
email6#test.com 3,7,8 3,7,8
email9#test.com [BLOB - 0B] [BLOB - 0B]
email10#test.com [BLOB - 0B] [BLOB - 0B]
I also tried to join against an SELECT UNION table - for the matching set - but it was even slower. Like this:
SELECT *
FROM `users` u
LEFT JOIN users_interests ui ON u.id = ui.user_id
LEFT JOIN users_locations ul ON u.id = ul.user_id
LEFT JOIN (SELECT 2 as interest UNION SELECT 3 as interest) as `is` ON ui.interest_id = is.interest
LEFT JOIN (SELECT 2 as location UNION SELECT 3 as location ) as `ls` ON ul.location_id = ls.location
WHERE IF(ui.user_id IS NOT NULL, `is`.interest IS NOT NULL,1) AND
IF(ul.user_id IS NOT NULL, ls.location IS NOT NULL,1)
GROUP BY u.id
I am using this for a basic targeting system.
I would appreciate very much, any suggestion! Thank you!
you have IS is reserved word for mysql
and also your group by can slow your query but i dont see any meaning to use group by u.id here since the u.id is already unique id.
look demo
try use backticks around it.
SELECT *
FROM `users` u
LEFT JOIN users_interests ui ON u.id = ui.user_id
LEFT JOIN users_locations ul ON u.id = ul.user_id
LEFT JOIN (SELECT 2 as interest UNION SELECT 3 as interest) as `is`
ON ui.interest_id = `is`.interest
LEFT JOIN (SELECT 2 as location UNION SELECT 3 as location ) as `ls`
ON ul.location_id = `ls`.location
WHERE IF(ui.user_id IS NOT NULL, `is`.interest IS NOT NULL,1)
AND
IF(ul.user_id IS NOT NULL, `ls`.location IS NOT NULL,1)