Stored Procedure timing out - mysql

Trying to collect some heirarchical data to send out to a third party, and was directed to this post.
After attempting to tweak it to my use case on SQL Fiddle, the stored procedure keeps timing out.
So I tried it on locally twice (via PhpMyAdmin).
When I try to reload PMA in the browser after calling the stored procedure, I just get an eternal "waiting for response" spinner (more than 10 or 20 minutes).
I'm presuming there's something wrong with my SP code ???
CREATE TABLE foo
(`id` int, `name` varchar(100), `parentId` int, `path` varchar(100))
//
INSERT INTO foo (`id`, `name`, `parentId`, `path`)
VALUES (1, 'discrete', 0, NULL),
(2, 'res', 1, NULL),
(3, 'smt', 2, NULL),
(4, 'cap', 1, NULL),
(5, 'ind', 1, NULL),
(6, 'smt', 4, NULL),
(7, 'tant', 6, NULL),
(8, 'cer', 6, NULL)
//
CREATE PROCEDURE updatePath()
BEGIN
DECLARE cnt, n int;
SELECT COUNT(*) INTO n FROM foo WHERE parentId = 0;
UPDATE foo a, foo b SET a.path = b.name WHERE b.parentId IS NULL AND a.parentId = b.id;
SELECT count(*) INTO cnt FROM foo WHERE path IS NULL;
while cnt > n do
UPDATE foo a, foo b SET a.path = concat(b.path, '|', b.id) WHERE b.path IS NOT NULL AND a.parentId = b.id;
SELECT count(*) INTO cnt FROM foo WHERE path IS NULL;
end while;
END//
EDIT
Expected results:
VALUES (1, 'discrete', 0, '1'),
(2, 'res', 1, '1|2'),
(3, 'smt', 2, '1|2|3'),
(4, 'cap', 1, '1|4'),
(5, 'ind', 1, '1|5'),
(6, 'smt', 4, '1|4|6'),
(7, 'tant', 6, '1|4|6|7'),
(8, 'cer', 6, '1|4|6|8');

After a good nights sleep, I took #Drew's lead and I went through it one pc at a time.
Got it working. Here's where I'm leaving it:
CREATE TABLE foo
(`id` int, `name` varchar(100), `parentId` int, `path` varchar(100))
//
INSERT INTO foo
(`id`, `name`, `parentId`, `path`)
VALUES
(1, 'dscr', 0, NULL),
(2, 'res', 1, NULL),
(3, 'smt', 2, NULL),
(4, 'cap', 1, NULL),
(5, 'ind', 1, NULL),
(6, 'chp', 4, NULL),
(7, 'tant', 6, NULL),
(8, 'cer', 6, NULL)
//
CREATE PROCEDURE updatePath()
BEGIN
DECLARE cnt, n int;
SELECT COUNT(*) INTO n FROM foo WHERE parentId = 0; -- n is now 1
SELECT COUNT(*) INTO cnt FROM foo WHERE path IS NULL; -- cnt is now 8
UPDATE foo child, foo parent -- each child now has its parent and own ID's in the path
SET child.path = CONCAT(parent.id, '|', child.id)
WHERE parent.parentId = 0
AND child.parentId = parent.id;
WHILE cnt > n DO
UPDATE foo child, foo parent -- concat parent's path and own ID into each empty child's path
SET child.path = concat( parent.path,'|',child.id )
WHERE parent.path IS NOT NULL
AND child.parentId = parent.id;
SELECT COUNT(*) INTO cnt -- decrement cnt
FROM foo
WHERE path IS NULL;
END WHILE;
UPDATE foo -- set path for any top-level categories
SET path = id
WHERE path IS NULL;
END//
call updatePath()//
Feel free to critique.
Hope this helps someone else some time.

Are you trying to do a self-referencing join to create the hierarchy?
select
a.name,
parentName = b.name
from
foo a ,
outer join foo b on
(
a.id = b.parentId
)

Related

SQL including count and multiple where clauses

I have 2 tables.
Table text_to_annotate:
CREATE TABLE text_to_annotate (
ID varchar(3),
text varchar(100));
INSERT INTO text_to_annotate (ID, text)
VALUES
(1, test1),
(2, test2),
(3, test3);
Table annotation_data:
CREATE TABLE annotation_data (
text_ID varchar(3),
annotation_ID varchar(3)
IP varchar(15));
INSERT INTO annotation_data (text_ID, annotation_ID, IP)
VALUES
(1, 0, IP_1),
(2, 1, IP_1),
(3, 2, IP_1),
(1, 1, IP_2),
(2, 2, IP_2),
(3, 3, IP_2),
(3, 0, IP_3),
(3, 0, IP_4),
(3, 2, IP_5);
I want to display an unseen text to an annotator which hasn't been annotated more than 5 times. For example, a new annotator with IP = IP_6 cannot annotate text_ID = 3, only text_ID = 1 and text_ID = 2. An annotator can only annotate unique text_IDs once.
Here's my code, but something isn't quite correct:
SELECT text_to_annotate.ID, text_to_annotate.text
FROM text_to_annotate
WHERE text_to_annotate.ID NOT IN (
SELECT text_ID, count(*)
FROM annotation_data
WHERE IP = '{$ip}'
AND GROUP BY text_ID
HAVING count(*) > 1;
)
ORDER BY RAND()
Here's my answer:
SELECT text_to_annotate.ID, text_to_annotate.text
FROM text_to_annotate
WHERE text_to_annotate.ID NOT IN (
SELECT text_ID, count(*)
FROM annotation_data
WHERE IP = '{$ip}'
)
AND text_to_annotate.ID IN (
SELECT text_ID FROM impact_annotation
GROUP BY text_ID
HAVING COUNT(*) < 5
)
ORDER BY RAND()

Select rows grouped by a column having max aggregate

Given the following data set, how would I find the email addresses that were references for the most ApplicationIDs that have an "Accepted" decision?
CREATE TABLE IF NOT EXISTS `EmailReferences` (
`ApplicationID` INT NOT NULL,
`Email` VARCHAR(45) NOT NULL,
PRIMARY KEY (`ApplicationID`, `Email`)
);
INSERT INTO EmailReferences (ApplicationID, Email)
VALUES
(1, 'ref10#test.org'), (1, 'ref11#test.org'), (1, 'ref12#test.org'),
(2, 'ref20#test.org'), (2, 'ref21#test.org'), (2, 'ref22#test.org'),
(3, 'ref11#test.org'), (3, 'ref31#test.org'), (3, 'ref32#test.org'),
(4, 'ref40#test.org'), (4, 'ref41#test.org'), (4, 'ref42#test.org'),
(5, 'ref50#test.org'), (5, 'ref51#test.org'), (5, 'ref52#test.org'),
(6, 'ref60#test.org'), (6, 'ref11#test.org'), (6, 'ref62#test.org'),
(7, 'ref70#test.org'), (7, 'ref71#test.org'), (7, 'ref72#test.org'),
(8, 'ref10#test.org'), (8, 'ref81#test.org'), (8, 'ref82#test.org')
;
CREATE TABLE IF NOT EXISTS `FinalDecision` (
`ApplicationID` INT NOT NULL,
`Decision` ENUM('Accepted', 'Denied') NOT NULL,
PRIMARY KEY (`ApplicationID`)
);
INSERT INTO FinalDecision (ApplicationID, Decision)
VALUES
(1, 'Accepted'), (2, 'Denied'),
(3, 'Accepted'), (4, 'Denied'),
(5, 'Denied'), (6, 'Denied'),
(7, 'Denied'), (8, 'Accepted')
;
Fiddle of same:http://sqlfiddle.com/#!9/03bcf2/1
Initially, I was using LIMIT 1 and ORDER BY CountDecision DESC, like so:
SELECT er.email, COUNT(fd.Decision) AS CountDecision
FROM EmailReferences AS er
JOIN FinalDecision AS fd ON er.ApplicationID = fd.ApplicationID
WHERE fd.Decision = 'Accepted'
GROUP BY er.email
ORDER BY CountDecision DESC
LIMIT 1
;
However, it occurred to me that I could have multiple email addresses that referred different "most accepted" decisions (i.e., a tie, so to speak), and those would be filtered out (is that the right phrasing?) with the LIMIT keyword.
I then tried a variation on the above query, replacing the ORDER BY and LIMIT lines with:
HAVING MAX(CountDecision)
But I realized that that's only half a statement: MAX(CountDecision) needs to be compared to something. I just don't know what.
Any pointers would be much appreciated. Thanks!
Note: this is for a homework assignment.
Update: To be clear, I'm trying to find value and count of Emails from EmailReferences. However, I only want rows that have FinalDecision.Decision = 'Accepted' (on matching ApplicantIDs). Based on my data, the result should be:
Email | CountDecision
---------------+--------------
ref10#test.org | 2
ref11#test.org | 2
For example...
SELECT a.*
FROM
( SELECT x.email
, COUNT(*) total
FROM emailreferences x
JOIN finaldecision y
ON y.applicationid = x.applicationid
WHERE y.decision = 'accepted'
GROUP
BY x.email
) a
JOIN
( SELECT COUNT(*) total
FROM emailreferences x
JOIN finaldecision y
ON y.applicationid = x.applicationid
WHERE y.decision = 'accepted'
GROUP
BY x.email
ORDER
BY total DESC
LIMIT 1
) b
ON b.total = a.total;
MySQL still lack window functions, but when version 8 is production ready, this becomes easier. So for fuure reference, or for those databases like Mariadb that already have window functions:
CREATE TABLE IF NOT EXISTS `EmailReferences` (
`ApplicationID` INT NOT NULL,
`Email` VARCHAR(45) NOT NULL,
PRIMARY KEY (`ApplicationID`, `Email`)
);
INSERT INTO EmailReferences (ApplicationID, Email)
VALUES
(1, 'ref10#test.org'), (1, 'ref11#test.org'), (1, 'ref12#test.org'),
(2, 'ref20#test.org'), (2, 'ref21#test.org'), (2, 'ref22#test.org'),
(3, 'ref30#test.org'), (3, 'ref31#test.org'), (3, 'ref32#test.org'),
(4, 'ref40#test.org'), (4, 'ref41#test.org'), (4, 'ref42#test.org'),
(5, 'ref50#test.org'), (5, 'ref51#test.org'), (5, 'ref52#test.org'),
(6, 'ref60#test.org'), (6, 'ref11#test.org'), (6, 'ref62#test.org'),
(7, 'ref70#test.org'), (7, 'ref71#test.org'), (7, 'ref72#test.org'),
(8, 'ref10#test.org'), (8, 'ref81#test.org'), (8, 'ref82#test.org')
;
CREATE TABLE IF NOT EXISTS `FinalDecision` (
`ApplicationID` INT NOT NULL,
`Decision` ENUM('Accepted', 'Denied') NOT NULL,
PRIMARY KEY (`ApplicationID`)
);
INSERT INTO FinalDecision (ApplicationID, Decision)
VALUES
(1, 'Accepted'), (2, 'Denied'),
(3, 'Accepted'), (4, 'Denied'),
(5, 'Denied'), (6, 'Denied'),
(7, 'Denied'), (8, 'Accepted')
;
select email, CountDecision
from (
SELECT er.email, COUNT(fd.Decision) AS CountDecision
, max(COUNT(fd.Decision)) over() maxCountDecision
FROM EmailReferences AS er
JOIN FinalDecision AS fd ON er.ApplicationID = fd.ApplicationID
WHERE fd.Decision = 'Accepted'
GROUP BY er.email
) d
where CountDecision = maxCountDecision
email | CountDecision
:------------- | ------------:
ref10#test.org | 2
dbfiddle here

SELECT data based on result of previous row in table

I have a database of students.
CREATE TABLE classlist
(`id` int, `studentid` int, `subjectid` int, `presentid` int)
;
CREATE TABLE student
(`id` int, `name` varchar(4))
;
CREATE TABLE subject
(`id` int, `name` varchar(4))
;
CREATE TABLE classStatus
(`id` int, `name` varchar(8))
;
INSERT INTO classlist
(`id`, `studentid`, `subjectid`, `presentid`)
VALUES
(1, 111, 1, 1),
(2, 222, 3, 0),
(3, 333, 2, 1),
(4, 111, 4, 1),
(5, 111, 1, 0),
(6, 222, 3, 0),
(7, 333, 2, 1),
(8, 111, 4, 1),
(9, 111, 2, 0),
(10, 111, 4, 1),
(11, 111, 1, 1),
(12, 333, 3, 1),
(13, 333, 2, 1),
(14, 333, 3, 1)
;
INSERT INTO student
(`id`, `name`)
VALUES
(111, 'John'),
(222, 'Kate'),
(333, 'Matt')
;
INSERT INTO subject
(`id`, `name`)
VALUES
(1, 'MATH'),
(2, 'ENG'),
(3, 'SCI'),
(4, 'GEO')
;
INSERT INTO classStatus
(`id`, `name`)
VALUES
(0, 'Absent'),
(1, 'Present')
;
And I have a query which shows how many times they have been present or absent.
SELECT
studentid,
students.name AS NAME,
SUM(presentid = 1) AS present,
SUM(presentid = 0) AS absent
FROM classlist
INNER JOIN student as students ON classlist.studentid=students.id
GROUP BY studentid, NAME
See this fiddle below.
http://sqlfiddle.com/#!2/fe0b0/1
There seems to be a trend from looking at this sample data that after someone attends subjectid 4 they are often not coming to the next class. How can I capture this in a query. I want to ONLY show data WHERE last subjectid =4. So in my sample data rows matching my criteria would be.
(5, 111, 1, 0),
(9, 111, 2, 0),
(11, 111, 1, 1),
as these rows are all the next row of a studentid who had a subjectid=4.
My output would be
| STUDENTID | NAME | PRESENT | ABSENT|
| 111 | John | 1 | 2 |
To get the next class for a student, use a correlated subquery:
select cl.*,
(select min(cl2.id) from classlist cl2 where cl2.studentid = cl.studentid and cl2.id > cl.id) as nextcl
from classlist cl
Plugging this into your query example tell you you who is present and absent for the next class:
SELECT students.id, students.name AS NAME,
SUM(cl.presentid = 1) AS present, SUM(cl.presentid = 0) AS absent,
sum(clnext.presentid = 1) as presentnext
FROM (select cl.*,
(select min(cl2.id) from classlist cl2 where cl2.studentid = cl.studentid and cl2.id > cl.id) as nextcl
from classlist cl
) cl INNER JOIN
student as students
ON cl.studentid = students.id left outer join
classlist clnext
on cl.nextcl = clnext.id
GROUP BY students.id, students.NAME
Add a where cl.subjectid = 4 to get the answer for subject 4.
I fixed the query. The SQLFiddle is k.
A quick and dirty solution could be to get the Classlist.Id for all lines where subjectid=4 (let's call them n) then select all the lines where Id = n+1

coalesce the other way around

Hi i have this scenario
My Table
create table foo(
id int,
num int1,
stage enum('a','b','c'),
unique(id,stage)
);
here are a snippets for data
INSERT INTO `foo` (`id`, `num`, `stage`) VALUES
(1, 1, 'a'),
(1, 2, 'b'),
(1, 3, 'c'),
(2, 1, 'a'),
(2, 2, 'b'),
(2, 3, 'c'),
(3, 1, 'a'),
(3, 2, 'b'),
(4, 1, 'a');
Notes on table
an id that has a state of c must have a prior state of a,b triggers no problem
query this as
I did this useing distinct with scalar correlated subquery
SELECT DISTINCT
id, IFNULL((
SELECT num
FROM foo f
WHERE f.id = foo.id AND f.stage = 'a'),'') `a`, IFNULL((
SELECT num
FROM foo f
WHERE f.id = foo.id AND f.stage = 'b'),'') `b`, IFNULL((
SELECT num
FROM foo f
WHERE f.id = foo.id AND f.stage = 'c'),'') `c`
FROM foo
[Bottom line]
I want a better fasten to do this
thanks
SELECT id,
MAX(IF(stage = 'a', num, NULL)) AS `a`
MAX(IF(stage = 'b', num, NULL)) AS `b`
MAX(IF(stage = 'c', num, NULL)) AS `c`
FROM foo
GROUP BY id
Try this:
SELECT id, IF(a=0, '', a) a, IF(b=0, '', b) b, IF(c=0, '', c) c
FROM (SELECT id, SUM(IF(stage = 'a', num, '')) a,
SUM(IF(stage = 'b', num, '')) b, SUM(IF(stage = 'c', num, '')) c
FROM foo GROUP BY id) AS A

Topological sorting in sql

I am resolving dependency between some objects in a table.
I have to do something with objects in order their dependency.
For example, the first object doesn't depend on any object. The second and third ones depends on first one and so on. I have to use topological sorting.
Could someone show the sample of implementation so sorting in t-sql.
I have a table:
create table dependency
(
DependencyId PK
,ObjectId
,ObjectName
,DependsOnObjectId
)
I want to get
ObjectId
ObjectName
SortOrder
Thank you.
It seams, it works:
declare #step_no int
declare #dependency table
(
DependencyId int
,ObjectId int
,ObjectName varchar(100)
,DependsOnObjectId int
,[rank] int NULL
,degree int NULL
);
insert into #dependency values (5, 5, 'Obj 5', 2, NULL, NULL)
insert into #dependency values (6, 6, 'Obj 6', 7, NULL, NULL)
insert into #dependency values (2, 2, 'Obj 2', 1, NULL, NULL)
insert into #dependency values (3, 3, 'Obj 3', 1, NULL, NULL)
insert into #dependency values (1, 1, 'Obj 1', 1, NULL, NULL)
insert into #dependency values (4, 4, 'Obj 4', 2, NULL, NULL)
insert into #dependency values (7, 7, 'Obj 7', 2, NULL, NULL)
update #dependency set rank = 0
-- computing the degree of the nodes
update d set d.degree =
(
select count(*) from #dependency t
where t.DependsOnObjectId = d.ObjectId
and t.ObjectId <> t.DependsOnObjectId
)
from #dependency d
set #step_no = 1
while 1 = 1
begin
update #dependency set rank = #step_no where degree = 0
if (##rowcount = 0) break
update #dependency set degree = NULL where rank = #step_no
update d set degree = (
select count(*) from #dependency t
where t.DependsOnObjectId = d.ObjectId and t.ObjectId != t.DependsOnObjectId
and t.ObjectId in (select tt.ObjectId from #dependency tt where tt.rank = 0))
from #dependency d
where d.degree is not null
set #step_no = #step_no + 1
end
select * from #dependency order by rank
You have a simple tree structure with only one path to each ObjectId so labeling based off number of DependsOnObjectId links traversed gives only one answer and a good enough answer to process the right stuff first. This is easy to do with a common table expression and has the benefit of easy portability:
with dependency_levels as
(
select ObjectId, ObjectName, 0 as links_traversed
from dependency where DependsOnObjectId is null
union all
select ObjectId, ObjectName, links_traversed+1
from dependecy
join dependency_levels on dependency.DependsOnObjectId = dependency_levels.ObjectId
)
select ObjectId, ObjectName, links_traversed
from dependency_levels
order by links_traversed