MySql Recursive - get all children and parents from a given id - mysql

MySQL Version 8.0
Schema SQL
CREATE TABLE IF NOT EXISTS `department` (
`id` INT NOT NULL,
`name` VARCHAR(45) NOT NULL,
`father` INT NULL,
PRIMARY KEY (`id`),
INDEX `fk_department_department_idx` (`father` ASC) VISIBLE,
CONSTRAINT `fk_department_department`
FOREIGN KEY (`father`)
REFERENCES `department` (`id`)
ON DELETE NO ACTION
ON UPDATE NO ACTION)
ENGINE = InnoDB;
insert into department (id,name,father)
values
(1, 'dp1',null),
(2, 'dp2',null),
(3, 'dp3',1),
(4, 'dp4',1),
(5, 'dp5',2),
(6, 'dp6',4),
(7, 'dp7',6),
(8, 'dp8',6),
(9, 'dp9',6);
SET GLOBAL sql_mode=(SELECT REPLACE(##sql_mode,'ONLY_FULL_GROUP_BY',''));
SET SESSION sql_mode = '';
My query:
WITH RECURSIVE cte_department AS (
SELECT
d1.id,
d1.name,
d1.father
FROM
department d1
WHERE
d1.id=6
UNION ALL
SELECT
d2.id,
d2.name,
d2.father
FROM
department d2
INNER JOIN cte_department cte ON cte.id = d2.father
)
SELECT * FROM cte_department;
Result:
id name father
6 dp6 4
7 dp7 6
8 dp8 6
9 dp9 6
What I need:
id name father
1 dp1 null
4 dp4 1
6 dp6 4
7 dp7 6
8 dp8 6
9 dp9 6
The problem is:
I can get all childrens, but I need to add to this query all the parents from the given ID, in this case, the ID 6.
I'm stuck with that. If someone can help me, follow the fiddle.
https://www.db-fiddle.com/f/g8YkE3hqsvaw8G9vdHPyyF/0

The recursive part can have multiple query blocks.
WITH RECURSIVE cte_department AS (
SELECT
d1.id,
d1.name,
d1.father,
'Begin' state
FROM
department d1
WHERE
d1.id=6
UNION ALL
SELECT
d2.id,
d2.name,
d2.father,
'Up'
FROM
department d2
INNER JOIN
cte_department cte
ON
cte.father = d2.id
WHERE
cte.state in ('Begin', 'Up')
UNION ALL
SELECT
d2.id,
d2.name,
d2.father,
'Down'
FROM
department d2
INNER JOIN
cte_department cte
ON
cte.id = d2.father
WHERE
cte.state in ('Begin', 'Down')
)
SELECT
id, name, father
FROM
cte_department
ORDER BY
father, id, name;
Try it on db<>fiddle.

I would use two separate recursive queries: one to bring the children, the other for the parents, and then union the results. You can keep track of the level of each node to properly order the records int he resultset:
with recursive
children as (
select 1 as lvl, d.* from department d where id = 6
union all
select c.lvl, d.* from department d inner join children c on c.id = d.father
),
parents as (
select 1 as lvl, d.* from department d where id = 6
union all
select p.lvl - 1, d.* from department d inner join parents p on d.id = p.father
)
select * from parents
union -- on purpose, to remove the duplicate on id 6
select * from children
order by lvl;
This is safer than having multiple union all members in the same query. MySQL does not guarantee the order of evaluation of the members in the recursion, so using this technique could lead to unexpected behavior.
Demo on DB Fiddle
Unrelated to your question, but: the following can be seen in your code:
SET GLOBAL sql_mode=(SELECT REPLACE(##sql_mode,'ONLY_FULL_GROUP_BY',''));
SET SESSION sql_mode = '';
Just don't. ONLY_FULL_GROUP_BY is there for a good reason, that is to have MySQL behave consistenly with the SQL standard as regard to aggregation query. Disabling this SQL mode is never a good idea.

Related

Recursive search in Mysql 5.7.30

I need to find the list of Parent id's in which particular text exists whether it may be in parent name or in its children's name.
Consider the following table
pid
parent
name
1
null
Parent1dynamic
2
null
Parent2
3
1
child1-P1
4
2
Child1-P2
5
4
Child-c1p2-dynamic
6
null
Parent3
7
null
Parent4
8
7
Child-p4-dynamic
i have used the following Mysql query
SELECT c.*
FROM db.tbl AS c
JOIN ( SELECT DISTINCT IFNULL(c.parent, c.pid) AS id
FROM db.tbl c
WHERE 1=1 AND c.name LIKE '%dyna%'
ORDER BY c.pid ASC ) s ON s.id = c.pid
WHERE parent IS NULL
ORDER BY pid LIMIT 0, 15
Using this query im searching for text 'dyna' and getting result with ids [1 & 7], its searching for first level
, but i need the result as [1, 2 & 7] - recursive search
In MySQL 8+ it may be
WITH RECURSIVE
cte AS ( SELECT pid, parent, name, pid rpid, pid rparent, name rname
FROM test
WHERE parent IS NULL
UNION ALL
SELECT test.pid, test.parent, test.name, cte.pid, cte.rparent, CONCAT(cte.rname, CHAR(0), test.name)
FROM cte
JOIN test ON cte.pid = test.parent )
SELECT DISTINCT rparent pid
FROM cte
WHERE rname LIKE #pattern;
or
WITH RECURSIVE
cte AS ( SELECT pid, parent
FROM test
WHERE name LIKE #pattern
UNION ALL
SELECT test.pid, test.parent
FROM cte
JOIN test ON cte.parent = test.pid )
SELECT DISTINCT pid
FROM cte
WHERE parent IS NULL
In MySQL 5+ use stored procedure:
CREATE PROCEDURE get_rows_like_pattern (IN pattern VARCHAR(255))
BEGIN
CREATE TABLE cte (pid INT PRIMARY KEY, parent INT)
SELECT pid, parent
FROM test
WHERE name LIKE pattern;
WHILE ROW_COUNT() DO
INSERT IGNORE INTO cte
SELECT test.pid, test.parent
FROM cte
JOIN test ON cte.parent = test.pid;
END WHILE;
SELECT DISTINCT pid
FROM cte
WHERE parent IS NULL;
DROP TABLE cte;
END
fiddle

Fast group rank() function

There are various ways people try to emulate MSSQL RANK() or ROW_NUMBER() functions in MySQL, but all of them I've tried so far are slow.
I have a table that looks like this:
CREATE TABLE ratings
(`id` int, `category` varchar(1), `rating` int)
;
INSERT INTO ratings
(`id`, `category`, `rating`)
VALUES
(3, '*', 54),
(4, '*', 45),
(1, '*', 43),
(2, '*', 24),
(2, 'A', 68),
(3, 'A', 43),
(1, 'A', 12),
(3, 'B', 22),
(4, 'B', 22),
(4, 'C', 44)
;
Except it has 220,000 records. There are about 90,000 unique id's.
I wanted to rank the id's first by looking at the categories which were not * where a higher rating is a lower rank.
SELECT g1.id,
g1.category,
g1.rating,
Count(*) AS rank
FROM ratings AS g1
JOIN ratings AS g2 ON (g2.rating, g2.id) >= (g1.rating, g1.id)
AND g1.category = g2.category
WHERE g1.category != '*'
GROUP BY g1.id,
g1.category,
g1.rating
ORDER BY g1.category,
rank
Output:
id category rating rank
2 A 68 1
3 A 43 2
1 A 12 3
4 B 22 1
3 B 22 2
4 C 44 1
Then I wanted to take the smallest rank an id had, and average that with the rank they have within the * category. Giving a total query of:
SELECT X1.id,
(X1.rank + X2.minrank) / 2 AS OverallRank
FROM
(SELECT g1.id,
g1.category,
g1.rating,
Count(*) AS rank
FROM ratings AS g1
JOIN ratings AS g2 ON (g2.rating, g2.id) >= (g1.rating, g1.id)
AND g1.category = g2.category
WHERE g1.category = '*'
GROUP BY g1.id,
g1.category,
g1.rating
ORDER BY g1.category,
rank) X1
JOIN
(SELECT id,
Min(rank) AS MinRank
FROM
(SELECT g1.id,
g1.category,
g1.rating,
Count(*) AS rank
FROM ratings AS g1
JOIN ratings AS g2 ON (g2.rating, g2.id) >= (g1.rating, g1.id)
AND g1.category = g2.category
WHERE g1.category != '*'
GROUP BY g1.id,
g1.category,
g1.rating
ORDER BY g1.category,
rank) X
GROUP BY id) X2 ON X1.id = X2.id
ORDER BY overallrank
Giving me
id OverallRank
3 1.5000
4 1.5000
2 2.5000
1 3.0000
This query is correct and the output I want, but it just hangs on my real table of 220,000 records. How can I optimize it? My real table has an index on id,rating and category and id,category
Edit:
Result of SHOW CREATE TABLE ratings:
CREATE TABLE `rating` (
`id` int(11) NOT NULL,
`category` varchar(255) NOT NULL,
`rating` int(11) NOT NULL DEFAULT '1500',
`rd` int(11) NOT NULL DEFAULT '350',
`vol` float NOT NULL DEFAULT '0.06',
`wins` int(11) NOT NULL,
`losses` int(11) NOT NULL,
`streak` int(11) NOT NULL DEFAULT '0',
PRIMARY KEY (`streak`,`rd`,`id`,`category`),
UNIQUE KEY `id_category` (`id`,`category`),
KEY `rating` (`rating`,`rd`),
KEY `streak_idx` (`streak`),
KEY `category_idx` (`category`),
KEY `id_rating_idx` (`id`,`rating`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
The PRIMARY KEY is the most common use case of queries to this table, that is why it's the clustered key. It's worth noting that the server is a raid 10 of SSDs with a 9GB/s FIO random read. So I don't suspect the indices not being clustered will affect much.
Output of (select count(distinct category) from ratings) is 50
In the interest that this could be how the data is or an oversight on me, I am included the export of the entire table. It is only 200KB zipped: https://www.dropbox.com/s/p3iv23zi0uzbekv/ratings.zip?dl=0
The first query takes 27 seconds to run
You can use temporary tables with an AUTO_INCREMENT column to generate ranks (row number).
For example - to generate ranks for the '*' category:
drop temporary table if exists tmp_main_cat_rank;
create temporary table tmp_main_cat_rank (
rank int unsigned auto_increment primary key,
id int NOT NULL
) engine=memory
select null as rank, id
from ratings r
where r.category = '*'
order by r.category, r.rating desc, r.id desc;
This runs in something like 30 msec. While your approach with the selfjoin takes 45 seconds on my machine. Even with a new index on (category, rating, id) it still takes 14 seconds to run.
To generate ranks per group (per category) is a bit more complicated. We can still use an AUTO_INCREMENT column, but will need to calculate and substract an offset per category:
drop temporary table if exists tmp_pos;
create temporary table tmp_pos (
pos int unsigned auto_increment primary key,
category varchar(50) not null,
id int NOT NULL
) engine=memory
select null as pos, category, id
from ratings r
where r.category <> '*'
order by r.category, r.rating desc, r.id desc;
drop temporary table if exists tmp_cat_offset;
create temporary table tmp_cat_offset engine=memory
select category, min(pos) - 1 as `offset`
from tmp_pos
group by category;
select t.id, min(t.pos - o.offset) as min_rank
from tmp_pos t
join tmp_cat_offset o using(category)
group by t.id
This runs in about 220 msec. The selfjoin solution takes 42 sec or 13 sec with the new index.
Now you just need to combine the last query with the first temp table, to get your final result:
select t1.id, (t1.min_rank + t2.rank) / 2 as OverallRank
from (
select t.id, min(t.pos - o.offset) as min_rank
from tmp_pos t
join tmp_cat_offset o using(category)
group by t.id
) t1
join tmp_main_cat_rank t2 using(id);
Overall runtime is ~280 msec without an additional index and ~240 msec with an index on (category, rating, id).
A note to the selfjoin approach: It's an elegant solution and performs fine with a small group size. It's fast with an average group size <= 2. It can be acceptable for a group size of 10. But you have an average group size 447 (count(*) / count(distinct category)). That means every row is joined with 447 other rows (on average). You can see the impact by removing the group by clause:
SELECT Count(*)
FROM ratings AS g1
JOIN ratings AS g2 ON (g2.rating, g2.id) >= (g1.rating, g1.id)
AND g1.category = g2.category
WHERE g1.category != '*'
The result is more than 10M rows.
However - with an index on (category, rating, id) your query runs in 33 seconds on my machine.

update and 2 select statements in same query causing an error

I have a table with a field c_id which has the entries with some values of CustomerIds.
I need to set those to null if those customer Ids are not valid as per the table.
I am using the following query. But it seems to throw an error:
update Customers set c_id=NULL where customer_id in (select customer_id from Customers where c_id not in (select customer_id from Customers);
Could someone help me identify the problem here
I avoided IN and used JOINS to fine tune your code:
UPDATE CUST SET C_Id = NULL
FROM Customers CUST
LEFT JOIN Customers LJC ON LJC.Customer_Id = CUST.C_Id
WHERE LJC.Customer_Id IS NULL
SQL Fiddle: http://sqlfiddle.com/#!3/59fb1/5
I'm explain what I have done:
Created dummy table and data for Customers table:
CREATE TABLE Customers (
Customer_Id INT,
C_Id INT
)
INSERT INTO Customers
SELECT 1, 11 UNION
SELECT 2, 22 UNION
SELECT 22, 3 UNION
SELECT 11, 4 UNION
SELECT 5, 6 UNION
SELECT 7, 8 UNION
SELECT 3, 9
Here 11, 22 and 3 are exists in Customer_Id, C_Id, So as per your request the other C_Id 4, 6, 8 and 9 those are not exist in Customer_Id are will UPDATE as NULL.
The below block will return the C_Id those are not exists in the Customer_Id
SELECT C1.C_Id
FROM Customers C1
LEFT JOIN Customers C2 ON C2.Customer_Id = C1.C_Id
WHERE C2.Customer_Id IS NULL
The below block will update C_Id as NULL from the above SELECT block
UPDATE Customers SET C_Id = NULL
WHERE C_Id IN (
SELECT C1.C_Id
FROM Customers C1
LEFT JOIN Customers C2 ON C2.Customer_Id = C1.C_Id
WHERE C2.Customer_Id IS NULL
)
From the above block, if I remove the IN and modify using JOIN, the query what I gave in the top will come.
You have a syntax error.
Add ) at the end of query for closing first select query
update Customers set c_id = NULL
where customer_id in
(select customer_id
from Customers
where c_id not exists
(select customer_id from Customers)
);

Select Common Record with different column from 3 tables

I have following 3 tables with duplicate id.I want to retrieve record with same id but having different name and date from all the 3 tables.I need query to get Expected result output.
CREATE TABLE Student1
(`id` int,`status` int,`amount` int , `Name` varchar(10), `date` varchar(55))
;
INSERT INTO Student1
(`id`,`status`,`amount`, `Name`, `date`)
VALUES
(1,0,4500, 'ram', '04/02/2012'),
(2,0,2000, 'shyam', '05/09/2013'),
(4,0,1500, 'ghanshyam', '08/11/2014')
;
CREATE TABLE Student2
(`id` int,`status` int,`amount` int , `Name` varchar(10), `date` varchar(55))
;
INSERT INTO Student2
(`id`,`status`,`amount`, `Name`, `date`)
VALUES
(3,0,4500, 'gopal', '04/02/2012'),
(2,0,8000, 'radheshyam', '15/11/2013'),
(4,1,1500, 'ghanshyam', '18/10/2015')
;
CREATE TABLE Student3
(`id` int,`status` int,`amount` int , `Name` varchar(10), `date` varchar(55))
;
INSERT INTO Student3
(`id`,`status`,`amount`, `Name`, `date`)
VALUES
(1,1,4500, 'ram', '14/02/2012'),
(2,0,6500, 'radhe', '11/11/2014'),
(3,1,4500, 'gopal', '14/02/2015')
;
Excepted Result :
id status amount Name date
2 0 2000 shyam 05/09/2013
2 0 6500 radhe 11/11/2014
2 0 8000 radheshyam 15/11/2013
You just use union all to bring the tables together. One way is:
select s.*
from (select s.* from student1 s union all
select s.* from student2 s union all
select s.* from student3 s
) s
where id = 2;
As I say in the comment, though, normally you would have three tables rather than one.
I realize that I might have misunderstood the question. If you want to find records that have the same id but different names, then use:
select s.id, group_concat(s.name) as names
from (select s.* from student1 s union all
select s.* from student2 s union all
select s.* from student3 s
) s
group by s.id
having count(distinct name) = 3 -- or perhaps >= 2, depending on what you mean
If you want the full records, you can join this back to the original tables.
EDIT:
If you want all the original rows:
select s.*
from (select s.id, group_concat(s.name) as names
from (select s.* from student1 s union all
select s.* from student2 s union all
select s.* from student3 s
) s
group by s.id
having count(distinct name) = 3
) ss join
(select s.* from student1 s union all
select s.* from student2 s union all
select s.* from student3 s
) s
on ss.id = s.id;

Looking for missed IDs in SQL Server 2008

I have a table that contains two columns
ID | Name
----------------
1 | John
2 | Sam
3 | Peter
6 | Mike
It has missed IDs. In this case these are 4 and 5.
How do I find and insert them together with random names into this table?
Update: cursors and temp tables are not allowed. The random name should be 'Name_'+ some random number. Maybe it would be the specified value like 'Abby'. So it doesn't matter.
Using a recursive CTE you can determine the missing IDs as follows
DECLARE #Table TABLE(
ID INT,
Name VARCHAR(10)
)
INSERT INTO #Table VALUES (1, 'John'),(2, 'Sam'),(3,'Peter'),(6, 'Mike')
DECLARE #StartID INT,
#EndID INT
SELECT #StartID = MIN(ID),
#EndID = MAX(ID)
FROM #Table
;WITH IDS AS (
SELECT #StartID IDEntry
UNION ALL
SELECT IDEntry + 1
FROM IDS
WHERE IDEntry + 1 <= #EndID
)
SELECT IDS.IDEntry [ID]
FROM IDS LEFT JOIN
#Table t ON IDS.IDEntry = t.ID
WHERE t.ID IS NULL
OPTION (MAXRECURSION 0)
The option MAXRECURSION 0 will allow the code to avoid the recursion limit of SQL SERVER
From Query Hints and WITH common_table_expression (Transact-SQL)
MAXRECURSION number Specifies the maximum number of recursions
allowed for this query. number is a nonnegative integer between 0 and
32767. When 0 is specified, no limit is applied. If this option is not specified, the default limit for the server is 100.
When the specified or default number for MAXRECURSION limit is reached
during query execution, the query is ended and an error is returned.
Because of this error, all effects of the statement are rolled back.
If the statement is a SELECT statement, partial results or no results
may be returned. Any partial results returned may not include all rows
on recursion levels beyond the specified maximum recursion level.
Generating the RANDOM names will largly be affected by the requirements of such a name, and the column type of such a name. What exactly does this random name entail?
You can do this using a recursive Common Table Expression CTE. Here's an example how:
DECLARE #MaxId INT
SELECT #MaxId = MAX(ID) from MyTable
;WITH Numbers(Number) AS
(
SELECT 1
UNION ALL
SELECT Number + 1 FROM Numbers WHERE Number < #MaxId
)
SELECT n.Number, 'Random Name'
FROM Numbers n
LEFT OUTER JOIN MyTable t ON n.Number=t.ID
WHERE t.ID IS NULL
Here are a couple of articles about CTEs that will be helpful to Using Common Table Expressions and Recursive Queries Using Common Table Expressions
Start by selecting the highest number in the table (select top 1 id desc), or select max(id), then run a while loop to iterate from 1...max.
See this article about looping.
For each iteration, see if the row exists, and if not, insert into table, with that ID.
I think recursive CTE is a better solution, because it's going to be faster, but here is what worked for me:
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[TestTable]') AND type in (N'U'))
DROP TABLE [dbo].[TestTable]
GO
CREATE TABLE [dbo].[TestTable](
[Id] [int] NOT NULL,
[Name] [varchar](50) NOT NULL,
CONSTRAINT [PK_TestTable] PRIMARY KEY CLUSTERED
(
[Id] ASC
))
GO
INSERT INTO [dbo].[TestTable]([Id],[Name]) VALUES (1, 'John')
INSERT INTO [dbo].[TestTable]([Id],[Name]) VALUES (2, 'Sam')
INSERT INTO [dbo].[TestTable]([Id],[Name]) VALUES (3, 'Peter')
INSERT INTO [dbo].[TestTable]([Id],[Name]) VALUES (6, 'Mike')
GO
declare #mod int
select #mod = MAX(number)+1 from master..spt_values where [type] = 'P'
INSERT INTO [dbo].[TestTable]
SELECT y.Id,'Name_' + cast(newid() as varchar(45)) Name from
(
SELECT TOP (select MAX(Id) from [dbo].[TestTable]) x.Id from
(
SELECT
t1.number*#mod + t2.number Id
FROM master..spt_values t1
CROSS JOIN master..spt_values t2
WHERE t1.[type] = 'P' and t2.[type] = 'P'
) x
WHERE x.Id > 0
ORDER BY x.Id
) y
LEFT JOIN [dbo].[TestTable] on [TestTable].Id = y.Id
where [TestTable].Id IS NULL
GO
select * from [dbo].[TestTable]
order by Id
GO
http://www.sqlfiddle.com/#!3/46c7b/18
It's actually very simple :
Create a table called #All_numbers which should contain all the natural number in the range that you are looking for.
#list is a table containing your data
select a.num as missing_number ,
'Random_Name' + convert(varchar, a.num)
from #All_numbers a left outer join #list l on a.num = l.Id
where l.id is null