Mysql Row to Column with ';' separator - mysql

i have a table (Catalogs) on my Mariadb 10.1
id value
1 one ; two ; one
2 two ; three ; one
3 four ; five
4 one
5 four ; one
how do i count and group the value on Catalogs table like on the table below.
result count
one 5
two 2
three 1
four 2
five 1
or this table
id value
1 one
1 two
1 one
2 two
2 three
2 one
3 four
3 five
4 one
5 four
5 one

Taking reference from link http://www.marcogoncalves.com/2011/03/mysql-split-column-string-into-rows/
Assuming you have table named as table1 which contains two columns id and value and value column contains comma separated values.
Modified procedure:
CREATE PROCEDURE `explode_table`(bound VARCHAR(255))
BEGIN
DECLARE id INT DEFAULT 0;
DECLARE value TEXT;
DECLARE occurance INT DEFAULT 0;
DECLARE i INT DEFAULT 0;
DECLARE splitted_value varchar(25);
DECLARE done INT DEFAULT 0;
DECLARE cur1 CURSOR FOR SELECT table1.id, table1.value
FROM table1
WHERE table1.value != '';
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
DROP TEMPORARY TABLE IF EXISTS table2;
CREATE TEMPORARY TABLE table2(
`id` INT NOT NULL,
`value` VARCHAR(56) NOT NULL
) engine=memory;
OPEN cur1;
read_loop: LOOP
FETCH cur1 INTO id, value;
IF done THEN
LEAVE read_loop;
END IF;
SET occurance = (SELECT LENGTH(value)
- LENGTH(REPLACE(value, bound, ''))
+1);
SET i=1;
WHILE i <= occurance DO
SET splitted_value =
trim((SELECT REPLACE(SUBSTRING(SUBSTRING_INDEX(value, bound, i),
LENGTH(SUBSTRING_INDEX(value, bound, i - 1)) + 1), ';', '')));
INSERT INTO table2 VALUES (id, splitted_value);
SET i = i + 1;
END WHILE;
END LOOP;
CLOSE cur1;
SELECT * FROM table2;
END

A plain SQL way of doing it, which will cope with up to 100 split delimited values (easily expanded to cope with more if necessary):-
SELECT result, COUNT(id)
FROM
(
SELECT id, SUBSTRING_INDEX(SUBSTRING_INDEX(value, ' ; ', tens.anum * 10 + units.anum + 1), ' ; ', -1) AS result
FROM Catalogs
CROSS JOIN
(SELECT 1 AS anum UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9 UNION SELECT 0) units
CROSS JOIN
(SELECT 1 AS anum UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9 UNION SELECT 0) tens
WHERE LENGTH(value) - LENGTH(REPLACE(value, ';', '')) >= ( tens.anum * 10 + units.anum)
) sub0
GROUP BY result

Related

MySQL - Replicate "n" rows based of multiple records in a string [duplicate]

I have table :
id | name
1 | a,b,c
2 | b
i want output like this :
id | name
1 | a
1 | b
1 | c
2 | b
If you can create a numbers table, that contains numbers from 1 to the maximum fields to split, you could use a solution like this:
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
numbers inner join tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
Please see fiddle here.
If you cannot create a table, then a solution can be this:
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
(select 1 n union all
select 2 union all select 3 union all
select 4 union all select 5) numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
an example fiddle is here.
If the name column were a JSON array (like '["a","b","c"]'), then you could extract/unpack it with JSON_TABLE() (available since MySQL 8.0.4):
select t.id, j.name
from mytable t
join json_table(
t.name,
'$[*]' columns (name varchar(50) path '$')
) j;
Result:
| id | name |
| --- | ---- |
| 1 | a |
| 1 | b |
| 1 | c |
| 2 | b |
View on DB Fiddle
If you store the values in a simple CSV format, then you would first need to convert it to JSON:
select t.id, j.name
from mytable t
join json_table(
replace(json_array(t.name), ',', '","'),
'$[*]' columns (name varchar(50) path '$')
) j
Result:
| id | name |
| --- | ---- |
| 1 | a |
| 1 | b |
| 1 | c |
| 2 | b |
View on DB Fiddle
I have take the reference from here with changed column name.
DELIMITER $$
CREATE FUNCTION strSplit(x VARCHAR(65000), delim VARCHAR(12), pos INTEGER)
RETURNS VARCHAR(65000)
BEGIN
DECLARE output VARCHAR(65000);
SET output = REPLACE(SUBSTRING(SUBSTRING_INDEX(x, delim, pos)
, LENGTH(SUBSTRING_INDEX(x, delim, pos - 1)) + 1)
, delim
, '');
IF output = '' THEN SET output = null; END IF;
RETURN output;
END $$
CREATE PROCEDURE BadTableToGoodTable()
BEGIN
DECLARE i INTEGER;
SET i = 1;
REPEAT
INSERT INTO GoodTable (id, name)
SELECT id, strSplit(name, ',', i) FROM BadTable
WHERE strSplit(name, ',', i) IS NOT NULL;
SET i = i + 1;
UNTIL ROW_COUNT() = 0
END REPEAT;
END $$
DELIMITER ;
Here is my attempt:
The first select presents the csv field to the split.
Using recursive CTE, we can create a list of numbers that are limited to the number of terms in the csv field.
The number of terms is just the difference in the length of the csv field and itself with all the delimiters removed.
Then joining with this numbers, substring_index extracts that term.
with recursive
T as ( select 'a,b,c,d,e,f' as items),
N as ( select 1 as n union select n + 1 from N, T
where n <= length(items) - length(replace(items, ',', '')))
select distinct substring_index(substring_index(items, ',', n), ',', -1)
group_name from N, T
My variant: stored procedure that takes table name, field names and delimiter as arguments. Inspired by post http://www.marcogoncalves.com/2011/03/mysql-split-column-string-into-rows/
delimiter $$
DROP PROCEDURE IF EXISTS split_value_into_multiple_rows $$
CREATE PROCEDURE split_value_into_multiple_rows(tablename VARCHAR(20),
id_column VARCHAR(20), value_column VARCHAR(20), delim CHAR(1))
BEGIN
DECLARE id INT DEFAULT 0;
DECLARE value VARCHAR(255);
DECLARE occurrences INT DEFAULT 0;
DECLARE i INT DEFAULT 0;
DECLARE splitted_value VARCHAR(255);
DECLARE done INT DEFAULT 0;
DECLARE cur CURSOR FOR SELECT tmp_table1.id, tmp_table1.value FROM
tmp_table1 WHERE tmp_table1.value IS NOT NULL AND tmp_table1.value != '';
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
SET #expr = CONCAT('CREATE TEMPORARY TABLE tmp_table1 (id INT NOT NULL, value VARCHAR(255)) ENGINE=Memory SELECT ',
id_column,' id, ', value_column,' value FROM ',tablename);
PREPARE stmt FROM #expr;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
DROP TEMPORARY TABLE IF EXISTS tmp_table2;
CREATE TEMPORARY TABLE tmp_table2 (id INT NOT NULL, value VARCHAR(255) NOT NULL) ENGINE=Memory;
OPEN cur;
read_loop: LOOP
FETCH cur INTO id, value;
IF done THEN
LEAVE read_loop;
END IF;
SET occurrences = (SELECT CHAR_LENGTH(value) -
CHAR_LENGTH(REPLACE(value, delim, '')) + 1);
SET i=1;
WHILE i <= occurrences DO
SET splitted_value = (SELECT TRIM(SUBSTRING_INDEX(
SUBSTRING_INDEX(value, delim, i), delim, -1)));
INSERT INTO tmp_table2 VALUES (id, splitted_value);
SET i = i + 1;
END WHILE;
END LOOP;
SELECT * FROM tmp_table2;
CLOSE cur;
DROP TEMPORARY TABLE tmp_table1;
END; $$
delimiter ;
Usage example (normalization):
CALL split_value_into_multiple_rows('my_contacts', 'contact_id', 'interests', ',');
CREATE TABLE interests (
interest_id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
interest VARCHAR(30) NOT NULL
) SELECT DISTINCT value interest FROM tmp_table2;
CREATE TABLE contact_interest (
contact_id INT NOT NULL,
interest_id INT NOT NULL,
CONSTRAINT fk_contact_interest_my_contacts_contact_id FOREIGN KEY (contact_id) REFERENCES my_contacts (contact_id),
CONSTRAINT fk_contact_interest_interests_interest_id FOREIGN KEY (interest_id) REFERENCES interests (interest_id)
) SELECT my_contacts.contact_id, interests.interest_id
FROM my_contacts, tmp_table2, interests
WHERE my_contacts.contact_id = tmp_table2.id AND interests.interest = tmp_table2.value;
Because you have to keep adding "select number union all" in the example above which can be a issue if you need a large number of splits.
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
(select 1 n union all
select 2 union all select 3 union all
select 4 union all select 5) numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
I decided a better way was this which only adds a number row for each digit. Example below is good for 1-1000 add another row makes it good for 1-10000 and so on.
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from(SELECT #row := #row + 1 AS n FROM
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t,
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t2,
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t3,
(SELECT #row:=0) as numbers)as numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
CREATE PROCEDURE `getVal`()
BEGIN
declare r_len integer;
declare r_id integer;
declare r_val varchar(20);
declare i integer;
DECLARE found_row int(10);
DECLARE row CURSOR FOR select length(replace(val,"|","")),id,val from split;
create table x(id int,name varchar(20));
open row;
select FOUND_ROWS() into found_row ;
read_loop: LOOP
IF found_row = 0 THEN
LEAVE read_loop;
END IF;
set i = 1;
FETCH row INTO r_len,r_id,r_val;
label1: LOOP
IF i <= r_len THEN
insert into x values( r_id,SUBSTRING(replace(r_val,"|",""),i,1));
SET i = i + 1;
ITERATE label1;
END IF;
LEAVE label1;
END LOOP label1;
set found_row = found_row - 1;
END LOOP;
close row;
select * from x;
drop table x;
END
The original question was for MySQL and SQL in general. The example below is for the new versions of MySQL. Unfortunately, a generic query that would work on any SQL server is not possible. Some servers do no support CTE, others do not have substring_index, yet others have built-in functions for splitting a string into multiple rows.
--- the answer follows ---
Recursive queries are convenient when the server does not provide built-in functionality. They can also be the bottleneck.
The following query was written and tested on MySQL version 8.0.16. It will not work on version 5.7-. The old versions do not support Common Table Expression (CTE) and thus recursive queries.
with recursive
input as (
select 1 as id, 'a,b,c' as names
union
select 2, 'b'
),
recurs as (
select id, 1 as pos, names as remain, substring_index( names, ',', 1 ) as name
from input
union all
select id, pos + 1, substring( remain, char_length( name ) + 2 ),
substring_index( substring( remain, char_length( name ) + 2 ), ',', 1 )
from recurs
where char_length( remain ) > char_length( name )
)
select id, name
from recurs
order by id, pos;
Here is another trick. The number 20 is the maximum number of values in comma separated list.
We use single query, no procedures.
If tbl has more rows than the maximum number of values in single comma separated list you may remove 'inner join tbl a inner join tbl c' part from query. I added this because there are only 2 rows.
CREATE TABLE tbl(id int NOT NULL,name varchar(50),PRIMARY KEY (`id`));
insert into tbl values(1, 'a,b,c'), (2, 'd');
select id ,SUBSTRING_INDEX(SUBSTRING_INDEX(name, ',', k.n), ',', -1) as name
from tbl
INNER JOIN (
SELECT *
FROM (
SELECT #n:=#n+1 AS n
FROM tbl inner join tbl a inner join tbl c
INNER JOIN (SELECT #n:=0) AS _a
) AS _a WHERE _a.n <= 20
)AS k ON k.n <= LENGTH(name) - LENGTH(replace(name, ',','')) + 1
order by id
This is a trick to extract nth value in comma separated list:
SUBSTRING_INDEX(SUBSTRING_INDEX(name, ',', k.n), ',', -1)
Here is my solution
-- Create the maximum number of words we want to pick (indexes in n)
with recursive n(i) as (
select
1 i
union all
select i+1 from n where i < 1000
)
select distinct
s.id,
s.oaddress,
-- n.i,
-- use the index to pick the nth word, the last words will always repeat. Remove the duplicates with distinct
if(instr(reverse(trim(substring_index(s.oaddress,' ',n.i))),' ') > 0,
reverse(substr(reverse(trim(substring_index(s.oaddress,' ',n.i))),1,
instr(reverse(trim(substring_index(s.oaddress,' ',n.i))),' '))),
trim(substring_index(s.oaddress,' ',n.i))) oth
from
app_schools s,
n
Best Practice.
Result:
SELECT
SUBSTRING_INDEX(SUBSTRING_INDEX('ab,bc,cd',',',help_id+1),',',-1) AS oid
FROM
(
SELECT #xi:=#xi+1 as help_id from
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc1,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc2,
(SELECT #xi:=-1) xc0
) a
WHERE
help_id < LENGTH('ab,bc,cd')-LENGTH(REPLACE('ab,bc,cd',',',''))+1
First, create a numbers table:
SELECT #xi:=#xi+1 as help_id from
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc1,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc2,
(SELECT #xi:=-1) xc0;
| help_id |
| --- |
| 0 |
| 1 |
| 2 |
| 3 |
| ... |
| 24 |
Second, just split the str:
SELECT SUBSTRING_INDEX(SUBSTRING_INDEX('ab,bc,cd',',',help_id+1),',',-1) AS oid
FROM
numbers_table
WHERE
help_id < LENGTH('ab,bc,cd')-LENGTH(REPLACE('ab,bc,cd',',',''))+1
| oid |
| --- |
| ab |
| bc |
| cd |
SELECT id, unnest(string_to_array(name, ',')) AS names
FROM datatable
Hope this helps :D

How Can I get individual counts of Group_Concat data in sql? [duplicate]

I have table :
id | name
1 | a,b,c
2 | b
i want output like this :
id | name
1 | a
1 | b
1 | c
2 | b
If you can create a numbers table, that contains numbers from 1 to the maximum fields to split, you could use a solution like this:
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
numbers inner join tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
Please see fiddle here.
If you cannot create a table, then a solution can be this:
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
(select 1 n union all
select 2 union all select 3 union all
select 4 union all select 5) numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
an example fiddle is here.
If the name column were a JSON array (like '["a","b","c"]'), then you could extract/unpack it with JSON_TABLE() (available since MySQL 8.0.4):
select t.id, j.name
from mytable t
join json_table(
t.name,
'$[*]' columns (name varchar(50) path '$')
) j;
Result:
| id | name |
| --- | ---- |
| 1 | a |
| 1 | b |
| 1 | c |
| 2 | b |
View on DB Fiddle
If you store the values in a simple CSV format, then you would first need to convert it to JSON:
select t.id, j.name
from mytable t
join json_table(
replace(json_array(t.name), ',', '","'),
'$[*]' columns (name varchar(50) path '$')
) j
Result:
| id | name |
| --- | ---- |
| 1 | a |
| 1 | b |
| 1 | c |
| 2 | b |
View on DB Fiddle
I have take the reference from here with changed column name.
DELIMITER $$
CREATE FUNCTION strSplit(x VARCHAR(65000), delim VARCHAR(12), pos INTEGER)
RETURNS VARCHAR(65000)
BEGIN
DECLARE output VARCHAR(65000);
SET output = REPLACE(SUBSTRING(SUBSTRING_INDEX(x, delim, pos)
, LENGTH(SUBSTRING_INDEX(x, delim, pos - 1)) + 1)
, delim
, '');
IF output = '' THEN SET output = null; END IF;
RETURN output;
END $$
CREATE PROCEDURE BadTableToGoodTable()
BEGIN
DECLARE i INTEGER;
SET i = 1;
REPEAT
INSERT INTO GoodTable (id, name)
SELECT id, strSplit(name, ',', i) FROM BadTable
WHERE strSplit(name, ',', i) IS NOT NULL;
SET i = i + 1;
UNTIL ROW_COUNT() = 0
END REPEAT;
END $$
DELIMITER ;
Here is my attempt:
The first select presents the csv field to the split.
Using recursive CTE, we can create a list of numbers that are limited to the number of terms in the csv field.
The number of terms is just the difference in the length of the csv field and itself with all the delimiters removed.
Then joining with this numbers, substring_index extracts that term.
with recursive
T as ( select 'a,b,c,d,e,f' as items),
N as ( select 1 as n union select n + 1 from N, T
where n <= length(items) - length(replace(items, ',', '')))
select distinct substring_index(substring_index(items, ',', n), ',', -1)
group_name from N, T
My variant: stored procedure that takes table name, field names and delimiter as arguments. Inspired by post http://www.marcogoncalves.com/2011/03/mysql-split-column-string-into-rows/
delimiter $$
DROP PROCEDURE IF EXISTS split_value_into_multiple_rows $$
CREATE PROCEDURE split_value_into_multiple_rows(tablename VARCHAR(20),
id_column VARCHAR(20), value_column VARCHAR(20), delim CHAR(1))
BEGIN
DECLARE id INT DEFAULT 0;
DECLARE value VARCHAR(255);
DECLARE occurrences INT DEFAULT 0;
DECLARE i INT DEFAULT 0;
DECLARE splitted_value VARCHAR(255);
DECLARE done INT DEFAULT 0;
DECLARE cur CURSOR FOR SELECT tmp_table1.id, tmp_table1.value FROM
tmp_table1 WHERE tmp_table1.value IS NOT NULL AND tmp_table1.value != '';
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
SET #expr = CONCAT('CREATE TEMPORARY TABLE tmp_table1 (id INT NOT NULL, value VARCHAR(255)) ENGINE=Memory SELECT ',
id_column,' id, ', value_column,' value FROM ',tablename);
PREPARE stmt FROM #expr;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
DROP TEMPORARY TABLE IF EXISTS tmp_table2;
CREATE TEMPORARY TABLE tmp_table2 (id INT NOT NULL, value VARCHAR(255) NOT NULL) ENGINE=Memory;
OPEN cur;
read_loop: LOOP
FETCH cur INTO id, value;
IF done THEN
LEAVE read_loop;
END IF;
SET occurrences = (SELECT CHAR_LENGTH(value) -
CHAR_LENGTH(REPLACE(value, delim, '')) + 1);
SET i=1;
WHILE i <= occurrences DO
SET splitted_value = (SELECT TRIM(SUBSTRING_INDEX(
SUBSTRING_INDEX(value, delim, i), delim, -1)));
INSERT INTO tmp_table2 VALUES (id, splitted_value);
SET i = i + 1;
END WHILE;
END LOOP;
SELECT * FROM tmp_table2;
CLOSE cur;
DROP TEMPORARY TABLE tmp_table1;
END; $$
delimiter ;
Usage example (normalization):
CALL split_value_into_multiple_rows('my_contacts', 'contact_id', 'interests', ',');
CREATE TABLE interests (
interest_id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
interest VARCHAR(30) NOT NULL
) SELECT DISTINCT value interest FROM tmp_table2;
CREATE TABLE contact_interest (
contact_id INT NOT NULL,
interest_id INT NOT NULL,
CONSTRAINT fk_contact_interest_my_contacts_contact_id FOREIGN KEY (contact_id) REFERENCES my_contacts (contact_id),
CONSTRAINT fk_contact_interest_interests_interest_id FOREIGN KEY (interest_id) REFERENCES interests (interest_id)
) SELECT my_contacts.contact_id, interests.interest_id
FROM my_contacts, tmp_table2, interests
WHERE my_contacts.contact_id = tmp_table2.id AND interests.interest = tmp_table2.value;
Because you have to keep adding "select number union all" in the example above which can be a issue if you need a large number of splits.
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
(select 1 n union all
select 2 union all select 3 union all
select 4 union all select 5) numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
I decided a better way was this which only adds a number row for each digit. Example below is good for 1-1000 add another row makes it good for 1-10000 and so on.
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from(SELECT #row := #row + 1 AS n FROM
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t,
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t2,
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t3,
(SELECT #row:=0) as numbers)as numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
CREATE PROCEDURE `getVal`()
BEGIN
declare r_len integer;
declare r_id integer;
declare r_val varchar(20);
declare i integer;
DECLARE found_row int(10);
DECLARE row CURSOR FOR select length(replace(val,"|","")),id,val from split;
create table x(id int,name varchar(20));
open row;
select FOUND_ROWS() into found_row ;
read_loop: LOOP
IF found_row = 0 THEN
LEAVE read_loop;
END IF;
set i = 1;
FETCH row INTO r_len,r_id,r_val;
label1: LOOP
IF i <= r_len THEN
insert into x values( r_id,SUBSTRING(replace(r_val,"|",""),i,1));
SET i = i + 1;
ITERATE label1;
END IF;
LEAVE label1;
END LOOP label1;
set found_row = found_row - 1;
END LOOP;
close row;
select * from x;
drop table x;
END
The original question was for MySQL and SQL in general. The example below is for the new versions of MySQL. Unfortunately, a generic query that would work on any SQL server is not possible. Some servers do no support CTE, others do not have substring_index, yet others have built-in functions for splitting a string into multiple rows.
--- the answer follows ---
Recursive queries are convenient when the server does not provide built-in functionality. They can also be the bottleneck.
The following query was written and tested on MySQL version 8.0.16. It will not work on version 5.7-. The old versions do not support Common Table Expression (CTE) and thus recursive queries.
with recursive
input as (
select 1 as id, 'a,b,c' as names
union
select 2, 'b'
),
recurs as (
select id, 1 as pos, names as remain, substring_index( names, ',', 1 ) as name
from input
union all
select id, pos + 1, substring( remain, char_length( name ) + 2 ),
substring_index( substring( remain, char_length( name ) + 2 ), ',', 1 )
from recurs
where char_length( remain ) > char_length( name )
)
select id, name
from recurs
order by id, pos;
Here is another trick. The number 20 is the maximum number of values in comma separated list.
We use single query, no procedures.
If tbl has more rows than the maximum number of values in single comma separated list you may remove 'inner join tbl a inner join tbl c' part from query. I added this because there are only 2 rows.
CREATE TABLE tbl(id int NOT NULL,name varchar(50),PRIMARY KEY (`id`));
insert into tbl values(1, 'a,b,c'), (2, 'd');
select id ,SUBSTRING_INDEX(SUBSTRING_INDEX(name, ',', k.n), ',', -1) as name
from tbl
INNER JOIN (
SELECT *
FROM (
SELECT #n:=#n+1 AS n
FROM tbl inner join tbl a inner join tbl c
INNER JOIN (SELECT #n:=0) AS _a
) AS _a WHERE _a.n <= 20
)AS k ON k.n <= LENGTH(name) - LENGTH(replace(name, ',','')) + 1
order by id
This is a trick to extract nth value in comma separated list:
SUBSTRING_INDEX(SUBSTRING_INDEX(name, ',', k.n), ',', -1)
Here is my solution
-- Create the maximum number of words we want to pick (indexes in n)
with recursive n(i) as (
select
1 i
union all
select i+1 from n where i < 1000
)
select distinct
s.id,
s.oaddress,
-- n.i,
-- use the index to pick the nth word, the last words will always repeat. Remove the duplicates with distinct
if(instr(reverse(trim(substring_index(s.oaddress,' ',n.i))),' ') > 0,
reverse(substr(reverse(trim(substring_index(s.oaddress,' ',n.i))),1,
instr(reverse(trim(substring_index(s.oaddress,' ',n.i))),' '))),
trim(substring_index(s.oaddress,' ',n.i))) oth
from
app_schools s,
n
Best Practice.
Result:
SELECT
SUBSTRING_INDEX(SUBSTRING_INDEX('ab,bc,cd',',',help_id+1),',',-1) AS oid
FROM
(
SELECT #xi:=#xi+1 as help_id from
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc1,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc2,
(SELECT #xi:=-1) xc0
) a
WHERE
help_id < LENGTH('ab,bc,cd')-LENGTH(REPLACE('ab,bc,cd',',',''))+1
First, create a numbers table:
SELECT #xi:=#xi+1 as help_id from
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc1,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc2,
(SELECT #xi:=-1) xc0;
| help_id |
| --- |
| 0 |
| 1 |
| 2 |
| 3 |
| ... |
| 24 |
Second, just split the str:
SELECT SUBSTRING_INDEX(SUBSTRING_INDEX('ab,bc,cd',',',help_id+1),',',-1) AS oid
FROM
numbers_table
WHERE
help_id < LENGTH('ab,bc,cd')-LENGTH(REPLACE('ab,bc,cd',',',''))+1
| oid |
| --- |
| ab |
| bc |
| cd |
SELECT id, unnest(string_to_array(name, ',')) AS names
FROM datatable
Hope this helps :D

get all comma seperated values of a string using mysql [duplicate]

I have table :
id | name
1 | a,b,c
2 | b
i want output like this :
id | name
1 | a
1 | b
1 | c
2 | b
If you can create a numbers table, that contains numbers from 1 to the maximum fields to split, you could use a solution like this:
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
numbers inner join tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
Please see fiddle here.
If you cannot create a table, then a solution can be this:
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
(select 1 n union all
select 2 union all select 3 union all
select 4 union all select 5) numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
an example fiddle is here.
If the name column were a JSON array (like '["a","b","c"]'), then you could extract/unpack it with JSON_TABLE() (available since MySQL 8.0.4):
select t.id, j.name
from mytable t
join json_table(
t.name,
'$[*]' columns (name varchar(50) path '$')
) j;
Result:
| id | name |
| --- | ---- |
| 1 | a |
| 1 | b |
| 1 | c |
| 2 | b |
View on DB Fiddle
If you store the values in a simple CSV format, then you would first need to convert it to JSON:
select t.id, j.name
from mytable t
join json_table(
replace(json_array(t.name), ',', '","'),
'$[*]' columns (name varchar(50) path '$')
) j
Result:
| id | name |
| --- | ---- |
| 1 | a |
| 1 | b |
| 1 | c |
| 2 | b |
View on DB Fiddle
I have take the reference from here with changed column name.
DELIMITER $$
CREATE FUNCTION strSplit(x VARCHAR(65000), delim VARCHAR(12), pos INTEGER)
RETURNS VARCHAR(65000)
BEGIN
DECLARE output VARCHAR(65000);
SET output = REPLACE(SUBSTRING(SUBSTRING_INDEX(x, delim, pos)
, LENGTH(SUBSTRING_INDEX(x, delim, pos - 1)) + 1)
, delim
, '');
IF output = '' THEN SET output = null; END IF;
RETURN output;
END $$
CREATE PROCEDURE BadTableToGoodTable()
BEGIN
DECLARE i INTEGER;
SET i = 1;
REPEAT
INSERT INTO GoodTable (id, name)
SELECT id, strSplit(name, ',', i) FROM BadTable
WHERE strSplit(name, ',', i) IS NOT NULL;
SET i = i + 1;
UNTIL ROW_COUNT() = 0
END REPEAT;
END $$
DELIMITER ;
Here is my attempt:
The first select presents the csv field to the split.
Using recursive CTE, we can create a list of numbers that are limited to the number of terms in the csv field.
The number of terms is just the difference in the length of the csv field and itself with all the delimiters removed.
Then joining with this numbers, substring_index extracts that term.
with recursive
T as ( select 'a,b,c,d,e,f' as items),
N as ( select 1 as n union select n + 1 from N, T
where n <= length(items) - length(replace(items, ',', '')))
select distinct substring_index(substring_index(items, ',', n), ',', -1)
group_name from N, T
My variant: stored procedure that takes table name, field names and delimiter as arguments. Inspired by post http://www.marcogoncalves.com/2011/03/mysql-split-column-string-into-rows/
delimiter $$
DROP PROCEDURE IF EXISTS split_value_into_multiple_rows $$
CREATE PROCEDURE split_value_into_multiple_rows(tablename VARCHAR(20),
id_column VARCHAR(20), value_column VARCHAR(20), delim CHAR(1))
BEGIN
DECLARE id INT DEFAULT 0;
DECLARE value VARCHAR(255);
DECLARE occurrences INT DEFAULT 0;
DECLARE i INT DEFAULT 0;
DECLARE splitted_value VARCHAR(255);
DECLARE done INT DEFAULT 0;
DECLARE cur CURSOR FOR SELECT tmp_table1.id, tmp_table1.value FROM
tmp_table1 WHERE tmp_table1.value IS NOT NULL AND tmp_table1.value != '';
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
SET #expr = CONCAT('CREATE TEMPORARY TABLE tmp_table1 (id INT NOT NULL, value VARCHAR(255)) ENGINE=Memory SELECT ',
id_column,' id, ', value_column,' value FROM ',tablename);
PREPARE stmt FROM #expr;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
DROP TEMPORARY TABLE IF EXISTS tmp_table2;
CREATE TEMPORARY TABLE tmp_table2 (id INT NOT NULL, value VARCHAR(255) NOT NULL) ENGINE=Memory;
OPEN cur;
read_loop: LOOP
FETCH cur INTO id, value;
IF done THEN
LEAVE read_loop;
END IF;
SET occurrences = (SELECT CHAR_LENGTH(value) -
CHAR_LENGTH(REPLACE(value, delim, '')) + 1);
SET i=1;
WHILE i <= occurrences DO
SET splitted_value = (SELECT TRIM(SUBSTRING_INDEX(
SUBSTRING_INDEX(value, delim, i), delim, -1)));
INSERT INTO tmp_table2 VALUES (id, splitted_value);
SET i = i + 1;
END WHILE;
END LOOP;
SELECT * FROM tmp_table2;
CLOSE cur;
DROP TEMPORARY TABLE tmp_table1;
END; $$
delimiter ;
Usage example (normalization):
CALL split_value_into_multiple_rows('my_contacts', 'contact_id', 'interests', ',');
CREATE TABLE interests (
interest_id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
interest VARCHAR(30) NOT NULL
) SELECT DISTINCT value interest FROM tmp_table2;
CREATE TABLE contact_interest (
contact_id INT NOT NULL,
interest_id INT NOT NULL,
CONSTRAINT fk_contact_interest_my_contacts_contact_id FOREIGN KEY (contact_id) REFERENCES my_contacts (contact_id),
CONSTRAINT fk_contact_interest_interests_interest_id FOREIGN KEY (interest_id) REFERENCES interests (interest_id)
) SELECT my_contacts.contact_id, interests.interest_id
FROM my_contacts, tmp_table2, interests
WHERE my_contacts.contact_id = tmp_table2.id AND interests.interest = tmp_table2.value;
Because you have to keep adding "select number union all" in the example above which can be a issue if you need a large number of splits.
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
(select 1 n union all
select 2 union all select 3 union all
select 4 union all select 5) numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
I decided a better way was this which only adds a number row for each digit. Example below is good for 1-1000 add another row makes it good for 1-10000 and so on.
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from(SELECT #row := #row + 1 AS n FROM
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t,
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t2,
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t3,
(SELECT #row:=0) as numbers)as numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
CREATE PROCEDURE `getVal`()
BEGIN
declare r_len integer;
declare r_id integer;
declare r_val varchar(20);
declare i integer;
DECLARE found_row int(10);
DECLARE row CURSOR FOR select length(replace(val,"|","")),id,val from split;
create table x(id int,name varchar(20));
open row;
select FOUND_ROWS() into found_row ;
read_loop: LOOP
IF found_row = 0 THEN
LEAVE read_loop;
END IF;
set i = 1;
FETCH row INTO r_len,r_id,r_val;
label1: LOOP
IF i <= r_len THEN
insert into x values( r_id,SUBSTRING(replace(r_val,"|",""),i,1));
SET i = i + 1;
ITERATE label1;
END IF;
LEAVE label1;
END LOOP label1;
set found_row = found_row - 1;
END LOOP;
close row;
select * from x;
drop table x;
END
The original question was for MySQL and SQL in general. The example below is for the new versions of MySQL. Unfortunately, a generic query that would work on any SQL server is not possible. Some servers do no support CTE, others do not have substring_index, yet others have built-in functions for splitting a string into multiple rows.
--- the answer follows ---
Recursive queries are convenient when the server does not provide built-in functionality. They can also be the bottleneck.
The following query was written and tested on MySQL version 8.0.16. It will not work on version 5.7-. The old versions do not support Common Table Expression (CTE) and thus recursive queries.
with recursive
input as (
select 1 as id, 'a,b,c' as names
union
select 2, 'b'
),
recurs as (
select id, 1 as pos, names as remain, substring_index( names, ',', 1 ) as name
from input
union all
select id, pos + 1, substring( remain, char_length( name ) + 2 ),
substring_index( substring( remain, char_length( name ) + 2 ), ',', 1 )
from recurs
where char_length( remain ) > char_length( name )
)
select id, name
from recurs
order by id, pos;
Here is another trick. The number 20 is the maximum number of values in comma separated list.
We use single query, no procedures.
If tbl has more rows than the maximum number of values in single comma separated list you may remove 'inner join tbl a inner join tbl c' part from query. I added this because there are only 2 rows.
CREATE TABLE tbl(id int NOT NULL,name varchar(50),PRIMARY KEY (`id`));
insert into tbl values(1, 'a,b,c'), (2, 'd');
select id ,SUBSTRING_INDEX(SUBSTRING_INDEX(name, ',', k.n), ',', -1) as name
from tbl
INNER JOIN (
SELECT *
FROM (
SELECT #n:=#n+1 AS n
FROM tbl inner join tbl a inner join tbl c
INNER JOIN (SELECT #n:=0) AS _a
) AS _a WHERE _a.n <= 20
)AS k ON k.n <= LENGTH(name) - LENGTH(replace(name, ',','')) + 1
order by id
This is a trick to extract nth value in comma separated list:
SUBSTRING_INDEX(SUBSTRING_INDEX(name, ',', k.n), ',', -1)
Here is my solution
-- Create the maximum number of words we want to pick (indexes in n)
with recursive n(i) as (
select
1 i
union all
select i+1 from n where i < 1000
)
select distinct
s.id,
s.oaddress,
-- n.i,
-- use the index to pick the nth word, the last words will always repeat. Remove the duplicates with distinct
if(instr(reverse(trim(substring_index(s.oaddress,' ',n.i))),' ') > 0,
reverse(substr(reverse(trim(substring_index(s.oaddress,' ',n.i))),1,
instr(reverse(trim(substring_index(s.oaddress,' ',n.i))),' '))),
trim(substring_index(s.oaddress,' ',n.i))) oth
from
app_schools s,
n
Best Practice.
Result:
SELECT
SUBSTRING_INDEX(SUBSTRING_INDEX('ab,bc,cd',',',help_id+1),',',-1) AS oid
FROM
(
SELECT #xi:=#xi+1 as help_id from
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc1,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc2,
(SELECT #xi:=-1) xc0
) a
WHERE
help_id < LENGTH('ab,bc,cd')-LENGTH(REPLACE('ab,bc,cd',',',''))+1
First, create a numbers table:
SELECT #xi:=#xi+1 as help_id from
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc1,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc2,
(SELECT #xi:=-1) xc0;
| help_id |
| --- |
| 0 |
| 1 |
| 2 |
| 3 |
| ... |
| 24 |
Second, just split the str:
SELECT SUBSTRING_INDEX(SUBSTRING_INDEX('ab,bc,cd',',',help_id+1),',',-1) AS oid
FROM
numbers_table
WHERE
help_id < LENGTH('ab,bc,cd')-LENGTH(REPLACE('ab,bc,cd',',',''))+1
| oid |
| --- |
| ab |
| bc |
| cd |
SELECT id, unnest(string_to_array(name, ',')) AS names
FROM datatable
Hope this helps :D

How to split and count item in MySQL [duplicate]

I have table :
id | name
1 | a,b,c
2 | b
i want output like this :
id | name
1 | a
1 | b
1 | c
2 | b
If you can create a numbers table, that contains numbers from 1 to the maximum fields to split, you could use a solution like this:
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
numbers inner join tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
Please see fiddle here.
If you cannot create a table, then a solution can be this:
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
(select 1 n union all
select 2 union all select 3 union all
select 4 union all select 5) numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
an example fiddle is here.
If the name column were a JSON array (like '["a","b","c"]'), then you could extract/unpack it with JSON_TABLE() (available since MySQL 8.0.4):
select t.id, j.name
from mytable t
join json_table(
t.name,
'$[*]' columns (name varchar(50) path '$')
) j;
Result:
| id | name |
| --- | ---- |
| 1 | a |
| 1 | b |
| 1 | c |
| 2 | b |
View on DB Fiddle
If you store the values in a simple CSV format, then you would first need to convert it to JSON:
select t.id, j.name
from mytable t
join json_table(
replace(json_array(t.name), ',', '","'),
'$[*]' columns (name varchar(50) path '$')
) j
Result:
| id | name |
| --- | ---- |
| 1 | a |
| 1 | b |
| 1 | c |
| 2 | b |
View on DB Fiddle
I have take the reference from here with changed column name.
DELIMITER $$
CREATE FUNCTION strSplit(x VARCHAR(65000), delim VARCHAR(12), pos INTEGER)
RETURNS VARCHAR(65000)
BEGIN
DECLARE output VARCHAR(65000);
SET output = REPLACE(SUBSTRING(SUBSTRING_INDEX(x, delim, pos)
, LENGTH(SUBSTRING_INDEX(x, delim, pos - 1)) + 1)
, delim
, '');
IF output = '' THEN SET output = null; END IF;
RETURN output;
END $$
CREATE PROCEDURE BadTableToGoodTable()
BEGIN
DECLARE i INTEGER;
SET i = 1;
REPEAT
INSERT INTO GoodTable (id, name)
SELECT id, strSplit(name, ',', i) FROM BadTable
WHERE strSplit(name, ',', i) IS NOT NULL;
SET i = i + 1;
UNTIL ROW_COUNT() = 0
END REPEAT;
END $$
DELIMITER ;
Here is my attempt:
The first select presents the csv field to the split.
Using recursive CTE, we can create a list of numbers that are limited to the number of terms in the csv field.
The number of terms is just the difference in the length of the csv field and itself with all the delimiters removed.
Then joining with this numbers, substring_index extracts that term.
with recursive
T as ( select 'a,b,c,d,e,f' as items),
N as ( select 1 as n union select n + 1 from N, T
where n <= length(items) - length(replace(items, ',', '')))
select distinct substring_index(substring_index(items, ',', n), ',', -1)
group_name from N, T
My variant: stored procedure that takes table name, field names and delimiter as arguments. Inspired by post http://www.marcogoncalves.com/2011/03/mysql-split-column-string-into-rows/
delimiter $$
DROP PROCEDURE IF EXISTS split_value_into_multiple_rows $$
CREATE PROCEDURE split_value_into_multiple_rows(tablename VARCHAR(20),
id_column VARCHAR(20), value_column VARCHAR(20), delim CHAR(1))
BEGIN
DECLARE id INT DEFAULT 0;
DECLARE value VARCHAR(255);
DECLARE occurrences INT DEFAULT 0;
DECLARE i INT DEFAULT 0;
DECLARE splitted_value VARCHAR(255);
DECLARE done INT DEFAULT 0;
DECLARE cur CURSOR FOR SELECT tmp_table1.id, tmp_table1.value FROM
tmp_table1 WHERE tmp_table1.value IS NOT NULL AND tmp_table1.value != '';
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
SET #expr = CONCAT('CREATE TEMPORARY TABLE tmp_table1 (id INT NOT NULL, value VARCHAR(255)) ENGINE=Memory SELECT ',
id_column,' id, ', value_column,' value FROM ',tablename);
PREPARE stmt FROM #expr;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
DROP TEMPORARY TABLE IF EXISTS tmp_table2;
CREATE TEMPORARY TABLE tmp_table2 (id INT NOT NULL, value VARCHAR(255) NOT NULL) ENGINE=Memory;
OPEN cur;
read_loop: LOOP
FETCH cur INTO id, value;
IF done THEN
LEAVE read_loop;
END IF;
SET occurrences = (SELECT CHAR_LENGTH(value) -
CHAR_LENGTH(REPLACE(value, delim, '')) + 1);
SET i=1;
WHILE i <= occurrences DO
SET splitted_value = (SELECT TRIM(SUBSTRING_INDEX(
SUBSTRING_INDEX(value, delim, i), delim, -1)));
INSERT INTO tmp_table2 VALUES (id, splitted_value);
SET i = i + 1;
END WHILE;
END LOOP;
SELECT * FROM tmp_table2;
CLOSE cur;
DROP TEMPORARY TABLE tmp_table1;
END; $$
delimiter ;
Usage example (normalization):
CALL split_value_into_multiple_rows('my_contacts', 'contact_id', 'interests', ',');
CREATE TABLE interests (
interest_id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
interest VARCHAR(30) NOT NULL
) SELECT DISTINCT value interest FROM tmp_table2;
CREATE TABLE contact_interest (
contact_id INT NOT NULL,
interest_id INT NOT NULL,
CONSTRAINT fk_contact_interest_my_contacts_contact_id FOREIGN KEY (contact_id) REFERENCES my_contacts (contact_id),
CONSTRAINT fk_contact_interest_interests_interest_id FOREIGN KEY (interest_id) REFERENCES interests (interest_id)
) SELECT my_contacts.contact_id, interests.interest_id
FROM my_contacts, tmp_table2, interests
WHERE my_contacts.contact_id = tmp_table2.id AND interests.interest = tmp_table2.value;
Because you have to keep adding "select number union all" in the example above which can be a issue if you need a large number of splits.
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
(select 1 n union all
select 2 union all select 3 union all
select 4 union all select 5) numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
I decided a better way was this which only adds a number row for each digit. Example below is good for 1-1000 add another row makes it good for 1-10000 and so on.
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from(SELECT #row := #row + 1 AS n FROM
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t,
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t2,
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t3,
(SELECT #row:=0) as numbers)as numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
CREATE PROCEDURE `getVal`()
BEGIN
declare r_len integer;
declare r_id integer;
declare r_val varchar(20);
declare i integer;
DECLARE found_row int(10);
DECLARE row CURSOR FOR select length(replace(val,"|","")),id,val from split;
create table x(id int,name varchar(20));
open row;
select FOUND_ROWS() into found_row ;
read_loop: LOOP
IF found_row = 0 THEN
LEAVE read_loop;
END IF;
set i = 1;
FETCH row INTO r_len,r_id,r_val;
label1: LOOP
IF i <= r_len THEN
insert into x values( r_id,SUBSTRING(replace(r_val,"|",""),i,1));
SET i = i + 1;
ITERATE label1;
END IF;
LEAVE label1;
END LOOP label1;
set found_row = found_row - 1;
END LOOP;
close row;
select * from x;
drop table x;
END
The original question was for MySQL and SQL in general. The example below is for the new versions of MySQL. Unfortunately, a generic query that would work on any SQL server is not possible. Some servers do no support CTE, others do not have substring_index, yet others have built-in functions for splitting a string into multiple rows.
--- the answer follows ---
Recursive queries are convenient when the server does not provide built-in functionality. They can also be the bottleneck.
The following query was written and tested on MySQL version 8.0.16. It will not work on version 5.7-. The old versions do not support Common Table Expression (CTE) and thus recursive queries.
with recursive
input as (
select 1 as id, 'a,b,c' as names
union
select 2, 'b'
),
recurs as (
select id, 1 as pos, names as remain, substring_index( names, ',', 1 ) as name
from input
union all
select id, pos + 1, substring( remain, char_length( name ) + 2 ),
substring_index( substring( remain, char_length( name ) + 2 ), ',', 1 )
from recurs
where char_length( remain ) > char_length( name )
)
select id, name
from recurs
order by id, pos;
Here is another trick. The number 20 is the maximum number of values in comma separated list.
We use single query, no procedures.
If tbl has more rows than the maximum number of values in single comma separated list you may remove 'inner join tbl a inner join tbl c' part from query. I added this because there are only 2 rows.
CREATE TABLE tbl(id int NOT NULL,name varchar(50),PRIMARY KEY (`id`));
insert into tbl values(1, 'a,b,c'), (2, 'd');
select id ,SUBSTRING_INDEX(SUBSTRING_INDEX(name, ',', k.n), ',', -1) as name
from tbl
INNER JOIN (
SELECT *
FROM (
SELECT #n:=#n+1 AS n
FROM tbl inner join tbl a inner join tbl c
INNER JOIN (SELECT #n:=0) AS _a
) AS _a WHERE _a.n <= 20
)AS k ON k.n <= LENGTH(name) - LENGTH(replace(name, ',','')) + 1
order by id
This is a trick to extract nth value in comma separated list:
SUBSTRING_INDEX(SUBSTRING_INDEX(name, ',', k.n), ',', -1)
Here is my solution
-- Create the maximum number of words we want to pick (indexes in n)
with recursive n(i) as (
select
1 i
union all
select i+1 from n where i < 1000
)
select distinct
s.id,
s.oaddress,
-- n.i,
-- use the index to pick the nth word, the last words will always repeat. Remove the duplicates with distinct
if(instr(reverse(trim(substring_index(s.oaddress,' ',n.i))),' ') > 0,
reverse(substr(reverse(trim(substring_index(s.oaddress,' ',n.i))),1,
instr(reverse(trim(substring_index(s.oaddress,' ',n.i))),' '))),
trim(substring_index(s.oaddress,' ',n.i))) oth
from
app_schools s,
n
Best Practice.
Result:
SELECT
SUBSTRING_INDEX(SUBSTRING_INDEX('ab,bc,cd',',',help_id+1),',',-1) AS oid
FROM
(
SELECT #xi:=#xi+1 as help_id from
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc1,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc2,
(SELECT #xi:=-1) xc0
) a
WHERE
help_id < LENGTH('ab,bc,cd')-LENGTH(REPLACE('ab,bc,cd',',',''))+1
First, create a numbers table:
SELECT #xi:=#xi+1 as help_id from
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc1,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc2,
(SELECT #xi:=-1) xc0;
| help_id |
| --- |
| 0 |
| 1 |
| 2 |
| 3 |
| ... |
| 24 |
Second, just split the str:
SELECT SUBSTRING_INDEX(SUBSTRING_INDEX('ab,bc,cd',',',help_id+1),',',-1) AS oid
FROM
numbers_table
WHERE
help_id < LENGTH('ab,bc,cd')-LENGTH(REPLACE('ab,bc,cd',',',''))+1
| oid |
| --- |
| ab |
| bc |
| cd |
SELECT id, unnest(string_to_array(name, ',')) AS names
FROM datatable
Hope this helps :D

SQL split values to multiple rows

I have table :
id | name
1 | a,b,c
2 | b
i want output like this :
id | name
1 | a
1 | b
1 | c
2 | b
If you can create a numbers table, that contains numbers from 1 to the maximum fields to split, you could use a solution like this:
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
numbers inner join tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
Please see fiddle here.
If you cannot create a table, then a solution can be this:
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
(select 1 n union all
select 2 union all select 3 union all
select 4 union all select 5) numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
an example fiddle is here.
If the name column were a JSON array (like '["a","b","c"]'), then you could extract/unpack it with JSON_TABLE() (available since MySQL 8.0.4):
select t.id, j.name
from mytable t
join json_table(
t.name,
'$[*]' columns (name varchar(50) path '$')
) j;
Result:
| id | name |
| --- | ---- |
| 1 | a |
| 1 | b |
| 1 | c |
| 2 | b |
View on DB Fiddle
If you store the values in a simple CSV format, then you would first need to convert it to JSON:
select t.id, j.name
from mytable t
join json_table(
replace(json_array(t.name), ',', '","'),
'$[*]' columns (name varchar(50) path '$')
) j
Result:
| id | name |
| --- | ---- |
| 1 | a |
| 1 | b |
| 1 | c |
| 2 | b |
View on DB Fiddle
I have take the reference from here with changed column name.
DELIMITER $$
CREATE FUNCTION strSplit(x VARCHAR(65000), delim VARCHAR(12), pos INTEGER)
RETURNS VARCHAR(65000)
BEGIN
DECLARE output VARCHAR(65000);
SET output = REPLACE(SUBSTRING(SUBSTRING_INDEX(x, delim, pos)
, LENGTH(SUBSTRING_INDEX(x, delim, pos - 1)) + 1)
, delim
, '');
IF output = '' THEN SET output = null; END IF;
RETURN output;
END $$
CREATE PROCEDURE BadTableToGoodTable()
BEGIN
DECLARE i INTEGER;
SET i = 1;
REPEAT
INSERT INTO GoodTable (id, name)
SELECT id, strSplit(name, ',', i) FROM BadTable
WHERE strSplit(name, ',', i) IS NOT NULL;
SET i = i + 1;
UNTIL ROW_COUNT() = 0
END REPEAT;
END $$
DELIMITER ;
Here is my attempt:
The first select presents the csv field to the split.
Using recursive CTE, we can create a list of numbers that are limited to the number of terms in the csv field.
The number of terms is just the difference in the length of the csv field and itself with all the delimiters removed.
Then joining with this numbers, substring_index extracts that term.
with recursive
T as ( select 'a,b,c,d,e,f' as items),
N as ( select 1 as n union select n + 1 from N, T
where n <= length(items) - length(replace(items, ',', '')))
select distinct substring_index(substring_index(items, ',', n), ',', -1)
group_name from N, T
My variant: stored procedure that takes table name, field names and delimiter as arguments. Inspired by post http://www.marcogoncalves.com/2011/03/mysql-split-column-string-into-rows/
delimiter $$
DROP PROCEDURE IF EXISTS split_value_into_multiple_rows $$
CREATE PROCEDURE split_value_into_multiple_rows(tablename VARCHAR(20),
id_column VARCHAR(20), value_column VARCHAR(20), delim CHAR(1))
BEGIN
DECLARE id INT DEFAULT 0;
DECLARE value VARCHAR(255);
DECLARE occurrences INT DEFAULT 0;
DECLARE i INT DEFAULT 0;
DECLARE splitted_value VARCHAR(255);
DECLARE done INT DEFAULT 0;
DECLARE cur CURSOR FOR SELECT tmp_table1.id, tmp_table1.value FROM
tmp_table1 WHERE tmp_table1.value IS NOT NULL AND tmp_table1.value != '';
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
SET #expr = CONCAT('CREATE TEMPORARY TABLE tmp_table1 (id INT NOT NULL, value VARCHAR(255)) ENGINE=Memory SELECT ',
id_column,' id, ', value_column,' value FROM ',tablename);
PREPARE stmt FROM #expr;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
DROP TEMPORARY TABLE IF EXISTS tmp_table2;
CREATE TEMPORARY TABLE tmp_table2 (id INT NOT NULL, value VARCHAR(255) NOT NULL) ENGINE=Memory;
OPEN cur;
read_loop: LOOP
FETCH cur INTO id, value;
IF done THEN
LEAVE read_loop;
END IF;
SET occurrences = (SELECT CHAR_LENGTH(value) -
CHAR_LENGTH(REPLACE(value, delim, '')) + 1);
SET i=1;
WHILE i <= occurrences DO
SET splitted_value = (SELECT TRIM(SUBSTRING_INDEX(
SUBSTRING_INDEX(value, delim, i), delim, -1)));
INSERT INTO tmp_table2 VALUES (id, splitted_value);
SET i = i + 1;
END WHILE;
END LOOP;
SELECT * FROM tmp_table2;
CLOSE cur;
DROP TEMPORARY TABLE tmp_table1;
END; $$
delimiter ;
Usage example (normalization):
CALL split_value_into_multiple_rows('my_contacts', 'contact_id', 'interests', ',');
CREATE TABLE interests (
interest_id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
interest VARCHAR(30) NOT NULL
) SELECT DISTINCT value interest FROM tmp_table2;
CREATE TABLE contact_interest (
contact_id INT NOT NULL,
interest_id INT NOT NULL,
CONSTRAINT fk_contact_interest_my_contacts_contact_id FOREIGN KEY (contact_id) REFERENCES my_contacts (contact_id),
CONSTRAINT fk_contact_interest_interests_interest_id FOREIGN KEY (interest_id) REFERENCES interests (interest_id)
) SELECT my_contacts.contact_id, interests.interest_id
FROM my_contacts, tmp_table2, interests
WHERE my_contacts.contact_id = tmp_table2.id AND interests.interest = tmp_table2.value;
Because you have to keep adding "select number union all" in the example above which can be a issue if you need a large number of splits.
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from
(select 1 n union all
select 2 union all select 3 union all
select 4 union all select 5) numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
I decided a better way was this which only adds a number row for each digit. Example below is good for 1-1000 add another row makes it good for 1-10000 and so on.
select
tablename.id,
SUBSTRING_INDEX(SUBSTRING_INDEX(tablename.name, ',', numbers.n), ',', -1) name
from(SELECT #row := #row + 1 AS n FROM
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t,
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t2,
(select 0 union all select 1 union all select 2 union all select 3 union all select 4 union all select 5 union all select 6 union all select 7 union all select 8 union all select 9) as t3,
(SELECT #row:=0) as numbers)as numbers INNER JOIN tablename
on CHAR_LENGTH(tablename.name)
-CHAR_LENGTH(REPLACE(tablename.name, ',', ''))>=numbers.n-1
order by
id, n
CREATE PROCEDURE `getVal`()
BEGIN
declare r_len integer;
declare r_id integer;
declare r_val varchar(20);
declare i integer;
DECLARE found_row int(10);
DECLARE row CURSOR FOR select length(replace(val,"|","")),id,val from split;
create table x(id int,name varchar(20));
open row;
select FOUND_ROWS() into found_row ;
read_loop: LOOP
IF found_row = 0 THEN
LEAVE read_loop;
END IF;
set i = 1;
FETCH row INTO r_len,r_id,r_val;
label1: LOOP
IF i <= r_len THEN
insert into x values( r_id,SUBSTRING(replace(r_val,"|",""),i,1));
SET i = i + 1;
ITERATE label1;
END IF;
LEAVE label1;
END LOOP label1;
set found_row = found_row - 1;
END LOOP;
close row;
select * from x;
drop table x;
END
The original question was for MySQL and SQL in general. The example below is for the new versions of MySQL. Unfortunately, a generic query that would work on any SQL server is not possible. Some servers do no support CTE, others do not have substring_index, yet others have built-in functions for splitting a string into multiple rows.
--- the answer follows ---
Recursive queries are convenient when the server does not provide built-in functionality. They can also be the bottleneck.
The following query was written and tested on MySQL version 8.0.16. It will not work on version 5.7-. The old versions do not support Common Table Expression (CTE) and thus recursive queries.
with recursive
input as (
select 1 as id, 'a,b,c' as names
union
select 2, 'b'
),
recurs as (
select id, 1 as pos, names as remain, substring_index( names, ',', 1 ) as name
from input
union all
select id, pos + 1, substring( remain, char_length( name ) + 2 ),
substring_index( substring( remain, char_length( name ) + 2 ), ',', 1 )
from recurs
where char_length( remain ) > char_length( name )
)
select id, name
from recurs
order by id, pos;
Here is another trick. The number 20 is the maximum number of values in comma separated list.
We use single query, no procedures.
If tbl has more rows than the maximum number of values in single comma separated list you may remove 'inner join tbl a inner join tbl c' part from query. I added this because there are only 2 rows.
CREATE TABLE tbl(id int NOT NULL,name varchar(50),PRIMARY KEY (`id`));
insert into tbl values(1, 'a,b,c'), (2, 'd');
select id ,SUBSTRING_INDEX(SUBSTRING_INDEX(name, ',', k.n), ',', -1) as name
from tbl
INNER JOIN (
SELECT *
FROM (
SELECT #n:=#n+1 AS n
FROM tbl inner join tbl a inner join tbl c
INNER JOIN (SELECT #n:=0) AS _a
) AS _a WHERE _a.n <= 20
)AS k ON k.n <= LENGTH(name) - LENGTH(replace(name, ',','')) + 1
order by id
This is a trick to extract nth value in comma separated list:
SUBSTRING_INDEX(SUBSTRING_INDEX(name, ',', k.n), ',', -1)
Here is my solution
-- Create the maximum number of words we want to pick (indexes in n)
with recursive n(i) as (
select
1 i
union all
select i+1 from n where i < 1000
)
select distinct
s.id,
s.oaddress,
-- n.i,
-- use the index to pick the nth word, the last words will always repeat. Remove the duplicates with distinct
if(instr(reverse(trim(substring_index(s.oaddress,' ',n.i))),' ') > 0,
reverse(substr(reverse(trim(substring_index(s.oaddress,' ',n.i))),1,
instr(reverse(trim(substring_index(s.oaddress,' ',n.i))),' '))),
trim(substring_index(s.oaddress,' ',n.i))) oth
from
app_schools s,
n
Best Practice.
Result:
SELECT
SUBSTRING_INDEX(SUBSTRING_INDEX('ab,bc,cd',',',help_id+1),',',-1) AS oid
FROM
(
SELECT #xi:=#xi+1 as help_id from
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc1,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc2,
(SELECT #xi:=-1) xc0
) a
WHERE
help_id < LENGTH('ab,bc,cd')-LENGTH(REPLACE('ab,bc,cd',',',''))+1
First, create a numbers table:
SELECT #xi:=#xi+1 as help_id from
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc1,
(SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) xc2,
(SELECT #xi:=-1) xc0;
| help_id |
| --- |
| 0 |
| 1 |
| 2 |
| 3 |
| ... |
| 24 |
Second, just split the str:
SELECT SUBSTRING_INDEX(SUBSTRING_INDEX('ab,bc,cd',',',help_id+1),',',-1) AS oid
FROM
numbers_table
WHERE
help_id < LENGTH('ab,bc,cd')-LENGTH(REPLACE('ab,bc,cd',',',''))+1
| oid |
| --- |
| ab |
| bc |
| cd |
SELECT id, unnest(string_to_array(name, ',')) AS names
FROM datatable
Hope this helps :D