How split multiples subvalues with multiples SubIndex in columns mysql? - mysql

CREATE TABLE tablename (id INT,C1 text);
INSERT INTO tablename VALUES
(1, '[AU 1] string 1; [AU 2] string 2; [AU 3] string 3.1; string 3.2; [AU 4] string 4.1; string 4.2; [AU 5] string 5'),
(2, '[AU 1; AU 2] string 1'),
(3, '[AU 1] string 1; [AU 2] string 2');
CREATE TABLE numbers (n INT PRIMARY KEY);
INSERT INTO numbers VALUES (1),(2),(3),(4),(5),(6);
As close as I got by following the examples of '#fthiella' and '#RGarcia'.
Please see fiddle here.
The result I get is different than expected in "I want output like this:"
I want output like this
| ID | AU | ORG |
| 1 |[AU 1]|string_1|
| 1 |[AU 2]|string_2|
| 1 |[AU 3]|string_3.1|
| 1 |[AU 3]|string_3.2|
| 1 |[AU 4]|string_4.1|
| 1 |[AU 4]|string_4.2|
| 1 |[AU 5]|string_5|
| 2 |[AU 1; AU 2]|string_1|
| 3 |[AU 1]|string_1|
| 3 |[AU 2]|string_2|

WITH RECURSIVE
cte1 AS ( SELECT id,
TRIM(TRAILING ';' FROM TRIM(SUBSTRING_INDEX(C1, '[', 2))) one_group,
SUBSTRING(C1 FROM LENGTH(SUBSTRING_INDEX(C1, '[', 2))) slack,
1 ordinality_au
FROM test
UNION ALL
SELECT id,
TRIM(TRAILING ';' FROM TRIM(SUBSTRING_INDEX(slack, '[', 2))),
SUBSTRING(slack FROM LENGTH(SUBSTRING_INDEX(slack, '[', 2))),
ordinality_au + 1
FROM cte1
WHERE LOCATE('[', slack) ),
cte2 AS ( SELECT id,
CONCAT(SUBSTRING_INDEX(one_group, ']', 1), ']') AU,
TRIM(SUBSTRING_INDEX(one_group, ']', -1)) ORG,
ordinality_au
FROM cte1 ),
cte3 AS ( SELECT id,
AU,
ordinality_au,
SUBSTRING_INDEX(ORG, ';', 1) ORG,
TRIM(TRIM(LEADING ';' FROM TRIM(LEADING SUBSTRING_INDEX(ORG, ';', 1) FROM ORG))) slack,
1 ordinality_org
FROM cte2
UNION ALL
SELECT id,
AU,
ordinality_au,
SUBSTRING_INDEX(slack, ';', 1),
TRIM(TRIM(LEADING ';' FROM TRIM(LEADING SUBSTRING_INDEX(slack, ';', 1) FROM slack))),
ordinality_org + 1
FROM cte3
WHERE TRIM(slack) != '' )
SELECT id,
AU,
ORG
FROM cte3
ORDER BY id, ordinality_au, ordinality_org;
https://dbfiddle.uk/?rdbms=mariadb_10.4&fiddle=a3258f8f1cd92eca0c480ea6673f13f1

Related

How can I get this pivot kind of output in sql

Consider my source table as given below i.e customer.
How can i get the required output as shown using sql (oracle or mysql)
customer :
customer id Purchase_id cashback
123 abc111 5
123 abc112 5
123 abc113 2
345 abc311 0
345 abc312 2
678 abc611 4
678 abc612 3
678 abc613 5
Output Needed:
ID purchare_id_1 purchare_id_2 purchare_id_3 cashback_1 cashback_2 cashback_3
123 abc111 abc112 abc113 5 5 2
345 abc311 abc312 0 2
678 abc611 abc612 abc613 4 3 5
DML and DDL:
create table cust_table (
customer_id int, Purchase_id varchar(100), cashback int
);
insert into cust_table values
(123 , 'abc111' , 5),
(123 , 'abc112' , 5),
(123 , 'abc113' , 2),
( 345 , 'abc311' , 0),
(345 , 'abc312' , 2),
(678 , 'abc611' , 4),
(678 , 'abc612' , 3),
(678 , 'abc613' , 5);
commit;
PS:
Data might be not static, it can change.
WITH
cte AS ( SELECT *, ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY Purchase_id) rn
FROM cust_table )
SELECT customer_id,
MAX(CASE WHEN rn=1 THEN Purchase_id END) purchase_1,
MAX(CASE WHEN rn=2 THEN Purchase_id END) purchase_2,
MAX(CASE WHEN rn=3 THEN Purchase_id END) purchase_3,
MAX(CASE WHEN rn=1 THEN cashback END) cashback_1,
MAX(CASE WHEN rn=2 THEN cashback END) cashback_2,
MAX(CASE WHEN rn=3 THEN cashback END) cashback_3
FROM cte
GROUP BY customer_id
https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=ec2de721d7089a82a5f7ae669ce2d19e
In MySQL: You can use concat along with group by to see partial result like as below
select customer_id, group_concat(`Purchase_id` separator ',') as `Purchase_idX`,
group_concat(`cashback` separator ',') as `cashbackX` from cust_table
group by customer_id;
If you want the exact result run the below query:
select
SUBSTRING_INDEX(AA, ',', 1) as purchare_id_1,
CASE
WHEN LOCATE(',', AA, 2) = 0 THEN NULL
ELSE SUBSTRING_INDEX(SUBSTRING_INDEX(AA, ',', 2), ',', -1)
END AS purchare_id_2,
CASE
WHEN LOCATE(',', AA, LOCATE(',', AA, 1)+1) = 0 THEN NULL
ELSE SUBSTRING_INDEX(SUBSTRING_INDEX(AA, ',', 3), ',', -1)
END AS cashback_1,
SUBSTRING_INDEX(BB, ',', 1) as purchare_id_1,
CASE
WHEN LOCATE(',', BB, 2) = 0 THEN NULL
ELSE SUBSTRING_INDEX(SUBSTRING_INDEX(BB, ',', 2), ',', -1)
END AS cashback_2,
CASE
WHEN LOCATE(',', BB, LOCATE(',', BB, 1)+1) = 0 THEN NULL
ELSE SUBSTRING_INDEX(SUBSTRING_INDEX(BB, ',', 3), ',', -1)
END AS cashback_3
from
( select customer_id, group_concat(`Purchase_id` separator ',') as AA, group_concat(`cashback` separator ',') as BB,
group_concat(`cashback` separator ',') as `cashbackX` from cust_table
group by customer_id) as TB

How to switch rows to columns and vice versa in SQL Server 2008

I have problem to switch rows to columns and vice versa in SQL Server 2008, I have tried any queries to a solution but I did not get a proper results.
I've a table as following:
declare #tmpTable table
(name varchar(20), date_ date, sales_code char(1), sales smallint, earned int)
insert into #tmpTable
values ('Robert', '2016/8/1', 'A', 2, 30),
('Robert', '2016/8/1', 'B', 3, 45),
('Robert', '2016/8/2', 'B', 1, 15),
('Robert', '2016/8/3', 'B', 2, 30),
('Jhon', '2016/8/1', 'A', 3, 45),
('Jhon', '2016/8/2', 'A', 3, 45),
('Jhon', '2016/8/3', 'B', 2, 30)
select * from #tmpTable;
Result:
Name date_ sales_code sales earned
------ ---------- ---------- ----- ------
Robert 2016-08-01 A 2 30
Robert 2016-08-01 B 3 45
Robert 2016-08-02 B 1 15
Robert 2016-08-03 B 2 30
Jhon 2016-08-01 A 3 45
Jhon 2016-08-02 A 3 45
Jhon 2016-08-03 B 2 30
Then, I have next query :
select * from (
select name, 'sales_code' as category, date_, sales_code from (
select * from (
SELECT name, date_
,STUFF((SELECT ', ' + sales_code [text()]
FROM #tmpTable
WHERE date_ = t.date_
FOR XML PATH(''), TYPE)
.value('.','NVARCHAR(MAX)'),1,2,' ') sales_code
,STUFF((SELECT ', ' + convert(varchar(max), sales) [text()]
FROM #tmpTable
WHERE date_ = t.date_
FOR XML PATH(''), TYPE)
.value('.','NVARCHAR(MAX)'),1,2,' ') sales
,STUFF((SELECT ', ' + convert(varchar(max), earned) [text()]
FROM #tmpTable
WHERE date_ = t.date_
FOR XML PATH(''), TYPE)
.value('.','NVARCHAR(MAX)'),1,2,' ') earned
FROM #tmpTable t
GROUP BY name, date_
) as a
) as a
) as a
pivot (
max(sales_code) FOR date_ IN ([2016/8/1], [2016/8/2], [2016/8/3])
)as pv
union all
select * from (
select name, 'sales' as category, date_, sales from (
select * from (
SELECT name, date_
,STUFF((SELECT ', ' + sales_code [text()]
FROM #tmpTable
WHERE date_ = t.date_
FOR XML PATH(''), TYPE)
.value('.','NVARCHAR(MAX)'),1,2,' ') sales_code
,STUFF((SELECT ', ' + convert(varchar(max), sales) [text()]
FROM #tmpTable
WHERE date_ = t.date_
FOR XML PATH(''), TYPE)
.value('.','NVARCHAR(MAX)'),1,2,' ') sales
,STUFF((SELECT ', ' + convert(varchar(max), earned) [text()]
FROM #tmpTable
WHERE date_ = t.date_
FOR XML PATH(''), TYPE)
.value('.','NVARCHAR(MAX)'),1,2,' ') earned
FROM #tmpTable t
GROUP BY name, date_
) as a
) as a
) as a
pivot (
max(sales) FOR date_ IN ([2016/8/1], [2016/8/2], [2016/8/3])
)as pv
union all
select * from (
select name, 'earned' as category, date_, earned from (
select * from (
SELECT name, date_
,STUFF((SELECT ', ' + sales_code [text()]
FROM #tmpTable
WHERE date_ = t.date_
FOR XML PATH(''), TYPE)
.value('.','NVARCHAR(MAX)'),1,2,' ') sales_code
,STUFF((SELECT ', ' + convert(varchar(max), sales) [text()]
FROM #tmpTable
WHERE date_ = t.date_
FOR XML PATH(''), TYPE)
.value('.','NVARCHAR(MAX)'),1,2,' ') sales
,STUFF((SELECT ', ' + convert(varchar(max), earned) [text()]
FROM #tmpTable
WHERE date_ = t.date_
FOR XML PATH(''), TYPE)
.value('.','NVARCHAR(MAX)'),1,2,' ') earned
FROM #tmpTable t
GROUP BY name, date_
) as a
) as a
) as a
pivot (
max(earned) FOR date_ IN ([2016/8/1], [2016/8/2], [2016/8/3])
)as pv
It will display the result:
name category 2016/8/1 2016/8/2 2016/8/3
------- -------- -------- ------- --------
Jhon sales_code A, B, A B, A B, B
Robert sales_code A, B, A B, A B, B
Jhon sales 2, 3, 3 1, 3 2, 2
Robert sales 2, 3, 3 1, 3 2, 2
Jhon earned 30, 45, 45 15, 45 30, 30
Robert earned 30, 45, 45 15, 45 30, 30
But, I would like to get the following result:
name category 2016/8/1 2016/8/2 2016/8/3
---- -------- -------- -------- --------
Robert sales_code A, B B B
Robert sales 2, 3 1 2
Robert earned 30, 45 15 30
Jhon sales_code A A B
Jhon sales 3 3 2
Jhon earned 45 45 30
Thanks a lot for any help.
first you need to unpivot your data.. to do this, all data types much match, so you need to convert the 2 numeric columns to varchars.
still use stuff before you unpivot to get the combined values per name, date_ but use distinct to only get the name, date_ values once.
after you unpivot, you just need to pivot again.
SELECT *
FROM ( SELECT DISTINCT
Name,
date_,
sales_code = STUFF((SELECT ', ' + sales_code
FROM #tmpTable t2
WHERE t2.Name = t.Name AND t2.date_ = t.date_
FOR XML PATH('')), 1, 2, ''),
sales = STUFF((SELECT ', ' + CONVERT(VARCHAR, sales)
FROM #tmpTable t2
WHERE t2.Name = t.Name AND t2.date_ = t.date_
FOR XML PATH('')), 1, 2, ''),
earned = STUFF((SELECT ', ' + CONVERT(VARCHAR, earned)
FROM #tmpTable t2
WHERE t2.Name = t.Name AND t2.date_ = t.date_
FOR XML PATH('')), 1, 2, '')
FROM #tmpTable t) t
UNPIVOT (
val
FOR category IN (sales_code, sales, earned)
) up
PIVOT (
MAX(val)
FOR date_ IN ([2016-08-01], [2016-08-02], [2016-08-03])
) p
ORDER BY name DESC,
category DESC

Counting distinct multi-column patterns

I'm using SQL Server 2014 and i nee some help with a hard query.
I have the following table (MyTable). These columns names are just for the example. They are actually totally different from each other.
id int,
col1 int,
col2 int,
..
..
..
col70 int
For each pairs of sequential columns {(col1, col2), (col2_col3)...(col69_col70)}, i need to calculate the following: The number of different pairs that each values has - col_i is the static column, and col_i+1 is the other one. Each value need to be divided by the total amount of records in the table. For example:
col1 | col2
45 | 789
56 | 345
99 | 234
45 | 789
45 | 222
89 | 678
89 | 345
45 | 789
90 | 234
12 | 567
Calculation:
((45, 789)+(45, 222))/10
(56, 345)/10
(99, 234)/10
(45, 789)+(45, 222)/10
(45, 789)+(45, 222)/10
(89, 678)+(89, 345)/10
(89, 678)+(89, 345)/10
((45, 789)+(45, 222))/10
(90, 234)/10
(12, 567)/10
Output:
col1_col2
0.2
0.1
0.1
0.2
0.2
0.2
0.2
0.2
0.1
0.1
Explanation for the first records:
45 is the value of the static column ,so now i'll check how many different combination we can find with col2:
45 | 789
45 | 789
45 | 222
45 | 789
Total distinct combinations divided by number of records in the table: 2/10 = 0.2
This calculation need for each pairs of sequential columns. Any recommendation? Is there's a smart way to calculate it automatically instead of writing a query with line for each pair?
An example assuming you have a primary key:
create table my_table
(column_id int not null,
column1 int not null,
column2 int not null);
insert into my_table
(column_id, column1, column2)
values
(1, 45,789),
(2, 56,345),
(3, 99,234),
(4, 45,789),
(5, 45,222),
(6, 89,678),
(7, 89,345),
(8, 45,789),
(9, 90,234),
(10, 12,567);
declare #column_a as nvarchar(100) = N'column1';
declare #column_b as nvarchar(100) = N'column2';
declare #result_column as nvarchar(100) = N'column1_2';
declare #sql_string as nvarchar(4000)
set #sql_string =
'select a.column_id,
1.0 * count( distinct b.' + #column_b + ') / (count(a.' + #column_a + ') over ()) as ' + #result_column
+ ' from my_table a
inner join my_table b
on a.' + #column_a + ' = b.' + #column_a +
' group by a.column_id, a.' + #column_a +
' order by a.column_id';
-- print #sql_string;
execute(#sql_string);
If there's no primary key you could use the rownumber() function to create an identifier, but the result order would change. The print command can be useful for checking the dynamic sql string, here commented out.
Putting the dynamic SQL into a stored procedure:
create procedure column_freq #column_a nvarchar(100), #column_b nvarchar(100), #result_column nvarchar(100)
as
begin
declare #sql_string as nvarchar(4000);
set #sql_string =
'select a.column_id,
1.0 * count( distinct b.' + #column_b + ') / (count(a.' + #column_a + ') over ()) as ' + #result_column
+ ' from my_table a
inner join my_table b
on a.' + #column_a + ' = b.' + #column_a +
' group by a.column_id, a.' + #column_a +
' order by a.column_id';
execute(#sql_string);
end;
go
exec column_freq N'column1', N'column2', N'column1_2';
go

Count occurrences that differ within a column

I want to be able to select the amount of times the data in columns Somedata_A and Somedata_B has changed from the from the previous row within its column. I've tried using DISTINCT and it works to some degree. {1,2,3,2,1,1} will show 3 when I want it to show 4 course there's 5 different values in sequence.
Example:
A,B,C,D,E,F
{1,2,3,2,1,1}
A compare to B gives a difference, B compare to C gives a difference . . . E compare to F gives not difference. All in all it gives 4 differences within a set of 6 values.
I have gotten DISTINCT to work but it does not really do the trick for me. And to add more to the question I'm really not interested it the whole range, lets say just the 2 last days/entries per Title.
Second I'm concern about performance issues. I tried the query below on a real set of data and it got interrupted probably due to timeout.
SQL Fiddle
MySQL 5.5.32 Schema Setup:
CREATE TABLE testdata(
Title varchar(10),
Date varchar(10),
Somedata_A int(5),
Somedata_B int(5));
INSERT INTO testdata (Title, Date, Somedata_A, Somedata_B) VALUES
("Alpha", '123', 1, 2),
("Alpha", '234', 2, 2),
("Alpha", '345', 1, 2),
("Alpha", '349', 1, 2),
("Alpha", '456', 1, 2),
("Omega", '123', 1, 1),
("Omega", '234', 2, 2),
("Omega", '345', 3, 3),
("Omega", '349', 4, 3),
("Omega", '456', 5, 4),
("Delta", '123', 1, 1),
("Delta", '234', 2, 2),
("Delta", '345', 1, 3),
("Delta", '349', 2, 3),
("Delta", '456', 1, 4);
Query 1:
SELECT t.Title, (SELECT COUNT(DISTINCT Somedata_A) FROM testdata AS tt WHERE t.Title = tt.Title) AS A,
(SELECT COUNT(DISTINCT Somedata_B) FROM testdata AS tt WHERE t.Title = tt.Title) AS B
FROM testdata AS t
GROUP BY t.Title
Results:
| TITLE | A | B |
|-------|---|---|
| Alpha | 2 | 1 |
| Delta | 2 | 4 |
| Omega | 5 | 4 |
Something like this may work: it uses a variable for row number, joins on an offset of 1 and then counts differences for A and B.
http://sqlfiddle.com/#!2/3bbc8/9/2
set #i = 0;
set #j = 0;
Select
A.Title aTitle,
sum(Case when A.SomeData_A <> B.SomeData_A then 1 else 0 end) AVar,
sum(Case when A.SomeData_B <> B.SomeData_B then 1 else 0 end) BVar
from
(SELECT Title, #i:=#i+1 as ROWID, SomeData_A, SomeData_B
FROM testdata
ORDER BY Title, date desc) as A
INNER JOIN
(SELECT Title, #j:=#j+1 as ROWID, SomeData_A, SomeData_B
FROM testdata
ORDER BY Title, date desc) as B
ON A.RowID= B.RowID + 1
AND A.Title=B.Title
Group by A.Title
This works (see here) (FYI: Your results in the question do not match your data - for instance, for Alpha, ColumnA: it never changes from 1. The answer should be 0)
Hopefully you can adapt this Statement to your actual data model
SELECT t1.title, SUM(t1.Somedata_A<>t2.Somedata_a) as SomeData_A
,SUM(t1.Somedata_b<>t2.Somedata_b) as SomeData_B
FROM testdata AS t1
JOIN testdata AS t2
ON t1.title = t2.title
AND t2.date = DATE_ADD(t1.date, INTERVAL 1 DAY)
GROUP BY t1.title
ORDER BY t1.title;

SQL dynamically pivot and group results

I have a table set up like below:
CLIENTNAME MONTHANDYEAR RESOURCE COST
abc JAN2011 res1 1000
abc FEB2011 res1 2000
def JAN2011 res2 1500
def MAR2011 res1 2000
ghi MAR2011 res3 2500
I need an output like below. Months are to be generated dynamically in 3-month intervals. In this case, is there a way to pivot by MONTHANDYEAR as well as group by clientname?
RESOURCE CLIENTNAME JAN2011 FEB2011 MAR2011
res1 abc 1000 1000
res1 def 2000
res2 def 1500
res3 ghi 2500
This is what the PIVOT operator is for:
SELECT
Resource, ClientName,
[JAN2011], [FEB2011], [MAR2011]
FROM
(
SELECT
*
FROM tblname
) AS SourceTable
PIVOT
(
SUM(COST)
FOR MONTHANDYEAR IN ([JAN2011], [FEB2011], [MAR2011])
) AS PivotTable;
Since your months are selected dynamically using #startDate as a base month, you can use the following dynamic query:
DECLARE #startDate datetime
SET #startDate = '2011-01-01'
DECLARE #sql varchar(MAX)
SET #sql = 'SELECT
Resource, ClientName, [' +
REPLACE(SUBSTRING(CONVERT(varchar, #startDate, 13), 4, 8), ' ', '') + '], [' +
REPLACE(SUBSTRING(CONVERT(varchar, DATEADD(MONTH, 1, #startDate), 13), 4, 8), ' ', '') + '], [' +
REPLACE(SUBSTRING(CONVERT(varchar, DATEADD(MONTH, 2, #startDate), 13), 4, 8), ' ', '') + ']
FROM
(
SELECT
*
FROM tblName
) AS SourceTable
PIVOT
(
SUM(COST)
FOR MONTHANDYEAR IN (' +
QUOTENAME(REPLACE(SUBSTRING(CONVERT(varchar, #startDate, 13), 4, 8), ' ', '')) + ', ' +
QUOTENAME(REPLACE(SUBSTRING(CONVERT(varchar, DATEADD(MONTH, 1, #startDate), 13), 4, 8), ' ', '')) + ', ' +
QUOTENAME(REPLACE(SUBSTRING(CONVERT(varchar, DATEADD(MONTH, 2, #startDate), 13), 4, 8), ' ', '')) + ')
) AS PivotTable'
execute(#sql)
working sqlfiddle here
This data transformation can be done with the PIVOT function.
If you know the values, then you can hard-code the monthandyear dates:
select resource,
clientname,
isnull(jan2011, '') Jan2011,
isnull(feb2011, '') Feb2011,
isnull(mar2011, '') Mar2011
from
(
select clientname, monthandyear, resource, cost
from yourtable
) src
pivot
(
sum(cost)
for monthandyear in (Jan2011, Feb2011, Mar2011)
) piv;
See SQL Fiddle with Demo.
But if the dates are unknown, then you will need to use dynamic SQL:
DECLARE #cols AS NVARCHAR(MAX),
#colNames AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX)
select #cols = STUFF((SELECT distinct ',' + QUOTENAME(monthandyear)
from yourtable
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
select #colNames = STUFF((SELECT distinct ', isnull(' + QUOTENAME(monthandyear)+', 0) as '+QUOTENAME(monthandyear)
from yourtable
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT resource, clientname,' + #colNames + ' from
(
select clientname, monthandyear, resource, cost
from yourtable
) x
pivot
(
sum(cost)
for monthandyear in (' + #cols + ')
) p '
execute(#query)
See SQL Fiddle with Demo.
The result of both is:
| RESOURCE | CLIENTNAME | JAN2011 | FEB2011 | MAR2011 |
-------------------------------------------------------
| res1 | abc | 1000 | 2000 | 0 |
| res1 | def | 0 | 0 | 2000 |
| res2 | def | 1500 | 0 | 0 |
| res3 | ghi | 0 | 0 | 2500 |
SELECT Resource, Clientname
, SUM(CASE WHEN MonthAndYear = 'JAN2011' THEN COST ELSE 0 END) AS JAN2011
, SUM(CASE WHEN MonthAndYear = 'FEB2011' THEN COST ELSE 0 END) AS FEB2011
, SUM(CASE WHEN MonthAndYear = 'MAR2011' THEN COST ELSE 0 END) AS MAR2011
FROM yourtable
GROUP BY Resource, Clientname
You can also remove the ELSE 0 to return a NULL value for resource/clientname combinations without data