I've a table with only one col, and contain free text, and each keyword separated by a space
TEXT
apple orange dog
apple banna cat
I want to have a result sth like
apple 2
orange 1
dog 1
banna 1
cat 1
Is it possible with just SQL?
Try this
create Function
Create FUNCTION [dbo].[fn_Split](#text varchar(8000), #delimiter varchar(20))
RETURNS #Strings TABLE
(
position int IDENTITY PRIMARY KEY,
value varchar(8000)
)
AS
BEGIN
DECLARE #index int
SET #index = -1
WHILE (LEN(#text) > 0)
BEGIN
SET #index = CHARINDEX(#delimiter , #text)
IF (#index = 0) AND (LEN(#text) > 0)
BEGIN
INSERT INTO #Strings VALUES (#text)
BREAK
END
IF (#index > 1)
BEGIN
INSERT INTO #Strings VALUES (LEFT(#text, #index - 1))
SET #text = RIGHT(#text, (LEN(#text) - #index))
END
ELSE
SET #text = RIGHT(#text, (LEN(#text) - #index))
END
RETURN
END
Query
declare #ColumnNameList VARCHAR(MAX)
select #ColumnNameList = COALESCE(#ColumnNameList +' ' ,'') +column from table
select #ColumnNameList
select value,count(value) as count from (select value from fn_split((select #ColumnNameList),' '))
Space_tbl group by value
I have a stored procedure where input is a comma separated string say '12341,34567,12446,12997' and it is not sure that the input string always carries numerical data. It may be '12341,34as67,12$46,1we97' so I need to validate them and use only the valid data in query.
Say my query is (Where the column OrderCode is int type)
select * from dbo.DataCollector where OrderCode in (12341,34567,12446,12997)
or only the valid data if other are invalid
select * from dbo.DataCollector where OrderCode in (12341)
For such situation what would be a good solution.
One way that works also in SQl-Server 2005 would be to create a split-function, then you can use ISNUMERIC to check if it's a number:
DECLARE #Input VARCHAR(MAX) = '12341,34as67,12$46,1we97'
SELECT i.Item FROM dbo.Split(#Input, ',')i
WHERE IsNumeric(i.Item) = 1
Demo
Your complete query:
select * from dbo.DataCollector
where OrderCode in ( SELECT i.Item FROM dbo.Split(#Input, ',')i
WHERE IsNumeric(i.Item) = 1 )
Here is the split-function which i use:
CREATE FUNCTION [dbo].[Split]
(
#ItemList NVARCHAR(MAX),
#delimiter CHAR(1)
)
RETURNS #ItemTable TABLE (Item VARCHAR(250))
AS
BEGIN
DECLARE #tempItemList NVARCHAR(MAX)
SET #tempItemList = #ItemList
DECLARE #i INT
DECLARE #Item NVARCHAR(4000)
SET #i = CHARINDEX(#delimiter, #tempItemList)
WHILE (LEN(#tempItemList) > 0)
BEGIN
IF #i = 0
SET #Item = #tempItemList
ELSE
SET #Item = LEFT(#tempItemList, #i - 1)
INSERT INTO #ItemTable(Item) VALUES(#Item)
IF #i = 0
SET #tempItemList = ''
ELSE
SET #tempItemList = RIGHT(#tempItemList, LEN(#tempItemList) - #i)
SET #i = CHARINDEX(#delimiter, #tempItemList)
END
RETURN
END
Edit according to the comment of Damien that ISNUMERIC has it's issues. You can use this function to check if it's a real integer:
CREATE FUNCTION dbo.IsInteger(#Value VarChar(18))
RETURNS Bit
AS
BEGIN
RETURN IsNull(
(Select Case When CharIndex('.', #Value) > 0
Then Case When Convert(int, ParseName(#Value, 1)) <> 0
Then 0
Else 1
End
Else 1
End
Where IsNumeric(#Value + 'e0') = 1), 0)
END
Here is another example with damien's "bad" input which contains £ and 0d0:
Demo
Is there a way to extract out list items within a column and put each item in its own row? I also want to do away with the list brackets [] too! Can anyone help?
Input:
name doc_id type
JD [409839589143224] 1843
JD [470573363064028,239564999464566] 778
BK [426580780775177,342730259144025] 202
Desired Output:
name doc_id type
JD 409839589143224 1843
JD 470573363064028 778
JD 239564999464566 778
BK 426580780775177 202
BK 342730259144025 202
this is solution using tsql, i`m sure you can do the same with MySql
CREATE FUNCTION ufn_split (#list nvarchar(MAX))
RETURNS #tbl TABLE (part nvarchar(max) NOT NULL) AS
BEGIN
DECLARE #pos int,
#nextpos int,
#valuelen int
SELECT #pos = 0, #nextpos = 1
WHILE #nextpos > 0
BEGIN
SELECT #nextpos = charindex(',', #list, #pos + 1)
SELECT #valuelen = CASE WHEN #nextpos > 0
THEN #nextpos
ELSE len(#list) + 1
END - #pos - 1
INSERT #tbl (part)
VALUES (substring(#list, #pos + 1, #valuelen))
SELECT #pos = #nextpos
END
RETURN
END;
GO
SELECT t.name, st.part, t.type
FROM #table AS t
CROSS APPLY ufn_split( LEFT(RIGHT(t.doc_id, LEN(t.doc_id) - 1),LEN(t.doc_id) - 2) ) AS st;
The first function is implimentation of Split function on TSQL from here Split function equivalent in T-SQL?
I'm having a problem with random values being generated for each row in a result set in SQL Server 2008. I found a similar question here, but upon implementing the proposed answer, I saw the same problem as before. When running the query I have provided below, it seems that the same values will sometimes show up in consecutive rows, even though I'm calling for a new NEWID() with each row.
DECLARE #Id int = 0
DECLARE #Counter int = 1
DECLARE #Value int
CREATE TABLE #Table1
(
id int identity(1,1)
,Value int
)
WHILE #Counter < 100000
BEGIN
INSERT INTO #Table1 (Value)
SELECT CAST(RAND(CHECKSUM(NEWID())) * 100000 as INT)
SET #Counter += 1
END
SET #Counter = 0
WHILE #Counter < 5
BEGIN
SELECT
#Value = T.Value
,#Id = T.id
FROM #Table1 T
WHERE T.id = CAST(RAND(CHECKSUM(NEWID())) * 100000 as INT) + 1 + #Counter
IF #Id <> 0
SELECT #Value AS Value ,#Id as ID
SET #Counter += 1
END
DROP TABLE #Table1
If I change the INT to a BIGINT, as suggested in the link I provided, nothing is solved, so I don't believe that it's an "overflow" issue.
If I take the calculation out of the select, I don't get the doubled rows:
DECLARE #Id int = 0
DECLARE #Counter int = 1
DECLARE #Value int
-- new variable
DECLARE #DERIVED INT
CREATE TABLE #Table1
(
id int identity(1,1)
,Value int
)
WHILE #Counter < 100000
BEGIN
INSERT INTO #Table1 (Value)
SELECT CAST(RAND(CHECKSUM(NEWID())) * 100000 as INT)
SET #Counter += 1
END
SET #Counter = 0
WHILE #Counter < 5
BEGIN
--set here to remove calculation from the select
SET #DERIVED = CAST(RAND(CHECKSUM(NEWID())) * 100000 as INT) + 1 + #Counter;
SELECT
#Value = T.Value
,#Id = T.id
FROM #Table1 T
WHERE T.id = #DERIVED
IF #Id <> 0
SELECT #Value AS Value ,#Id as ID;
SET #Counter += 1
END
DROP TABLE #Table1
I'm seeing the duplicates every time with the pseudorandom generator inside the select. Oddly enough, I get about the same frequency of duplicates on the insert loop whether or not the calculation is inside the insert... select. It could be coincidence, since we are dealing with a randomly selected number. Note also, that since you're adding to the pseudorandom result, the results aren't technically duplicates. They're descending sequences:
11111 + 1 + 1 = 11113
11110 + 1 + 2 = 11113
Same overall result, different pseudorandom result. However, if I change
CAST(RAND(CHECKSUM(NEWID())) * 100000 as INT) + 1 + #Counter
to
CAST(RAND(CHECKSUM(NEWID())) * 100000 as INT) + #Counter + #Counter
I still consistently get duplicates. That implies that the optimizer may be caching/re-using values, at least on the select. I'd call that improper for a non-deterministic function call. I get similar results on 10.0.1600 and 10.50.1600 (2008 RTM and 2008R2 RTM).
I have the following table with each row having comma-separated values:
ID
-----------------------------------------------------------------------------
10031,10042
10064,10023,10060,10065,10003,10011,10009,10012,10027,10004,10037,10039
10009
20011,10027,10032,10063,10023,10033,20060,10012,10020,10031,10011,20036,10041
I need to get a count for each ID (a groupby).
I am just trying to avoid cursor implementation and stumped on how to do this without cursors.
Any Help would be appreciated !
You will want to use a split function:
create FUNCTION [dbo].[Split](#String varchar(MAX), #Delimiter char(1))
returns #temptable TABLE (items varchar(MAX))
as
begin
declare #idx int
declare #slice varchar(8000)
select #idx = 1
if len(#String)<1 or #String is null return
while #idx!= 0
begin
set #idx = charindex(#Delimiter,#String)
if #idx!=0
set #slice = left(#String,#idx - 1)
else
set #slice = #String
if(len(#slice)>0)
insert into #temptable(Items) values(#slice)
set #String = right(#String,len(#String) - #idx)
if len(#String) = 0 break
end
return
end;
And then you can query the data in the following manner:
select items, count(items)
from table1 t1
cross apply dbo.split(t1.id, ',')
group by items
See SQL Fiddle With Demo
Well, the solution i always use, and probably there might be a better way, is to use a function that will split everything. No use for cursors, just a while loop.
if OBJECT_ID('splitValueByDelimiter') is not null
begin
drop function splitValueByDelimiter
end
go
create function splitValueByDelimiter (
#inputValue varchar(max)
, #delimiter varchar(1)
)
returns #results table (value varchar(max))
as
begin
declare #delimeterIndex int
, #tempValue varchar(max)
set #delimeterIndex = 1
while #delimeterIndex > 0 and len(isnull(#inputValue, '')) > 0
begin
set #delimeterIndex = charindex(#delimiter, #inputValue)
if #delimeterIndex > 0
set #tempValue = left(#inputValue, #delimeterIndex - 1)
else
set #tempValue = #inputValue
if(len(#tempValue)>0)
begin
insert
into #results
select #tempValue
end
set #inputValue = right(#inputValue, len(#inputValue) - #delimeterIndex)
end
return
end
After that you can call the output like this :
if object_id('test') is not null
begin
drop table test
end
go
create table test (
Id varchar(max)
)
insert
into test
select '10031,10042'
union all select '10064,10023,10060,10065,10003,10011,10009,10012,10027,10004,10037,10039'
union all select '10009'
union all select '20011,10027,10032,10063,10023,10033,20060,10012,10020,10031,10011,20036,10041'
select value
from test
cross apply splitValueByDelimiter(Id, ',')
Hope it helps, although i am still looping through everything
After reiterating the comment above about NOT putting multiple values into a single column (Use a separate child table with one value per row!),
Nevertheless, one possible approach: use a UDF to convert delimited string to a table. Once all the values have been converted to tables, combine all the tables into one table and do a group By on that table.
Create Function dbo.ParseTextString (#S Text, #delim VarChar(5))
Returns #tOut Table
(ValNum Integer Identity Primary Key,
sVal VarChar(8000))
As
Begin
Declare #dlLen TinyInt -- Length of delimiter
Declare #wind VarChar(8000) -- Will Contain Window into text string
Declare #winLen Integer -- Length of Window
Declare #isLastWin TinyInt -- Boolean to indicate processing Last Window
Declare #wPos Integer -- Start Position of Window within Text String
Declare #roVal VarChar(8000)-- String Data to insert into output Table
Declare #BtchSiz Integer -- Maximum Size of Window
Set #BtchSiz = 7900 -- (Reset to smaller values to test routine)
Declare #dlPos Integer -- Position within Window of next Delimiter
Declare #Strt Integer -- Start Position of each data value within Window
-- -------------------------------------------------------------------------
-- ---------------------------
If #delim is Null Set #delim = '|'
If DataLength(#S) = 0 Or
Substring(#S, 1, #BtchSiz) = #delim Return
-- --------------------------------------------
Select #dlLen = DataLength(#delim),
#Strt = 1, #wPos = 1,
#wind = Substring(#S, 1, #BtchSiz)
Select #winLen = DataLength(#wind),
#isLastWin = Case When DataLength(#wind) = #BtchSiz
Then 0 Else 1 End,
#dlPos = CharIndex(#delim, #wind, #Strt)
-- --------------------------------------------
While #Strt <= #winLen
Begin
If #dlPos = 0 Begin -- No More delimiters in window
If #isLastWin = 1 Set #dlPos = #winLen + 1
Else Begin
Set #wPos = #wPos + #Strt - 1
Set #wind = Substring(#S, #wPos, #BtchSiz)
-- ----------------------------------------
Select #winLen = DataLength(#wind), #Strt = 1,
#isLastWin = Case When DataLength(#wind) = #BtchSiz
Then 0 Else 1 End,
#dlPos = CharIndex(#delim, #wind, 1)
If #dlPos = 0 Set #dlPos = #winLen + 1
End
End
-- -------------------------------
Insert #tOut (sVal)
Select LTrim(Substring(#wind,
#Strt, #dlPos - #Strt))
-- -------------------------------
-- Move #Strt to char after last delimiter
Set #Strt = #dlPos + #dlLen
Set #dlPos = CharIndex(#delim, #wind, #Strt)
End
Return
End
Then write, (using your table schema),
Declare #AllVals VarChar(8000)
Select #AllVals = Coalesce(#allVals + ',', '') + ID
From Table Where ID Is Not null
-- -----------------------------------------
Select sVal, Count(*)
From dbo.ParseTextString(#AllVals, ',')
Group By sval