Optimizing SQL Server stored procedures that use functions? - sql-server-2008

I'd like some help in optimizing the following query:
SELECT DISTINCT TOP (#NumberOfResultsRequested) dbo.FilterRecentSearchesTitles(OriginalSearchTerm) AS SearchTerms
FROM UserSearches
WHERE WebsiteID = #WebsiteID
AND LEN(OriginalSearchTerm) > 20
--AND dbo.FilterRecentSearchesTitles(OriginalSearchTerm) NOT IN (SELECT KeywordUrl FROM PopularSearchesBaseline WHERE WebsiteID = #WebsiteID)
GROUP BY OriginalSearchTerm, GeoID
It runs fine without the line that is commented out. I have an index set on UserSearches.OriginalSearchTerm, WebsiteID, and PopularSearchesBaseline.KeywordUrl, but the query still runs slow with this line in there.
-- UPDATE --
The function used is as follows:
ALTER FUNCTION [dbo].[FilterRecentSearchesTitles]
(
#SearchTerm VARCHAR(512)
)
RETURNS VARCHAR(512)
AS
BEGIN
DECLARE #Ret VARCHAR(512)
SET #Ret = dbo.RegexReplace('[0-9]', '', REPLACE(#SearchTerm, '__s', ''), 1, 1)
SET #Ret = dbo.RegexReplace('\.', '', #Ret, 1, 1)
SET #Ret = dbo.RegexReplace('\s{2,}', ' ', #Ret, 1, 1)
SET #Ret = dbo.RegexReplace('\sv\s', ' ', #Ret, 1, 1)
RETURN(#Ret)
END
Using the Reglar Expression Workbench code.
However, as I mentioned - without the line that is currently commented out it runs fine.
Any other suggestions?

I am going to guess that dbo.FilterRecentSearchesTitles(OriginalSearchTerm) is a function. My suggestion would be to see about rewriting it into a table valued function so you can return a table that could be joined on.
Otherwise you are calling that function for each row you are trying to return which is going to cause your problems.
If you cannot rewrite the function, then why not create a stored proc that will only execute it once, similar to this:
SELECT DISTINCT TOP (#NumberOfResultsRequested) dbo.FilterRecentSearchesTitles(OriginalSearchTerm) AS SearchTerms
INTO #temp
WHERE WebsiteID = #WebsiteID
SELECT *
FROM #temp
WHERE SearchTerms NOT IN (SELECT KeywordUrl
FROM PopularSearchesBaseline
WHERE WebsiteID = #WebsiteID)
Then you get your records into a temp table after executing the function once and then you select on the temp table.

I might try to use a persisted computed column in this case:
ALTER TABLE UserSearches ADD FilteredOriginalSearchTerm AS dbo.FilterRecentSearchesTitles(OriginalSearchTerm) PERSISTED
You will probably have to add WITH SCHEMABINDING to your function (and the RegexReplace function) like so:
ALTER FUNCTION [dbo].[FilterRecentSearchesTitles]
(
#SearchTerm VARCHAR(512)
)
RETURNS VARCHAR(512)
WITH SCHEMABINDING -- You will need this so the function is considered deterministic
AS
BEGIN
DECLARE #Ret VARCHAR(512)
SET #Ret = dbo.RegexReplace('[0-9]', '', REPLACE(#SearchTerm, '__s', ''), 1, 1)
SET #Ret = dbo.RegexReplace('\.', '', #Ret, 1, 1)
SET #Ret = dbo.RegexReplace('\s{2,}', ' ', #Ret, 1, 1)
SET #Ret = dbo.RegexReplace('\sv\s', ' ', #Ret, 1, 1)
RETURN(#Ret)
END
This makes your query look like this:
SELECT DISTINCT TOP (#NumberOfResultsRequested) FilteredOriginalSearchTerm AS SearchTerms
FROM UserSearches
WHERE WebsiteID = #WebsiteID
AND LEN(OriginalSearchTerm) > 20
AND FilteredOriginalSearchTerm NOT IN (SELECT KeywordUrl FROM PopularSearchesBaseline WHERE WebsiteID = #WebsiteID)
GROUP BY OriginalSearchTerm, GeoID
Which could potentially be optimized for speed (if necessary) with a join instead of not in, or maybe different indexing (perhaps on the computed column, or some covering indexes). Also, DISTINCT with a GROUP BY is somewhat of a code smell to me, but it could be legit.

Instead of using using the function on SELECT, I modified the INSERT query to include this function. That way, I avoid calling the function for every row when I later want to retrieve the data.

Related

MySQL Variable Assignment via Procedure Not Working Correctly

In the code below, I'm trying go through the results of endDateTable row by row, comparing the current row's endDate to the previous row's endDate. If there has been any change since the previous, we increment #revisionNum. However, upon populating the new table, all of the #revisionNum entries are 0. What am I doing wrong?
NOTE: I'm using prepared statements in this manner since doing a straightforward SELECT into a variable gives a syntax error due to the LIMIT clause not allowing a variable in our version of MySQL.
BEGIN
DECLARE _currentEndDate DATETIME DEFAULT now();
DECLARE _priorEndDate DATETIME DEFAULT now();
SET #ResultsCount = (SELECT COUNT(*) FROM mainTable);
SET #j = 0;
WHILE #j < #ResultsCount DO
SET #revisionNum = 0;
/*CURRENT END DATE*/
SET #appResultQueryCurrent = CONCAT('
SELECT
end_date
INTO _currentEndDate
FROM endDateTable
LIMIT ', #j, ', 1'
);
PREPARE currentQueryStmt FROM #appResultQueryCurrent;
EXECUTE currentQueryStmt;
/*PREVIOUS END DATE*/
SET #appResultQueryPrior = CONCAT('
SELECT
end_date
INTO _priorAppEndDate
FROM endDateTable
LIMIT ', IF(#j = 0, 0, #j - 1), ', 1'
);
PREPARE priorQueryStmt FROM #appResultQueryPrior;
EXECUTE priorQueryStmt;
SET #revisionNum = IF(
#j = 0 OR (_currentEndDate = _priorEndDate),
#revisionNum,
IF(
_currentEndDate != _priorEndDate,
#revisionNum + 1,
#revisionNum
)
);
INSERT INTO finalTable (RevisionNum)
SELECT
#revisionNum AS RevisionNum
FROM endDateTable;
SET #j = #j +1;
END WHILE;
END $$
You don't need a loop, you can use INSERT INTO ... SELECT ..., incrementing the variable in the select query.
You also need an ORDER BY criteria to specify how to order the rows when comparing one row to the previous row.
INSERT INTO finalTable (RevisionNum, otherColumn)
SELECT revision, otherColumn
FROM (
SELECT IF(end_date = #prev_end_date, #revision, #revision := #revision + 1) AS revision,
#prev_end_date := end_date,
otherColumn
FROM endDateTable
CROSS JOIN (SELECT #prev_end_date := NULL, #revision := -1) AS vars
ORDER BY id) AS x
DEMO
The offset value in the LIMIT clause is tenuous without an ORDER BY.
Without an ORDER BY clause, MySQL is free to return results in any sequence.
There is no guarantee that LIMIT 41,1 will return the row before LIMIT 42,1, or that it won't return the exact same row as LIMIT 13,1 did.
(A table in a relational database represents an unordered set of tuples, there is no guaranteed "order" or rows in a table.)
But just adding ORDER BY to the queries isn't enough to fix the Rube-Goldberg-esque rigmarole.
In the code shown, it looks like each time through the loop, we're inserting a copy of endDateTable into finalTable. If that's 1,000 rows in endDateTable, we're going to get 1,000,000 rows (1,000 x 1,000) inserted into finalTable. Not at all clear why we need so many copies.
Given the code shown, it's not clear what the objective is. Looks like we are conditionally incrementing revisionNum, the end result of which is the highest revision num. Just guessing here.
If there is some kind of requirement to do this in a LOOP construct, within a procedure, I'd think we'd do a cursor loop. And we can use procedure variables vs user-defined variables.
Something along these lines:
BEGIN
DECLARE ld_current_end_date DATETIME;
DECLARE ld_prior_end_date DATETIME;
DECLARE li_done INT;
DECLARE li_revision_num INT;
DECLARE lcsr_end_date CURSOR FOR SELECT t.end_date FROM `endDateTable` t ORDER BY NULL;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET li_done = TRUE;
SET li_done = FALSE;
SET li_revision_num = 0;
OPEN lcsr_end_date;
FETCH lcsr_end_date INTO ld_current_end_date;
SET ld_prior_end_date = ld_current_end_date;
WHILE NOT li_done DO
SET li_revision_num = li_revision_num + IF( ld_current_end_date <=> ld_prior_end_date ,0,1);
SET ld_prior_end_date := ld_current_end_date;
FETCH lcsr_end_date INTO ld_current_end_date;
END WHILE;
CLOSE lcsr_end_date;
INSERT INTO `finalTable` (revisionnum) VALUES (li_revision_num);
END $$
Note the "order by" clause on the SELECT, its not clear what the rows should be ordered on, so we're using a literal as a placeholder.
As the end result, we insert a single row into finalTable.
Again, it's not clear what the code in the question is supposed to achieve, but doing a cursor loop across ordered rows would be much more efficient than a bazillion dynamic SQL executions fetching individual rows.

Creating a subquery within the From clause in order to refer to a archived table

I'm trying to run a simple count(*) query to check the total number of entries in a table that gets archived everyday. However, I need this query to function everyday within my reporting functions so it should be useful everyday.
Unfortunately, when I run the query it simply treats it either as an error or a subquery with no data inside it. Here's my troubleshooting query:
SELECT * FROM
SELECT CONCAT('archivedtable__', REPLACE(SUBDATE(CURRENT_DATE(), 1), '-
',''));
SELECT * FROM (
SELECT CONCAT('archivedtable__', REPLACE(SUBDATE(CURRENT_DATE(), 1), '-
',''))) subquery;
The first gives an error, the 2nd will just return archivedtable__20190813 which is the correct name but doesn't actually get it to refer to the table.
You may think of using Prepared Statements. For your hint here is an example:
Pre-Requisite: Create two tables with the dates of last 2 days i.e yesterday and day before yesterday.
SET #prefix := 'archivedtable__';
SET #date1 := REPLACE(SUBDATE(CURRENT_DATE(), 1), '-','');
SET #table1 := concat(#prefix, #date1);
SET #date2 := REPLACE(SUBDATE(CURRENT_DATE(), 2), '-','');
SET #table2 := concat(#prefix, #date2);
set #qry:= concat('SELECT COUNT(*) as t1,(SELECT COUNT(*) FROM ', #table1, ') as t2 FROM ', #table2);
prepare stmt from #qry;
execute stmt;
You can build the SQL as a VARCHAR (or NVARCHAR) and then use sp_executesql.
Like this:
DECLARE #tblCount INT
DECLARE #tblName AS NVARCHAR(100) = CONCAT(.....)
DECLARE #countSql AS NARCHAR(200) = N'SELECT #cnt = COUNT(*) FROM ' + #tblName
DECLARE #ParmDefinition NVARCHAR(500) = N'#cnt INT OUTPUT'
EXECUTE sp_executesql #countSql, #ParmDefinition, #cnt = #tblCount OUTPUT;
SELECT #tblCount;

mysql where columnname in (function(a value)) not working

I have a strange situation here with the mysql query:
When the WHERE unterkategorie IN (children_csv(1)) is used there is no result.
Second "WHERE unterkategorie IN (11,12,13,28,29,32,14,15,16,30,31,33,34,35)" is fetching records when I substitute function name with the results of the function when executed separately
the full query is:
SELECT k.name category_name,
p.unterkategorie,
p.artikelnummer,
p.hauptkategorie,
p.id,
p.name product_name,
p.preis,
p.sortierung,
p.verpackungseinheit
FROM produkte p, kategorie k
WHERE unterkategorie IN (children_csv(1))
WHERE unterkategorie IN (11,12,13,28,29,32,14,15,16,30,31,33,34,35)
AND p.unterkategorie = k.id
ORDER BY unterkategorie, p.sortierung
Following is the function definition
delimiter //
CREATE DEFINER=`root`#`localhost` FUNCTION `children_csv`(child int)RETURNS varchar(1000) CHARSET utf8
BEGIN
declare return_value varchar(1000);
SELECT GROUP_CONCAT(Level SEPARATOR ',')childrens into return_value FROM (
SELECT #Ids := (
SELECT GROUP_CONCAT(`id` SEPARATOR ',')
FROM `kategorie`
WHERE FIND_IN_SET(`parent`, #Ids)
ORDER BY parent, sortierung
) Level
FROM `kategorie`
JOIN (SELECT #Ids := child) temp1
WHERE FIND_IN_SET(`parent`, #Ids)
) temp2;
RETURN return_value;
END;
//
delimiter ;
Your function is returning a single value, a string. It is not returning a a list of values (because MySQL functions do not do that). If you want to use the function directly, you can use find_in_set():
WHERE find_in_set(unterkategorie, children_csv(1))
I will caution you that MySQL cannot use an index on unterkategorie, so this might be slower.
If you want a faster query, then you can construct a query as a string (called dynamic SQL) and use prepare and exec to run it.
If you are coming from another programming language, you need to learn that functions are not the route to better performance in SQL. Moving the logic into a function generally does not help performance.

Loop through a split string variable to insert rows in a stored procedure in SQL Server 2008

I am working on SQL Server 2008 to create a stored procedure that:
takes a string variable like this: '1,2,3'
splits the string using a table-valued function to get each value separately
and then inserts each value into a new row in a table
What I am trying to do is something like this:
WHILE (select vlaue FROM dbo.SplitString('1,2,3',',')) has rows
insert into TableName (col1,col2) values (col1Data, value)
I am having a hard time trying to find the right syntax for this.
I use this Table-valued function:
CREATE FUNCTION [dbo].[Split] (#sep char(1), #s varchar(512))
RETURNS table
AS
RETURN (
WITH Pieces(pn, start, stop) AS (
SELECT 1, 1, CHARINDEX(#sep, #s)
UNION ALL
SELECT pn + 1, stop + 1, CHARINDEX(#sep, #s, stop + 1)
FROM Pieces
WHERE stop > 0
)
SELECT pn,
SUBSTRING(#s, start, CASE WHEN stop > 0 THEN stop-start ELSE 512 END) AS s
FROM Pieces
)
GO
Which takes a string with a separator and returns a table with two columns the first returns a 1-based position and the second the element at that position in the string:
Usage:
SELECT * FROM dbo.Split(',', '1,2,3')
Returns:
pn s
1 1
2 2
3 3
To Insert results into a table:
INSERT INTO TableName (Col1)
SELECT S FROM dbo.Split(',', '1,2,3)
For your specific example change your syntax to be:
insert into TableName (col1,col2)
select col1Data, value FROM dbo.SplitString('1,2,3',',')
The typical INSERT INTO ... SELECT ... should do:
INSERT INTO TableName (col1,col2)
SELECT #col1Data,value FROM dbo.SplitString('1,2,3',','))
If someone else is looking for this, I was about to make a split function as several answers mentioned but noticed there's a built-in function that does this already.
string_split was added in MSSQL 2016.
INSERT INTO Project.FormDropdownAnswers (FkTableId, CreatedBy, CreatedDate)
SELECT 123, TRY_CAST(value AS INT), #username, getdate()
FROM string_split('44,45,46,47,55',',')
https://learn.microsoft.com/en-us/sql/t-sql/functions/string-split-transact-sql
CREATE TABLE tablename
(
id SMALLINT ,
value INT
)
INSERT INTO tablename ( id, value )
SELECT * FROM dbo.Split('1,2,3',',')
try this....
If need to use as variables there is 2 nice options:
Procedure MF_SPLIT
CREATE PROC [MF_SPLIT] (#ELS NVARCHAR(MAX)=NULL OUTPUT, #RET NVARCHAR(MAX)=NULL OUTPUT, #PROC NVARCHAR(MAX)=NULL) AS BEGIN
IF #ELS IS NULL BEGIN
PRINT ' #ELS
List of elements in string (OUTPUT)
#RET
Next return (OUTPUT)
#PROC
NULL = '','', content to do split
Example:
DECLARE #NAMES VARCHAR(100) = ''ERICK,DE,VATHAIRE''
DECLARE #N VARCHAR(100)
WHILE #NAMES IS NOT NULL BEGIN
EXEC MF_SPLIT #NAMES OUTPUT, #N OUTPUT
SELECT List = #NAMES, ActiveWord = #N
END'
RETURN
END
SET #PROC = ISNULL(#PROC, ',')
IF CHARINDEX(#PROC, #ELS) = 0 BEGIN
SELECT #RET = #ELS, #ELS = NULL
RETURN
END
SELECT
#RET = LEFT(#ELS, CHARINDEX(#PROC, #ELS) - 1)
, #ELS = STUFF(#ELS, 1, LEN(#RET) + 1, '')
END
Usage:
DECLARE #NAMES VARCHAR(100) = '1,2,3'
DECLARE #N VARCHAR(100)
WHILE #NAMES IS NOT NULL BEGIN
EXEC MF_SPLIT #NAMES OUTPUT, #N OUTPUT
SELECT List = #NAMES, ActiveWord = #N
END
Procedure MF_SPLIT_DO (Depends of MF_SPLIT), less sintax to use BUT the code will be in a string and use default variable "#X"
CREATE PROC MF_SPLIT_DO (#ARR NVARCHAR(MAX), #DO NVARCHAR(MAX)) AS BEGIN
--Less sintax
DECLARE #X NVARCHAR(MAX)
WHILE #ARR IS NOT NULL BEGIN
EXEC MF_SPLIT #ARR OUT, #X OUT
EXEC SP_EXECUTESQL #DO, N'#X NVARCHAR(MAX)', #X
END
END
Usage:
EXEC MF_SPLIT_DO '1,2,3', 'SELECT #X'

T-SQL function with dynamic SELECT (not possible) - solved with procedure instead

I've managed to use EXEC sp_executesql in a one off statement to do a dynamic lookup, but am unable to adjust the code to create a function since EXEC is not allowed in functions. It works in procedures and I've managed to get output via PRINT for a single lookup by using a temporary table, but really that was just me struggling to find a workaround. Ideally I'd like to be able to create a scalar-value function.
The reason that I need a dynamic lookup is because the column name is stored in another table.
Here's a quick breakdown of the tables:
Questions:
Columns: Q_Group, Q_Nbr, Question_Desc, Data_Field
Sample data: 'R3', 5, 'Do you have any allergies?', 'TXT_04'
Responses:
Columns: Order_Nbr, Q_Group, TXT_01, TXT_02, TXT_03, TXT_04, etc.
Data: 999, 'R3', 'blah', 'blah', 'blah', 'NO'
Orders will be assigned a particular set of questions 'Q_Group' and often a particular question will be the same across various different sets of questions. The problem is that when the set/groups of questions were set up, the questions may not have been added in the same order, and thus the responses go into different columns.
So here's where I'm at...
I can get 'TXT_04' from the Data_Field column in Questions and use EXEC sp_executesql to do a lookup for a single order, but am struggling to find a way to accomplish this as a function of some sort.
DECLARE #col_name VARCHAR(6)
DECLARE #sql VARCHAR(100)
SET #col_name = SELECT Data_Field FROM QUESTIONS WHERE Q_Group = 'R3'
AND Question_Desc = 'Do you have any allergies?'
SET #sql = 'SELECT ' + #col_name + ' FROM RESPONSES WHERE Order_Nbr = 999'
EXEC sp_executesql #sql
I'm just at a loss as to how this could be incorporated into a function so that I could get responses for several orders in a result set. Any workarounds possible? Maybe I'm totally off base using EXEC sp_executesql?
Thanks.
Edit...
Okay, I've changed the title to reflect that I'm going to consider this solved with a procedure instead of a function, as it ended up getting the output that I wanted. Which was a table with all of the corresponding responses.
Here's the code that I settled on. I decided to use LIKE to match the Question_Desc instead of equals, and then included the Question_Desc in the results, so that it could be used a bit more broadly. Thankfully it's pretty quick to run currently. Although that could always change as the database grows!
CREATE PROCEDURE get_all_responses (#question_txt VARCHAR(255))
AS
DECLARE #response_col VARCHAR(35)
DECLARE #t TABLE (order_nbr int, question_txt VARCHAR(255), response_col VARCHAR(35), response VARCHAR(255))
DECLARE #i TABLE (id INT PRIMARY KEY IDENTITY(1,1), response_col VARCHAR(35))
DECLARE #u TABLE (order_nbr int, response VARCHAR(255))
DECLARE #sql VARCHAR(200)
INSERT #t
SELECT Order_Nbr, Question_Desc, Data_Field, NULL
FROM Responses
JOIN (
SELECT Q_Group, Question_Desc, Data_Field
FROM Questions
WHERE Question_Desc LIKE #question_txt
) #Q ON Q_Group = #Q.Q_Group
WHERE Q_Group <> '0'
ORDER BY Data_Field, Order_Nbr
-- Stop if no results found and return empty result set
IF (SELECT COUNT(*) FROM #t) = 0
BEGIN
SELECT order_nbr, question_txt, response FROM #t
RETURN
END
INSERT #i SELECT response_col FROM #t GROUP BY response_col
DECLARE #row_nbr int
DECLARE #last_row int
SET #row_nbr = 1
SET #last_row = (SELECT COUNT(*) FROM #i)
-- Iterate through each Data_Field found
WHILE #row_nbr <= #last_row
BEGIN
SET #response_col = (SELECT response_col FROM #i WHERE id = #row_nbr)
SET #sql = 'SELECT Order_Nbr, ' + #response_col + ' FROM Responses WHERE NullIf(' + #response_col + ','''') IS NOT NULL'
INSERT INTO #u
EXEC (#sql)
UPDATE #t
SET response = y.response
FROM #t AS x
INNER JOIN #u AS y ON x.order_nbr = y.order_nbr
SET #row_nbr = #row_nbr + 1
END
-- Remove results with no responses
DELETE FROM #t WHERE response IS NULL
SELECT order_nbr, question_txt, response FROM #t
RETURN
You will not be able to execute dynamic SQL from within a function but you could do this with a stored procedure and capture the output.
DECLARE #col_name VARCHAR(6), #param NVARCHAR(50), #myReturnValue VARCHAR(50)
SET #param = N'#result VARCHAR(50) OUTPUT'
DECLARE #sql VARCHAR(100)
SET #col_name = SELECT Data_Field FROM QUESTIONS WHERE Q_Group = 'R3'
AND Question_Desc = 'Do you have any allergies?'
SET #sql = 'SELECT #result = ' + #col_name + ' FROM RESPONSES WHERE Order_Nbr = 999'
EXEC sp_executesql #sql, #param, #result = #myReturnValue output
--manipulate value here
print #myReturnValue
You could also create a temp table and do an insert into from exec sp_executesql.