I need to implement without using cursor. below script is using cursor and it's taking 5 hours for 140k records.
How to improve the performance in sql server?
in the orginal table have over 100k records.
SET NOCOUNT ON
CREATE TABLE #temp (
RecordID int identity,
Address varchar(50),
City varchar(30),
State varchar(5),
GPSLat numeric(9,6),
GPSLong numeric(9,6),
MapURL varchar(255))
INSERT INTO #temp (Address, City, State)
VALUES ('1033 Southwest 152nd Street', 'Burien', 'WA')
INSERT INTO #temp (Address, City, State)
VALUES ('11910 Northeast 154th Street', 'Brush Prairie', 'WA')
INSERT INTO #temp (Address, City, State)
VALUES ('500 SeaWorld Drive', 'San Diego', 'CA')
INSERT INTO #temp (Address, City, State)
VALUES ('1 Legoland Drive', 'Carlsbad', 'CA')
DECLARE curGeo CURSOR LOCAL STATIC FOR
SELECT RecordID, Address, City, State
FROM #temp
DECLARE #RecordID int
DECLARE #Address varchar(50)
DECLARE #City varchar(30)
DECLARE #State varchar(5)
DECLARE #GPSLatitude numeric(9, 6)
DECLARE #GPSLongitude numeric(9, 6)
DECLARE #MapURL varchar(255)
OPEN curGeo
FETCH curGeo INTO
#RecordID,
#Address,
#City,
#State
WHILE ##FETCH_STATUS = 0 BEGIN
BEGIN TRY
EXEC opsstream.sputilGeocode
#Address = #Address OUTPUT,
#City = #City OUTPUT,
#State = #State OUTPUT,
#GPSLatitude = #GPSLatitude OUTPUT,
#GPSLongitude = #GPSLongitude OUTPUT,
#MapURL = #MapURL OUTPUT
UPDATE #temp
SET
GPSLat = #GPSLatitude,
GPSLong = #GPSLongitude,
MapURL = #MapURL
WHERE
RecordID = #RecordID
END TRY
BEGIN CATCH
PRINT 'Warning: RecordID ' + CAST(#RecordID AS varchar(100)) + ' could not be geocoded.'
END CATCH
FETCH curGeo INTO
#RecordID,
#Address,
#City,
#State
END
SELECT * FROM #temp
You have a procedure call in your code, so I'm quite sure the problem is not the cursor, but the row-by-row logic done with the procedure. You might improve the performance of the cursor by defining it as fast_forward, but that might not be noticeable.
You have a procedure that you call, so either you need to change the procedure to accept a table valued parameter, and then of course change your procedure so that it's not a row-by-row operation or change it to a table valued function, but if you use the multi statement one, it's probably not going to improve your performance.
Related
I have a query that works outside of a stored procedure (populate dejan_output table), but inside it doesn't. Why?
CREATE DEFINER=`admin`#`%` PROCEDURE `test_sp`()
BEGIN
declare avg_all double;
declare Zipcode text(10);
declare from1, to1, from2, to2, from3, to3, from4, to4 int;
declare Year smallint;
#######################################################################
#create output table here
DROP TABLE IF EXISTS dejan_output;
CREATE TABLE dejan_output(
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
Zipcode text(10),
year smallint,
month tinyint,
RevenueIn1000USD decimal(15,4),
calendarData tinyint,
processed bit
);
#######################################################################
#insert into output table here
insert into dejan_output(Zipcode, year, month, RevenueIn1000USD, processed)
select f.Zipcode, year(ReportingMonth) year, month(ReportingMonth) month, round(sum(RevenueUSD) / 1000, 4) value, 0
from monthly_performance_data f join
(
select Zipcode, year(ReportingMonth) year, count(distinct month(ReportingMonth)) count
from monthly_performance_data
group by Zipcode, year(ReportingMonth)
) d on f.Zipcode = d.Zipcode and year(ReportingMonth) = d.year and d.count = 12
group by f.Zipcode, year(ReportingMonth) , month(ReportingMonth)
order by f.Zipcode, year(ReportingMonth) , month(ReportingMonth);
END
The table is created successfully but without any data. I am running it from MySQL Workbench 8.0.
I've found what was causing this but I still don't know why? Still, I will publish my solution so someone else can follow. Just remove any declarations:
declare avg_all double;
declare Zipcode text(10);
declare from1, to1, from2, to2, from3, to3, from4, to4 int;
declare Year smallint;
Because you can use
set #Zipcode = 'some value'
to set that variable.
I am moving data from Spreadsheets to MySQL.
So we know that in Spreadsheets usually there is no ID, instead, just text.
City;Country;...
New York;USA;...
Berlim;Germany;...
Munich,Germany,...
With that in mind, let's consider two tables:
Country : [ID, name]
City : [ID , country (FK) , name]
I dont want to create several countries with the same name -- but I want to use the existing one. Perfect, so, let's add a FUNCTION in the INSERT state that searches, insert (if needed) and return the Country ID.
So I created a Function to FIRST assess whether the Country exists if not then create a country
getCountry (parameter IN strCountry varchar(100))
BEGIN
SELECT ID INTO #id from `country` WHERE country.country = strCountry ;
IF (#id is NULL OR #id= 0) THEN
INSERT INTO `country` (country) VALUES (strCountry);
if (ROW_COUNT()>0) THEN
SET #id = LAST_INSERT_ID();
else
SET #id = NULL;
END IF;
END IF ;
RETURN #id;
END
And then I have DOZENS OF THOUSANDS of INSERTS such as
INSERT INTO city (name, country) VALUES ('name of the city', getCountry('new or existing one'));
The Function works well when executed alone, such as
SELECT getCountry('Aruba');
However, when I execute that in that VERY LONG SQL (22K+ rows) then it does not work.... it uses basically the latest ID that was created BEFORE starting the execution. Maybe I should "wait" the function execute and return a proper result? But How?
What am I doing wrong?
Instead of function why not use a Stored Procedure, then the procedure will process the checking and insertion.
https://www.mysqltutorial.org/getting-started-with-mysql-stored-procedures.aspx
DELIMITER $$
CREATE PROCEDURE `sp_city_add`(in p_city varchar(100), in p_country varchar(100))
BEGIN
DECLARE country_id INT;
IF (SELECT COUNT(1) FROM country WHERE country.country = p_country) = 0 THEN
INSERT INTO country (country) VALUE (p_country);
SET country_id = LAST_INSERT_ID();
ELSE
SELECT ID INTO country_id FROM country WHERE country.country = p_country;
END IF;
INSERT INTO city (name, country) VALUES (p_city, country_id);
END$$
DELIMITER ;
And if you want to execute a procedure
CALL sp_city_add('Bogota', 'Colombia');
CALL sp_city_add('Phnom Penh', 'Cambodia');
CALL sp_city_add('Yaounde', 'Cameroon');
CALL sp_city_add('Ottawa', 'Canada');
CALL sp_city_add('Santiago', 'Chile');
CALL sp_city_add('Beijing', 'China');
CALL sp_city_add('Bogotá', 'Colombia');
CALL sp_city_add('Moroni', 'Comoros');
You can also add a condition to check if the city and country exists to prevent duplicate entry.
I can't find any documentation of it, but maybe there's a conflict when you do an INSERT in a function that's called during another INSERT. So try splitting them up using a variable:
SELECT #country := getCountry('new or existing one');
INSERT INTO city (name, country) VALUES ('name of the city', #country);
Using the idea of #Barman, PLUS adding COMMIT to each row I could solve that:
SELECT #id := getCountry("Colombia");INSERT into city ( city, country) VALUES ('Bogota',#id);COMMIT;
SELECT #id := getCountry("Colombia");INSERT into city ( city, country) VALUES ('Medelin',#id);COMMIT;
SELECT #id := getCountry("Brazil");INSERT into city ( city, country) VALUES ('Medelin',#id);COMMIT;
SELECT #id := getCountry("Brazil");INSERT into city ( city, country) VALUES ('Sao Paulo',#id);COMMIT;
SELECT #id := getCountry("Brazil");INSERT into city ( city, country) VALUES ('Curitiba',#id);COMMIT;
SELECT #id := getCountry("USA");INSERT into city ( city, country) VALUES ('Boston',#id);COMMIT;
SELECT #id := getCountry("USA");INSERT into city ( city, country) VALUES ('DallaS',#id);COMMIT;
Without the COMMIT at the end of each row, MySQL was not calculating the variable anymore, instead, just throwing some last result it collected.
I have function below but using it in a select and then inserting the value into temp table is too slow so I try to update to make better performance.
I think it's slow because it selects and then inserts into temp table. If I execute the script without function, it is fast, but I need it to use other place the same table.
CREATE FUNCTION [dbo].[FN_GET_CUSTOMER_INFO]
(#CUSTID VARCHAR(MAX))
RETURNS
#TBL TABLE (CUSTID VARCHAR(200),CUSTNAME NVARCHAR(200))
BEGIN
DECLARE #STATUS VARCHAR(200)
SELECT #STATUS = [STATUS]
FROM TBL_CUSTOMER
WHERE CUSTID = #CUSTID
IF #STATUS = 'INCLUDE'
BEGIN
INSERT INTO #TBL
SELECT *
FROM TBL_CUSTOMER
WHERE CUSTID = #CUSTID
END
ELSE
BEGIN
INSERT INTO #TBL
SELECT *
FROM TBL_CUSTOMER
WHERE CUSTID = #CUSTID
AND GENDER = 'M'
END
END
I got a function that returns string between two strings:
CREATE FUNCTION dbo.udf_GetStringBetween2Chars (#String VARCHAR(50), #FirstSpecialChar VARCHAR(50), #LastSpecialChar VARCHAR(50))
RETURNS VARCHAR(50)
AS
BEGIN
DECLARE #FirstIndexOfChar INT,
#LastIndexOfChar INT,
#LengthOfStringBetweenChars INT
SET #FirstIndexOfChar = CHARINDEX(#FirstSpecialChar,#String,0)
SET #LastIndexOfChar = CHARINDEX(#LastSpecialChar,#String,#FirstIndexOfChar+1)
SET #LengthOfStringBetweenChars = #LastIndexOfChar - #FirstIndexOfChar -1
SET #String = SUBSTRING(#String,#FirstIndexOfChar+1,#LengthOfStringBetweenChars)
RETURN #String
END
However, when I try to get string between POINTDESCRIPTION and DATAPOINT, I get error:
SET ANSI_WARNINGS OFF
declare #table table
(string varchar(50))
insert into #table
select 'EVENT: ID Order Reassignment (Individual Job Specific Update)|||LOCATION: Reassign.reassign()|||DATEPOINTDESCRIPTION: Only specific ID orders were reassigned from user fatis to user blake.|||DATAPOINT: blake' union all
select 'EVENT: ID Order Reassignment (Individual Job Specific Update)|||LOCATION: Reassign.reassign()|||DATAPOINTDESCRIPTION: Only specific ID orders were reassigned from user ilevic to user manic2.|||DATAPOINT: manic2' union all
select 'EVENT: ID Order Reassignment (Individual Job Specific Update)|||LOCATION: Reassign.reassign()|||DATAPOINTDESCRIPTION: Only specific ID orders were reassigned from user links to user sepir.|||DATAPOINT: sepir'
select dbo.udf_GetStringBetween2Chars (Tab.string,'POINTDESCRIPTION: ','|||DATAPOINT')
FROM #table Tab
Msg 537, Level 16, State 2, Line 10
Invalid length parameter passed to the LEFT or SUBSTRING function.
Does anyone see why this would happen?
If anyone finds it useful, here is final function to return string between 2 strings:
CREATE FUNCTION dbo.udf_GetStringBetween2Chars (#String VARCHAR(500), #FirstSpecialChar VARCHAR(500), #LastSpecialChar VARCHAR(500))
RETURNS VARCHAR(500)
AS
BEGIN
DECLARE #FirstIndexOfChar INT,
#LastIndexOfChar INT,
#LengthOfStringBetweenChars INT
SET #FirstIndexOfChar = CHARINDEX(#FirstSpecialChar,#String,0)
SET #LastIndexOfChar = CHARINDEX(#LastSpecialChar,#String,#FirstIndexOfChar+1)
SET #LengthOfStringBetweenChars = #LastIndexOfChar - #FirstIndexOfChar -1
SET #String = SUBSTRING(#String,#FirstIndexOfChar+LEN(#FirstSpecialChar),#LengthOfStringBetweenChars)
RETURN #String
END
GO
And to call it:
select dbo.udf_GetStringBetween2Chars (tab.someString,'POINTDESCRIPTION: ','|||DATAPOINT')
FROM yourTable tab
I have written the following stored procedure which in HeidiSQL is giving me an Error 1064 at the line starting with SET pay_ref = SELECT CONCAT('KOS' ...
Let me firstly explain what's going on with this procedure. I have a table gamers with a BIGINT primary key with auto_increment. This proc is supposed to:
Take in some params from the user
Check if the user already exists in the db according to his/her email address, and spits back the word "DUPLICATE" if a reord does exist
Else it does the insert as normal
Then it reads in the ID of the new record created and converts it to a varchar, pads it with leading zeros and then gets concatenated with some other strings
This new string (which should read for example KOS00001ABCDEF) then gets updated to the pay_refcode field >>> this is how we have settled on generating a unique payment reference for the user
If all works out well it updates retval with the newly generated reference code to be read by PHP script.
DELIMITER //
CREATE PROCEDURE `InsertGamer` (
IN p_fname VARCHAR(30),
IN p_lname VARCHAR(30),
IN p_email VARCHAR(255),
IN p_favgame VARCHAR(60),
IN p_pay_suffix VARCHAR(6),
OUT retval VARCHAR(14)
)
BEGIN
DECLARE last_id BIGINT;
DECLARE pay_ref VARCHAR(14);
IF (EXISTS(SELECT * FROM gamers WHERE (email = p_email))) THEN
SET retval = 'DUPLICATE';
ELSE
INSERT INTO gamers (fname, lname, email, favgame, pay_refcode)
VALUES (p_fname, p_lname, p_email, p_favgame, NULL);
SET last_id = LAST_INSERT_ID();
SET pay_ref = SELECT CONCAT('KOS', (SELECT LPAD(CONVERT(last_id, VARCHAR(5)),5,'0')), p_pay_suffix);
UPDATE gamers
SET pay_refcode = pay_ref
WHERE application_id = last_id;
SET retval = pay_ref;
END IF;
END //
I cannot for the life of me figure out what the problem is and would sincerely appreciate any help from you. Thank you very much in advance!
You just need to remove the SELECT keyword from line which you set the value for pay_ref.
SET pay_ref = CONCAT('KOS', LPAD(CONVERT(last_id, CHAR(5)),5,'0'), p_pay_suffix);
full code:
DELIMITER //
CREATE PROCEDURE `InsertGamer` (
IN p_fname VARCHAR(30),
IN p_lname VARCHAR(30),
IN p_email VARCHAR(255),
IN p_favgame VARCHAR(60),
IN p_pay_suffix VARCHAR(6),
OUT retval VARCHAR(14)
)
BEGIN
DECLARE last_id BIGINT;
DECLARE pay_ref VARCHAR(14);
SET #count := (SELECT COUNT(*) FROM gamers WHERE email = p_email)
IF (#count > 0) THEN
SET retval = 'DUPLICATE';
ELSE
INSERT INTO gamers (fname, lname, email, favgame, pay_refcode)
VALUES (p_fname, p_lname, p_email, p_favgame, NULL);
SET last_id = LAST_INSERT_ID();
SET pay_ref = CONCAT('KOS', LPAD(CONVERT(last_id, CHAR(5)),5,'0'), p_pay_suffix);
UPDATE gamers
SET pay_refcode = pay_ref
WHERE application_id = last_id;
SET retval = pay_ref;
END IF;
END //
DELIMITER ;