Related
Data comes in via a zip file in a group of csv files which are used to populate holding tables. Some can include new data or updates of existing entries. The holding tables are used for intermediate processing then are used for import to the working tables.
In an effort to speed up the process I have been writing stored procedures to update the working tables. On my laptop one such file with only 730 records is taking about a minute to do its stuff.
I did consider making the 'insert ... on duplicate key update' into a prepared statement inside the SP but some of the other tables have many more fields and I could find no good guide on how to write such a complex one.
Here is my stored procedure:
DROP PROCEDURE IF EXISTS impLuRacodes;
DELIMITER $$
CREATE DEFINER=`root`#`localhost` PROCEDURE impLuRacodes()
BEGIN
DECLARE done INT DEFAULT 0;
DECLARE v_ATID varchar(10);
DECLARE v_Code varchar(10);
DECLARE v_IssType varchar(10);
DECLARE v_Category varchar(10);
DECLARE v_CNumber int(4);
DECLARE v_CDesc varchar(255);
DECLARE v_ColCode varchar(6);
DECLARE v_LLike int(3);
DECLARE v_LifeS int(3);
DECLARE v_PropS int(3);
DECLARE v_BusS int(3);
DECLARE c_1 CURSOR FOR
SELECT `ATID`, `Code`, `IssType`, `Category`, `CNumber`, `CDesc`, `ColCode`, `LLike`, `LifeS`, `PropS`, `BusS`
FROM lu_racodes_temp;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done=1;
OPEN c_1;
REPEAT
FETCH c_1 INTO v_ATID, v_Code, v_IssType, v_Category, v_CNumber,
v_CDesc, v_ColCode, v_LLike, v_LifeS, v_PropS, v_BusS;
INSERT INTO lu_racodes (ATID, `Code`, IssType, Category, CNumber,
CDesc, ColCode, LLike, LifeS, PropS, BusS)
VALUES(v_ATID, v_Code, v_IssType, v_Category, v_CNumber, v_CDesc,
v_ColCode, v_LLike, v_LifeS, v_PropS, v_BusS)
ON DUPLICATE KEY UPDATE
ATID= v_ATID, `Code`= v_Code, IssType= v_IssType, Category= v_Category,
CNumber= v_CNumber, CDesc= v_CDesc, ColCode= v_ColCode, LLike=v_LLike,
LifeS= v_LifeS, PropS= v_PropS, BusS= v_BusS;
UNTIL done END REPEAT;
CLOSE c_1;
END $$
Alternatively, is it possible to put the Select inside the 'Insert ... On Duplicate Key' instead of using a cursor - again something I have had no joy in finding a clear answer to.
Thanks to the guidance I rewrote the code to put the select inside and execution time dropped to 0.156 seconds for a first population. I was concerned about the syntax of the update section with the reference to VALUES as it was a Select statement bringing in the data but changing a couple of rows in the source table did get updated and still with only a 0.235 second time. So here is the code:
DROP PROCEDURE IF EXISTS impLuRacodes;
DELIMITER $$
CREATE DEFINER=`root`#`localhost` PROCEDURE impLuRacodes()
BEGIN
INSERT INTO lu_racodes (`ATID`, `Code`, `IssType`, `Category`, `CNumber`, `CDesc`, `ColCode`, `LLike`, `LifeS`, `PropS`, `BusS`) SELECT `ATID`, `Code`, `IssType`, `Category`, `CNumber`, `CDesc`, `ColCode`, `LLike`, `LifeS`, `PropS`, `BusS` FROM lu_racodes_temp ON DUPLICATE KEY UPDATE `ATID`= VALUES(`ATID`), `Code`= VALUES(`Code`), `IssType`= VALUES(`IssType`), `Category`= VALUES(`Category`), `CNumber`= VALUES(`CNumber`), `CDesc`= VALUES(`CDesc`), `ColCode`= VALUES(`ColCode`), `LLike`= VALUES(`LLike`), `LifeS`= VALUES(`LifeS`), `PropS`= VALUES(`PropS`), `BusS`= VALUES(`BusS`);
END
$$
DELIMITER ;
update query does not update the values into temporary table in mysql.
DECLARE _defaultDateTime DATETIME;
DECLARE _resourceTypeId CHAR(36);
DECLARE _billedUsageHrs DECIMAL(15,6);
DECLARE _unbilledusageHrs DECIMAL(15,6);
DECLARE _billedCost DECIMAL(15,6);
DECLARE _unBilledCost DECIMAL(15,6);
DECLARE _resourceIdentifier CHAR(36);
DROP TABLE IF EXISTS _usageTable;
CREATE TEMPORARY TABLE _usageTable(resourceinstanceId CHAR(36),billedusageinHrs DECIMAL(15,6), billedusageCost DECIMAL(15,6), unBilledusageinHrs DECIMAL(15,6), unbilledusageCost DECIMAL(15,6)) ENGINE=MEMORY;
UPDATE _usageTable SET resourceinstanceId = 'a17b5e49-000c-11e3-8bfa-842b2bac06e5' WHERE 1 = 1;
SELECT resourceinstanceId, billedusageinHrs, billedusageCost, unBilledusageinHrs, unbilledusageCost FROM _usageTable;
The above stored procedure returns empty row
CREATE TEMPORARY TABLE _usageTable(resourceinstanceId CHAR(36),
billedusageinHrs DECIMAL(15,6),
billedusageCost DECIMAL(15,6),
unBilledusageinHrs DECIMAL(15,6),
unbilledusageCost DECIMAL(15,6)) ENGINE=MEMORY;
UPDATE _usageTable SET resourceinstanceId = 'a17b5e49-000c-11e3-8bfa-842b2bac06e5' WHERE 1 = 1;
UPDATE will update existing rows.
As far as I can see, you just created the table. So it is empty. There is simply no row to update.
Perhaps are you looking for INSERT instead of UPDATE ? Or maybe you example does not put emphasis on the real problem?
DELIMITER $$
CREATE PROCEDURE Load_Fact_List()
BEGIN
DECLARE Project_Number_Temp INT;
DECLARE Panel_Id_Temp INT;
DECLARE Employee_Id_Temp INT;
DECLARE Zip_Temp VARCHAR(255);
DECLARE Created_Date_Temp DATE;
DECLARE Country_Temp VARCHAR(255);
DECLARE no_more_rows BOOLEAN;
DECLARE loop_cntr INT DEFAULT 0;
DECLARE num_rows INT DEFAULT 0;
DECLARE load_cur CURSOR FOR
SELECT Project_Id, Panel_Id, Employee_Id, Zip, Created_Date
FROM Fact_List;
DECLARE CONTINUE HANDLER FOR NOT FOUND
SET no_more_rows = TRUE;
OPEN load_cur;
select FOUND_ROWS() into num_rows;
the_loop: LOOP
FETCH load_cur
INTO Project_Number_Temp, Panel_Id_Temp, Employee_Id_Temp, Zip_Temp, Created_Date_Temp;
IF no_more_rows THEN
CLOSE load_cur;
LEAVE the_loop;
END IF;
SET Country_Temp= (select Country from Zip where Zip= Zip_Temp);
INSERT INTO Test_Fact
(
Project_Key,
Campaign_Key,
Respondents_Key,
Event_Key,
Employee_Key,
Geography_Key,
Date_Key
)
SELECT (SELECT Project_Key from Project_Dim where Project_Id= Project_Number_Temp AND Quota_Country= Country_Temp),0,(SELECT MAX(Respondents_Key) from Respondents_Dim WHERE Panel_Id= Panel_Id_Temp),1,(select MAX(Employee_Key) from Employee_Dim WHERE Employee_Id= Employee_Id_Temp),(Select Geography_Key from Geography_Dim where Zip= Zip_Temp), (Select Date_Key from Date_Dim where Full_Date= Created_Date_Temp);
SET loop_cntr = loop_cntr + 1;
END LOOP the_loop;
select num_rows, loop_cntr;
END $$
The above code is properly working but it is damn slow. For every 1 hour it is loading 1000 records. I got lacks of records to load into fact table. can anyone suggest me any optimization?
Requirement is to load fact table by looping through other table and gathering required key values from dimension tables.
The usual procedure is actually like this.
You have your dimensions built and you just gathered the data you want to insert into your fact table in a temporary table. Then you insert this data in another temporary table like this:
INSERT INTO tmp_fact_table
(
fact_key,
dim1_key,
dim2_key,
...
fact1,
fact2
...
)
SELECT
ISNULL (f.fact_key, 0),
ISNULL (d1.sid, 0) as whatever,
ISNULL (d2.sid, 0) as whatever2,
...
ISNULL (tt.fact1, 0),
ISNULL (tt.fact2, 0)
FROM
yourTempTable tt
LEFT JOIN Dim1 d1 ON tt.identifying_column = d1.identifying_column
...
LEFT JOIN fact_table f ON
f.dim1_key = d1.sid
AND f.dim2_key = d2.sid
where
fact_key is the identifying column in your fact table
dim1_key is the foreign key in your fact table to the dimensions
fact1 and so on are the facts you want in your fact table, clear
the ISNULL() function returns 0 when no entry is found. 0 is the id of your dummy row in each dimension for unknown data
Then you will have a table where you have the IDs of your dimensions linked to the data you want to import into your fact table with 0 as fact key when the entry in the fact table does not already exist and the ID of the fact table entry otherwise.
Then you update the fact table where tmp_fact_table.fact_key != 0
Then you insert into the fact table where tmp_fact_table.fact_key = 0
That's it.
I'm doing this with millions of rows and it takes about half an hour. 300,000 rows is peanuts.
Am new to Stored procedures.I wrote the stored procedure to copy table from one dtabase to another database.On executing my stored procedures everytime My datas are added in the destination table .My row counts was increasing on every execution.
Please help to resolve the issue.Hope the problem In the loops.
My SP is:
--exec mall
alter procedure mall
as
begin
declare #mallid int
declare #mallname nvarchar(40)
declare #mallstatus nvarchar(40)
declare #malludsuomid nchar(2)
declare #malludsassetcode nvarchar(6)
declare #malludsassettype nvarchar(15)
declare #malludsremarks nvarchar(max)
declare #malludsdwdb int
declare #mallsecterr int
declare #mallassetid int
declare #secterr int
declare #Maxmallid int
declare #mallentityid int
Select #mallentityid = customtable.Bord_TableId From CRM.dbo.Custom_Tables as customtable With (NoLock) Where Upper(Bord_Caption) = Upper('Mall') And Bord_Deleted Is Null
DECLARE cur_address CURSOR FOR
SELECT
udsasset.Asset_ID,udsasset.Asset_Name,udsasset.Asset_Status,udsasset.UOM_ID, udsasset.Asset_Code,udsasset.Asset_Type,udsasset.Remarks,udsasset.DW_Key_Source_DB --,crmterr.TPro_SecTerr
from
CMA_UDS.dbo.Dim_Asset as udsasset
OPEN cur_address
FETCH NEXT FROM cur_address INTO #mallid,#mallname,#mallstatus,#malludsuomid,#malludsassetcode,#malludsassettype,#malludsremarks,#malludsdwdb --,#mallsecterr
WHILE ##FETCH_STATUS = 0
BEGIN
if not exists (select crmmall.mall_MallID from CRM.dbo.Mall as crmmall where crmmall.mall_MallID = #mallid)
begin
exec #Maxmallid = CRM.dbo.crm_next_id #Table_Id=#mallentityid
insert into
CRM.dbo.Mall
(mall_MallID,mall_Name,mall_Status,mall_uds_UOMID,mall_uds_asset_code,mall_uds_asset_type,
mall_uds_remarks,mall_uds_dw_db,mall_CreatedBy,mall_CreatedDate,mall_Secterr,mall_AMOSUploaded,mall_asset_id)
values(#Maxmallid,#mallname,#mallstatus,#malludsuomid,#malludsassetcode,#malludsassettype,#malludsremarks,#malludsdwdb,1,GETDATE(),
#mallsecterr,GETDATE(),#mallid)
end
else
begin
update
CRM.dbo.Mall
set
mall_asset_id=#mallid,mall_Name = #mallname,mall_Status=#mallstatus,mall_uds_UOMID =#malludsuomid,mall_uds_asset_code=#malludsassetcode,
mall_uds_asset_type=#malludsassettype,mall_uds_remarks=#malludsremarks,mall_uds_dw_db=#malludsdwdb,mall_UpdatedBy=1,
mall_UpdatedDate=GETDATE(),mall_Secterr=#mallsecterr,mall_AMOSUploaded=GETDATE()
where
mall_MallID=#mallid
end
FETCH NEXT FROM cur_address INTO #mallid,#mallname,#mallstatus,#malludsuomid,#malludsassetcode,#malludsassettype,#malludsremarks,#malludsdwdb--,#mallsecterr
end
CLOSE cur_address
DEALLOCATE cur_address
End
Why are you inserting crm_next_id as the value in mall_MallID, but using that same id to compare with #mallid to see if the record is already inserted? For example, if you have id 5, and you insert a new record with id 150, it's not going to see that the record is already inserted when you run the SP again. Next run, it will add record with id 151, then 152, and so forth forever. You shouldn't use the same field as both an auto-increment identity and a foreign key reference at the same time...
You either need to use the same #mallid when you insert the new records so they match, or after you generate a new id and insert into the table, update the original record CMA_UDS.dbo.Dim_Asset to have Asset_ID = #mallid so they are linked up properly. Which method you use depends on the meanings of those id's and what constraints you have in your particular application.
All, I want to start the numbering of an IDENTITY field based on the current maximum obtained from another table. So I have tried something like the following
DECLARE #CurrentES INT;
SET #CurrentES = (SELECT MaxES
FROM [NDB]..[TmpMaxES]) + 1;
ALTER TABLE BA
ADD ES INT IDENTITY(#CurrentES, 1);
But this will not accept a variable as the seed value in IDENTITY. How can what I require be achieved?
Thanks for your time.
Do do this and other non-variable allowed tasks, you can use the EXEC function, as follows:
DECLARE #CurrentES INT;
SET #CurrentES = (SELECT MaxES
FROM [NDB]..[TmpMaxES]) + 1;
DECLARE #Statement VARCHAR(200)
SET #Statement = 'ALTER TABLE BA
ADD ES INT IDENTITY(' + CAST(#CurrentES AS VARCHAR) + ', 1);'
EXEC (#Statement)
You could use the dbcc checkident feature of SQL Server...
DECLARE #MAXID INT
SELECT #MAXID = MAX(ID_FIELD) FROM OLDTABLE
dbcc checkident(NEWTABLE, reseed, #MAXID)
One thing to note with this is that the value in the 3rd parameter (in this case the #MAXID variable) denotes the current identity value - in other words the last identity value that was generated on the table.
So, for example, if you want the next value that is automatically created to be 100, then set the 3rd parameter to 99.
--first variable
declare #code varchar(50);
set #code=1345688867567576;
--second variable
declare #namedb varchar(50);
set #namedb='test';
--let's you add to the identity(ID) field
SET IDENTITY_INSERT dbo.nameAndroid ON
--declaring variable to hold the next id number
declare #id int;
set #id=##IDENTITY +1;
--clause to check if the table has the matching barcode
if not exists (select * from dbo.nameAndroid where barcode = #code)
INSERT INTO dbo.nameAndroid (id, name, barcode, [floor], Column1,Column2,Row1,Row2,Shelf,Stock,OnOrder)
VALUES ( #id,#namedb, #code, 'Value3', 'Value4','Value5','Value6','Value7','Value8',123,600);
SET IDENTITY_INSERT dbo.nameAndroid OFF;
OR (if the id column is of type int)
declare #code varchar(50);
set #code='123211';
declare #namedb varchar(50);
set #namedb='test';
declare #floordb varchar(50);
set #floordb='test';
declare #Column1db varchar(50);
set #Column1db='test';
declare #Column2db varchar(50);
set #Column2db='test';
declare #Row1db varchar(50);
set #Row1db='test';
declare #Row2db varchar(50);
set #Row2db='test';
declare #Shelfdb varchar(50);
set #Shelfdb='test';
declare #OnOrderdb decimal(18,2);
set #OnOrderdb=10010;
declare #Stockdb decimal(18,2);
set #Stockdb=1010101;
declare #id int;
set #id=((select max(id) from dbo.nameAndroid )+1);
if not exists (select * from dbo.nameAndroid where barcode = #code)
begin
SET IDENTITY_INSERT dbo.nameAndroid ON;
INSERT INTO dbo.nameAndroid (id, name, barcode, [floor], Column1,Column2,Row1,Row2,Shelf,Stock,OnOrder)
VALUES (#id, #namedb, #code, #floordb, #Column1db,#Column2db,#Row1db,#Row2db,#Shelfdb,#OnOrderdb,#Stockdb);
SET IDENTITY_INSERT dbo.nameAndroid OFF;
end
Try something like this..
SET IDENTITY_INSERT [MyTable] ON
INSERT INTO [MyTable] ... (MAX) Value from another table and other applicable record.
...
SET IDENTITY_INSERT [MyTable] OFF