I have two functions in SQL server that i'm trying to recreate in snowflake. i want to make it a CTE instead as i'm having many issues with it being a function (very picky about date parameters passed through)
I'm not quite thinking of it in the right way. So we pass two paramters through, a date and an int. and the function returns an INT value for us. I'm kind of "stuck".
--Function 1: Straight from SQL server
ALTER FUNCTION [dbo].[cfn_GetShiftIDFromDateTime] (
#dateTime datetime,
#shiftCalendarID int
)
RETURNS int
AS
BEGIN
DECLARE
#time time = CONVERT( time, #dateTime ),
#curDay int,
#prvDay int,
#shiftID int;
SELECT TOP 1
#shiftCalendarID = ID,
#curDay = DATEDIFF( dd, BeginDate, #dateTime ) % PeriodInDays + 1,
#prvDay = ( #curDay + PeriodInDays - 2 ) % PeriodInDays + 1
FROM ShiftCalendar
WHERE ID = #shiftCalendarID
OR ( #shiftCalendarID IS NULL
AND Name = 'Factory'
AND BeginDate <= #dateTime )
ORDER BY BeginDate DESC;
SELECT #shiftID = ID
FROM Shift
WHERE ShiftCalendarID = #shiftCalendarID
AND ( ( FromDay = #curDay AND FromTimeOfDay <= #time AND TillTimeOfDay > #time )
OR ( FromDay = #curDay AND FromTimeOfDay >= TillTimeOfDay AND FromTimeOfDay <= #time )
OR ( FromDay = #prvDay AND FromTimeOfDay >= TillTimeOfDay AND TillTimeOfDay > #time )
);
RETURN #shiftID;
END
--GO
I had help from a user writing this and was able to get this function written in snowflake and seems to be working properly. here it is below
--Function 1 -- Snowflake syntax, currently working
CREATE OR REPLACE FUNCTION DB_BI_DEV.RAW_CPMS_AAR.cfn_GetShiftIDFromDateTime (dateTime TIMESTAMP_NTZ(9), shiftCalendarID int)
RETURNS table (shiftID int)
AS
$$
WITH T0 (ShiftCalendarID, CurDay, PrvDay)
AS (
SELECT TOP 1
ID AS ShiftCalendarID,
DATEDIFF( day, BeginDate, dateTime ) % PeriodInDays + 1 AS CurDay,
( CurDay + PeriodInDays - 2 ) % PeriodInDays + 1 AS PrvDay
FROM RAW_CPMS_AAR.ShiftCalendar
WHERE ID = shiftCalendarID
OR ( shiftCalendarID IS NULL
AND Name = 'Factory'
AND BeginDate <= dateTime )
ORDER BY BeginDate DESC
),
T1 (TimeValue)
AS (
SELECT TIME_FROM_PARTS(
EXTRACT(HOUR FROM dateTime),
EXTRACT(MINUTE FROM dateTime),
EXTRACT(SECOND FROM dateTime))
)
SELECT ID as shiftID
FROM RAW_CPMS_AAR.Shift, T0, T1
WHERE Shift.ShiftCalendarID = T0.ShiftCalendarID
AND ( ( FromDay = T0.CurDay AND FromTimeOfDay <= T1.TimeValue AND TillTimeOfDay > T1.TimeValue )
OR ( FromDay = T0.CurDay AND FromTimeOfDay >= TillTimeOfDay AND FromTimeOfDay <= T1.TimeValue )
OR ( FromDay = T0.PrvDay AND FromTimeOfDay >= TillTimeOfDay AND TillTimeOfDay > T1.TimeValue )
)
$$
;
here is function 2:
--Function 2: Straight from SQL server
ALTER FUNCTION [dbo].[cfn_GetEquipmentShiftCalendarID] ( #equipmentID int, #date datetime )
RETURNS int
AS
BEGIN
IF #date IS NULL SET #date = GETDATE();
DECLARE
#shiftCalendarID int,
#endDate date;
WITH cte ( ID, ParentEquipmentID, ShiftCalendarEntityNumber ) AS (
SELECT ID, ParentEquipmentID, ShiftCalendarEntityNumber
FROM Equipment
WHERE ID = #equipmentID
UNION ALL
SELECT p.ID, p.ParentEquipmentID, p.ShiftCalendarEntityNumber
FROM cte
INNER JOIN Equipment p ON p.ID = cte.ParentEquipmentID AND cte.ShiftCalendarEntityNumber IS NULL
)
SELECT TOP 1 #shiftCalendarID = sc.ID, #endDate = sc.EndDate
FROM cte
INNER JOIN ShiftCalendar sc ON sc.EntityNumber = cte.ShiftCalendarEntityNumber
WHERE sc.BeginDate <= #date
ORDER BY
CASE WHEN EndDate IS NULL OR EndDate > #date THEN 1 ELSE 2 END, -- Prio on date range
sc.BeginDate DESC;
IF #shiftCalendarID IS NULL
BEGIN
-- Default to the last created calendar we find that started before the given time
SELECT TOP 1 #shiftCalendarID = ID
FROM ShiftCalendar
WHERE BeginDate < #date
ORDER BY BeginDate DESC;
END;
RETURN #shiftCalendarID; -- CASE WHEN #endDate IS NULL OR #endDate > #date THEN #shiftCalendarID END; -- Return NULL when no matching date range found?
END
GO
This one I was able to rewrite in snowflake but the if statement isnt working. I am not sure if snowflake function can use an if statement.
ALTER FUNCTION [dbo].[cfn_GetShiftIDFromDateTime] (
#dateTime datetime,
#shiftCalendarID int
)
RETURNS int
AS
BEGIN
DECLARE
#time time = CONVERT( time, #dateTime ),
#curDay int,
#prvDay int,
#shiftID int;
SELECT TOP 1
#shiftCalendarID = ID,
#curDay = DATEDIFF( dd, BeginDate, #dateTime ) % PeriodInDays + 1,
#prvDay = ( #curDay + PeriodInDays - 2 ) % PeriodInDays + 1
FROM ShiftCalendar
WHERE ID = #shiftCalendarID
OR ( #shiftCalendarID IS NULL
AND Name = 'Factory'
AND BeginDate <= #dateTime )
ORDER BY BeginDate DESC;
SELECT #shiftID = ID
FROM Shift
WHERE ShiftCalendarID = #shiftCalendarID
AND ( ( FromDay = #curDay AND FromTimeOfDay <= #time AND TillTimeOfDay > #time )
OR ( FromDay = #curDay AND FromTimeOfDay >= TillTimeOfDay AND FromTimeOfDay <= #time )
OR ( FromDay = #prvDay AND FromTimeOfDay >= TillTimeOfDay AND TillTimeOfDay > #time )
);
RETURN #shiftID;
END
--GO
so how are these functions used? i have a view which i was able to recreate in snowflake, but is missing the part that calls the function.
ALTER VIEW [proj].[pvw_PowerBI_ActualUnits]
SELECT
e.Name AS ProductionUnit,
temp.DateTime AS DateTime,
s.Reference AS Shift,
CONVERT(TIME, temp.DateTime) AS Time,
CONVERT(DATE, temp.DateTime - ISNULL((SELECT CAST(MIN(s_first.FromTimeOfDay) AS DateTime) FROM [Shift] s_first WHERE s_first.FromDay = s.FromDay AND s_first.ShiftCalendarID = s.ShiftCalendarID), CAST('6:00' AS DateTime))) AS ProductionDate,
'Actual Units' AS ScrapReason,
temp.ScrapQuantity AS ScrapQuantity,
'Auto Registered' AS RegistrationType,
s.ID
FROM
(SELECT
vl.EquipmentID AS ProductionUnit,
DATEADD(MINUTE, 30 * (DATEPART(MINUTE, vl.BeginTime) / 30), DATEADD(HOUR, DATEDIFF(HOUR, 0, vl.BeginTime), 0)) AS DateTime,
SUM(vl.Quantity) AS ScrapQuantity
FROM oee.ValueLog vl WITH (NOLOCK)
INNER JOIN KPIInstance ki ON ki.ID = vl.KPIInstanceID AND ki.KPIDefinitionID LIKE 'COUNT-OUT:%'
GROUP BY DATEADD(MINUTE, 30 * (DATEPART(MINUTE, vl.BeginTime) / 30), DATEADD(HOUR, DATEDIFF(HOUR, 0, vl.BeginTime), 0)), vl.EquipmentID) temp
INNER JOIN Equipment e ON e.ID = temp.ProductionUnit
INNER JOIN Shift s ON s.ID = dbo.cfn_GetShiftIDFromDateTime(temp.DateTime, dbo.cfn_GetEquipmentShiftCalendarID(temp.ProductionUnit, temp.DateTime)) -- here is where the functions are called
i was able to rewrite this in snowflake for the most part minus calling the function.
SELECT
e.Name AS ProductionUnit,
temp.DateTime AS DateTime,
s.Reference AS Shift,
temp.DateTime::TIME AS Time,
--CONVERT(DATE, temp.DateTime - ISNULL((SELECT CAST(MIN(s_first.FromTimeOfDay) AS DateTime) FROM [Shift] s_first WHERE s_first.FromDay = s.FromDay AND s_first.ShiftCalendarID = s.ShiftCalendarID), CAST('6:00' AS DateTime))) AS ProductionDate,
IFNULL(dateadd(HOUR, - (HOUR(SELECT MIN(s_first.FromTimeOfDay)
FROM RAW_CPMS_AAR.Shift s_first WHERE s_first.FromDay = s.FromDay AND s_first.ShiftCalendarID = s.ShiftCalendarID)), temp.DateTime), (dateadd(HOUR, - 6, temp.DateTime))) AS ProductionDate ,
'Actual Units' AS ScrapReason,
temp.ScrapQuantity AS ScrapQuantity,
'Auto Registered' AS RegistrationType
FROM
(SELECT
vl.EquipmentID AS ProductionUnit,
DATEADD(MIN, 30 * (DATE_PART(MINUTE, vl.BeginTime) / 30), DATEADD(HOUR, DATEDIFF(HOUR, '0', vl.BeginTime), '0')) AS DateTime,
SUM(vl.Quantity) AS ScrapQuantity
FROM RAW_CPMS_AAR.ValueLog vl
INNER JOIN KPIInstance ki ON ki.ID = vl.KPIInstanceID AND ki.KPIDefinitionID LIKE 'COUNT-OUT:%'
GROUP BY DATEADD(MIN, 30 * (DATE_PART(MINUTE, vl.BeginTime) / 30), DATEADD(HOUR, DATEDIFF(HOUR, '0', vl.BeginTime), '0')), vl.EquipmentID) as temp, shiftcalendar_cte, RAW_CPMS_AAR.Equipment e, RAW_CPMS_AAR.Shift s
WHERE e.ID = temp.ProductionUnit
Now i dont know if there is a better way to do this, maybe a cte is better. i know functions are not resource friendly, i'm simply trying to recreate this. open to any ideas or help.
ok, some small translations:
DATEADD(HOUR, DATEDIFF(HOUR, 0, vl.BeginTime), 0)
is truncating to the hour, so is the same as
date_trunc('HOUR', vl.BeginTime)
that dateadd thus is:
select column1
,date_trunc('hour', column1) as t_hour
,truncate(minute(column1)/30)*30
,timeadd('minute', truncate(minute(column1)/30)*30, date_trunc('hour', column1)) as datetime
from values
('2022-11-14 13:26:01'::timestamp_ntz),
('2022-11-14 12:34:01'::timestamp_ntz);
COLUMN1
T_HOUR
TRUNCATE(MINUTE(COLUMN1)/30)*30
DATETIME
2022-11-14 13:26:01.000
2022-11-14 13:00:00.000
0
2022-11-14 13:00:00.000
2022-11-14 12:34:01.000
2022-11-14 12:00:00.000
30
2022-11-14 12:30:00.000
So moving that sub-select that you alias as temp into a CTE, and get that "working"
WITH table_oee_valuelog(equipmentid, begintime, quantity, kpiinstanceid) as (
select * from values
(1, '2022-11-14 13:26:01'::timestamp_ntz, 10, 100),
(1, '2022-11-14 13:26:01'::timestamp_ntz, 11, 100),
(1, '2022-11-14 13:34:01'::timestamp_ntz, 12, 100)
), table_kpiinstance(id, kpidefinitionid) as (
select * from values
(100, 'COUNT-OUT:extra stuff')
)--, temp_sub_select as (
SELECT
vl.equipmentid as productionunit,
timeadd('minute', truncate(minute(vl.BeginTime)/30)*30, date_trunc('hour', vl.BeginTime)) as datetime,
SUM(vl.quantity) AS scrapquantity
FROM table_oee_valuelog as vl
INNER JOIN table_kpiinstance as ki
ON ki.ID = vl.KPIInstanceID
AND ki.KPIDefinitionID LIKE 'COUNT-OUT:%'
GROUP BY 1,2
;)
PRODUCTIONUNIT
DATETIME
SCRAPQUANTITY
1
2022-11-14 13:00:00.000
21
1
2022-11-14 13:30:00.000
12
implementing cfn_GetEquipmentShiftCalendarID
so if we extend our table data a bit more we can take a first crack at cfn_GetEquipmentShiftCalendarID like:
WITH table_oee_valuelog(equipmentid, begintime, quantity, kpiinstanceid) as (
select * from values
(1, '2022-11-14 13:26:01'::timestamp_ntz, 10, 100),
(1, '2022-11-14 13:26:01'::timestamp_ntz, 11, 100),
(1, '2022-11-14 13:34:01'::timestamp_ntz, 12, 100)
), table_kpiinstance(id, kpidefinitionid) as (
select * from values
(100, 'COUNT-OUT:extra stuff')
), table_equipment(id, name, parentequipmentid, shiftcalendarentitynumber) as (
select * from values
(1,'equipment one', 2, null),
(2,'equipment two', null, 80),
(3,'equipment three', 4, 81),
(4,'equipment four', null, 82)
), table_shift(id, shiftcalendarid, reference) as (
select * from values
(1001, 9001, 'a')
), table_shiftcalendar(id, entitynumber, begindate, enddate) as (
select * from values
(699, 80, '2021-01-01'::date, '2021-12-31'::date),
(700, 80, '2022-01-01'::date, '2022-12-31'::date),
--(701, 81, '2022-02-01'::date, '2022-11-30'::date),
(702, 82, '2022-10-01'::date, '2022-11-15'::date)
), cte_GetEquipmentShiftCalendarID/*(id, shiftCalendarID)*/ as (
with recursive rec_cte (id, parentequipmentid, shiftcalendarentitynumber) as (
select
ID,
ParentEquipmentID,
ShiftCalendarEntityNumber
FROM table_equipment
UNION ALL
SELECT
r.ID,
p.ParentEquipmentID,
p.ShiftCalendarEntityNumber
FROM rec_cte as r
INNER JOIN table_equipment p
ON p.ID = r.ParentEquipmentID
AND r.ShiftCalendarEntityNumber IS NULL
)
select * from rec_cte as c
left join table_shiftcalendar as sc
on sc.entitynumber = c.ShiftCalendarEntityNumber
where shiftcalendarentitynumber is not null
qualify row_number() over (partition by c.id order by sc.begindate desc ) = 1
)
select * from cte_GetEquipmentShiftCalendarID;
This is missing the #date based filters and the catch all, as to product the "latest" all bit of equipment, cannot be done yet.
ID
PARENTEQUIPMENTID
SHIFTCALENDARENTITYNUMBER
ID_2
ENTITYNUMBER
BEGINDATE
ENDDATE
1
80
700
80
2022-01-01
2022-12-31
2
80
700
80
2022-01-01
2022-12-31
3
4
81
4
82
702
82
2022-10-01
2022-11-15
so we need to weave this current data, with the temp table, how convenient we made it a CTE already...
so the next partial step is:
WITH table_oee_valuelog(equipmentid, begintime, quantity, kpiinstanceid) as (
select * from values
(1, '2022-11-14 13:26:01'::timestamp_ntz, 10, 100),
(1, '2022-11-14 13:26:01'::timestamp_ntz, 11, 100),
(1, '2022-11-14 13:34:01'::timestamp_ntz, 12, 100),
(2, '2022-11-14 13:34:01'::timestamp_ntz, 20, 100),
(3, '2022-11-14 13:34:01'::timestamp_ntz, 30, 100),
(4, '2022-11-14 13:34:01'::timestamp_ntz, 44, 100)
), table_kpiinstance(id, kpidefinitionid) as (
select * from values
(100, 'COUNT-OUT:extra stuff')
), table_equipment(id, name, parentequipmentid, shiftcalendarentitynumber) as (
select * from values
(1,'equipment one', 2, null),
(2,'equipment two', null, 80),
(3,'equipment three', 4, 81),
(4,'equipment four', null, 82)
), table_shift(id, shiftcalendarid, reference) as (
select * from values
(1001, 9001, 'a')
), table_shiftcalendar(id, entitynumber, begindate, enddate) as (
select * from values
(699, 80, '2021-01-01'::date, '2021-12-31'::date),
(700, 80, '2022-01-01'::date, '2022-12-31'::date),
(701, 80, '2023-01-01'::date, '2023-12-31'::date),
--(701, 81, '2022-02-01'::date, '2022-11-30'::date),
(702, 82, '2022-10-01'::date, '2022-11-15'::date)
), temp_sub_select as (
SELECT
vl.equipmentid as productionunit,
timeadd('minute', truncate(minute(vl.BeginTime)/30)*30, date_trunc('hour', vl.BeginTime)) as datetime,
SUM(vl.quantity) AS scrapquantity
FROM table_oee_valuelog as vl
INNER JOIN table_kpiinstance as ki
ON ki.ID = vl.KPIInstanceID
AND ki.KPIDefinitionID LIKE 'COUNT-OUT:%'
GROUP BY 1,2
), cte_GetEquipmentShiftCalendarID_part_a/*(id, shiftCalendarID)*/ as (
with recursive rec_cte (id, parentequipmentid, shiftcalendarentitynumber) as (
select
ID,
ParentEquipmentID,
ShiftCalendarEntityNumber
FROM table_equipment
UNION ALL
SELECT
r.ID,
p.ParentEquipmentID,
p.ShiftCalendarEntityNumber
FROM rec_cte as r
INNER JOIN table_equipment p
ON p.ID = r.ParentEquipmentID
AND r.ShiftCalendarEntityNumber IS NULL
)
select
c.id,
sc.id as shiftCalendarID,
sc.begindate, sc.enddate
from rec_cte as c
left join table_shiftcalendar as sc
on sc.entitynumber = c.ShiftCalendarEntityNumber
where shiftcalendarentitynumber is not null
)--, last_calendar_per_equipment as (
select *
,iff(c.enddate is null or c.enddate > t.datetime, 1, 2) as order_a
,row_number() over (partition by t.productionunit, t.datetime order by order_a, c.begindate desc) as rn
from temp_sub_select as t
left join cte_GetEquipmentShiftCalendarID_part_a as c
on t.productionunit = c.id
and c.begindate <= t.datetime
;)
this gives:
PRODUCTIONUNIT
DATETIME
SCRAPQUANTITY
ID
SHIFTCALENDARID
BEGINDATE
ENDDATE
ORDER_A
RN
1
2022-11-14 13:00:00.000
21
1
700
2022-01-01
2022-12-31
1
1
1
2022-11-14 13:00:00.000
21
1
699
2021-01-01
2021-12-31
2
2
1
2022-11-14 13:30:00.000
12
1
700
2022-01-01
2022-12-31
1
1
1
2022-11-14 13:30:00.000
12
1
699
2021-01-01
2021-12-31
2
2
2
2022-11-14 13:30:00.000
20
2
700
2022-01-01
2022-12-31
1
1
2
2022-11-14 13:30:00.000
20
2
699
2021-01-01
2021-12-31
2
2
3
2022-11-14 13:30:00.000
30
1
1
4
2022-11-14 13:30:00.000
44
4
702
2022-10-01
2022-11-15
1
1
last step of this function can be handled with this CTE which we can join to the prior results, and take if the prior results are null.
select
t.datetime,
sc.id
from (
select distinct datetime
from temp_sub_select
) as t
join table_shiftcalendar as sc
on sc.begindate <= t.datetime
qualify row_number() over (partition by t.datetime order by sc.begindate desc) = 1
weave those together and a little data change:
WITH table_oee_valuelog(equipmentid, begintime, quantity, kpiinstanceid) as (
select * from values
(1, '2022-11-14 13:26:01'::timestamp_ntz, 10, 100),
(1, '2022-11-14 13:26:01'::timestamp_ntz, 11, 100),
(1, '2022-11-14 13:34:01'::timestamp_ntz, 12, 100),
(2, '2022-11-14 13:34:01'::timestamp_ntz, 20, 100),
(3, '2022-11-14 13:34:01'::timestamp_ntz, 30, 100),
(4, '2022-11-14 13:34:01'::timestamp_ntz, 44, 100)
), table_kpiinstance(id, kpidefinitionid) as (
select * from values
(100, 'COUNT-OUT:extra stuff')
), table_equipment(id, name, parentequipmentid, shiftcalendarentitynumber) as (
select * from values
(1,'equipment one', 2, null),
(2,'equipment two', null, 80),
(3,'equipment three', 4, 81),
(4,'equipment four', null, 82)
), table_shift(id, shiftcalendarid, reference) as (
select * from values
(1001, 9001, 'a')
), table_shiftcalendar(id, entitynumber, begindate, enddate) as (
select * from values
(699, 80, '2021-01-01'::date, '2021-12-31'::date),
(700, 80, '2022-01-01'::date, '2022-12-31'::date),
(701, 80, '2023-01-01'::date, '2023-12-31'::date),
--(701, 81, '2022-02-01'::date, '2022-11-30'::date),
(702, 82, '2022-10-01'::date, '2022-11-15'::date),
(703, 89, '2022-11-01'::date, '2022-11-15'::date)
), temp_sub_select as (
SELECT
vl.equipmentid as productionunit,
timeadd('minute', truncate(minute(vl.BeginTime)/30)*30, date_trunc('hour', vl.BeginTime)) as datetime,
SUM(vl.quantity) AS scrapquantity
FROM table_oee_valuelog as vl
INNER JOIN table_kpiinstance as ki
ON ki.ID = vl.KPIInstanceID
AND ki.KPIDefinitionID LIKE 'COUNT-OUT:%'
GROUP BY 1,2
), cte_GetEquipmentShiftCalendarID_part_a/*(id, shiftCalendarID)*/ as (
with recursive rec_cte (id, parentequipmentid, shiftcalendarentitynumber) as (
select
ID,
ParentEquipmentID,
ShiftCalendarEntityNumber
FROM table_equipment
UNION ALL
SELECT
r.ID,
p.ParentEquipmentID,
p.ShiftCalendarEntityNumber
FROM rec_cte as r
INNER JOIN table_equipment p
ON p.ID = r.ParentEquipmentID
AND r.ShiftCalendarEntityNumber IS NULL
)
select
c.id,
sc.id as shiftCalendarID,
sc.begindate, sc.enddate
from rec_cte as c
left join table_shiftcalendar as sc
on sc.entitynumber = c.ShiftCalendarEntityNumber
where shiftcalendarentitynumber is not null
), cte_GetEquipmentShiftCalendarID_part_b as (
select t.productionunit,
t.org_datetime,
t.datetime,
c.shiftCalendarID
from (
select
productionunit,
datetime as org_datetime,
nvl(datetime, CURRENT_DATE) as datetime /* handle the null case from the T-SQL */
from temp_sub_select
) as t
left join cte_GetEquipmentShiftCalendarID_part_a as c
on t.productionunit = c.id
and c.begindate <= t.datetime
qualify row_number() over (partition by t.productionunit, t.datetime
order by iff(c.enddate is null or c.enddate > t.datetime, 1, 2), c.begindate desc) = 1
), max_shiftCalendar_per_datetime as (
select
t.datetime,
sc.id
from (
select distinct nvl(datetime, CURRENT_DATE) as datetime
from temp_sub_select
) as t
join table_shiftcalendar as sc
on sc.begindate <= t.datetime
qualify row_number() over (partition by t.datetime order by sc.begindate desc) = 1
)--, last_calendar_per_equipment as (
select
a.productionunit
,a.org_datetime
,a.shiftCalendarID, b.id
,nvl(a.shiftCalendarID, b.id) as shiftCalendarID
from cte_GetEquipmentShiftCalendarID_part_b as a
join max_shiftCalendar_per_datetime as b
on a.datetime = b.datetime
;)
gives:
PRODUCTIONUNIT
ORG_DATETIME
SHIFTCALENDARID
ID
SHIFTCALENDARID_2
1
2022-11-14 13:00:00.000
700
703
700
1
2022-11-14 13:30:00.000
700
703
700
2
2022-11-14 13:30:00.000
700
703
700
3
2022-11-14 13:30:00.000
703
703
4
2022-11-14 13:30:00.000
702
703
702
Mostly Complete answer:
So I striped ProductionDate and the two fixed string from my answer but:
--CREATE VIEW proj.pvw_PowerBI_ActualUnits
WITH table_oee_valuelog(equipmentid, begintime, quantity, kpiinstanceid) as (
select * from values
(1, '2022-11-14 13:26:01'::timestamp_ntz, 10, 100),
(1, '2022-11-14 13:26:01'::timestamp_ntz, 11, 100),
(1, '2022-11-14 13:34:01'::timestamp_ntz, 12, 100),
(2, '2022-11-14 13:34:01'::timestamp_ntz, 20, 100),
(3, '2022-11-14 13:34:01'::timestamp_ntz, 30, 100),
(4, '2022-11-14 13:34:01'::timestamp_ntz, 44, 100)
), table_kpiinstance(id, kpidefinitionid) as (
select * from values
(100, 'COUNT-OUT:extra stuff')
), table_equipment(id, name, parentequipmentid, shiftcalendarentitynumber) as (
select * from values
(1,'equipment one', 2, null),
(2,'equipment two', null, 80),
(3,'equipment three', 4, 81),
(4,'equipment four', null, 82)
), table_shift(id, shiftcalendarid, reference, FromDay, FromTimeOfDay, TillTimeOfDay) as (
select * from values
(1001, 700, 'a', 8, '06:00'::time,'18:00'::time),
(1001, 702, 'a', 5, '06:00'::time,'18:00'::time),
(1001, 703, 'a', 4, '06:00'::time,'18:00'::time)
), table_shiftcalendar(id, entitynumber, begindate, enddate, name, PeriodInDays) as (
select * from values
(699, 80, '2021-01-01'::date, '2021-12-31'::date, 'Factory', 10),
(700, 80, '2022-01-01'::date, '2022-12-31'::date, 'Factory', 10),
(701, 80, '2023-01-01'::date, '2023-12-31'::date, 'Factory', 10),
--(701, 81, '2022-02-01'::date, '2022-11-30'::date, 'Factory', 10),
(702, 82, '2022-10-01'::date, '2022-11-15'::date, 'Factory', 10),
(703, 89, '2022-11-01'::date, '2022-11-15'::date, 'Factory', 10)
), temp_sub_select as (
SELECT
vl.equipmentid as productionunit,
timeadd('minute', truncate(minute(vl.BeginTime)/30)*30, date_trunc('hour', vl.BeginTime)) as datetime,
SUM(vl.quantity) AS scrapquantity
FROM table_oee_valuelog as vl
INNER JOIN table_kpiinstance as ki
ON ki.ID = vl.KPIInstanceID
AND ki.KPIDefinitionID LIKE 'COUNT-OUT:%'
GROUP BY 1,2
), cte_GetEquipmentShiftCalendarID_part_a as (
with recursive rec_cte (id, parentequipmentid, shiftcalendarentitynumber) as (
select
ID,
ParentEquipmentID,
ShiftCalendarEntityNumber
FROM table_equipment
UNION ALL
SELECT
r.ID,
p.ParentEquipmentID,
p.ShiftCalendarEntityNumber
FROM rec_cte as r
INNER JOIN table_equipment p
ON p.ID = r.ParentEquipmentID
AND r.ShiftCalendarEntityNumber IS NULL
)
select
c.id,
sc.id as shiftCalendarID,
sc.begindate, sc.enddate
from rec_cte as c
left join table_shiftcalendar as sc
on sc.entitynumber = c.ShiftCalendarEntityNumber
where shiftcalendarentitynumber is not null
), cte_GetEquipmentShiftCalendarID_part_b as (
select t.productionunit,
t.org_datetime,
t.datetime,
c.shiftCalendarID
from (
select
productionunit,
datetime as org_datetime,
nvl(datetime, CURRENT_DATE) as datetime /* handle the null case from the T-SQL */
from temp_sub_select
) as t
left join cte_GetEquipmentShiftCalendarID_part_a as c
on t.productionunit = c.id
and c.begindate <= t.datetime
qualify row_number() over (partition by t.productionunit, t.datetime
order by iff(c.enddate is null or c.enddate > t.datetime, 1, 2), c.begindate desc) = 1
), max_shiftCalendar_per_datetime as (
select
t.datetime,
sc.id
from (
select distinct nvl(datetime, CURRENT_DATE) as datetime
from temp_sub_select
) as t
join table_shiftcalendar as sc
on sc.begindate <= t.datetime
qualify row_number() over (partition by t.datetime order by sc.begindate desc) = 1
), last_calendar_per_equipment as (
select
a.productionunit
,a.org_datetime
,nvl(a.shiftCalendarID, b.id) as shiftCalendarID
from cte_GetEquipmentShiftCalendarID_part_b as a
join max_shiftCalendar_per_datetime as b
on a.datetime = b.datetime
), cfn_GetShiftIDFromDateTime as (
with t0 as (
select
x.productionunit
,x.org_datetime
,x.org_datetime::time as org_time
,x.ShiftCalendarID
,DATEDIFF( day, BeginDate, x.org_datetime ) % PeriodInDays + 1 AS CurDay
,( CurDay + PeriodInDays - 2 ) % PeriodInDays + 1 AS PrvDay
from last_calendar_per_equipment as x
join table_shiftcalendar as sc
where sc.id = x.shiftCalendarID
OR ( x.shiftCalendarID IS NULL
AND sc.Name = 'Factory'
AND sc.BeginDate <= x.org_datetime )
QUALIFY row_number() over (partition by x.productionunit, x.org_datetime order by sc.begindate desc) = 1
)
SELECT
s.id
,s.reference
,productionunit
,org_datetime
,s.ShiftCalendarID
,s.FromDay, s.FromTimeOfDay, s.TillTimeOfDay, T0.CurDay, t0.org_time
FROM table_shift as s, T0
WHERE s.ShiftCalendarID = T0.ShiftCalendarID
AND ( ( s.FromDay = T0.CurDay AND s.FromTimeOfDay <= t0.org_time AND s.TillTimeOfDay > t0.org_time )
OR ( s.FromDay = T0.CurDay AND s.FromTimeOfDay >= s.TillTimeOfDay AND s.FromTimeOfDay <= t0.org_time )
OR ( s.FromDay = T0.PrvDay AND s.FromTimeOfDay >= s.TillTimeOfDay AND s.TillTimeOfDay > t0.org_time )
)
)
SELECT
e.name AS productionunit,
temp.datetime AS datetime,
s.reference AS shift,
temp.DateTime::time AS Time,
temp.ScrapQuantity AS ScrapQuantity,
s.ID
FROM temp_sub_select as temp
INNER JOIN table_equipment e
ON e.ID = temp.ProductionUnit
INNER JOIN cfn_GetShiftIDFromDateTime s
ON s.productionunit = temp.ProductionUnit
and temp.datetime = s.org_datetime
as far as I can follow, does what your functions do, and unrolls the correlated query, as that would never work in Snowflake.
PRODUCTIONUNIT
DATETIME
SHIFT
TIME
SCRAPQUANTITY
ID
equipment one
2022-11-14 13:00:00.000
a
13:00:00
21
1001
equipment one
2022-11-14 13:30:00.000
a
13:30:00
12
1001
equipment two
2022-11-14 13:30:00.000
a
13:30:00
20
1001
equipment three
2022-11-14 13:30:00.000
a
13:30:00
30
1001
equipment four
2022-11-14 13:30:00.000
a
13:30:00
44
1001
Not so bad for five hours work...
I need to get users count, grouped by user type (A,B,C) and every month (that exist in db) in current year - only for users who don't have paid orders (with total > 0) in every month (every row returned by SQL), but have orders (with total > 0) in any previous months (in any year, not just current). In other words this is inactive users, who placed some paid order before, but don't placed any new orders in current SQL request row month returned.
What I expect to get in results (values are just examples):
label user_type data
Nov B 2
Nov A 1
Nov C 3
Dec C 1
.... other months
This means that in December there are 5 users with user type A and 3 users with user type B and 0 users with user type C, who DON'T placed orders in December 2021, but placed orders sometime before December in any year.
Sample DB (two tables - users and orders) with SQL that show number users, by every user type, in every month, who placed orders in this month. Instead of just this simple results, I need to get users counts that DON'T placed orders in this month, but placed paid orders somewhere before.
https://dbfiddle.uk/?rdbms=mysql_5.6&fiddle=4c4fadf67bcdc7cc3443f46c387173df
I need SQL that will work with MySQL 5.7
Try this query to generate counts for all months x user type
SELECT
DATE_FORMAT(DATE(CONCAT_WS('-', YEAR(CURDATE()), months.mm, '01')), "%b") as label,
users.user_type,
SUM(
EXISTS (
SELECT 1
FROM orders
WHERE orders.user_id = users.userid
AND orders.`date` < DATE(CONCAT_WS('-', YEAR(CURDATE()), months.mm, '01'))
) AND NOT EXISTS (
SELECT 1
FROM orders
WHERE orders.user_id = users.userid
AND orders.`date` BETWEEN DATE(CONCAT_WS('-', YEAR(CURDATE()), months.mm, '01')) AND LAST_DAY(DATE(CONCAT_WS('-', YEAR(CURDATE()), months.mm, '01')))
)
) counts
FROM (
SELECT '01' mm
UNION SELECT '02' UNION SELECT '03' UNION SELECT '04' UNION SELECT '05'
UNION SELECT '06' UNION SELECT '07' UNION SELECT '08' UNION SELECT '09'
UNION SELECT '10' UNION SELECT '11' UNION SELECT '12'
) months
CROSS JOIN users
GROUP BY months.mm, users.user_type
demo
Test this query if it fits your needs
SELECT DATE_FORMAT(o.date, "%b") as label,
UPPER(u.user_type) as user_type,
COUNT(distinct o.user_id) as data FROM orders o
JOIN users u ON o.user_id = u.userid
WHERE DATE_FORMAT(o.date, "%Y") = "2021"
AND o.user_id NOT IN
(SELECT DISTINCT o1.user_id FROM orders o1 WHERE DATE_FORMAT(o1.date, "%b") = DATE_FORMAT(now(), "%b") AND YEAR(o1.date) = YEAR(now()) )
AND o.user_id IN
(SELECT DISTINCT o1.user_id FROM orders o1 WHERE (DATE_FORMAT(o1.date, "%c") < DATE_FORMAT(now(), "%c") OR YEAR(o1.date) < YEAR(now())))
GROUP BY DATE_FORMAT(o.date, "%Y %b"),
u.user_type HAVING SUM(o.total) > 0 ORDER BY o.date ASC
EDIT
The query below returns every month of the year
SELECT months.MONTH as label,
ifnull(UPPER(u.user_type), '-') as user_type,
COUNT(distinct o.user_id) as data
FROM (
SELECT 1 AS MONTH
UNION SELECT 2 AS MONTH
UNION SELECT 3 AS MONTH
UNION SELECT 4 AS MONTH
UNION SELECT 5 AS MONTH
UNION SELECT 6 AS MONTH
UNION SELECT 7 AS MONTH
UNION SELECT 8 AS MONTH
UNION SELECT 9 AS MONTH
UNION SELECT 10 AS MONTH
UNION SELECT 11 AS MONTH
UNION SELECT 12 AS MONTH
) as months
LEFT JOIN orders o
ON DATE_FORMAT(o.date, "%c") = months.MONTH
LEFT JOIN users u ON o.user_id = u.userid
WHERE (DATE_FORMAT(o.date, "%Y") = "2021" OR o.date IS NULL)
AND (
(
NOT EXISTS
(SELECT DISTINCT o1.user_id
FROM orders o1
WHERE
DATE_FORMAT(o1.date, "%b") = DATE_FORMAT(now(), "%b")
AND YEAR(o1.date) = YEAR(now())
AND o1.user_id = o.user_id
)
AND EXISTS
(SELECT DISTINCT o1.user_id
FROM orders o1
WHERE
(DATE_FORMAT(o1.date, "%c") < DATE_FORMAT(now(), "%c") OR YEAR(o1.date) < YEAR(now())) AND o1.user_id = o.user_id
)
)
OR o.user_id IS null OR u.userid IS NULL
)
GROUP BY months.MONTH, u.user_type ORDER BY months.MONTH ASC
This uses a similar approach to VeteranSlayer but it starts with the cross join between months and users followed by the left join to orders. It also uses ranges for the date comparisons instead of the functions. It may perform really badly but it should give the correct result -
SELECT
months.month AS `label`,
u.user_type,
COUNT(u.userid) AS `data`
FROM (
SELECT 'Jan' `month`, '2021-01-01' month_start, '2021-01-31' month_end UNION ALL
SELECT 'Feb', '2021-02-01', '2021-02-28' UNION ALL
SELECT 'Mar', '2021-03-01', '2021-03-31' UNION ALL
SELECT 'Apr', '2021-04-01', '2021-04-30' UNION ALL
SELECT 'May', '2021-05-01', '2021-05-31' UNION ALL
SELECT 'Jun', '2021-06-01', '2021-06-30' UNION ALL
SELECT 'Jul', '2021-07-01', '2021-07-31' UNION ALL
SELECT 'Aug', '2021-08-01', '2021-08-31' UNION ALL
SELECT 'Sep', '2021-09-01', '2021-09-30' UNION ALL
SELECT 'Oct', '2021-10-01', '2021-10-31' UNION ALL
SELECT 'Nov', '2021-11-01', '2021-11-30' UNION ALL
SELECT 'Dec', '2021-12-01', '2021-12-31'
) months
INNER JOIN users u
LEFT JOIN orders o
ON o.date BETWEEN months.month_start AND months.month_end
AND o.user_id = u.userid
WHERE o.user_id IS NULL
AND EXISTS (
SELECT DISTINCT o1.user_id
FROM orders o1
WHERE o1.date < months.month_start
AND o1.user_id = u.userid
)
GROUP BY months.month, u.user_type
ORDER BY months.month_start ASC, u.user_type ASC;
EDIT
The performance of these queries varies dramatically based on the scale of the dataset, the distribution of data and the indices. I have done some tests with many different index variations and the following test datasets. Note the random data created in the two tables can lead to wildly different performance. The dummy table referenced in the SELECTs of the INSERTs is just a random table with 1M rows.
CREATE TABLE `users` (
`id` int unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY,
`user_type` char(1) NOT NULL,
KEY `IDX_user_type` (`user_type`)
);
INSERT INTO users (user_type)
SELECT
CASE (FLOOR(RAND() * 3) + 1) WHEN 1 THEN 'A' WHEN 2 THEN 'B' ELSE 'C' END AS `user_type`
FROM dummy
LIMIT 1000;
CREATE TABLE orders (
`id` int UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
`user_id` int,
`date` DATE,
`total` DECIMAL(6,2),
KEY `IDX_user_id_date` (`user_id`,`date`)
);
INSERT INTO orders (user_id, date, total)
SELECT
(FLOOR(RAND() * 1000) + 1) AS `user_id`,
('2020-01-01' + INTERVAL FLOOR(RAND() * 685) + 1 DAY) AS `date`,
( (FLOOR(RAND() * 10) + 1) * 5) AS `total`
FROM dummy
LIMIT 100000;
The most significant performance difference across the queries came from adding -
KEY `IDX_user_id_date` (`user_id`,`date`)
and adding the user_type index gave a small but consistent improvement -
KEY `IDX_user_type` (`user_type`)
ProGu's query executed consistently with an average time of 1.466 sec. And my query was similarly consistent at 0.922 sec. Your mileage will vary!
I haven't included time's for VeteranSlayer's query as it returned radically different results.
EDIT 2
Repopulated the two tables with 50k users and 1M orders
TRUNCATE TABLE orders;
TRUNCATE TABLE users;
INSERT INTO users (user_type)
SELECT
CASE (FLOOR(RAND() * 3) + 1) WHEN 1 THEN 'A' WHEN 2 THEN 'B' ELSE 'C' END AS `user_type`
FROM (SELECT 1 FROM dummy LIMIT 50000) t;
INSERT INTO orders (user_id, date, total)
SELECT
(FLOOR(RAND() * 50000) + 1),
TIMESTAMPADD(SECOND, FLOOR(RAND() * TIMESTAMPDIFF(SECOND, '2016-01-01', '2021-12-13')), '2016-01-01'),
((FLOOR(RAND() * 50) + 1) * 5)
FROM (SELECT 1 FROM dummy LIMIT 1000000) t
ORDER BY date;
The resulting distribution of orders, by time and user_id, is quite even which is unlikely to be realistic so this test dataset grossly exacerbates any performance issues, I think.
I was surprised that by using my months table, ProGu's query was significantly faster, dropping from 21.062sec to 9.703sec, and using one less temporary table (two instead of three).
SELECT
months.month as label,
users.user_type,
SUM(
EXISTS (
SELECT 1
FROM orders
WHERE orders.user_id = users.id
AND orders.`date` < months.month_start
) AND NOT EXISTS (
SELECT 1
FROM orders
WHERE orders.user_id = users.id
AND orders.`date` BETWEEN months.month_start AND months.month_end
)
) counts
FROM (
SELECT 'Jan' `month`, '2021-01-01' month_start, '2021-01-31' month_end UNION ALL
SELECT 'Feb', '2021-02-01', '2021-02-28' UNION ALL
SELECT 'Mar', '2021-03-01', '2021-03-31' UNION ALL
SELECT 'Apr', '2021-04-01', '2021-04-30' UNION ALL
SELECT 'May', '2021-05-01', '2021-05-31' UNION ALL
SELECT 'Jun', '2021-06-01', '2021-06-30' UNION ALL
SELECT 'Jul', '2021-07-01', '2021-07-31' UNION ALL
SELECT 'Aug', '2021-08-01', '2021-08-31' UNION ALL
SELECT 'Sep', '2021-09-01', '2021-09-30' UNION ALL
SELECT 'Oct', '2021-10-01', '2021-10-31' UNION ALL
SELECT 'Nov', '2021-11-01', '2021-11-30' UNION ALL
SELECT 'Dec', '2021-12-01', '2021-12-31'
) months
CROSS JOIN users
GROUP BY months.month, users.user_type
ORDER BY months.month_start ASC, users.user_type ASC
My query above can be significantly improved by pre grouping the orders data for the current year (your mileage will vary but worth considering) -
SELECT
months.month AS `label`,
u.user_type,
COUNT(u.id) AS `data`
FROM (
SELECT 'Jan' `month`, '2021-01-01' month_start, '2021-01-31' month_end UNION ALL
SELECT 'Feb', '2021-02-01', '2021-02-28' UNION ALL
SELECT 'Mar', '2021-03-01', '2021-03-31' UNION ALL
SELECT 'Apr', '2021-04-01', '2021-04-30' UNION ALL
SELECT 'May', '2021-05-01', '2021-05-31' UNION ALL
SELECT 'Jun', '2021-06-01', '2021-06-30' UNION ALL
SELECT 'Jul', '2021-07-01', '2021-07-31' UNION ALL
SELECT 'Aug', '2021-08-01', '2021-08-31' UNION ALL
SELECT 'Sep', '2021-09-01', '2021-09-30' UNION ALL
SELECT 'Oct', '2021-10-01', '2021-10-31' UNION ALL
SELECT 'Nov', '2021-11-01', '2021-11-30' UNION ALL
SELECT 'Dec', '2021-12-01', '2021-12-31'
) months
INNER JOIN users u
LEFT JOIN (
SELECT `user_id`, DATE_FORMAT(`date`, '%Y-%m-01') AS `m`
FROM `orders`
WHERE `date` >= '2021-01-01'
GROUP BY `user_id`, `m`
) o
ON o.m = months.month_start
AND o.user_id = u.id
WHERE o.user_id IS NULL
AND EXISTS (
SELECT 1
FROM orders o1
WHERE o1.date < months.month_start
AND o1.user_id = u.id
)
GROUP BY months.month, u.user_type
ORDER BY months.month_start ASC, u.user_type ASC
Execution time dropped from 12.422sec to 6.497sec
And the final test I tried was de-normalising by adding first_order_date to the users table -
ALTER TABLE `users` ADD COLUMN `first_order_date` DATE NULL AFTER `user_type`;
UPDATE users u
INNER JOIN (SELECT o.user_id, MIN(date) AS `first_o`, MAX(date) AS `last_o` FROM orders o GROUP BY o.user_id) t ON u.id = t.user_id
SET `u`.`first_order_date` = `t`.`first_o`, `u`.`last_order_date` = `t`.`last_o`;
I then modified my query to use this instead of the EXISTS sub-query -
SELECT
`months`.`month` AS `label`,
`u`.`user_type`,
COUNT(`u`.`id`) AS `data`
FROM (
SELECT 'Jan' `month`, '2021-01-01' month_start, '2021-01-31' month_end UNION ALL
SELECT 'Feb', '2021-02-01', '2021-02-28' UNION ALL
SELECT 'Mar', '2021-03-01', '2021-03-31' UNION ALL
SELECT 'Apr', '2021-04-01', '2021-04-30' UNION ALL
SELECT 'May', '2021-05-01', '2021-05-31' UNION ALL
SELECT 'Jun', '2021-06-01', '2021-06-30' UNION ALL
SELECT 'Jul', '2021-07-01', '2021-07-31' UNION ALL
SELECT 'Aug', '2021-08-01', '2021-08-31' UNION ALL
SELECT 'Sep', '2021-09-01', '2021-09-30' UNION ALL
SELECT 'Oct', '2021-10-01', '2021-10-31' UNION ALL
SELECT 'Nov', '2021-11-01', '2021-11-30' UNION ALL
SELECT 'Dec', '2021-12-01', '2021-12-31'
) `months`
INNER JOIN `users` `u`
LEFT JOIN (
SELECT `user_id`, DATE_FORMAT(`date`, '%Y-%m-01') AS `m`
FROM `orders`
WHERE `date` >= '2021-01-01'
GROUP BY `user_id`, `m`
) o
ON `o`.`m` = `months`.`month_start`
AND `o`.`user_id` = `u`.`id`
WHERE `o`.`user_id` IS NULL
AND `u`.`first_order_date` < `months`.`month_start`
GROUP BY `months`.`month`, `u`.`user_type`
ORDER BY `months`.`month_start` ASC, `u`.`user_type` ASC;
This returns the same result in 1.447sec. Obviously, de-normalising like this should be avoided but I included it here as it shows the performance benefit for this one scenario.