Related
I have a table with few fields like id, country, ip, created_at. Then I am trying to get the deltas between total entry of one day and total entry of the next day.
CREATE TABLE session (
id int NOT NULL AUTO_INCREMENT,
country varchar(50) NOT NULL,
ip varchar(255),
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id)
);
INSERT INTO `session` (`id`, `country`, `ip`, `created_at`) VALUES
('1', 'IN', '10.100.102.11', '2021-04-05 20:26:02'),
('2', 'IN', '10.100.102.11', '2021-04-05 19:26:02'),
('3', 'US', '10.120.102.11', '2021-04-17 10:26:02'),
('4', 'US', '10.100.112.11', '2021-04-16 12:26:02'),
('5', 'AU', '10.100.102.122', '2021-04-12 19:36:02'),
('6', 'AU', '10.100.102.122', '2021-04-12 18:20:02'),
('7', 'AU', '10.100.102.122', '2021-04-12 23:26:02'),
('8', 'US', '10.100.102.2', '2021-04-16 21:33:01'),
('9', 'AU', '10.100.102.122', '2021-04-18 20:46:02'),
('10', 'AU', '10.100.102.111', '2021-04-04 13:19:12'),
('11', 'US', '10.100.112.11', '2021-04-16 12:26:02'),
('12', 'IN', '10.100.102.11', '2021-04-05 15:26:02'),
('13', 'IN', '10.100.102.11', '2021-04-05 19:26:02');
Now I have written this query to get the delta
SELECT T1.date1 as date, IFNULL(T1.cnt1-T2.cnt2, T1.cnt1) as delta from (
select TA.dateA as date1, MAX(TA.countA) as cnt1 from (
select DATE(created_at) AS dateA, COUNT(*) AS countA
FROM session
GROUP BY DATE(created_at)
UNION
select DISTINCT DATE(DATE(created_at)+1) AS dateA, 0 AS countA
FROM session
) as TA
group by TA.dateA
) as T1
LEFT OUTER JOIN (
select DATE(DATE(created_at)+1) AS date2,
COUNT(*) AS cnt2
FROM session
GROUP BY DATE(created_at)
) as T2
ON T1.date1=T2.date2
ORDER BY date;
http://sqlfiddle.com/#!9/4f5fd26/60
Then I am getting the results as
date delta
2021-04-04 1
2021-04-05 3
2021-04-06 -4
2021-04-12 3
2021-04-13 -3
2021-04-16 3
2021-04-17 -2
2021-04-18 0
2021-04-19 -1
Now, is there any place of improvements/optimizes on it with/or window functions? (I am zero with SQL, still playing around).
Try a shorter version
with grp as (
SELECT t.dateA, SUM(t.cnt) AS countA
FROM session,
LATERAL (
select DATE(created_at) AS dateA, 1 as cnt
union all
select DATE(DATE(created_at)+1), 0 as cnt
) t
GROUP BY dateA
)
select t1.dateA as date, IFNULL(t1.countA-t2.countA, t1.countA) as delta
from grp t1
left join grp t2 on DATE(t2.dateA + 1) = t1.dateA
order by t1.dateA
db<>fiddle
SQL God...I need some help!
I have a data table that has a route_complete_percentage column and a created_at column.
I need two pieces of data:
the time stamp (within created_at column) when the route_complete_percentage is at its minimum but not zero
the time stamp (within created_at column) when the route_complete_percentage is at its maximum, it might be 100% or not, but when its at its highest.
Here is the kicker, there might be multiple time stamps for the highest route completion column. For example,
Example Table
I have multiple values when the route_completion_percentage is at its maximum, but I need the minimum time stamp value.
Here is the query so far...but the two time stamps are the same.
SELECT
A.fc,
A.plan_id,
A.route_id,
mintime.first_scan AS First_Batch_Scan,
min(route_complete_percentage),
maxtime.last_scan AS Last_Batch_Scan,
max(route_complete_percentage)
FROM
(SELECT
fc,
plan_id,
route_id,
route_complete_percentage,
CONCAT(plan_id, '-', route_id) AS JOINKEY
FROM
houdini_ops.BATCHINATOR_SCAN_LOGS_V2
WHERE
fc <> ''
AND order_id <> 'Can\'t find order'
AND source = 'scan'
AND created_at > DATE_ADD(CURDATE(), INTERVAL - 3 DAY)) A
LEFT JOIN
(SELECT
l.fc,
l.route_id,
l.plan_id,
CONCAT(l.plan_id, '-', l.route_id) AS JOINKEY,
CASE
WHEN MIN(route_complete_percentage) THEN CONVERT_TZ(l.created_at, 'UTC', s.time_zone)
END AS first_scan
FROM
houdini_ops.BATCHINATOR_SCAN_LOGS_V2 l
JOIN houdini_ops.O_SERVICE_AREA_ATTRIBUTES s ON l.fc = s.default_station_code
WHERE
l.fc <> ''
AND l.order_id <> 'Can\'t find order'
AND l.source = 'scan'
AND l.created_at > DATE_ADD(CURDATE(), INTERVAL - 3 DAY)
GROUP BY fc , plan_id , route_id) mintime ON A.JOINKEY = mintime.JOINKEY
LEFT JOIN
(SELECT
l.fc,
l.route_id,
l.plan_id,
CONCAT(l.plan_id, '-', l.route_id) AS JOINKEY,
CASE
WHEN MAX(route_complete_percentage) THEN CONVERT_TZ(l.created_at, 'UTC', s.time_zone)
END AS last_scan
FROM
houdini_ops.BATCHINATOR_SCAN_LOGS_V2 l
JOIN houdini_ops.O_SERVICE_AREA_ATTRIBUTES s ON l.fc = s.default_station_code
WHERE
l.fc <> ''
AND l.order_id <> 'Can\'t find order'
AND l.source = 'scan'
AND l.created_at > DATE_ADD(CURDATE(), INTERVAL - 3 DAY)
GROUP BY fc , plan_id , route_id) maxtime ON mintime.JOINKEY = maxtime.JOINKEY
GROUP BY fc , plan_id , route_id
I don't want to meddle with the rest of your query. Here is something that will do what it sounds like you need. There's sample data included. -- I interpreted your blank values as nulls from your sample data.
Basically, what you are looking for is the Minimum created_at value, inside each of the route_complete_percentage groups. So I treated route_complete_percentage as a group identifier. But you only care about two of the groups, so I identify those groups first in the cte, and use them to filter the aggregate query.
if object_id('tempdb.dbo.#Data') is not null drop table #Data
go
create table #Data (
route_complete_percentage int,
created_at datetime
)
insert into #Data (route_complete_percentage, created_at)
values
(0, '20170531 19:58'),
(1, null),
(2, null),
(3, null),
(4, null),
(5, null),
(6, null),
(7, null),
(80, null),
(90, null),
(100, '20170531 20:10'),
(100, '20170531 20:12'),
(100, '20170531 20:15')
;with cteMinMax(min_route_complete_percentage, max_route_complete_percentage) as (
select
min(route_complete_percentage),
max(route_complete_percentage)
from #Data D
-- This ensures the condition that you don't get the timestamp for 0
where D.route_complete_percentage > 0
)
select
route_complete_percentage,
min_created_at = min(created_at)
from #Data D
join cteMinMax MM on D.route_complete_percentage in (MM.min_route_complete_percentage, MM.max_route_complete_percentage)
group by route_complete_percentage
I have a table that looks like this
userid | eventid | description | date | starttime | endtime
1 1 Event 1 2016-02-02 09:30:00 11:00:00
1 2 Event 2 2016-02-02 13:30:00 15:00:00
1 3 Event 3 2016-02-02 17:30:00 21:00:00
2 4 Event 4 2016-02-03 13:00:00 14:00:00
2 5 Event 5 2016-02-03 15:00:00 16:00:00
I need to find what is the sum of time between the events on the same day by the user.
Like this:
userid | timeBetween
1 05:00:00
2 01:00:00
I should also assume that there may be overlapping times for example event1 starts at 11:00 ends 13:00 and event2 starts 12:00 and ends 14:00 by the same user on the same day. These cases are rare and I believe returning 00:00 here is the appropriate answer.
I solved a similar problem, finding the sum of the length of all events per day.
SELECT *,
SEC_TO_TIME( SUM( TIME_TO_SEC(TIMEDIFF(`endtime`,`starttime`)))) as sumtime
FROM `events`
group by userid, date
order by sumtime desc
Given this sample data:
CREATE TABLE t
(`userid` int, `eventid` int, `description` varchar(7), `date` date, `starttime` time, `endtime` time)
;
INSERT INTO t
(`userid`, `eventid`, `description`, `date`, `starttime`, `endtime`)
VALUES
(1, 1, 'Event 1', '2016-02-02', '09:30:00', '11:00:00'),
(1, 2, 'Event 2', '2016-02-02', '13:30:00', '15:00:00'),
(1, 3, 'Event 3', '2016-02-02', '17:30:00', '21:00:00'),
(2, 4, 'Event 4', '2016-02-03', '13:00:00', '14:00:00'),
(2, 5, 'Event 5', '2016-02-03', '15:00:00', '16:00:00')
;
this query
SELECT userid, SEC_TO_TIME(SUM(TIME_TO_SEC(diff))) AS time_between
FROM (
SELECT
TIMEDIFF(starttime, COALESCE(IF(userid != #prev_userid, NULL, #prev_endtime), starttime)) AS diff,
#prev_endtime := endtime,
#prev_userid := userid AS userid
FROM
t
, (SELECT #prev_endtime := NULL, #prev_userid := NULL) var_init_subquery
ORDER BY userid
) sq
GROUP BY userid;
will return
+--------+--------------+
| userid | time_between |
+--------+--------------+
| 1 | 05:00:00 |
| 2 | 01:00:00 |
+--------+--------------+
Explanation:
In this part
, (SELECT #prev_endtime := NULL, #prev_userid := NULL) var_init_subquery
ORDER BY userid
we initialize our variables. The ORDER BY is very important, since there's no order in a relational database unless you specify it. It is so important, because the SELECT clause processes the rows in this order.
In the SELECT clause the order is also very important. Here
#prev_endtime := endtime,
#prev_userid := userid AS userid
we assign the values of the current row to the variables. Since this happens after this line
TIMEDIFF(starttime, COALESCE(IF(userid != #prev_userid, NULL, #prev_endtime), starttime)) AS diff,
the variables still hold the values of the previous row in the timediff() function. Therefore we also have to use COALESCE(), because in the very first row and when the userid changes, there is no value to calculate the diff from. To get a diff of 0 there, COALESCE() exchanges the NULL value with the starttime.
The last part is obviously to simply sum the seconds of the "between times".
Here's one way you can get the timeBetween value in SECONDS
SELECT
firsttable.userid,
SEC_TO_TIME(SUM(TIME_TO_SEC(secondtable.starttime) - TIME_TO_SEC(firsttable.endtime))) timeBetween
FROM
(
SELECT
*,
IF(#prev = userid, #rn1 := #rn1 + 1, #rn1 := 1) rank,
#prev := userid
FROM eventtable,(SELECT #prev := 0,#rn1 := 1) var
ORDER BY userid,starttime DESC
) firsttable
INNER JOIN
(
SELECT
*,
IF(#prev2 = userid, #rn2 := #rn2 + 1, #rn2 := 1) rank,
#prev2 := userid
FROM eventtable,(SELECT #prev2 := 0,#rn2 := 1) var
ORDER BY userid,endtime DESC
) secondTable
ON firsttable.userid = secondtable.userid AND firsttable.rank = secondtable.rank + 1 AND
firsttable.date = secondtable.date
GROUP BY firsttable.userid;
TEST:
Unable to add a fiddle.
So here's test data with schema:
DROP TABLE IF EXISTS `eventtable`;
CREATE TABLE `eventtable` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`userid` int(11) NOT NULL,
`eventid` int(11) NOT NULL,
`description` varchar(100) CHARACTER SET utf8 NOT NULL,
`date` date NOT NULL,
`starttime` time NOT NULL,
`endtime` time NOT NULL,
PRIMARY KEY (`id`)
) ;
INSERT INTO `eventtable` VALUES ('1', '1', '1', 'Event 1', '2016-02-02', '09:30:00', '11:00:00');
INSERT INTO `eventtable` VALUES ('2', '1', '2', 'Event 2', '2016-02-02', '13:30:00', '15:00:00');
INSERT INTO `eventtable` VALUES ('3', '1', '3', 'Event 3', '2016-02-02', '17:30:00', '21:00:00');
INSERT INTO `eventtable` VALUES ('4', '2', '4', 'Event 4', '2016-02-03', '13:00:00', '14:00:00');
INSERT INTO `eventtable` VALUES ('5', '2', '5', 'Event 5', '2016-02-03', '15:00:00', '16:00:00');
Result:
Executing the above query on the given test data you will get output like below:
userid timeBetween
1 05:00:00
2 01:00:00
Note:
For overlapping events the above query will give you negative timeBetween value.
You can replace the the SEC_TO_TIME...line by the following:
SEC_TO_TIME(IF(SUM(TIME_TO_SEC(secondtable.starttime) - TIME_TO_SEC(firsttable.endtime)) < 0, 0,SUM(TIME_TO_SEC(secondtable.starttime) - TIME_TO_SEC(firsttable.endtime)))) timeBetween
If you take the TIMEDIFF of the MIN(starttime) and MAX(endtime) for each user/day and then subtract the sum of events as calculated earlier, this will give you the times in between.
try this on
select TIMEDIFF('start_time','end_time') from your table
hope this one help you
I've been writing SQL queries for years but I'm stuck on this one.
I've got 2 tables in MySQL:
LOANPAYMENTSDUE includes LoanPaymentsDueId, LoanId, AmtDue, DueDate
LOANPAYMENTS includes LoanPaymentsId, LoanId, AmtPaid, PaidDate
The relationship between the tables is the LoanId and not the specific payment that is due. In a perfect world the DueDate = PaidDate and the AmtDue = AmtPaid. However, what is making this complex for me is no relationship between the LoanPaymentsDueId and the LoanPaymentsId. The relationship only exists at the LoanId allowing for partial payments to be made on a single LOANPAYMENTSDUE payment.
I've researched the web trying to find the right query to create a report showing the date that each LOANPAYMENTSDUE was satisfied. This requires calculating the balance as of the LOANPAYMENTSDUE.DueDate because there can be payments missed and a new payment should satisfy the balance of the oldest LOANPAYMENTSDUE payment.
Here is the sample data and table scripts:
CREATE TABLE LOANPAYMENTSDUE (
LoanPaymentsDueId BIGINT(20) NOT NULL AUTO_INCREMENT
, LoanId BIGINT(20)
, AmtDue double NOT NULL
, DueDate date NOT NULL
, PRIMARY KEY (LoanPaymentsDueId)
);
INSERT INTO LOANPAYMENTSDUE (LoanId, AmtDue, DueDate) VALUES (1, 100, '2013-07-15');
INSERT INTO LOANPAYMENTSDUE (LoanId, AmtDue, DueDate) VALUES (1, 100, '2013-08-15');
INSERT INTO LOANPAYMENTSDUE (LoanId, AmtDue, DueDate) VALUES (1, 100, '2013-09-15');
INSERT INTO LOANPAYMENTSDUE (LoanId, AmtDue, DueDate) VALUES (1, 100, '2013-10-15');
INSERT INTO LOANPAYMENTSDUE (LoanId, AmtDue, DueDate) VALUES (1, 100, '2013-11-15');
CREATE TABLE LOANPAYMENTS (
LoanPaymentsId BIGINT(20) NOT NULL AUTO_INCREMENT
, LoanId BIGINT(20)
, AmtPaid double NOT NULL
, PaidDate date NOT NULL
, PRIMARY KEY (LoanPaymentsId)
);
INSERT INTO LOANPAYMENTS (LoanId, AmtPaid, PaidDate) VALUES (1, 100, '2013-07-15'); /* Full pmt on due date */
INSERT INTO LOANPAYMENTS (LoanId, AmtPaid, PaidDate) VALUES (1, 100, '2013-08-10'); /* Full pmt a few days early */
INSERT INTO LOANPAYMENTS (LoanId, AmtPaid, PaidDate) VALUES (1, 100, '2013-09-22'); /* Full pmt a week late */
INSERT INTO LOANPAYMENTS (LoanId, AmtPaid, PaidDate) VALUES (1, 50, '2013-10-18'); /* Partial pmt a few days late */
INSERT INTO LOANPAYMENTS (LoanId, AmtPaid, PaidDate) VALUES (1, 50, '2013-11-07');/* Partial pmt 3 weeks late and satisfies the 10/15/2013 balance on this date */
INSERT INTO LOANPAYMENTS (LoanId, AmtPaid, PaidDate) VALUES (1, 100, '2013-11-22');/* Full pmt a week late and satisfies the 11/15/2013 pmt due */
The report query should simply provide the PAIDDATE when each LOANPAYMENTSDUE was satisfied. Using the table data above the report would be as follows:
LOANID LOANPAYMENTSDUEID AMTDUE DUEDATE PAIDDATE
1 1 100 2013-07-15 2013-07-15
1 2 100 2013-08-15 2013-08-10
1 3 100 2013-09-15 2013-09-22
1 4 100 2013-10-15 2013-11-07
1 5 100 2013-11-15 2013-11-22
You could start with these two queries, that return all of the rows with a running total column:
SELECT
LoanId, DueDate,
CASE WHEN LoanId=#last_LoanId THEN #Due:=#Due+AmtDue
ELSE #Due:=AmtDue END total_due,
#last_LoanId:=LoanId
FROM
LOANPAYMENTSDUE, (SELECT #last_LoanId:=NULL, #Due:=NULL) t;
SELECT
LoanId, PaidDate,
CASE WHEN LoanId=#last_LoanId THEN #Paid:=#Paid+AmtPaid
ELSE #Paid:=AmtPaid END total_paid,
#last_LoanId:=LoanId
FROM
LOANPAYMENTS, (SELECT #last_LoanId:=NULL, #Paid:=NULL) t;
and then you could use a LEFT JOIN on due.LoanId=due.LoanId AND total_due<=total_paid, and a GROUP BY to get the minimum date where the join succeded:
SELECT
ld.LoanId, ld.DueDate, MIN(lp.PaidDate)
FROM
(SELECT
LoanId, DueDate,
CASE WHEN LoanId=#last_LoanId1 THEN #Due:=#Due+AmtDue ELSE #Due:=AmtDue END total_due,
#last_LoanId1:=LoanId
FROM
LOANPAYMENTSDUE, (SELECT #last_LoanId1:=NULL, #Due:=NULL) t1) ld
LEFT JOIN
(SELECT
LoanId, PaidDate,
CASE WHEN LoanId=#last_LoanId2 THEN #Paid:=#Paid+AmtPaid ELSE #Paid:=AmtPaid END total_paid,
#last_LoanId2:=LoanId
FROM
LOANPAYMENTS, (SELECT #last_LoanId2:=NULL, #Paid:=NULL) t2) lp
ON
ld.LoanId=lp.LoanId AND ld.total_due<=lp.total_paid
GROUP BY
ld.LoanId, ld.DueDate
Please see fiddle here.
Assuming that when the amount is paid it's paid in portion or remaining amount in whole, you check based on Total Amount Due and Total Amount Paid by matching those up. Here's the sqlFiddle example of your data and query
SELECT T1.LoanId,
T1.LoanPaymentsDueId,
T1.AmtDue,
T1.DueDate,
T2.PaidDate
FROM
(SELECT
LD.LoanPaymentsDueId,
LD.LoanId,
LD.DueDate,
LD.AmtDue,
(SELECT Sum(AmtDue)
FROM LOANPAYMENTSDUE LD1
WHERE LD1.DueDate <= LD.DueDate
AND LD1.LoanId = LD.LoanId
)as AmtDueTotal
FROM
LOANPAYMENTSDUE LD
)T1,
(SELECT
L.LoanPaymentsId,
L.LoanId,
L.PaidDate,
(SELECT Sum(AmtPaid)
FROM LOANPAYMENTS L1
WHERE L1.PaidDate <= L.PaidDate
AND L1.LoanId = L.LoanId
)as AmtPaidTotal
FROM LOANPAYMENTS L
)T2
WHERE
T1.LoanId = T2.LoanId
AND T1.LoanId = 1
AND T1.AmtDueTotal = T2.AmtPaidTotal;
I have a bunch of patient prescriptions, each having a certain start date and an end date. I would like to find the instances where a patient has been taking more than one drug in the same drug category for more than 2 days. duration should overlap.
Table structure looks like this:
PatientID StartDate EndDate Drug DrugCategory
1 1/1/2013 1/5/2013 A Cat1
1 1/1/2013 1/4/2013 B Cat1
1 1/10/2013 1/12/2013 C Cat1
2 ....... ........ ............. .........
As seen above, Patient-1 was prescribed 3 drugs in the same category and the first two drug overlapped in duration more than 2 days. So, for this example, I would like the query return the first two records for Patient-1 along with drug name, patientid.
Hope someone can help. This is using SQL Server 2008 R2 btw.
Do you want them as separate rows or as one row? If you want them as separate rows, this should work; otherwise you can pivot the result.
create table want as
select H.* from have H, have V
where H.drug ne V.drug
and H.PatientID=V.PatientID
and H.startDate <= V.startDate
and V.startDate <= H.endDate-2
union select V.* from have H, have V
where H.drug ne V.drug
and H.PatientID=V.PatientID
and H.startDate <= V.startDate
and V.startDate <= H.endDate-2
;
I union the H and V records, I'm sure there's a more efficient way to do that but couldn't easily come up with one. (Just H works for the example given, but for a more proper example where the start dates are not always equal you need the V row as well.)
Please test this thoroughly before using. I have tested it and so far it looks good but not done an exhaustive testing. Would be great if you can test it out further sufficient to your requirements and can point any anomalies in the result if present or take it forward and modify if required.
--Test data:
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_PADDING ON
GO
CREATE TABLE [dbo].[Prescriptions](
[PatientID] [int] NULL,
[StartDate] [datetime] NULL,
[EndDate] [datetime] NULL,
[Drug] [varchar](50) NULL,
[DrugCategory] [varchar](50) NULL
) ON [PRIMARY]
GO
SET ANSI_PADDING OFF
GO
INSERT [dbo].[Prescriptions] ([PatientID], [StartDate], [EndDate], [Drug], [DrugCategory]) VALUES (1, CAST(0x0000A13A00000000 AS DateTime), CAST(0x0000A13E00000000 AS DateTime), N'D', N'Cat1')
INSERT [dbo].[Prescriptions] ([PatientID], [StartDate], [EndDate], [Drug], [DrugCategory]) VALUES (1, CAST(0x0000A13A00000000 AS DateTime), CAST(0x0000A13B00000000 AS DateTime), N'E', N'Cat1')
INSERT [dbo].[Prescriptions] ([PatientID], [StartDate], [EndDate], [Drug], [DrugCategory]) VALUES (1, CAST(0x0000A13800000000 AS DateTime), CAST(0x0000A13B00000000 AS DateTime), N'F', N'Cat1')
INSERT [dbo].[Prescriptions] ([PatientID], [StartDate], [EndDate], [Drug], [DrugCategory]) VALUES (1, CAST(0x0000A13800000000 AS DateTime), CAST(0x0000A13900000000 AS DateTime), N'G', N'Cat1')
INSERT [dbo].[Prescriptions] ([PatientID], [StartDate], [EndDate], [Drug], [DrugCategory]) VALUES (1, CAST(0x0000A12300000000 AS DateTime), CAST(0x0000A13900000000 AS DateTime), N'Z', N'Cat1')
INSERT [dbo].[Prescriptions] ([PatientID], [StartDate], [EndDate], [Drug], [DrugCategory]) VALUES (1, CAST(0x0000A12300000000 AS DateTime), CAST(0x0000A13A00000000 AS DateTime), N'Y', N'Cat1')
INSERT [dbo].[Prescriptions] ([PatientID], [StartDate], [EndDate], [Drug], [DrugCategory]) VALUES (1, CAST(0x0000A13900000000 AS DateTime), CAST(0x0000A13D00000000 AS DateTime), N'A', N'Cat1')
INSERT [dbo].[Prescriptions] ([PatientID], [StartDate], [EndDate], [Drug], [DrugCategory]) VALUES (1, CAST(0x0000A13900000000 AS DateTime), CAST(0x0000A13C00000000 AS DateTime), N'B', N'Cat1')
INSERT [dbo].[Prescriptions] ([PatientID], [StartDate], [EndDate], [Drug], [DrugCategory]) VALUES (1, CAST(0x0000A14200000000 AS DateTime), CAST(0x0000A14400000000 AS DateTime), N'C', N'Cat1')
Query Used:
SELECT DISTINCT PatientID,StartDate,EndDate,Drug,DrugCategory FROM (
SELECT
DATEDIFF(dd,a.startdate,b.startdate) c1
,DATEDIFF(dd,a.enddate,b.enddate)c2
,DATEDIFF(dd,a.startdate,b.enddate) c3
,DATEDIFF(dd,a.enddate,b.startdate) c4
,DATEDIFF(dd,a.startdate,b.enddate)+DATEDIFF(dd,a.enddate,b.startdate) c34
,a.PatientID
,a.StartDate
,a.EndDate
,a.Drug
,a.DrugCategory
,b.PatientID AS PatientID1
,b.StartDate AS StartDate1
,b.EndDate AS EndDate1
,b.Drug AS Drug1
,b.DrugCategory DrugCategory1
FROM Prescriptions a
,Prescriptions b
WHERE a.patientid=b.patientid
AND a.DrugCategory= b.DrugCategory
and a.drug<>b.drug
)a
WHERE c1*c2*c3*c4 <0
AND c3>2
and c4<=-2
ORDER BY 1,2,3,4
Results:
PatientID StartDate EndDate Drug DrugCategory
----------- ----------------------- ----------------------- -------------------------------------------------- --------------------------------------------------
1 2012-12-10 00:00:00.000 2013-01-02 00:00:00.000 Y Cat1
1 2012-12-31 00:00:00.000 2013-01-03 00:00:00.000 F Cat1
1 2013-01-01 00:00:00.000 2013-01-04 00:00:00.000 B Cat1
1 2013-01-01 00:00:00.000 2013-01-05 00:00:00.000 A Cat1
1 2013-01-02 00:00:00.000 2013-01-06 00:00:00.000 D Cat1
(5 row(s) affected)
what you should be able to do is join the prescription table to itself on the patientID and drugCategory fields where the drugName differs and the startDate OR endDate of the 2nd instance spans the startDate/endDate of the first. Then, determine the overlap range by subtracting the number of days between the max(startDates) and min(endDates). If the overlap is greater than 2 days, return the row:
select *, datediff(d, start_max, end_min) as overlap
from (
SELECT
P.PatientID, P.StartDate, P.EndDate, P.Drug, P.DrugCategory,
P1.StartDate AS p1_start, P1.EndDate AS p1_end, P1.Drug AS p1_drug,
CASE WHEN p.startdate >= P1.startdate THEN p.startdate ELSE P1.startdate END AS start_max,
CASE WHEN p.EndDate <= P1.EndDate THEN p.EndDate ELSE P1.EndDate END AS end_min
FROM
dbo.Prescriptions p INNER JOIN
dbo.Prescriptions AS P1 ON
P.PatientID = P1.PatientID AND
P.DrugCategory = P1.DrugCategory AND
P.Drug <> P1.Drug
WHERE
(P1.StartDate >= P.StartDate AND P1.StartDate <= P.EndDate) OR
(P1.EndDate >= P.StartDate AND P1.EndDate <= P.EndDate)
) t
where
datediff(d, start_max, end_min) > 2