find a duplicate series in SQL

find a duplicate series in SQL - sql-server-2008

I have a table with 3 columns containing a variable number of records based off of the first column which is a foreign key. I am trying to determine if I can detect when there is a duplicate across multiple rows for an entire series
declare #finddupseries table
(
portid int,
asset_id int,
allocation float
)
;
INSERT INTO #finddupseries
SELECT 250, 6, 0.05 UNION ALL
SELECT 250, 66, 0.8 UNION ALL
SELECT 250, 2, 0.105 UNION ALL
SELECT 250, 4, 0.0225 UNION ALL
SELECT 250, 5, 0.0225 UNION ALL
SELECT 251, 13, 0.6 UNION ALL
SELECT 251, 2, 0.3 UNION ALL
SELECT 251, 5, 0.1 UNION ALL
SELECT 252, 13, 0.8 UNION ALL
SELECT 252, 2, 0.15 UNION ALL
SELECT 252, 5, 0.05 UNION ALL
SELECT 253, 13, 0.4 UNION ALL
SELECT 253, 2, 0.45 UNION ALL
SELECT 253, 5, 0.15 UNION ALL
SELECT 254, 6, 0.05 UNION ALL
SELECT 254, 66, 0.8 UNION ALL
SELECT 254, 2, 0.105 UNION ALL
SELECT 254, 4, 0.0225 UNION ALL
SELECT 254, 5, 0.0225
select * from #finddupseries
The records for portid 250 and 254 match.
Is there any way I can write a query to detect this?
edit: yes, the entire series must match. Also, if there was a way to determine which one it DID match would be helpful as the actual table has around 10k records.
thanks!

This query will give you all the values converted into a string grouped by port_id
SELECT fus1.portid,
(
SELECT CONVERT (VARCHAR, fus2.asset_id) + CONVERT (VARCHAR, fus2.allocation) + ','
FROM #finddupseries fus2
WHERE 1=1
AND fus1.portid = fus2.portid
ORDER BY fus2.portid, fus2.asset_id, fus2.allocation
FOR XML PATH ('')
) AllValuesFromAllRows
FROM #finddupseries fus1
GROUP BY fus1.portid
the output should look like this
portid AllValuesFromAllRows
----------- ------------------------------------------------------
250 20.105,40.0225,50.0225,60.05,660.8,
251 20.3,50.1,130.6,
252 20.15,50.05,130.8,
253 20.45,50.15,130.4,
254 20.105,40.0225,50.0225,60.05,660.8,
Now, lets do a group by with a having!
;With DuplicateFinder as
(
SELECT fus1.portid,
(
SELECT CONVERT (VARCHAR, fus2.asset_id) + CONVERT (VARCHAR, fus2.allocation) + ','
FROM #finddupseries fus2
WHERE 1=1
AND fus1.portid = fus2.portid
ORDER BY fus2.portid, fus2.asset_id, fus2.allocation
FOR XML PATH ('')
) AllValuesFromAllRows
FROM #finddupseries fus1
GROUP BY fus1.portid
)
SELECT AllValuesFromAllRows, COUNT (*) NumDups
FROM DuplicateFinder
GROUP BY AllValuesFromAllRows
Having COUNT (*) > 1
You should get
AllValuesFromAllRows NumDups
----------------------------------------------- -----------
20.105,40.0225,50.0225,60.05,660.8, 2
So here is everything put together
SET NOCOUNT ON
declare #finddupseries table
(
portid int,
asset_id int,
allocation float
)
;
INSERT INTO #finddupseries
SELECT 250, 6, 0.05 UNION ALL
SELECT 250, 66, 0.8 UNION ALL
SELECT 250, 2, 0.105 UNION ALL
SELECT 250, 4, 0.0225 UNION ALL
SELECT 250, 5, 0.0225 UNION ALL
SELECT 251, 13, 0.6 UNION ALL
SELECT 251, 2, 0.3 UNION ALL
SELECT 251, 5, 0.1 UNION ALL
SELECT 252, 13, 0.8 UNION ALL
SELECT 252, 2, 0.15 UNION ALL
SELECT 252, 5, 0.05 UNION ALL
SELECT 253, 13, 0.4 UNION ALL
SELECT 253, 2, 0.45 UNION ALL
SELECT 253, 5, 0.15 UNION ALL
SELECT 254, 6, 0.05 UNION ALL
SELECT 254, 66, 0.8 UNION ALL
SELECT 254, 2, 0.105 UNION ALL
SELECT 254, 4, 0.0225 UNION ALL
SELECT 254, 5, 0.0225
;With PivotAssetIdAndAllocation as
(
SELECT fus1.portid,
(
SELECT CONVERT (VARCHAR, fus2.asset_id) + '_'+ CONVERT (VARCHAR, fus2.allocation) + '~~'
FROM #finddupseries fus2
WHERE 1=1
AND fus1.portid = fus2.portid
ORDER BY fus2.portid, fus2.asset_id, fus2.allocation
FOR XML PATH ('')
) AllValuesFromAllRows
FROM #finddupseries fus1
GROUP BY fus1.portid
)
,
ListOfDuplicates AS
(
SELECT AllValuesFromAllRows, COUNT (*) NumDups
FROM PivotAssetIdAndAllocation
GROUP BY AllValuesFromAllRows
Having COUNT (*) > 1
)
SELECT portid, AllValuesFromAllRows
FROM PivotAssetIdAndAllocation
WHERE AllValuesFromAllRows IN (SELECT AllValuesFromAllRows FROM ListOfDuplicates)
and the output is
portid AllValuesFromAllRows
----------- ----------------------------------------------------------------------
250 2_0.105~~4_0.0225~~5_0.0225~~6_0.05~~66_0.8~~
254 2_0.105~~4_0.0225~~5_0.0225~~6_0.05~~66_0.8~~

this should do the trick:
declare #finddupseries table
(
portid int,
asset_id int,
allocation float
)
;
INSERT INTO #finddupseries
SELECT 250, 6, 0.05 UNION ALL
SELECT 250, 66, 0.8 UNION ALL
SELECT 250, 2, 0.105 UNION ALL
SELECT 250, 4, 0.0225 UNION ALL
SELECT 250, 5, 0.0225 UNION ALL
SELECT 251, 13, 0.6 UNION ALL
SELECT 251, 2, 0.3 UNION ALL
SELECT 251, 5, 0.1 UNION ALL
SELECT 252, 13, 0.8 UNION ALL
SELECT 252, 2, 0.15 UNION ALL
SELECT 252, 5, 0.05 UNION ALL
SELECT 253, 13, 0.4 UNION ALL
SELECT 253, 2, 0.45 UNION ALL
SELECT 253, 5, 0.15 UNION ALL
SELECT 254, 6, 0.05 UNION ALL
SELECT 254, 66, 0.8 UNION ALL
SELECT 254, 2, 0.105 UNION ALL
SELECT 254, 4, 0.0225 UNION ALL
SELECT 254, 5, 0.0225 UNION ALL
SELECT 255, 13, 0.6 UNION ALL
SELECT 255, 2, 0.3 UNION ALL
SELECT 255, 5, 0.1
;with cteGetDupes
as
(
SELECT row_number() over (partition by asset_id, allocation
order by portid desc, asset_id desc, allocation desc)
AS RNDesc
, row_number() over (partition by asset_id, allocation
order by portid, asset_id, allocation)
AS RNAsc
, *
FROM #finddupseries
)
SELECT portid, asset_id, allocation
FROM cteGetDupes
WHERE RNDesc - RNAsc != 0
order by portid, asset_id, allocation

Related

Can I transpose (Pivot) a table using a 'where' clause?

I need to create a pivot table from an original table in mysql, but I need to be specific about which rows i want to take the data into the new table i'm making.
I would be guessing i could use the 'where' clause in a query to create the pivot table, but i dont know exactly how. I have a code that allows me to create a pivot table from its original. It selects two rows, one for each 'max' function and turns them into columns.
create table `transp_table` as
select * from (
select original_table,
max(case when ID = 1.01 then value else 0 end) '1.01',
max(case when ID = 1.02 then value else 0 end) '1.02'
from(
select ID, `month_1` value, 1 descrip
from disp
union all
select ID, `month_2` value, 2 descrip
from disp
union all
select ID, `month_3` value, 3 descrip
from disp
union all
select ID, `month_4` value, 4 descrip
from disp
union all
select ID, `month_5` value, 5 descrip
from disp
union all
select ID, `month_6` value, 6 descrip
from disp
union all
select ID, `month_7` value, 7 descrip
from disp
union all
select ID, `month_8` value, 8 descrip
from original_table
) src
group by descrip
) as `transp_table`;
It works well to creating a pivot table, but for this model, i need to include a 'max' function for each specific ID. And from the original_table, there is a lot of rows. And there is, for an instance, a column in the original_table called 'type_of_product', and i need to select the rows that have a specific string in it. Is there a query were i could select the rows to make the pivot table without having to type for each one of them like in the example above? Here's the structure with a sample of the original_table:
CREATE TABLE original_table (
`ID` float not null, `type_of_product` text, `month_1` int,
`month_2` int, `month_3` int, `month_4` int, `month_5` int, `month_6` int)
INSERT INTO `original_table` (
ID, type_of_procduct, `month-1`, `month_2`, `month_3`, `month_4`, `month_5`, `month_6`)
VALUES
(1.01, 'TV', 50, 53, 20, 33, 134, 0),
(1.02, 'DVD', 36, 12, 5, 0, 0, 26),
(2.01, 'DVD', 11, 12, 30, 5, 22, 0),
(3.01, 'CD', 0, 0, 3, 1, 0, 19),
(3.02, 'TV', 3, 6, 0, 0, 10, 15),
(3.03, 'TV', 500, 20, 0, 0, 0, 1);

ordering the results of a query as specified in the where clause

I have a table QuotesTable - primary key is quotesid.
I have this sql-statement:
select * from QuotesTable where quotesid in (103,7,16,50,41,80,67,64)
This returns me the result in the following order:
7
16
41
.........
103 and so on.
but I need the results in the following order as specified in the query (103,7,16,50,41,80,67,64) as:
103,
7
16
.......
64 and so on.
Is there a way to achieve this one?

Try this:
select * from QuotesTable where quotesid in (103,7,16,50,41,80,67,64)
order by case quotesid when 103 then 1
when 7 then 2
when 16 then 3
when 50 then 4
when 41 then 5
when 80 then 6
when 67 then 7
when 64 then 8
end
If those values grow then you can create a table in database:
create table QuotesOrderingTable(quotesid int, orderid int)
go
fill it with appropriate values:
insert into QuotesOrderingTable values
(103, 1),
(7, 2),
(16, 3),
(50, 4),
(41, 5),
(80, 6),
(67, 7),
(64, 8),
(..., 9),
(..., 10),
...
and then use it to order by:
select qt.* from QuotesTable qt
join QuotesOrderingTable qot on qt.quotesid = qot.quotesid
where qt.quotesid in (103,7,16,50,41,80,67,64)
order by qot.orderid

Another option is doing it like this:
SELECT 1, * FROM QuotesTable WHERE quotesid = 103 UNION ALL
SELECT 2, * FROM QuotesTable WHERE quotesid = 7 UNION ALL
SELECT 3, * FROM QuotesTable WHERE quotesid = 16 UNION ALL
SELECT 4, * FROM QuotesTable WHERE quotesid = 50 UNION ALL
SELECT 5, * FROM QuotesTable WHERE quotesid = 41 UNION ALL
SELECT 6, * FROM QuotesTable WHERE quotesid = 80 UNION ALL
SELECT 7, * FROM QuotesTable WHERE quotesid = 67 UNION ALL
SELECT 8, * FROM QuotesTable WHERE quotesid = 64
ORDER BY 1
Not the prettiest, but atleast you aren't repeating the WHERE clause
Another variation, which looks a bit nicer:
SELECT * FROM QuotesTable q
JOIN (SELECT 1 ordering, 103 quotesid UNION ALL
SELECT 2 , 7 UNION ALL
SELECT 3 , 16 UNION ALL
SELECT 4 , 50 UNION ALL
SELECT 5 , 41 UNION ALL
SELECT 6 , 80 UNION ALL
SELECT 7 , 67 UNION ALL
SELECT 8 , 64) o ON o.quotesid = q.quotesid
ORDER BY o.ordering

GROUP_CONCAT not working in Sub-Query

I have tried this query to get the following data
SELECT
GROUP_CONCAT( sb.p_id ) ,
TRUNCATE( SUM( sb.total_amount ) , 2 ) grand_total,
TRUNCATE( SUM( sb.amount_wot ) , 2 ) sale_amount,
(SELECT TRUNCATE( SUM( (item_qty * item_price) * tax_price /100 ) , 2 )
FROM ci_bill_items
WHERE bill_id IN ( GROUP_CONCAT( sb.p_id ) ) ) tax_amount
FROM ci_suppliers_bills sb
WHERE sb.p_id >0
I got the expected result but not the tax_amount its return null but if i run the seperate query like :
SELECT TRUNCATE( SUM( (item_qty * item_price) * tax_price /100 ) , 2 )
FROM ci_bill_items
WHERE bill_id IN ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 71 )
Then i get the correct result. But all i want in one query like the first one, is group_concat not working like i tried bill_id IN ( GROUP_CONCAT( sb.p_id ) )? Help is much appriciated.

You can use FIND_IN_SET like this:
SELECT
GROUP_CONCAT( sb.p_id ) ,
TRUNCATE( SUM( sb.total_amount ) , 2 ) grand_total,
TRUNCATE( SUM( sb.amount_wot ) , 2 ) sale_amount,
(SELECT TRUNCATE( SUM( (item_qty * item_price) * tax_price /100 ) , 2 )
FROM ci_bill_items
WHERE FIND_IN_SET(bill_id, GROUP_CONCAT( sb.p_id ) ) > 0 ) tax_amount
FROM ci_suppliers_bills sb
WHERE sb.p_id >0

GROUP_CONCAT doesn't return a list, it returns a string. To get a list that you can use with IN, you need to run a subquery:
WHERE bill_id IN (SELECT p_id FROM ci_suppliers_bills
WHERE p_id > 0)

SQL Select Query Help. Maximum sum of consequtive four rows.

We have a traffic counter that counts cars in each lane (two inbound and two outbound) in 15 minute increments.
There is a peak period which is defined as 7:00am to 9:00am. Within this peak period we want to know the PeakHourIn and PeakHourOut and PeakHourSum.
The PeakHourIn is the highest consecutive 4x15 minute total (1 hour) for lne1in + lne4in
The PeakHourOut is the highest consecutive 4x15 minute total (1 hour) for lne2out + lne3out
The PeakHourSum is the highest consecutive 4x15 minute total (1 hour) for all lanes.
Date Time lne1in lne2out lne3out lne4in
09-18-2012 5:45 AM 2 0 0 0
09-18-2012 6:00 AM 1 0 0 1
09-18-2012 6:15 AM 2 1 0 0
09-18-2012 6:30 AM 2 1 0 0
09-18-2012 6:45 AM 6 1 2 1
09-18-2012 7:00 AM 9 1 0 3
09-18-2012 7:15 AM 81 12 22 15
09-18-2012 7:30 AM 144 31 63 56
09-18-2012 7:45 AM 84 30 62 42
09-18-2012 8:00 AM 7 1 0 3
09-18-2012 8:15 AM 11 2 3 3
09-18-2012 8:30 AM 12 3 7 1
09-18-2012 8:45 AM 16 4 8 0
09-18-2012 9:00 AM 5 2 5 0
09-18-2012 9:15 AM 10 1 4 0
Results should look like:
PeakHourIn 434
PeakHourOut 221
PeakHourSum 655
Any help would be greatly appreciated.

If you used a native temporal data type to store the date/time, you could group multiple self-joins:
SELECT MAX(lne1in + lne4in ) AS PeakHourIn,
MAX(lne2out + lne3out) AS PeakHourOut,
MAX(lne1in + lne2out + lne3out + lne4in) AS PeakHourSum
FROM (
SELECT t1.lne1in + t2.lne1in + t3.lne1in + t4.lne1in AS lne1in,
t1.lne2out + t2.lne2out + t3.lne2out + t4.lne2out AS lne2out,
t1.lne3out + t2.lne3out + t3.lne3out + t4.lne3out AS lne3out,
t1.lne4in + t2.lne4in + t3.lne4in + t4.lne4in AS lne4in
FROM my_table t1
JOIN my_table t2 ON t2.DateTime = t1.DateTime + INTERVAL 15 MINUTE
JOIN my_table t3 ON t3.DateTime = t2.DateTime + INTERVAL 15 MINUTE
JOIN my_table t4 ON t4.DateTime = t3.DateTime + INTERVAL 15 MINUTE
WHERE TIME(t1.DateTime) BETWEEN '07:00:00' AND '08:00:00'
GROUP BY t1.DateTime
) t

EDIT
Here's a solution in MySQL: http://sqlfiddle.com/#!2/ff0fb/9
create table TrafficData
(
StartTime timestamp
,Lane int
,CarCount int
);
create table LaneData
(
Lane int
, Direction bit
);
insert LaneData
select 1, 0
union select 2, 1
union select 3, 1
union select 4, 0;
insert TrafficData
select dt, lane
, case lane
when 1 then l1
when 2 then l2
when 3 then l3
when 4 then l4
else null
end
from
(
select '2012-09-18 05:45' dt, 2 l1, 0 l2, 0 l3, 0 l4
union all select '2012-09-18 06:00', 1, 0, 0, 1
union all select '2012-09-18 06:15', 2, 1, 0, 0
union all select '2012-09-18 06:30', 2, 1, 0, 0
union all select '2012-09-18 06:45', 6, 1, 2, 1
union all select '2012-09-18 07:00', 9, 1, 0, 3
union all select '2012-09-18 07:15', 81, 12, 22, 15
union all select '2012-09-18 07:30', 144, 31, 63, 56
union all select '2012-09-18 07:45', 84, 30, 62, 42
union all select '2012-09-18 08:00', 7, 1, 0, 3
union all select '2012-09-18 08:15', 11, 2, 3, 3
union all select '2012-09-18 08:30', 12, 3, 7, 1
union all select '2012-09-18 08:45', 16, 4, 8, 0
union all select '2012-09-18 09:00', 5, 2, 5, 0
union all select '2012-09-18 09:15', 10, 1, 4, 0
) as originalTable
cross join LaneData;
select Lane, max(SumCarCount) as MaxSumCarCount
from
(
select a.Lane, SUM(b.CarCount) as SumCarCount
from TrafficData a
inner join TrafficData b
on b.Lane = a.Lane
and b.StartTime between a.StartTime and DATE_ADD(DATE_ADD(a.starttime, interval 1 hour), interval -1 second)
where time(a.StartTime) between '07:00' and '08:15'
group by a.Lane, a.StartTime
) x
group by Lane
order by Lane;
select Direction, max(SumCarCount) as MaxSumCarCount
from
(
select al.Direction, SUM(b.CarCount) SumCarCount
from TrafficData a
inner join LaneData al
on al.Lane = a.Lane
inner join TrafficData b
on b.StartTime between a.StartTime and DATE_ADD(DATE_ADD(a.starttime, interval 1 hour), interval -1 second)
inner join LaneData bl
on bl.Lane = b.Lane
and bl.Direction = al.Direction
where time(a.StartTime) between '07:00' and '08:15'
group by al.Direction, a.StartTime
) x
group by Direction
order by Direction;
ORIGINAL
Here's how I'd go about it in SQL Server:
--I'd change your table structure to be like this - that way you can easily add new lanes without rewriting the whole system
declare #trafficData table
(
StartTime DateTime
,Lane int
,CarCount int
)
--here's where you store additional info about the lanes (e.g. what direction they go in)
declare #laneData table
(
Lane int
, Direction bit --0 in, 1 out
)
--populate the tables with sample data
insert #laneData
select 1, 0
union select 2, 1
union select 3, 1
union select 4, 0
insert #trafficData
select dt, lane
, case lane
when 1 then l1
when 2 then l2
when 3 then l3
when 4 then l4
else null --should never happen
end
from
(
select '2012-09-18 5:45 AM' dt, 2 l1, 0 l2, 0 l3, 0 l4
union all select '2012-09-18 6:00 AM', 1, 0, 0, 1
union all select '2012-09-18 6:15 AM', 2, 1, 0, 0
union all select '2012-09-18 6:30 AM', 2, 1, 0, 0
union all select '2012-09-18 6:45 AM', 6, 1, 2, 1
union all select '2012-09-18 7:00 AM', 9, 1, 0, 3
union all select '2012-09-18 7:15 AM', 81, 12, 22, 15
union all select '2012-09-18 7:30 AM', 144, 31, 63, 56
union all select '2012-09-18 7:45 AM', 84, 30, 62, 42
union all select '2012-09-18 8:00 AM', 7, 1, 0, 3
union all select '2012-09-18 8:15 AM', 11, 2, 3, 3
union all select '2012-09-18 8:30 AM', 12, 3, 7, 1
union all select '2012-09-18 8:45 AM', 16, 4, 8, 0
union all select '2012-09-18 9:00 AM', 5, 2, 5, 0
union all select '2012-09-18 9:15 AM', 10, 1, 4, 0
) originalTable
cross join #laneData
--peak for each individual lane
select *
from
(
select a.Lane, a.StartTime, SUM(b.CarCount) SumCarCount
, ROW_NUMBER() over (partition by a.lane order by SUM(b.CarCount) desc) r
from #trafficData a
inner join #trafficData b
on b.Lane = a.Lane
and b.StartTime between a.StartTime and DATEADD(second,-1,DATEADD(hour,1,a.starttime))
group by a.Lane, a.StartTime
) x
where r = 1
order by Lane
--peak for lane direction
select *
from
(
select al.Direction, a.StartTime, SUM(b.CarCount) SumCarCount
, ROW_NUMBER() over (partition by al.Direction order by SUM(b.CarCount) desc) r
from #trafficData a
inner join #laneData al
on al.Lane = a.Lane
inner join #trafficData b
on b.StartTime between a.StartTime and DATEADD(second,-1,DATEADD(hour,1,a.starttime))
inner join #laneData bl
on bl.Lane = b.Lane
and bl.Direction = al.Direction
group by al.Direction, a.StartTime
) x
where r = 1
order by Direction

Group By is sorting the resultsets which I don't want...?

I am using following SQL Query:
SELECT `comment`.`id` AS `comment_id` , count( `comment_likes`.`comment_id` ) AS `number_of_likes`
FROM `comment`
LEFT JOIN `comment_likes` ON `comment`.`id` = `comment_likes`.`comment_id`
WHERE `comment`.`id`
IN ( 10, 5, 7, 8, 3, 2, 9 )
GROUP BY `comment`.`id`
Here the result of the query is coming as:
comment_id number_of_likes
2 0
3 1
5 0
7 0
8 0
9 0
10 0
Which I don't want...? I want the same ordering given where condition i.e. WHERE comment.id
IN ( 10, 5, 7, 8, 3, 2, 9 ).
So I want the result to be like:
comment_id number_of_likes
10 0
5 0
7 0
8 0
3 1
2 0
9 0
Can anyone help me...?
Thanks In Advance.....

Add explicit ORDER BY. A function that can be helpful is FIELD():
ORDER BY FIELD(comment.id, 10, 5, 7, 8, 3, 2, 9)

You need to use the FIELD() of MySQL in order to achieve your desired results
SELECT `comment`.`id` AS `comment_id` ,
count( `comment_likes`.`comment_id` ) AS `number_of_likes`
FROM `comment` LEFT JOIN `comment_likes` ON
`comment`.`id` = `comment_likes`.`comment_id`
WHERE `comment`.`id`
IN ( 10, 5, 7, 8, 3, 2, 9 )
GROUP BY `comment`.`id`
ORDER BY FIELD(comment.id, 10, 5, 7, 8, 3, 2, 9)

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

find a duplicate series in SQL - sql-server-2008

Related

Can I transpose (Pivot) a table using a 'where' clause?

ordering the results of a query as specified in the where clause

GROUP_CONCAT not working in Sub-Query

SQL Select Query Help. Maximum sum of consequtive four rows.

Group By is sorting the resultsets which I don't want...?

Categories

Resources