Optimize SQL Server Query - sql-server-2008

I have to do a query to get the total cost of previous month and compared to current month to calculate the percentage difference.
this is the script:
create table #calc
(
InvoiceDate Date,
TotalCost decimal (12,2)
)
insert into #calc values ('2013-07-01', 9470.36)
insert into #calc values ('2013-08-01', 11393.81)
and this is the query:
select InvoiceDate,
TotalCost,
PrevTotalCost,
(CASE WHEN (PrevTotalCost = 0)
THEN 0
ELSE (((TotalCost - PrevTotalCost) / PrevTotalCost) * 100.0)
END) AS PercentageDifference
from (
select a.InvoiceDate, a.TotalCost,
isnull((select b.TotalCost
from #calc b
where InvoiceDate = (select MAX(InvoiceDate)
from #calc c
where c.InvoiceDate < a.InvoiceDate)), 0) as PrevTotalCost
from #calc a) subq
Is there a more efficient way to do it for cgetting the previous month?

Using a ranking function to put more burden on sorts than table scans seems the fastest when using no indexes. The query below processed 6575 records in under a second:
SELECT
Main.InvoiceDate,
Main.TotalCost,
PreviousTotalCost=Previous.TotalCost,
PercentageDifference=
CASE WHEN COALESCE(Previous.TotalCost,0) = 0 THEN 0
ELSE (((Main.TotalCost - Previous.TotalCost) / Previous.TotalCost) * 100.00)
END
FROM
(
SELECT
InvoiceDate,
TotalCost,
OrderInGroup=ROW_NUMBER() OVER (ORDER BY InvoiceDate DESC)
FROM
Test
)AS Main
LEFT OUTER JOIN
(
SELECT
InvoiceDate,
TotalCost,
OrderInGroup=ROW_NUMBER() OVER (ORDER BY InvoiceDate DESC)
FROM
Test
)AS Previous ON Previous.OrderInGroup=Main.OrderInGroup+1
Using nested looping as the case when getting the previous invoice cost in a select subquery proves the slowest - 6575 rows in 30 seconds.
SELECT
X.InvoiceDate,
X.TotalCost,
X.PreviousTotalCost,
PercentageDifference=
CASE WHEN COALESCE(X.PreviousTotalCost,0) = 0 THEN 0
ELSE (((X.TotalCost - X.PreviousTotalCost) / X.PreviousTotalCost) * 100.00)
END
FROM
(
SELECT
InvoiceDate,
TotalCost,
PreviousTotalCost=(SELECT TotalCost FROM Test WHERE InvoiceDate=(SELECT MAX(InvoiceDate) FROM Test WHERE InvoiceDate<Main.InvoiceDate))
FROM
Test AS Main
)AS X
Your query processed 6575 records in 20 seconds with the biggest cost coming from the nested loops for inner join
select InvoiceDate,
TotalCost,
PrevTotalCost,
(CASE WHEN (PrevTotalCost = 0)
THEN 0
ELSE (((TotalCost - PrevTotalCost) / PrevTotalCost) * 100.0)
END) AS PercentageDifference
from (
select a.InvoiceDate, a.TotalCost,
isnull((select b.TotalCost
from Test b
where InvoiceDate = (select MAX(InvoiceDate)
from #calc c
where c.InvoiceDate < a.InvoiceDate)), 0) as PrevTotalCost
from Test a) subq
Using indexes would be a big plus unless you are required to use temp tables.
Hope this helps :)

SELECT
`current`.`InvoiceDate`,
`current`.`TotalCost`,
`prev`.`TotalCost` AS `PrevTotalCost`,
(`current`.`TotalCost` - `prev`.`TotalCost`) AS `CostDifference`
FROM dates `current`
LEFT JOIN
dates `prev`
ON `prev`.`InvoiceDate` <= DATE_FORMAT(`current`.`InvoiceDate` - INTERVAL 1 MONTH, '%Y-%m-01');
Screenshot of the results I got: http://cl.ly/image/0b3z2x1f2H1n
I think this might be what you're looking for.
Edit: I wrote this query in MySQL, so it's possible you may need to alter a couple minor syntax things for your server.

Related

Retrieve a data from a partition in SQL Servertable

I'm new to SQL Server partitioning. I have tried a some of the tutorials and finally I got this based on that created a partition but the problem is that I can't retrieve the data's based on partition like in MYSQL query
SELECT * FROM TABLE_NAME PARTITION(partitionId)
and also tried this query in SQL Server
SELECT $PARTITION.partition_function (partition_function_id)
EDIT:
The picture shows the data in table I have created partition based on Date, I like to retrieve data based on partition name, which is if partition name is 20170203 I am excepting results on that date.
select * from TABLE_NAME
where $partition.fpartfunction(date) = 1
This returns all the data in partition 1.
If you want to retrieve it from the date you can do as below,
CREATE PARTITION FUNCTION PF_Date(date) AS RANGE LEFT FOR VALUES('2017-01-01','2018-01-01');
--first partition
SELECT *
FROM schema.table_name
WHERE key_column <= '2017-01-01';
--second partition
SELECT *
FROM schema.table_name
WHERE key_column > '2017-01-01' AND key_column <= '2018-01-01';
--last partition
SELECT *
FROM schema.table_name
WHERE key_column > '2018-01-01';
I hope this may work for your need:
CREATE PROCEDURE USP_GetRowsWithinPartition (#InputDate DATE)
AS
BEGIN
DECLARE #TV TABLE
(StartDate DATE,
EndDate DATE
)
INSERT INTO #TV (StartDate, EndDate)
SELECT TOP 1
CAST((CASE
WHEN (DATEPART(MONTH, #InputDate)) = 7 AND (DATEPART(DAY, #InputDate)) > 2
THEN DATEADD(YEAR, -1, CONVERT(DATE, prv.Value, 116))
WHEN (DATEPART(MONTH, #InputDate)) > 7 AND (DATEPART(DAY, #InputDate)) >= 1
THEN DATEADD(YEAR, -1, CONVERT(DATE, prv.Value, 116))
ELSE prv.Value
END) AS DATE) AS StartDate,
CAST((CASE
WHEN (DATEPART(MONTH, #InputDate)) = 7 AND (DATEPART(DAY, #InputDate)) > 2
THEN prv.Value
WHEN (DATEPART(MONTH, #InputDate)) > 7 AND (DATEPART(DAY, #InputDate)) >= 1
THEN prv.Value
ELSE DATEADD(YEAR, 1, CONVERT(DATE, prv.Value, 116))
END ) AS DATE) AS EndDate
FROM sys.partition_functions pf
INNER JOIN sys.partition_schemes AS ps ON ps.function_id = pf.function_id
INNER JOIN sys.destination_data_spaces AS dds ON dds.partition_scheme_id = ps.data_space_id
INNER JOIN sys.dm_db_partition_stats pstats ON pstats.partition_number = dds.destination_id
INNER JOIN sys.partitions p ON p.partition_id = pstats.partition_id
INNER JOIN sys.data_spaces AS ds ON dds.data_space_id = ds.data_space_id
INNER JOIN sys.partition_range_values AS prv ON pf.function_id = prv.function_id
WHERE pstats.object_id = OBJECT_ID('YourTable')
GROUP BY prv.boundary_id, prv.value
ORDER BY ABS(DATEDIFF(DAY, #InputDate, Convert(DATE, value, 116)))
SELECT yt.* FROM YourTable yt
INNER JOIN #TV t ON yt.DateColumn>=t.StartDate AND yt.DateColumn < t.EndDate
END
EXEC USP_GetRowsWithinPartition '2015-08-01'
(MySQL)
There is virtually no use for accessing a specific PARTITION. You should let the WHERE clause pick which partition(s) to use.
Furthermore, there are very few uses for partitioning. I recommend that you ignore that feature until you have a non-trivial amount of experience under your belt.
Any, to answer your question... Yes, this should work:
SELECT * FROM TABLE_NAME PARTITION(partitionId);
But that was not available until 5.6.
The partitionid may need to be quoted, especially if it is a number. For further research, please provide SHOW CREATE TABLE.

Optimizing MySQL query that includes a repeated subquery

I have the following query that is working perfectly right now, I have been trying to optimize it since I am using the same subquery 4 times. It will be great to come up with a better/smarter solution. Thank you
Here is the query:
select
invoices.invoice_id
,invoices.invoice_amount
,(
select SUM(invoice_payment_amount) as total
FROM invoice_payments
where invoice_payment_invoice_id = invoices.invoice_id
) as payments
,round((invoices.invoice_amount-(
select SUM(invoice_payment_amount) as total
FROM invoice_payments
where invoice_payment_invoice_id = invoices.invoice_id
)),2) as balance
from invoices
where (
round((invoices.invoice_amount -
(select SUM(invoice_payment_amount) as total
FROM invoice_payments
where invoice_payment_invoice_id = invoices.invoice_id)
),2)
) > 0
or (
round((invoices.invoice_amount -
(select SUM(invoice_payment_amount) as total
FROM invoice_payments
where invoice_payment_invoice_id = invoices.invoice_id)
),2)
) IS NULL
order by balance
SQL Fiddle: http://sqlfiddle.com/#!9/aecea/1
Just use a subquery:
select i.invoice_id, i.invoice_amount, i.payments,
round((i.invoice_amount- i.payments), 2) as balance
from (select i.*,
(select sum(ip.invoice_payment_amount)
from invoice_payments ip
where ip.invoice_payment_invoice_id = i.invoice_id
) as payments
from invoices i
) i
where round((i.invoice_amount- i.payments), 2) > 0 or
round((i.invoice_amount- i.payments), 2) is null
order by balance;
For better performance, you want an index on invoice_payments(invoice_payment_invoice_id, invoice_payment_amount).

MySQL query to get count of current month and last month to get percentage of growth

What im trying to achieve is to get the current count and last month count so i can do a formula to get the growth percentage
(CountCurrent - CountLastMonth) / CountLastMonth
My table has the following fields
activity, upload_date
This is the query im trying now.
SELECT
Y.CurrentMonth, Y.CountCurrent,
Z.LastMonth, Z.CountLastMonth
FROM
(SELECT
upload_date,
activity,
DATE_FORMAT(upload_date,'%M %Y') AS CurrentMonth,
COUNT(activity) AS CountCurrent
FROM appmaster
WHERE activity = 'com.google.test'
GROUP BY DATE_FORMAT(upload_date,'%m%y'))
Y INNER JOIN
(SELECT
activity,
DATE_FORMAT(upload_date,'%M %Y') AS CurrentMonth2,
DATE_FORMAT(upload_date - INTERVAL 1 MONTH,'%M %Y') AS LastMonth,
COUNT(activity) AS CountLastMonth
FROM appmaster
WHERE activity = 'com.google.test'
GROUP BY DATE_FORMAT(upload_date - INTERVAL 1 MONTH,'%m%y'))
Z ON Z.CurrentMonth2 = Y.CurrentMonth
GROUP BY DATE_FORMAT(upload_date,'%Y%m')
ORDER BY DATE_FORMAT(upload_date,'%Y%m')
My CurrentMonth, CountCurrent and LastMonth work fine. But the CountLastMonth is coming out the same as CountCurrent.
I was trying this before and it would give me everything but the CountLastMonth
SELECT b.CurrentMonth, sum(b.CountCurrent), b.LastMonth
FROM (SELECT DATE(a.upload_date - INTERVAL 1 MONTH) AS LastMonth, DATE(a.upload_date) AS CurrentMonth, COUNT(a.activity) AS CountCurrent
FROM appmaster a WHERE a.activity = 'com.google.android.googlequicksearchbox'
group BY MONTH(a.upload_date)) AS b
group BY MONTH(b.CurrentMonth)
No temporary table required:
SELECT
a.ym,
CASE #totals
WHEN 0 THEN 0
ELSE (a.totals - #totals) / #totals
END increment,
#totals := a.totals totals
FROM
(
SELECT
EXTRACT(YEAR_MONTH FROM upload_date) ym,
COUNT(1) AS totals
FROM
appmaster
WHERE
activity = 'com.google.test'
GROUP BY ym -- no ORDER BY required
) a
CROSS JOIN (SELECT #totals := 0) x
Maybe there's a simpler way to do this, using a little trick with user variables.
First, you need to write a query that groups your data by month; I'll store it in a temp table to ease things a bit:
drop table if exists temp_count;
create temporary table temp_count
select last_day(upload_date) as month -- A little trick to get
-- the last day of the month
, count(activity) as count_current
-- Add any other fields or expressions you need
from app_master
-- Add the needed joins
-- Include any WHERE conditions here
group by last_day(upload_date);
-- Let's add an index to this temp table... add any indexes you may need
alter table temp_count
add index idx_month(month);
And now, let's use this temp table to get what you need:
select a.month
, #count_last as count_last -- This is the value of the user variable
-- before reassigning it
, (a.count_current - #count_last) / #count_last as increment
, #count_last := a.count_current -- Here the variable is updated with the
-- current value
from
( -- This subquery is used to initialize the user variable
select #count_last := 0
) as init
, temp_count as a
-- It's important to order the data, otherwise God knows what may happen ;)
order by a.month;
Hope this helps
#Rental number Growth per month -- This is the example for monthly growth where "Total count of activity per month" is concerned -- This answer was developed based on Barranka answer and credit goes to him.
SELECT
a.YM,
CASE #Num_of_Rentals
WHEN 0 THEN 0
ELSE (a.Num_of_Rentals - #Num_of_Rentals) / #Num_of_Rentals
END increment,
#Num_of_Rentals := a.Num_of_Rentals Num_of_Rentals
FROM
(
SELECT
LEFT(rental_date,7) YM,
COUNT(rental_id) AS Num_of_Rentals
FROM
rental
GROUP BY ym -- no ORDER BY required
) a
CROSS JOIN (SELECT #Num_of_Rentals := 0) x;
-- OR
SELECT
a.YM,
CASE #Num_of_Rentals
WHEN 0 THEN 0
ELSE (a.Num_of_Rentals - #Num_of_Rentals) / #Num_of_Rentals
END increment,
#Num_of_Rentals := a.Num_of_Rentals Num_of_Rentals
FROM
(
SELECT
LEFT(rental_date,7) YM,
COUNT(1) AS Num_of_Rentals
FROM
rental
GROUP BY ym -- no ORDER BY required
) a
CROSS JOIN (SELECT #Num_of_Rentals := 0) x;
Revenue growth over each month - This is based on the answer provided by Barranka but it is suitable for Monthly revenue growth:
SELECT
a.YM,
CASE #Revenue
WHEN 0 THEN 0
ELSE (a.Revenue - #Revenue) / #Revenue
END Increment,
#Revenue := a.Revenue Revenue
FROM
(
SELECT
LEFT(payment_date, 7) YM,
SUM(amount) AS Revenue -- Toatl is SUM of Amount
FROM
payment
GROUP BY YM -- no ORDER BY required
) a
CROSS JOIN (SELECT #Revenue := 0) x
;
SELECT
a.YM,
CASE #Revenue
WHEN 0 THEN 0
ELSE (a.Revenue - #Revenue) / #Revenue
END Increment,
#Revenue := a.Revenue Revenue
FROM
(
SELECT
EXTRACT(YEAR_MONTH FROM payment_date) YM,
SUM(amount) AS Revenue
FROM
payment
GROUP BY YM -- no ORDER BY required
) a
CROSS JOIN (SELECT #Revenue := 0) x
;

MySQL SELECT Query - Subtract a SUM() value with the combined total of two other SUM() values

I have two SELECT statements that give the values "TotalSales" and "VendorPay_Com". I want to be able to subtract VendorPay_Com from TotalSales within the one MySQL statement to get the value "Outstanding_Funds" but I haven't found a reliable way to do so.
These are my two statements:
Query 1:
SELECT SUM(Price) AS TotalSales
FROM PROPERTY
WHERE Status = 'Sold';
Query 2:
SELECT SUM(AC.Amount) AS VendorPay_Comm
FROM (
SELECT Amount FROM lawyer_pays_vendor
UNION ALL
SELECT CommissionEarned AS `Amount` FROM COMMISSION AS C WHERE C.`status` = 'Paid') AS AC
Any help on this matter would be greatly appreciated :)
You can do it as follows :
select (select ...) - (select ...)
In your example, simply :
select
(
SELECT SUM(Price) AS TotalSales
FROM PROPERTY
WHERE Status = 'Sold'
)
-
(
SELECT SUM(AC.Amount) AS VendorPay_Comm
FROM (
SELECT Amount FROM lawyer_pays_vendor
UNION ALL
SELECT CommissionEarned AS `Amount` FROM COMMISSION AS C WHERE C.`status` = 'Paid') AS AC
) AS Outstanding_Funds
Try
SELECT TotalSales-VendorPay_Comm AS Outstanding_Funds
FROM
(SELECT SUM(Price) AS TotalSales
FROM PROPERTY
WHERE Status = 'Sold') t1,
(SELECT SUM(Amount) AS VendorPay_Comm
FROM (SELECT Amount FROM lawyer_pays_vendor
UNION ALL
SELECT CommissionEarned AS Amount
FROM COMMISSION
WHERE Status = 'Paid') t0) t2
Here is sqlfiddle

Checking for maximum length of consecutive days which satisfy specific condition

I have a MySQL table with the structure:
beverages_log(id, users_id, beverages_id, timestamp)
I'm trying to compute the maximum streak of consecutive days during which a user (with id 1) logs a beverage (with id 1) at least 5 times each day. I'm pretty sure that this can be done using views as follows:
CREATE or REPLACE VIEW daycounts AS
SELECT count(*) AS n, DATE(timestamp) AS d FROM beverages_log
WHERE users_id = '1' AND beverages_id = 1 GROUP BY d;
CREATE or REPLACE VIEW t AS SELECT * FROM daycounts WHERE n >= 5;
SELECT MAX(streak) AS current FROM ( SELECT DATEDIFF(MIN(c.d), a.d)+1 AS streak
FROM t AS a LEFT JOIN t AS b ON a.d = ADDDATE(b.d,1)
LEFT JOIN t AS c ON a.d <= c.d
LEFT JOIN t AS d ON c.d = ADDDATE(d.d,-1)
WHERE b.d IS NULL AND c.d IS NOT NULL AND d.d IS NULL GROUP BY a.d) allstreaks;
However, repeatedly creating views for different users every time I run this check seems pretty inefficient. Is there a way in MySQL to perform this computation in a single query, without creating views or repeatedly calling the same subqueries a bunch of times?
This solution seems to perform quite well as long as there is a composite index on users_id and beverages_id -
SELECT *
FROM (
SELECT t.*, IF(#prev + INTERVAL 1 DAY = t.d, #c := #c + 1, #c := 1) AS streak, #prev := t.d
FROM (
SELECT DATE(timestamp) AS d, COUNT(*) AS n
FROM beverages_log
WHERE users_id = 1
AND beverages_id = 1
GROUP BY DATE(timestamp)
HAVING COUNT(*) >= 5
) AS t
INNER JOIN (SELECT #prev := NULL, #c := 1) AS vars
) AS t
ORDER BY streak DESC LIMIT 1;
Why not include user_id in they daycounts view and group by user_id and date.
Also include user_id in view t.
Then when you are queering against t add the user_id to the where clause.
Then you don't have to recreate your views for every single user you just need to remember to include in your where clause.
That's a little tricky. I'd start with a view to summarize events by day:
CREATE VIEW BView AS
SELECT UserID, BevID, CAST(EventDateTime AS DATE) AS EventDate, COUNT(*) AS NumEvents
FROM beverages_log
GROUP BY UserID, BevID, CAST(EventDateTime AS DATE)
I'd then use a Dates table (just a table with one row per day; very handy to have) to examine all possible date ranges and throw out any with a gap. This will probably be slow as hell, but it's a start:
SELECT
UserID, BevID, MAX(StreakLength) AS StreakLength
FROM
(
SELECT
B1.UserID, B1.BevID, B1.EventDate AS StreakStart, DATEDIFF(DD, StartDate.Date, EndDate.Date) AS StreakLength
FROM
BView AS B1
INNER JOIN Dates AS StartDate ON B1.EventDate = StartDate.Date
INNER JOIN Dates AS EndDate ON EndDate.Date > StartDate.Date
WHERE
B1.NumEvents >= 5
-- Exclude this potential streak if there's a day with no activity
AND NOT EXISTS (SELECT * FROM Dates AS MissedDay WHERE MissedDay.Date > StartDate.Date AND MissedDay.Date <= EndDate.Date AND NOT EXISTS (SELECT * FROM BView AS B2 WHERE B1.UserID = B2.UserID AND B1.BevID = B2.BevID AND MissedDay.Date = B2.EventDate))
-- Exclude this potential streak if there's a day with less than five events
AND NOT EXISTS (SELECT * FROM BView AS B2 WHERE B1.UserID = B2.UserID AND B1.BevID = B2.BevID AND B2.EventDate > StartDate.Date AND B2.EventDate <= EndDate.Date AND B2.NumEvents < 5)
) AS X
GROUP BY
UserID, BevID