SQL Queries to analyse Employee Database - data-analysis

I am looking for queries, using which I can analyze a general employee database. This is for Data Analysis.

Tried this for monthly employee trend
SELECT
dt.FullDateAlternateKey as 'Date'
, count(1) as ActiveCount
FROM DimDate dt
LEFT JOIN (SELECT 'Active' as 'EmpStatus', * FROM DimEmployee) emp
-- regular active employees
ON (dt.FullDateAlternateKey between emp.StartDate and ISNULL(emp.EndDate,'9999-12-31'))
WHERE
dt.FullDateAlternateKey = EOMONTH(dt.FullDateAlternateKey)
GROUP BY
dt.FullDateAlternateKey
ORDER BY
1;
also found CTE use for finding employee hierarchy
WITH DirectReports (ManagerID, EmployeeID, Title, DeptID, Level)
AS
(
-- Anchor member definition
SELECT e.ParentEmployeeKey, e.EmployeeKey, e.Title, e.DepartmentName,
0 AS Level
FROM DimEmployee AS e
WHERE e.ParentEmployeeKey IS NULL
UNION ALL
-- Recursive member definition
SELECT e.ParentEmployeeKey, e.EmployeeKey, e.Title, e.DepartmentName,
Level + 1
FROM DimEmployee AS e
INNER JOIN DirectReports AS d
ON e.ParentEmployeeKey = d.EmployeeID
)
-- Statement that executes the CTE
SELECT ManagerID, EmployeeID, Title, DeptID, Level
FROM DirectReports
WHERE DeptID = 'Information Services' OR Level = 0
also, some good queries to analyze the sales data
-- Show each sales average for Group, Country, and Region all in one query
SELECT DISTINCT
t.SalesTerritoryGroup
, t.SalesTerritoryCountry
, t.SalesTerritoryRegion
, AVG(s.SalesAmount) OVER(PARTITION BY t.SalesTerritoryGroup ) as 'GroupAvgSales'
, AVG(s.SalesAmount) OVER(PARTITION BY t.SalesTerritoryCountry ) as 'CountryAvgSales'
, AVG(s.SalesAmount) OVER(PARTITION BY t.SalesTerritoryRegion ) as 'RegionAvgSales'
FROM FactInternetSales s
JOIN DimSalesTerritory t ON
s.SalesTerritoryKey = t.SalesTerritoryKey
WHERE
YEAR(s.OrderDate) = 2013
ORDER BY
1,2,3
Use additional aggregations to understand more about product sales such as the distribution of sales etc..
SELECT
cat.EnglishProductCategoryName 'Category'
, sub.EnglishProductSubcategoryName 'SubCategory'
, count(1) 'Count' -- How many sales where there?
, sum(s.SalesAmount) 'Sales' -- How much sales did we have?
, avg(s.SalesAmount) 'Avg_SalesAmount' -- What was the Avg sale amount?
, min(s.SalesAmount) 'Min_SaleAmount' -- What was the Min sale amount?
, max(s.SalesAmount) 'Max_SaleAmount' -- What was the Max sale amount
FROM FactInternetSales s
LEFT JOIN DimProduct p ON s.ProductKey = p.ProductKey
LEFT JOIN DimProductSubcategory sub ON p.ProductSubcategoryKey = sub.ProductSubcategoryKey
LEFT JOIN DimProductCategory cat ON sub.ProductCategoryKey = cat.ProductCategoryKey
-- must use group by in order for aggregation to work properly
GROUP BY
cat.EnglishProductCategoryName -- column aliases aren't allowed
, sub.EnglishProductSubcategoryName
ORDER BY
cat.EnglishProductCategoryName
, sub.EnglishProductSubcategoryName
-- Calculate the customer acquisition funnel
SELECT
c.FirstName
, c.LastName
, c.DateFirstPurchase
, DATEDIFF(d,c.DateFirstPurchase,getdate()) as 'DaysSinceFirstPurchase' -- How long have they been a customer?
FROM DimCustomer c
ORDER BY 3 DESC
-- Calculate a Monthly average of customer tenure
SELECT
EOMONTH(c.DateFirstPurchase) as 'MonthOfFirstPurchase' -- What month did they become a customer?
, DATEDIFF(d,EOMONTH(c.DateFirstPurchase),getdate()) as 'DaysSinceFirstPurchase' -- How long have they been a customer?
, COUNT(1) as 'CustomerCount' -- How manY customers are there for this month?
FROM DimCustomer c
GROUP BY EOMONTH(c.DateFirstPurchase)
ORDER BY 2 DESC
-- Show the top product Sub Categories for each year
SELECT
count(DISTINCT s.SalesOrderNumber) 'OrderCount' -- use 1 instead of a field for faster performance
, RANK() OVER (PARTITION BY YEAR(s.OrderDate) ORDER BY sum(s.SalesAmount) DESC) 'SalesRank'
, sum(s.SalesAmount) 'TotalSales'
, cat.EnglishProductCategoryName 'Category'
, sub.EnglishProductSubcategoryName 'SubCategory'
, YEAR(s.OrderDate) 'Year'
FROM FactInternetSales s
INNER JOIN DimProduct p ON s.ProductKey = p.ProductKey
INNER JOIN DimProductSubcategory sub ON p.ProductSubcategoryKey = sub.ProductSubcategoryKey
INNER JOIN DimProductCategory cat ON sub.ProductCategoryKey = cat.ProductCategoryKey
-- must use group by in order for aggregation to work properly
GROUP BY
cat.EnglishProductCategoryName -- column aliases aren't allowed
, sub.EnglishProductSubcategoryName
, YEAR(s.OrderDate)
ORDER BY YEAR(s.OrderDate), SUM(s.SalesAmount) DESC;
-- first, create weekly sales totals
SELECT SUM(s.SalesAmount) 'WeeklySales'
, DATEPART(ww, s.OrderDate) as 'WeekNum'
FROM FactInternetSales s
WHERE YEAR(s.OrderDate) = 2013
GROUP BY
DATEPART(ww, s.OrderDate)
ORDER BY
DATEPART(ww, s.OrderDate) ASC
-- use that subquery as our source and calculate the moving average
SELECT
AVG(WeeklySales) OVER (ORDER BY WeekNum ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) as AvgSales
, WeeklySales as 'TotalSales'
, WeekNum
FROM (
SELECT SUM(s.SalesAmount) 'WeeklySales'
, DATEPART(ww, s.OrderDate) as 'WeekNum'
FROM FactInternetSales s
WHERE YEAR(s.OrderDate) = 2013
GROUP BY
DATEPART(ww, s.OrderDate)
) AS s
GROUP BY
WeekNum, WeeklySales
ORDER BY
WeekNum ASC
-- Running Total
SELECT
SUM(MonthlySales) OVER (PARTITION BY SalesYear ORDER BY SalesMonth ROWS UNBOUNDED PRECEDING) as YTDSales
, MonthlySales as 'MonthlySales'
, SalesYear
, SalesMonth
FROM (
SELECT SUM(s.SalesAmount) 'MonthlySales'
, MONTH(s.OrderDate) as 'SalesMonth'
, year(s.OrderDate) as 'SalesYear'
FROM FactInternetSales s
GROUP BY
MONTH(s.OrderDate)
, year(s.OrderDate)
) AS s
GROUP BY
SalesMonth, SalesYear, MonthlySales
ORDER BY
SalesYear, SalesMonth ASC
-- Get Prev Year Sales
WITH MonthlySales (YearNum, MonthNum, Sales)
AS
(
SELECT d.CalendarYear, d.MonthNumberOfYear, SUM(s.SalesAmount)
FROM DimDate d
JOIN FactInternetSales s ON d.DateKey = s.OrderDateKey
GROUP BY d.CalendarYear, d.MonthNumberOfYear
)
-- Get Current Year and join to CTE for previous year
SELECT
d.CalendarYear
, d.MonthNumberOfYear
, ms.Sales PrevSales
, SUM(s.SalesAmount) CurrentSales
FROM DimDate d
JOIN FactInternetSales s ON d.DateKey = s.OrderDateKey
JOIN MonthlySales ms ON
d.CalendarYear-1 = ms.YearNum AND
d.MonthNumberOfYear = ms.MonthNum
GROUP BY
d.CalendarYear
, d.MonthNumberOfYear
, ms.Sales
ORDER BY
1 DESC, 2 DESC
-- Now calculate the % change Year over Year
WITH MonthlySales (YearNum, MonthNum, Sales)
AS
(
SELECT d.CalendarYear, d.MonthNumberOfYear, SUM(s.SalesAmount)
FROM DimDate d
JOIN FactInternetSales s ON d.DateKey = s.OrderDateKey
GROUP BY d.CalendarYear, d.MonthNumberOfYear
)
-- Get Current Year and join to CTE for previous year
SELECT
d.CalendarYear
, d.MonthNumberOfYear
, ms.Sales PrevSales
, SUM(s.SalesAmount) CurrentSales
, (SUM(s.SalesAmount) - ms.Sales) / SUM(s.SalesAmount) 'PctGrowth'
FROM DimDate d
JOIN FactInternetSales s ON d.DateKey = s.OrderDateKey
JOIN MonthlySales ms ON
d.CalendarYear-1 = ms.YearNum AND
d.MonthNumberOfYear = ms.MonthNum
GROUP BY
d.CalendarYear
, d.MonthNumberOfYear
, ms.Sales
ORDER BY
1 DESC, 2 DESC

Related

my query doesn't return the max from value from the sum of working hours of the month

I want to find name of that persons who worked most in a month. but the query doesn't returning max value from sum of value
I'm new in mysql
SELECT
x.name,
sec_to_time(MAX(x.sum_time)) maximum
FROM (
SELECT
name,
SUM(TIME_TO_SEC(ending_time) - TIME_TO_SEC(starting_time)) sum_time
FROM working_hours wh, employees
WHERE wh.employees_id = employees.id
AND project_id IS NOT NULL
GROUP BY employees_id
) x
GROUP BY x.name;
this is my query. i want to show just name of that persons who worked most in a month. but it returns all persons who worked in a month
Try making these changes to your query:
change name to MAX(name)
qualify employees_id with wh.employees_id
SELECT
x.name,
sec_to_time(MAX(x.sum_time)) maximum
FROM (
SELECT
MAX(name) AS name,
SUM(TIME_TO_SEC(ending_time) - TIME_TO_SEC(starting_time)) sum_time
FROM working_hours wh, employees
WHERE wh.employees_id = employees.id
AND project_id IS NOT NULL
GROUP BY wh.employees_id
) x
group by x.name;
Simply use Order by LIMIT -
SELECT X1.name, X1.maximum
FROM (SELECT name, SUM(TIME_TO_SEC(ending_time) - TIME_TO_SEC(starting_time)) maximum
FROM working_hours wh, employees
WHERE wh.employees_id=employees.id
GROUP BY name) X1
JOIN (SELECT SUM(TIME_TO_SEC(ending_time) - TIME_TO_SEC(starting_time)) sum_time
FROM working_hours wh, employees
WHERE wh.employees_id=employees.id
AND project_id is not null
GROUP BY employees_id
ORDER BY sum_time DESC
LIMIT 1) X2 ON X2.sum_time = X1.maximum;

MySql Start and End price (Min,Max) with Inner Joins

I have a table of prices, 2 types. metal 1 and metal 2.
I have succeeded in getting the max, min price for each metal groups by day.
How can i also select the start (first) and end (last) of every day too?
I am nearly there, but struggling on getting these two final prices...
My SQL fiddle with example data:
http://sqlfiddle.com/#!9/ca4867/1
My query so far:
select
highp.metal_price_datetime_IST AS high_price_metal_price_datetime_IST
, highp.metal_price as highest_price
, lowp.report_term
, lowp.metal_id
, lowp.metal_price as lowest_price
, lowp.metal_price_datetime_IST AS low_price_metal_price_datetime_IST
from (select #report_term:=concat(day(metal_price_datetime_IST), ' ', monthname(metal_price_datetime_IST), ' ', year(metal_price_datetime_IST)) as report_term
, metal_price_datetime_IST
, metal_price
, metal_id
, case when #report_term=#old_report_term then #rn1:=#rn1+1 else #rn1:=1 end as rn
, #old_report_term:=#report_term
from metal_prices
cross join (select #rn1:=0, #old_report_term:='') inituservar1
where metal_price_datetime_IST BETWEEN '2018-02-01' AND LAST_DAY('2018-02-01')
order by metal_id, report_term, metal_price asc) lowp
inner join (select #report_term2:=concat(day(metal_price_datetime_IST), ' ', monthname(metal_price_datetime_IST), ' ', year(metal_price_datetime_IST)) as report_term
, metal_price_datetime_IST
, metal_price
, metal_id
, case when #report_term2=#old_report_term2 then #rn2:=#rn2+1 else #rn2:=1 end as rn
, #old_report_term2:=#report_term2
from metal_prices
cross join (select #rn2:=0, #old_report_term2:='') inituservar1
where metal_price_datetime_IST BETWEEN '2018-02-01' AND LAST_DAY('2018-02-01')
order by metal_id, report_term, metal_price desc) highp
on lowp.rn=highp.rn
and lowp.metal_id = highp.metal_id
and lowp.report_term = highp.report_term
and lowp.rn = 1
and (lowp.metal_id = 1 or lowp.metal_id = 2)
order by lowp.metal_price_datetime_IST DESC
The query you have in your fiddle seems too complex for what needs to be done. I have refactored and rewritten the query. Basically, the query is split in two parts. First one maxminprice determines the max and min price for each day for each metal. Fairly straight forward. The second part firstlastprice is a bit more complex. It finds out the max and min time stamps for each metal for each day. Then joins back to the main table to get the values for those time stamps. The case statement there is to merge the results for max and min (first and last) time so we don't have to do the query twice.
SELECT maxminprice.metal_id,
maxminprice.metal_price_datetime,
maxminprice.max_price,
maxminprice.min_price,
firstlastprice.first_price,
firstlastprice.last_price
FROM (SELECT metal_id,
DATE(metal_price_datetime) metal_price_datetime,
MAX(metal_price) max_price,
MIN(metal_price) min_price
FROM metal_prices
GROUP BY metal_id,
DATE(metal_price_datetime)
ORDER BY metal_id,
DATE(metal_price_datetime)) maxminprice
INNER JOIN (SELECT mp.metal_id,
day_range.metal_price_datetimefl,
SUM(CASE
WHEN TIME(mp.metal_price_datetime) = first_time
THEN
mp.metal_price
ELSE NULL
END) first_price,
SUM(CASE
WHEN TIME(mp.metal_price_datetime) = last_time
THEN
mp.metal_price
ELSE NULL
END) last_price
FROM metal_prices mp
INNER JOIN (SELECT metal_id,
DATE(metal_price_datetime)
metal_price_datetimefl,
MAX(TIME(metal_price_datetime))
last_time,
MIN(TIME(metal_price_datetime))
first_time
FROM metal_prices
GROUP BY metal_id,
DATE(metal_price_datetime))
day_range
ON mp.metal_id = day_range.metal_id
AND DATE(mp.metal_price_datetime) =
day_range.metal_price_datetimefl
AND TIME(mp.metal_price_datetime) IN
( last_time, first_time )
GROUP BY mp.metal_id,
day_range.metal_price_datetimefl) firstlastprice
ON maxminprice.metal_id = firstlastprice.metal_id
AND maxminprice.metal_price_datetime =
firstlastprice.metal_price_datetimefl

15 days of Sql From hackerrank

I am unable to understand the use of this line in a code can someone please explain me about this or give some different way to approach to this question
Link to the question:https://www.hackerrank.com/challenges/15-days-of-learning-sql
Code:
select
submission_date ,
( SELECT COUNT(distinct hacker_id)
FROM Submissions s2
WHERE s2.submission_date = s1.submission_date
AND ( SELECT COUNT(distinct s3.submission_date)
FROM Submissions s3
WHERE
s3.hacker_id = s2.hacker_id
AND s3.submission_date < s1.submission_date
) = dateDIFF(s1.submission_date , '2016-03-01'))
, ( select hacker_id
from submissions s2
where s2.submission_date = s1.submission_date
group by hacker_id
order by count(submission_id) desc , hacker_id limit 1
) as shit
, ( select name
from hackers where hacker_id = shit
)
FROM
( select distinct submission_date
from submissions) s1
group by submission_date
Unable to understand why they have used this line from this part of the code:
(s3.submission_date < s1.submission_date) = dateDIFF(s1.submission_date , '2016-03-01'))
CREATE TABLE #max_submissions (
submission_date date,
hacker_id integer,
submission_count integer,
ordering_row integer
)
insert into #max_submissions
select
submission_date,
hacker_id,
submission_count,
row_number() over(partition by submission_date order by submission_count desc, hacker_id) as ordering_row
from (
select submission_date,
hacker_id,
count(hacker_id) as submission_count
from submissions
group by submission_date, hacker_id
) tbl_submission_count
CREATE TABLE #hacker_counts (
submission_date date,
hacker_count integer
)
insert into #hacker_counts
select tbl.submission_date,
COUNT(distinct tbl.hacker_id) as cc
from (
select *,
(case when (
(select count(*)
from (select distinct *
from (select s1.hacker_id,
s1.submission_date
from Submissions s1
where s1.hacker_id = s.hacker_id and
(s1.submission_date >= '2016-03-01' and
s1.submission_date <= s.submission_date)) t1
) t2
) >= (DATEDIFF(day, '2016-03-01', s.submission_date) + 1) )
then 1
else 0
end) as logic
from Submissions s
) tbl
where tbl.logic = 1
group by tbl.submission_date
select max_submissions.submission_date,
hacker_counts.hacker_count,
max_submissions.hacker_id,
h.name
from #max_submissions max_submissions
inner join hackers h on max_submissions.hacker_id = h.hacker_id
left join #hacker_counts hacker_counts on max_submissions.submission_date = hacker_counts.submission_date
where max_submissions.ordering_row = 1
order by max_submissions.submission_date
drop table #max_submissions
drop table #hacker_counts
To understand this line
( SELECT COUNT(distinct s3.submission_date)
FROM Submissions s3
WHERE
s3.hacker_id = s2.hacker_id
AND s3.submission_date < s1.submission_date)
= dateDIFF(s1.submission_date , '2016-03-01')
First understand left hand side:
(SELECT COUNT(distinct s3.submission_date) FROM Submissions s3 WHERE s3.hacker_id = s2.hacker_id AND s3.submission_date < s1.submission_date)
This line counts the unique submission dates for each hacker_id uptil the current date,
So if the date for one row is 2016-03-05, it will count unique submissions for a hacker_id uptil this date (note it will count multiple submissions by a single hacker on a day as 1 count only)
In other words, this takes a hacker_id and start checking if there is a submission by this hacker_id for each day from 1st day uptil this day,it will do this for each submission date
Then Understand Right Hand Side:
dateDIFF(s1.submission_date , '2016-03-01')
this will take the difference of this current date 2016-03-05 to first day 2016-03-01,
Understanding the whole statement now:
So if a hacker made at least one submission each day from 2016-03-05 to 2016-03-01, then both sides of the above code will be equal,
that is date difference from 5th to 1st will be 5 (Right Hand Side) and distinct submission date for a hacker who made at least one submission each day from 1st to 5th will also be 5 (left hand side)

Better optimized SELECT SQL query for 50,000+ records

I have a query which works great for 1000 records or less but now I need to optimize it for 50,000+ records and when I run it on that it just stalls...
Here is my code:
SELECT
b1.account_num,b1.effective_date as ed1,b1.amount as am1,
b2.effective_date as ed2,b2.amount as am2
FROM bill b1
left join bill b2 on (b1.account_num=b2.account_num)
where b1.effective_date = (select max(effective_date) from bill where account_num = b1.account_num)
and (b2.effective_date = (select max(effective_date) from bill where account_num = b1.account_num and effective_date < (select max(effective_date) from bill where account_num = b1.account_num)) or b2.effective_date is null)
ORDER BY b1.effective_date DESC
My objective is to get the latest two effective dates and amounts from one table with many records.
Here is a working answer from your SQL-Fiddle baseline
First, the inner preQuery gets the max date per account. That is then joined to the bill table per account AND the effective date is less than the max already detected.
That is then joined to each respective bill for their amounts.
select
FB1.account_num,
FB1.effective_date as ed1,
FB1.amount as am1,
FB2.effective_date as ed2,
FB2.amount as am2
from
( select
pq1.account_num,
pq1.latestBill,
max( b2.effective_date ) as secondLastBill
from
( SELECT
b1.account_num,
max( b1.effective_date ) latestBill
from
bill b1
group by
b1.account_num ) pq1
LEFT JOIN bill b2
on pq1.account_num = b2.account_num
AND b2.effective_date < pq1.latestBill
group by
pq1.account_num ) Final
JOIN Bill FB1
on Final.Account_Num = FB1.Account_Num
AND Final.LatestBill = FB1.Effective_Date
LEFT JOIN Bill FB2
on Final.Account_Num = FB2.Account_Num
AND Final.secondLastBill = FB2.Effective_Date
ORDER BY
Final.latestBill DESC
In mysql , window analytic function like row_number is not there, so we can simulate the same using variables.
The good thing is, the table is scanned only once with this approach.
A row_number is assigned to each partition which is divided based on ( account number, effective date ) and only 2 rows are selected from each partition.
select account_num,
max(case when row_number =1 then effective_date end) as ed1,
max(case when row_number =1 then amount end) as am1,
max(case when row_number =2 then effective_date end) as ed2,
max(case when row_number =2 then amount end )as am2
from (
select account_num, effective_date, amount,
#num := if(#prevacct= account_num , #num + 1, 1) as row_number,
#prevacct := account_num as dummy
from bill, (select #num:=0, #prevacct := '' ) as var
order by account_num , effective_date desc
)T
where row_number <=2
group by account_num

SQL Sum cumulative and non-cumulative in same query

Hi I was wondering if there is a way to get a cumulative and non-cumulative total in the same query. I have a table with following fields:
Department, SalesPerson, fin_month, activity, cost
What I would like is have two sums, one that would give a monthly total for salesperson, and another giving a year to date total. I am having a problem setting two different where criteria to get it to work.
Many Thanks
Would something like this help?
SELECT
*
FROM
(
SELECT
Department, SalesPerson
, SUM(fin_month) SalesPerson_Sum
FROM
[TABLE_NAME]
GROUP BY Department, SalesPerson
) a
INNER JOIN
(
SELECT
Department
, SUM(fin_month) AS Department_Sum
FROM
[TABLE_NAME]
GROuP BY
Department
) b
ON
a.Department = b.Department
This solution uses CTEs, recursion, and ranking to obtain cumulative totals for every fin_month per SalesPerson in every Department based on the corresponding monthly totals.
;WITH
monthlytotals AS (
SELECT
Department,
SalesPerson,
fin_month,
MonthlyTotal = SUM(cost),
rn = ROW_NUMBER() OVER (PARTITION BY Department, SalesPerson
ORDER BY fin_month)
FROM atable
GROUP BY Department, SalesPerson, fin_month
),
alltotals AS (
SELECT
Department,
SalesPerson,
fin_month,
MonthlyTotal,
CumulativeTotal = MonthlyTotal,
rn
FROM monthlytotals
WHERE rn = 1
UNION ALL
SELECT
m.Department,
m.SalesPerson,
m.fin_month,
m.MonthlyTotal,
CumulativeTotal = a.CumulativeTotals + m.MonthlyTotal,
m.rn
FROM monthlytotals m
INNER JOIN alltotals a
ON m.Department = a.Department
AND m.SalesPerson = a.SalesPerson
AND m.rn = a.rn + 1
)
SELECT
Department,
SalesPerson,
fin_month,
MonthlyTotal,
CumulativeTotal
FROM alltotals