Group by a summed variable - mysql

SELECT deposit.numberSuccessfulDeposits, count(distinct userid)
FROM deposit WHERE deposit.asOfDate between '2016-04-01 00:00:00' and '2016-04-03 23:59:59'
AND deposit.licenseeId = 1306
GROUP BY deposit.numberSuccessfulDeposits
Sample output
numberSuccessfulDeposits count(distinct userid)
0 228
1 878
2 90
3 37
4 17
However, if Bob made 1 deposit on Monday and 3 deposits on Tuesday, then it will count towards both "1" and "3" for number of successful deposits.
numberSuccessfulDeposits count(distinct userid)
0 ##
1 1
2 ##
3 1
4 ##
Ideally, it should only count towards "4"
numberSuccessfulDeposits count(distinct userid)
0 ##
1 ##
2 ##
3 ##
4 1
Thoughts?

Change the grouping to user-based and sum all occurences of deposits. Then count users for each sum of those deposits:
SELECT
numberSuccessfulDeposits,
COUNT(userid) AS users_count
FROM (
SELECT
sum(numberSuccessfulDeposits) AS numberSuccessfulDeposits,
userid
FROM deposit
WHERE asOfDate between '2016-04-01 00:00:00' and '2016-04-03 23:59:59'
AND licenseeId = 1306
GROUP BY userid
) t
GROUP BY numberSuccessfulDeposits
Edit: Grouping deposints into 0, 1, 2, 3+ category would look like that:
SELECT
numberSuccessfulDeposits,
COUNT(userid) AS user_count
FROM (
SELECT
CASE WHEN numberSuccessfulDeposits >= 3 THEN '3+' ELSE numberSuccessfulDeposits::TEXT END AS numberSuccessfulDeposits,
userid
FROM (
SELECT
sum(numberSuccessfulDeposits) AS numberSuccessfulDeposits,
userid
FROM deposit
WHERE asOfDate between '2016-04-01 00:00:00' and '2016-04-03 23:59:59'
AND licenseeId = 1306
GROUP BY userid
) t
) f
GROUP BY numberSuccessfulDeposits

Calculate the per-user sum in a subquery, then the per-total count in the main query.
SELECT totalDeposits, COUNT(*)
FROM (SELECT userid, SUM(numberOfSuccessfulDeposits) AS totalDeposits
FROM deposit
WHERE deposit.asOfDate between '2016-04-01 00:00:00' and '2016-04-03 23:59:59'
AND deposit.licenseeId = 1306
GROUP BY userid) AS subquery
GROUP BY totalDeposits

Related

Need to Calculate few metrics from dataset using SQL - separate queries

Dataset looks like this : This is a sample dataset for number of employee login activity named - activity
I need to calculate few metrics, was able to do in python data frames, but new in mySQL.
what is the average number of employee active per day for month of jan 2018 by dept ( was able to do somewhat half of it, but results coming are not correct.
number of unique active employee (login >0) per month for jan 2018 for each dept_id (was able to do it)
month over month growth for all dept_id from dec-2017 to jan 2018 where at least one employee was active (login >0) - no idea how to do this in sql
fraction of users who were active in each dept_id for dec 2017 and were also active in the same dept_id for jan 2018
how many employee login in on 3 or more consecutive days in jan 2018
Any help would be appreciated.
Query written for case 1:
select dept_id,
DAU
from
(
select dept_id
, month(date)
, year(date)
, avg(logins) as DAU
from
(select * from activity where login >0)
where year(date) =2018
and month(date) =1
group by dept_id, month(date), year(date)
)
Textual Format Dataset
date dept_id emp_id logins
29-11-2017 ABC001 A1 1
30-11-2017 ABC002 A2 2
01-12-2017 XYZ001 A3 0
01-12-2017 XYZ002 A4 1
03-12-2017 ABC001 D2 4
04-12-2017 ABC002 D1 1
05-12-2017 XYZ001 A6 2
05-12-2017 XYZ002 A7 3
30-12-2017 ABC001 A8 0
01-01-2018 ABC002 A2 6
02-01-2018 XYZ001 A10 4
03-01-2018 XYZ002 A11 2
04-01-2018 ABC001 A1 2
04-01-2018 ABC002 A2 0
05-01-2018 XYZ001 A13 4
05-01-2018 XYZ001 A6 2
05-01-2018 XYZ002 A7 1
06-01-2018 XYZ001 A6 2
06-01-2018 XYZ002 A7 3
07-01-2018 XYZ001 A6 3
07-01-2018 XYZ002 A7 4
06-01-2018 XYZ002 A14 3
30-01-2018 ABC001 A15 2
Let me know if this works otherwise I will update the answer, I don't have MYSQL installed so wasn't able to check.
And the date is a keyword in oracle but not sure in MYSQL so use it in quotes like "date".
Case 1:
SELECT dept_id,
AVG(cnt) average_emp
FROM (SELECT dept_id,
days,
COUNT(emp_id) cnt
FROM (SELECT dept_id,
emp_id,
SUM(logins) logins,
to_char(DATES, 'dd') days
FROM mytable
WHERE to_char(DATES,'mmyyyy') = '012018'
GROUP BY dept_id,
emp_id,
to_char(DATES, 'dd') )
WHERE logins > 0
GROUP BY dept_id,
days )
GROUP BY dept_id;
Case 2:
SELECT dept_id,
COUNT(emp_id)
FROM (SELECT dept_id,
emp_id,
SUM(logins) logins
FROM mytable
WHERE to_char(DATES,'mmyyyy') = '012018'
GROUP BY dept_id,
emp_id )
WHERE logins > 0
GROUP BY dept_id;
Case 3:
SELECT months,
users,
ROUND( (users - nvl(LAG(users) OVER (ORDER BY rownum),users) ) / nvl(LAG(users) OVER (ORDER BY ROWNUM), 1)
, 2) growth_rate
FROM (SELECT to_char(mt.DATES, 'MON-YYYY') months,
count(mt.EMP_ID) users
FROM (SELECT *
FROM MYTABLE
ORDER BY DATES) mt
WHERE mt.DATES >= to_date('DEC-2017', 'MON-YYYY')
AND mt.DATES <= to_date('JAN-2018', 'MON-YYYY')
GROUP BY to_char(mt.DATES, 'MON-YYYY')
ORDER BY to_date(months, 'MON-YYYY') ) oq
WHERE exists(SELECT 1
FROM MYTABLE iq
WHERE to_char(iq.DATES, 'MON-YYYY') = oq.months
AND iq.EMP_ID IN (SELECT EMP_ID
FROM MYTABLE
WHERE iq.LOGINS > 0) );
Case 4:
SELECT dept_id,
emp_id
FROM (SELECT dept_id,
emp_id
FROM mytable
WHERE to_char(DATES,'mmyyyy') = '122017'
AND logins > 0
GROUP BY dept_id,
emp_id )
INTERSECT
SELECT dept_id,
emp_id
FROM (SELECT dept_id,
emp_id
FROM mytable
WHERE to_char(DATES,'mmyyyy') = '012018'
AND logins > 0
GROUP BY dept_id,
emp_id )
Case 5:
-- not full proof
SELECT COUNT(*) emp_cnt
FROM (SELECT emp_id,
DENSE_RANK() OVER(ORDER BY DATES) rn,
COUNT(*) OVER(PARTITION BY emp_id ORDER BY DATES) cnt
FROM mytable
WHERE to_char(DATES,'mmyyyy') = '012018'
AND logins > 0
ORDER BY rn,
cnt )
WHERE rn = cnt
AND rn >= 3;

MySQL Select duplicities according date interval

I have table orders:
id
login_name
success
order_date
1
login1
0
2021-01-05
2
login2
0
2021-01-06
3
login3
0
2021-01-08
4
login1
1
2021-01-04
5
login2
0
2021-01-01
I need to select id, login_name with success=0 for which exist another order with order_date older or younger than 60 days.
The result should be:
1 - login1, 2 - login2, 5 - login2
I have this, but I think that is not a right way:
SELECT id, login_name, COUNT(*)
FROM orders
WHERE success=0
GROUP BY login_name
HAVING COUNT(*) > 1
You can use the EXISTS as follows:
SELECT id, login_name, COUNT(*)
FROM orders r
WHERE success=0
and exists
(select 1 from orders rr
where rr.login = r.login
and abs(datediff(rr.order_date, r.order_date)) <= 60
and rr.id <> r.id
)
If you want orders that appear within 60 days of each other, you can use lag() and lead():
select o.*
from (select o.*,
lag(order_date) over (partition by login_name order by order_date) as prev_order_date,
lead(order_date) over (partition by login_name order by order_date) as lead_order_date
from orders o
) o
where prev_order_date > dateadd(day, -60, order_date) or
next_order_date < dateadd(day, 60, order_date);

How to group by two fields?

I have the following table:
listId | accountId | amount
1 1 20
1 1 20
2 2 30
2 2 30
I need to SUM(amount) and group by listId, accountId to get result:
listId | accountId | amount |
1 1 40
2 2 60
But it does not work for me: SUM(amount) ... GROUP BY listId, accountId
My full query is:
select `account_transactions`.*,
`enterprise_invoces`.*,
ABS(SUM(IF(AT_amount>0, AT_amount, 0))) AS debit,
ABS(SUM(IF(AT_amount<0, AT_amount, 0))) AS credit
from `account_transactions`
inner join `enterprise_invoces`
on `enterprise_invoces`.`AC_id` = `account_transactions`.`AT_code`
where `AT_createuser` = 15 and
date(`AT_transactiondatetime`) >= 2019-04-11 and
date(`AT_transactiondatetime`) <= 2019-07-29 and
`AC_code` >= 601 and
`AC_code` <= 761
group by `enterprise_invoces`.`AC_id`, `account_transactions.AT_transactionficheno`
order by `AT_transactiondatetime` desc
Your select query should not have other columns and should have only the columns mentioned in group by and also the column which needs to be aggregated. So the query should be like this below.
select enterprise_invoces.AC_id, account_transactions.AT_transactionficheno ,
ABS(SUM(IF(AT_amount>0, AT_amount, 0))) AS debit,
ABS(SUM(IF(AT_amount<0, AT_amount, 0))) AS credit
from account_transactions
inner join enterprise_invoces
on enterprise_invoces.AC_id = account_transactions.AT_code
where AT_createuser = 15 and
date(AT_transactiondatetime) >= 2019-04-11 and
date(AT_transactiondatetime) <= 2019-07-29 and
AC_code >= 601 and
AC_code <= 761
group by enterprise_invoces.AC_id, account_transactions.AT_transactionficheno
order by AT_transactiondatetime desc

mysql subquery for charts

I have 2 tables with the following data
inviteTable
inviteDate | contractAmount | status
2014-01-01 1500 awarded
2015-01-01 2000 awarded
2015-01-02 4000 closed
2015-02-01 6000 awarded
2015-02-02 8000 awarded
2015-03-01 8500 awarded
quoteTable
quoteDate | quoteAmount | status
2014-03-01 1500 awarded
2015-01-02 2000 awarded
2015-01-03 4000 sent
2015-01-04 6000 awarded
2015-02-03 8000 awarded
2015-02-10 8500 sent
2015-02-11 9000 awarded
2015-03-01 9500 awarded
I want to make ONE query that gives me the following data structure
month | quotedTotal | quotedCount | awardedTotal | awardedCount
January 8000 2 2000 1
February 17000 2 14000 2
March 9500 1 8500 1
currently i have written this code...but it doesn't work
SELECT
MONTHNAME(t1.due_date) as month,
(select sum(`amount`) from quoteTable where awarded= 1 ) as estimatedAmount,
sum(t1.contactAmount) AS sumContactAmount,
COUNT(*) as `noOfAwarded`
FROM inviteTable t1
where
t1.status = "Awarded" and
t1.inviteDate between '2014-11-01' and '2015-10-31'
GROUP BY MONTH(due_date)
;
basically I want to have ONE query that gives me the following:
volume that got awarded - sum of contractAmount and status awarded
count of how many awarded - count of inviteTable.status=awarded
volume that got quoted - sum of quoteAmount and status awarded
count of how many quoted - count of quotedTable.status=awarded
all grouped by month and in a date range
my query doesn't work. Your help would be highly appreciated as I am stuck!
Your schema is unfortunate; it would be easier if you have just one table with an extra column to record what type of record you have. However, if you first collect the two tables into one table via union all, you can use a fairly simple query to produce the results to want:
select
monthname(_date) month,
sum(quoteAwarded * amount) quotedTotal,
sum(quoteAwarded) quotedCount,
sum(contractAwarded * amount) awardedTotal,
sum(contractAwarded) awardedCount
from (select inviteDate _date, contractAmount amount, status = 'awarded' contractAwarded, 0 quoteAwarded from inviteTable
union all
select quoteDate, quoteAmount, 0, status = 'awarded' from quoteTable) x
where _date between '2014-11-01' and '2015-10-31'
group by 1
order by month(_date)
See SQLFiddle using your sample data, and producing you expected output.
At the core of this query is the convenient fact that (in mysql) "true" is 1 and "false" is 0 - allowing the use of sum() instead of count(), and more importantly avoiding the use of case by using sum() over some simple math.
Here is a way you can do it, note that you have dates in both tables and you need to use join preferably left join , so this will ensure that the dates present in the left table is always returned in this case the month name.
select
t1.month,
t1.awardedTotal,
t1.awardedCount,
t2.quotedTotal,
t2.quotedCount
from(
select
monthname(t1.inviteDate) as month,
month(t1.inviteDate) as m,
sum(
case
when t1.status = 'awarded' then t1.contractAmount else 0
end
) as awardedTotal,
sum(
case
when t1.status = 'awarded' then 1 else 0
end
) as awardedCount
from inviteTable t1
where t1.inviteDate between '2014-11-01' and '2015-10-31'
group by month
)t1
left join(
select
monthname(t2.quoteDate) as month,
sum(
case
when t2.status = 'awarded' then t2.quoteAmount else 0
end
) as quotedTotal,
sum(
case
when t2.status = 'awarded' then 1 else 0
end
) as quotedCount
from quoteTable t2
where t2.quoteDate between '2014-11-01' and '2015-10-31'
group by month
)t2 on
t1.month = t2.month
order by t1.m
http://sqlfiddle.com/#!9/a863a/10
UPDATE:
Yes since you want dates present in both tables it can not be done with the above process since it always consider one table as left table and will get all the dates from that table. For getting data for all the dates across both tables you first need to get the dates from both tables and then using left join get the calculation part something as
select
t1.month,
coalesce(t2.awardedTotal,0) as awardedTotal,
coalesce(t2.awardedCount,0) as awardedCount,
coalesce(t3.quotedTotal,0) as quotedTotal,
coalesce(t3.quotedCount,0) as quotedCount
from(
select month(inviteDate) as m,monthname(inviteDate) as month
from inviteTable where inviteDate between '2014-11-01' and '2015-10-31'
union
select month(quoteDate) as m,monthname(quoteDate) as month
from quoteTable where quoteDate between '2014-11-01' and '2015-10-31'
)t1
left join(
select
monthname(inviteDate) as month,
sum(
case
when status = 'awarded' then contractAmount else 0
end
) as awardedTotal,
sum(
case
when status = 'awarded' then 1 else 0
end
) as awardedCount
from inviteTable
group by month
)t2 on t1.month = t2.month
left join(
select
monthname(quoteDate) as month,
sum(
case
when status = 'awarded' then quoteAmount else 0
end
) as quotedTotal,
sum(
case
when status = 'awarded' then 1 else 0
end
) as quotedCount
from quoteTable
group by month
)t3 on t1.month=t3.month
order by t1.m
http://sqlfiddle.com/#!9/ddaac/14

Calculate index position using sub queries on group by using date

select user_id as sponsor_id,sum(points),created_at
from points_history
where created_at between '2014/08/12' and '2015/08/12' and transaction_type="debit"
group by user_id,DATE_FORMAT(created_at,"%d %M %Y")
order by DATE_FORMAT(created_at,"%d %M %Y"),sum(points) desc
sponsor_id sum(points) created_at
1 30 2014-12-08 10:54:59
2 25 2014-12-09 05:43:11
3 20 2014-12-09 06:58:40
1 5 2014-12-09 05:56:12
1 34 2014-08-23 10:42:32
here I want to calculate rank of particular sponsor using sponsor_id on daily basis .. I want to build a query that can return me something like as displayed below:
sponsor_id rank created_at
1 1 2014-12-08 10:54:59
1 3 2014-12-09 05:56:12
1 1 2014-08-23 10:42:32
I think I can use sub query like
select *
from (select user_id as sponsor_id,sum(points),created_at
from points_history
where created_at between '2014/08/12' and '2015/08/12' and transaction_type="debit"
group by user_id,DATE_FORMAT(created_at,"%d %M %Y")
order by DATE_FORMAT(created_at,"%d %M %Y"),sum(points) desc
) as t
where t.sponsor_id = 1
but how to calulate rank here.
Try this:
SELECT sponsor_id, points, created_at,
IF(#dte=#dte:=DATE(created_at), #rank:=#rank+1, #rank:=1) AS rank
FROM (SELECT user_id AS sponsor_id, SUM(points) points, created_at
FROM points_history
WHERE created_at BETWEEN '2014-08-12' AND '2015-08-12' AND
transaction_type = "debit"
GROUP BY user_id, DATE_FORMAT(created_at,"%d %M %Y")
ORDER BY DATE(created_at), SUM(points) DESC
) AS A, (SELECT #rank:=0, #dte:='') AS B
ORDER BY DATE(created_at), points DESC;