Union with Count OR Join with Sum - MySQL - mysql

I want to combine three tables - date, lead and click - in a query.
The tables looks like this:
date:
|date|
lead:
id|time|commission
click:
id|time|commission
The table date is just storing dates and is used when getting dates with no click or lead.
So if we have the following data in the tables:
date:
2009-06-01
2009-06-02
2009-06-03
lead:
1|2009-06-01|400
2|2009-06-01|300
3|2009-06-03|350
click:
1|2009-06-01|1
2|2009-06-03|2
3|2009-06-03|2
4|2009-06-03|0
I would like to get date, number of click, commission generated by clicks (there are clicks that don't give commission), number of leads, commission generated by leads and total commission. So with the tables above I would like to get:
2009-06-01|1|1|2|700|701|
2009-06-02|0|0|0|0|0
2009-06-03|3|4|1|350|354|
I have tried with the following union:
SELECT
campaign_id,
commission_date,
SUM( click_commission ) AS click_commission,
click,
SUM( lead_commission ) AS lead_commission ,
lead,
SUM( total_commission ) as total_commission
FROM(
SELECT
click.campaign_id AS campaign_id,
DATE( click.time ) AS commission_date,
click.commission AS click_commission,
(SELECT count(click.id) from click GROUP BY date(click.time)) as click,
0 as lead_commission,
0 as lead,
click.commission AS total_commission
FROM click
UNION ALL
SELECT
lead.campaign_id AS campaign_id,
DATE( lead.time ) AS commission_date,
0 as click_commission,
0 as click,
lead.commission AS lead_commission,
lead.id as lead,
lead.commission AS total_commission
FROM lead
UNION ALL
SELECT
0 AS campaign_id,
date.date AS commission_date,
0 AS click_commission,
0 as click,
0 AS lead_commission,
0 as lead,
0 AS total_commission
FROM date
) AS foo
WHERE commission_date BETWEEN '2009-06-01' AND '2009-07-25'
GROUP BY commission_date
ORDER BY commission_date LIMIT 0, 10
But this does not work to count both the number of clicks and leads, the code above gives the right amount of clicks bot 0 on all leads. If I move the code around and put the select from the lead table I get the leads right bot 0 on all clicks. I have not been able to find a way to get both of the counts from the query.
So I tried a left-join instead:
SELECT
date.date as date,
count( DISTINCT click.id ) AS clicks,
sum(click.commission) AS click_commission,
count( lead.id ) AS leads,
sum(lead.commission) AS lead_commission
FROM date
LEFT JOIN click ON ( date.date = date( click.time ) )
LEFT JOIN lead ON ( date.date = date( lead.time ) )
GROUP BY date.date
LIMIT 0 , 30
The problem with this query is if there are more than one clicks or leads on a date it will return the expected value * 2. So on 2009-06-01 it will return 1400 instead on the expected 700 for lead commission.
So in the UNION I have problems with the count and in the left join it is the SUM that is not working.
I would really like to stick to the UNION if possible, but I haven't found a way to get both counts from it.
(This is a follow up to this earlier question, but since I didn't ask for the count in that I posted a new question.)

SELECT date,
COALESCE(lcomm, 0), COALESCE(lcnt, 0),
COALESCE(ccomm, 0), COALESCE(ccnt, 0),
COALESCE(ccomm, 0) + COALESCE(lcomm, 0),
COALESCE(ccnt, 0) + COALESCE(lcnt, 0)
LEFT JOIN
(
SELECT date, SUM(commission) AS lcomm, COUNT(*) AS lcnt
FROM leads
GROUP BY
date
) l
ON l.date = d.date
LEFT JOIN
(
SELECT date, SUM(commission) AS ccomm, COUNT(*) AS ccnt
FROM clicks
GROUP BY
date
) с
ON c.date = d.date
FROM date d

The code that I used, built from the suggestion from Quassnoi:
SELECT date,
COALESCE(ccomm, 0) AS click_commission, COALESCE(ccnt, 0) AS click_count,
COALESCE(lcomm, 0) AS lead_commision, COALESCE(lcnt, 0) AS lead_count,
COALESCE(ccomm, 0) + COALESCE(lcomm, 0) as total_commission
FROM date d
LEFT JOIN
(
SELECT DATE(time) AS lead_date, SUM(commission) AS lcomm, COUNT(*) AS lcnt
FROM lead
GROUP BY
lead_date
) l
ON lead_date = date
LEFT JOIN
(
SELECT DATE(time) AS click_date, SUM(commission) AS ccomm, COUNT(*) AS ccnt
FROM click
GROUP BY
click_date
) с
ON click_date = date

Related

15 days of Sql From hackerrank

I am unable to understand the use of this line in a code can someone please explain me about this or give some different way to approach to this question
Link to the question:https://www.hackerrank.com/challenges/15-days-of-learning-sql
Code:
select
submission_date ,
( SELECT COUNT(distinct hacker_id)
FROM Submissions s2
WHERE s2.submission_date = s1.submission_date
AND ( SELECT COUNT(distinct s3.submission_date)
FROM Submissions s3
WHERE
s3.hacker_id = s2.hacker_id
AND s3.submission_date < s1.submission_date
) = dateDIFF(s1.submission_date , '2016-03-01'))
, ( select hacker_id
from submissions s2
where s2.submission_date = s1.submission_date
group by hacker_id
order by count(submission_id) desc , hacker_id limit 1
) as shit
, ( select name
from hackers where hacker_id = shit
)
FROM
( select distinct submission_date
from submissions) s1
group by submission_date
Unable to understand why they have used this line from this part of the code:
(s3.submission_date < s1.submission_date) = dateDIFF(s1.submission_date , '2016-03-01'))
CREATE TABLE #max_submissions (
submission_date date,
hacker_id integer,
submission_count integer,
ordering_row integer
)
insert into #max_submissions
select
submission_date,
hacker_id,
submission_count,
row_number() over(partition by submission_date order by submission_count desc, hacker_id) as ordering_row
from (
select submission_date,
hacker_id,
count(hacker_id) as submission_count
from submissions
group by submission_date, hacker_id
) tbl_submission_count
CREATE TABLE #hacker_counts (
submission_date date,
hacker_count integer
)
insert into #hacker_counts
select tbl.submission_date,
COUNT(distinct tbl.hacker_id) as cc
from (
select *,
(case when (
(select count(*)
from (select distinct *
from (select s1.hacker_id,
s1.submission_date
from Submissions s1
where s1.hacker_id = s.hacker_id and
(s1.submission_date >= '2016-03-01' and
s1.submission_date <= s.submission_date)) t1
) t2
) >= (DATEDIFF(day, '2016-03-01', s.submission_date) + 1) )
then 1
else 0
end) as logic
from Submissions s
) tbl
where tbl.logic = 1
group by tbl.submission_date
select max_submissions.submission_date,
hacker_counts.hacker_count,
max_submissions.hacker_id,
h.name
from #max_submissions max_submissions
inner join hackers h on max_submissions.hacker_id = h.hacker_id
left join #hacker_counts hacker_counts on max_submissions.submission_date = hacker_counts.submission_date
where max_submissions.ordering_row = 1
order by max_submissions.submission_date
drop table #max_submissions
drop table #hacker_counts
To understand this line
( SELECT COUNT(distinct s3.submission_date)
FROM Submissions s3
WHERE
s3.hacker_id = s2.hacker_id
AND s3.submission_date < s1.submission_date)
= dateDIFF(s1.submission_date , '2016-03-01')
First understand left hand side:
(SELECT COUNT(distinct s3.submission_date) FROM Submissions s3 WHERE s3.hacker_id = s2.hacker_id AND s3.submission_date < s1.submission_date)
This line counts the unique submission dates for each hacker_id uptil the current date,
So if the date for one row is 2016-03-05, it will count unique submissions for a hacker_id uptil this date (note it will count multiple submissions by a single hacker on a day as 1 count only)
In other words, this takes a hacker_id and start checking if there is a submission by this hacker_id for each day from 1st day uptil this day,it will do this for each submission date
Then Understand Right Hand Side:
dateDIFF(s1.submission_date , '2016-03-01')
this will take the difference of this current date 2016-03-05 to first day 2016-03-01,
Understanding the whole statement now:
So if a hacker made at least one submission each day from 2016-03-05 to 2016-03-01, then both sides of the above code will be equal,
that is date difference from 5th to 1st will be 5 (Right Hand Side) and distinct submission date for a hacker who made at least one submission each day from 1st to 5th will also be 5 (left hand side)

Optimizing MySQL query that includes a repeated subquery

I have the following query that is working perfectly right now, I have been trying to optimize it since I am using the same subquery 4 times. It will be great to come up with a better/smarter solution. Thank you
Here is the query:
select
invoices.invoice_id
,invoices.invoice_amount
,(
select SUM(invoice_payment_amount) as total
FROM invoice_payments
where invoice_payment_invoice_id = invoices.invoice_id
) as payments
,round((invoices.invoice_amount-(
select SUM(invoice_payment_amount) as total
FROM invoice_payments
where invoice_payment_invoice_id = invoices.invoice_id
)),2) as balance
from invoices
where (
round((invoices.invoice_amount -
(select SUM(invoice_payment_amount) as total
FROM invoice_payments
where invoice_payment_invoice_id = invoices.invoice_id)
),2)
) > 0
or (
round((invoices.invoice_amount -
(select SUM(invoice_payment_amount) as total
FROM invoice_payments
where invoice_payment_invoice_id = invoices.invoice_id)
),2)
) IS NULL
order by balance
SQL Fiddle: http://sqlfiddle.com/#!9/aecea/1
Just use a subquery:
select i.invoice_id, i.invoice_amount, i.payments,
round((i.invoice_amount- i.payments), 2) as balance
from (select i.*,
(select sum(ip.invoice_payment_amount)
from invoice_payments ip
where ip.invoice_payment_invoice_id = i.invoice_id
) as payments
from invoices i
) i
where round((i.invoice_amount- i.payments), 2) > 0 or
round((i.invoice_amount- i.payments), 2) is null
order by balance;
For better performance, you want an index on invoice_payments(invoice_payment_invoice_id, invoice_payment_amount).

MySQL Use of Having and Where Clause

Having difficulty returning the desired results.
Here is my query:
SELECT
DATABASE()AS INVOICES_Range_$0,
count(
DISTINCT invoice_lines.invoice_header_id
)AS 'Invoice Header Count',
sum(invoice_lines.accounting_total)AS 'Dollar_Value',
100 * COUNT(
DISTINCT invoice_lines.invoice_header_id
)/(
SELECT
COUNT(
DISTINCT invoice_lines.invoice_header_id
)
FROM
invoice_lines
WHERE
(
invoice_lines. STATUS NOT LIKE '%new%'
)
AND(
invoice_lines. STATUS NOT LIKE '%voided%'
)
)AS 'Percent of All Invoices',
COUNT(approvals.approvable_id)/ count(
DISTINCT invoice_lines.invoice_header_id
)AS 'AVG_APPROVALS'
FROM
invoice_lines
LEFT JOIN approvals ON invoice_lines.invoice_header_id = approvals.approvable_id
WHERE
(
invoice_lines.accounting_total = 0
)
AND(
invoice_lines. STATUS NOT LIKE '%new%'
)
AND(
invoice_lines. STATUS NOT LIKE '%voided%'
);
This query returns results where any invoice line has a value of $0.
For reference, we may have an invoice where one line is $0 but the other lines total $600.
I am wanting to only include in the above query where the total of all the invoice lines equal $0.
I've tried:
SELECT
DATABASE()AS INVOICES_Range_$0,
count(
DISTINCT invoice_lines.invoice_header_id
)AS 'Invoice Header Count',
sum(invoice_lines.accounting_total)AS 'Dollar_Value',
100 * COUNT(
DISTINCT invoice_lines.invoice_header_id
)/(
SELECT
COUNT(
DISTINCT invoice_lines.invoice_header_id
)
FROM
invoice_lines
WHERE
(
invoice_lines. STATUS NOT LIKE '%new%'
)
AND(
invoice_lines. STATUS NOT LIKE '%voided%'
)
)AS 'Percent of All Invoices',
COUNT(approvals.approvable_id)/ count(
DISTINCT invoice_lines.invoice_header_id
)AS 'AVG_APPROVALS'
FROM
invoice_lines
LEFT JOIN approvals ON invoice_lines.invoice_header_id = approvals.approvable_id
WHERE
(
invoice_lines. STATUS NOT LIKE '%new%'
)
AND(
invoice_lines. STATUS NOT LIKE '%voided%'
)
HAVING
SUM(
invoice_lines.accounting_total = 0
);
However, that returns the same results. Also, when modified to
HAVING (SUM(invoice_lines.accounting_total) < 500 )
It returns all invoices and the total amount.
Any assistance would be greatly appreciated, as I cannot determine the proper method for limiting my results to those invoice_header_id to only count those invoices where the sum of all lines is equal to 0.
HAVING
SUM(
invoice_lines.accounting_total = 0
);
probably wants to be
HAVING
SUM(
invoice_lines.accounting_total
) = 0
The solution was to evaluate in the WHERE clause using the 'Dollar Value' identified earlier in the query.
I changed the SUM(invoice_lines.accounting_total) as TOTAL
and then in the WHERE clause I added AND (TOTAL = 0);
Worked like a champ.

count consecutive number of 10 days when number is = 0 or > 10

Here is sqlfiddle that i made with mysql query
http://sqlfiddle.com/#!2/f2794/4
It count 10 consecutive days when present = 0, but i need to add second condition to count where present is > 10.
For example
11
22
0
0
0
0
0
0
0
0
0
0
0
0
1
should count 14
here is that query
select sum(count) total from (
SELECT COUNT(present) as count FROM (
SELECT
IF((q.present != 0), #rownum:=#rownum+1, #rownum:=#rownum) AS rownumber, #prevDate:=q.date, q.*
FROM (
SELECT
name
, date
, present
FROM
teacher, (SELECT #rownum:=0, #prevDate:='') vars
WHERE date BETWEEN '2013-07-01' AND '2013-07-31'
ORDER BY date, present
) q
) sq
GROUP BY present, rownumber
HAVING COUNT(*) >= 10
) d
So if U can help me, pls do it :)
best regards
m.
I dont really understand your query overly well, but I think simply changing (q.Present != 0) to incorporate the additional test should solve your problem:
SELECT sum(count) total from (
SELECT COUNT(present) as count FROM (
SELECT
IF((q.present != 0 AND q.present <= 10), #rownum:=#rownum+1, #rownum:=#rownum) AS rownumber, #prevDate:=q.date, q.*
FROM (
SELECT
name
, date
, present
FROM
teacher, (SELECT #rownum:=0, #prevDate:='') vars
WHERE date BETWEEN '2013-07-01' AND '2013-07-31'
ORDER BY date, present
) q
) sq
GROUP BY present, rownumber
HAVING COUNT(*) >= 10
) d

mysql statistics data for users - how do I get columns per date range?

I have 3 tables in a query:
apiusers, users
these contain a user id, and a name.
download_stats:
this contains usertype (1 = users, 2 = apiusers), date, file downloaded_id.
What I want to do is get a layout of:
| user id | name | download stats for january | download stats for february
etc.
I'm currently trying with this query, which takes forever:
SELECT name,queryuserid,count(ROWA.id) ,count(ROWB.id)
FROM (SELECT IFNULL(u.name, au.companyname) AS name,IFNULL(u.id, au.id) AS queryuserid
FROM apiusers as au, users as u
GROUP BY 2 ) AS users
LEFT JOIN stats_download as ROWA ON ROWA.userid=queryuserid AND ROWA.date > date('2011-01-01') AND ROWA.date < date('2011-02-01')
LEFT JOIN stats_download as ROWB ON ROWB.userid=queryuserid AND ROWB.date > date('2011-01-01') AND ROWB.date < date('2011-02-01')
GROUP BY 2;
Is there a better way of going about this? The client wants supports to "group" the output statistics by year, month and day. So there could potentionally be 30+ LEFT JOIN's in there.
This should get you the basis, and runs an entire year sample. Look at the pattern. By doing a sum of a qualified IF() condition returning 1 or 0 you get the total count per each month. Now, if you are looking for SIZE (such as download size), instead of 1, 0, you can just substitute the fileSize instead of 1 and you'll have the total downloaded size... put them as different columns and you can have both Count of downloads and totalSize of downloads. Expand the date range over year, just keep going with the pattern...
SELECT
AllUsers.Name,
AllUsers.QueryUserID,
SUM( if( SD.Date BETWEEN '2011-01-01' and '2011-01-31', 1, 0 )) as CountJan2011,
SUM( if( SD.Date BETWEEN '2011-02-01' and '2011-02-28', 1, 0 )) as CountFeb2011,
SUM( if( SD.Date BETWEEN '2011-03-01' and '2011-03-31', 1, 0 )) as CountMar2011,
SUM( if( SD.Date BETWEEN '2011-04-01' and '2011-04-30', 1, 0 )) as CountApr2011,
SUM( if( SD.Date BETWEEN '2011-05-01' and '2011-05-31', 1, 0 )) as CountMay2011,
SUM( if( SD.Date BETWEEN '2011-06-01' and '2011-06-30', 1, 0 )) as CountJun2011,
SUM( if( SD.Date BETWEEN '2011-07-01' and '2011-07-31', 1, 0 )) as CountJul2011,
SUM( if( SD.Date BETWEEN '2011-08-01' and '2011-08-31', 1, 0 )) as CountAug2011,
SUM( if( SD.Date BETWEEN '2011-09-01' and '2011-09-30', 1, 0 )) as CountSep2011,
SUM( if( SD.Date BETWEEN '2011-10-01' and '2011-10-31', 1, 0 )) as CountOct2011,
SUM( if( SD.Date BETWEEN '2011-11-01' and '2011-11-30', 1, 0 )) as CountNov2011,
SUM( if( SD.Date BETWEEN '2011-12-01' and '2011-12-31', 1, 0 )) as CountDec2011
FROM
( select distinct
APIUsers.CompanyName as Name,
APIUsers.QueryUserID
from APIUsers
UNION
select
Users.Name,
Users.QueryUserID
from Users ) AllUsers
LEFT JOIN stats_download SD
ON AllUsers.QueryUserID = SD.UserID
AND SD.Date BETWEEN '2011-01-01' and '2011-12-31'
GROUP BY
2;
There is a better way.
You need to do just one join with the stats_download and aggregate on months, I assume the method GETMONTH extracts the month from a date (every database has its own method, you didn't specify what you are using)
(
SELECT name,queryuserid,count(stats_download.id) as cnt,GETMONTH(stats_download.date) as month
FROM (SELECT IFNULL(u.name, au.companyname) AS name,IFNULL(u.id, au.id) AS queryuserid
FROM apiusers as au, users as u
GROUP BY 2 ) AS users
LEFT JOIN stats_download ON stats_download.userid=queryuserid
GROUP BY name, queryuserid, GETMONTH(stats_download.date)
) as TableA
so now we have a table that create columns according to the different groups, so we inner join the table with itself (should be much faster since this table is smaller)
Select name, queryuserid, A.cnt as January, B.cnt as February from TableA left join TableA as A on (TableA.queryuserid = A.queryuserid and A.month=1) left join TableA as B on (TableA.queryuserid = B.queryuserid and B.month=2)
I haven't run it so I may have some typos but this is the direction.
Hope it helps...