i want help on a way to merge two complex MYSQL queries.
Query 1:
SELECT p.*
FROM posts p
INNER JOIN
(
SELECT a.ID
FROM posts a
INNER JOIN post_tags b
ON a.ID = b.post_ID
WHERE a.post LIKE '%mmmm%' AND
b.tagname IN ('#test','#iseeyou')
GROUP BY ID
HAVING COUNT(DISTINCT b.tagname) = 2
) sub ON p.ID = sub.ID
ORDER BY p.upvotes DESC, p.unix_timestamp DESC
Query 2:
SELECT p.*, ((upvotes + 1.9208) / (upvotes + downvotes) - 1.96 * SQRT((upvotes * downvotes)
/ (upvotes + downvotes) + 0.9604) / (upvotes + downvotes))
/ (1 + 3.8416 / (upvotes + downvotes)) AS ci_lower_bound
FROM posts p WHERE upvotes + downvotes > 0
AND p.unix_timestamp BETWEEN 1363023402 AND 1363109802 ORDER BY ci_lower_bound DESC
A small table definition is given at SQL Fiddle
Actually, the first one is a search query and the second one gives the most popular results based on votes in the last 24 hours, so i want use the search query based on the formula used in the second one and also the time range
With minimal changes something like this (if I have understood what you want correctly)
SELECT p.*, ((upvotes + 1.9208) / (upvotes + downvotes) - 1.96 * SQRT((upvotes * downvotes)
/ (upvotes + downvotes) + 0.9604) / (upvotes + downvotes))
/ (1 + 3.8416 / (upvotes + downvotes)) AS ci_lower_bound
FROM posts p
INNER JOIN
(
SELECT a.ID
FROM posts a INNER JOIN post_tags b ON a.ID = b.post_ID
WHERE a.post LIKE '%mmmm%' AND b.tagname IN ('#test','#iseeyou')
GROUP BY ID
HAVING COUNT(DISTINCT b.tagname) = 2
) sub ON p.ID = sub.ID
WHERE upvotes + downvotes > 0
AND p.unix_timestamp BETWEEN 1363023402 AND 1363109802
ORDER BY ci_lower_bound DESC
Possibly a touch more efficient to swap around the WHERE clause on the subselect (the leading % on the like will not use an index, hence probably more efficient to join that against the easily indexed check on post_tags))
SELECT p.*, ((upvotes + 1.9208) / (upvotes + downvotes) - 1.96 * SQRT((upvotes * downvotes)
/ (upvotes + downvotes) + 0.9604) / (upvotes + downvotes))
/ (1 + 3.8416 / (upvotes + downvotes)) AS ci_lower_bound
FROM posts p
INNER JOIN
(
SELECT a.ID
FROM post_tags b STRAIGHT_JOIN posts a ON a.ID = b.post_ID
WHERE a.post LIKE '%mmmm%' AND b.tagname IN ('#test','#iseeyou')
GROUP BY ID
HAVING COUNT(DISTINCT b.tagname) = 2
) sub ON p.ID = sub.ID
WHERE upvotes + downvotes > 0
AND p.unix_timestamp BETWEEN 1363023402 AND 1363109802
ORDER BY ci_lower_bound DESC
Related
I have quite the beast of a SQL statement and I was hoping for a little help. I have found that I need to be using an OUTER join for some of these tables but MySQL doesn't have that function.
SELECT validclick.CampaignName AS CampaignName,
validclick.Website AS Website,
validclick.RevClicks AS RevClicks,
validclick.Revenue AS Revenue,
validclick.TQ AS TQ,
validclick.Source AS Source,
validclick.ScoredClicks AS ScoredClicks,
validclick.Searches AS Searches,
vci.Impressions AS Impressions,
(validclick.RevClicks / validclick.Searches * 100) AS CTR,
(IFNULL(yaho.Spend, 0) + IFNULL(facebook.Spend, 0) + IFNULL(google.Spend, 0)) AS Spend,
(IFNULL(yaho.PaidClicks, 0) + IFNULL(facebook.PaidClicks, 0) + IFNULL(google.PaidClicks, 0)) AS PaidClicks,
(validclick.Revenue -
(IFNULL(yaho.Spend, 0) + IFNULL(facebook.Spend, 0) + IFNULL(google.Spend, 0))) AS Profit,
(validclick.TQ / validclick.ScoredClicks) AS ScoredTQ,
(validclick.TQ / validclick.RevClicks) AS UnscoredTQ,
(validclick.Revenue /
(IFNULL(yaho.PaidClicks, 0) + IFNULL(facebook.PaidClicks, 0) + IFNULL(google.PaidClicks, 0))) AS RPI,
(validclick.Revenue / validclick.RevClicks) AS RPC,
((IFNULL(yaho.Spend, 0) + IFNULL(facebook.Spend, 0) + IFNULL(google.Spend, 0)) /
(IFNULL(yaho.PaidClicks, 0) + IFNULL(facebook.PaidClicks, 0) + IFNULL(google.PaidClicks, 0))) AS CPC,
((validclick.Revenue -
(IFNULL(yaho.Spend, 0) + IFNULL(facebook.Spend, 0) + IFNULL(google.Spend, 0))) /
(IFNULL(yaho.Spend, 0) + IFNULL(facebook.Spend, 0) + IFNULL(google.Spend, 0)) * 100) AS ROI,
(validclick.RevClicks /
(IFNULL(yaho.PaidClicks, 0) + IFNULL(facebook.PaidClicks, 0) + IFNULL(google.PaidClicks, 0)) * 100) AS Conversion
FROM
(SELECT camp.CampaignName AS CampaignName,
MAX(camp.Website) AS Website,
MAX(camp.Source) AS Source,
SUM(vc.Clicks) AS RevClicks,
SUM(vc.Revenue) AS Revenue,
SUM(vc.TQ) AS TQ,
SUM(vc.ScoredClicks) AS ScoredClicks,
SUM(vc.BiddedSearches) AS Searches
FROM
(SELECT AffID,
MAX(CampaignName) AS CampaignName,
Website,
MAX(Source) AS Source
FROM campaigns
GROUP BY AffID) AS camp
JOIN
(SELECT AffID,
SUM(Clicks) AS Clicks,
SUM(AffiliateRevenue) AS Revenue,
SUM(BiddedSearches) AS BiddedSearches,
SUM(CASE WHEN TQ > 0 THEN (TQ * Clicks) ELSE NULL END) AS TQ,
SUM(CASE WHEN TQ > 0 THEN Clicks ELSE NULL END) AS ScoredClicks
FROM validclickvc
WHERE Date BETWEEN '2018-05-06' AND '2018-05-07'
GROUP BY AffID) AS vc
ON vc.AffID = camp.AffID
GROUP BY camp.CampaignName) AS validclick
LEFT JOIN
(SELECT CampaignName,
SUM(Spend) AS Spend,
SUM(OutboundClicks) AS PaidClicks
FROM facebookads
WHERE Date BETWEEN '2018-05-06' AND '2018-05-07'
GROUP BY CampaignName) AS facebook
ON validclick.CampaignName = facebook.CampaignName
LEFT JOIN
(SELECT CampaignName,
SUM(Spend) AS Spend,
SUM(Clicks) AS PaidClicks
FROM yahoo
WHERE Date BETWEEN '2018-05-06' AND '2018-05-07'
GROUP BY CampaignName) AS yaho
ON validclick.CampaignName = yaho.CampaignName
LEFT JOIN
(SELECT CampaignName,
SUM(Cost) AS Spend,
SUM(Clicks) AS PaidClicks
FROM adwords
WHERE Date BETWEEN '2018-05-06' AND '2018-05-07'
GROUP BY CampaignName) AS google
ON validclick.CampaignName = google.CampaignName
LEFT JOIN
(SELECT camp.CampaignName AS CampaignName,
SUM(vc.Impressions) AS Impressions
FROM
(SELECT AffID,
MAX(CampaignName) AS CampaignName
FROM campaigns
GROUP BY AffID) AS camp
JOIN
(SELECT ty AS AffID,
COUNT(DISTINCT(id)) AS Impressions
FROM validclickimpressions
WHERE ts BETWEEN '2018-05-06 00:00' AND '2018-05-07 23:59'
GROUP BY AffID) AS vc
ON vc.AffID = camp.AffID
GROUP BY camp.CampaignName) AS vci
ON validclick.CampaignName = vci.CampaignName
ORDER BY CampaignName;
I'm aware this may not be the most efficient MySQL statement, but we will soon be changing the structure of things so i's not a big deal. For now I need some way to use OUTER join, or do a LEFT JOIN UNTION RIGHT JOIN but with an alias. The tables (aliases) I need to full join are vci, facebook, google, and yaho. Any ideas on how to tackle this?
MySQL does not support FULL [OUTER] JOIN.
We can emulate a full outer join in MySQL using two separate select, combining the results. The pattern of an outer join and an anti-join.
-- outer join (all rows from a along with matching rows from b)
SELECT ... FROM a LEFT JOIN b ON ...
UNION ALL
-- anti-join (rows from b with no matching row in a)
SELECT ... FROM b LEFT JOIN a ON ... WHERE a.notnull_col IS NULL
As a simplistic demonstration, two tables a and b:
table a table b
an bn
------- -------
1 2
2 3
3 5
4 7
Example query
-- left outer join (rows in a with matching rows from b)
SELECT a.an, b.bn
FROM a
LEFT
JOIN b
ON b.bn = a.an
UNION ALL
-- anti-join (rows from b with no match in a)
SELECT a.an, b.bn
FROM b
LEFT
JOIN a
ON a.an = b.bn
WHERE a.an IS NULL
should return something like this (without ORDER BY clause, order of rows is indeterminate)
an bn
------- -------
1 (NULL)
2 2
3 3
4 (NULL)
(NULL) 5
(NULL) 7
I'm attempting to assign the closest location to a community based on the community postcode and using the Haversine formula with SQL described here. I need to return a single scalar value but I can't seem to avoid having the second calculated distance value which is needed to determine the closest location. Help.
UPDATE Community AS c
JOIN Postcode p on p.id = c.postcode_id
JOIN (
SELECT 100.0 AS radius, 111.045 AS distance_unit
) AS a
SET c.location_id = (
SELECT l.id,
a.distance_unit
* DEGREES(ACOS(COS(RADIANS(p.latitude))
* COS(RADIANS(l.latitude))
* COS(RADIANS(p.longitude - l.longitude))
+ SIN(RADIANS(p.latitude))
* SIN(RADIANS(l.latitude)))) AS distance
FROM Location AS l
WHERE l.latitude
BETWEEN p.latitude - (a.radius / a.distance_unit)
AND p.latitude + (a.radius / a.distance_unit)
AND l.longitude
BETWEEN p.longitude - (a.radius / (a.distance_unit * COS(RADIANS(p.latitude))))
AND p.longitude + (a.radius / (a.distance_unit * COS(RADIANS(p.latitude))))
HAVING distance <= a.radius
ORDER BY distance
LIMIT 1
)
Using the structure you have, you need to move the distance calculation into the WHERE and ORDER BY clauses:
SET c.location_id = (
SELECT l.id
FROM Location AS l
WHERE l.latitude
BETWEEN p.latitude - (a.radius / a.distance_unit)
AND p.latitude + (a.radius / a.distance_unit)
AND l.longitude
BETWEEN p.longitude - (a.radius / (a.distance_unit * COS(RADIANS(p.latitude))))
AND p.longitude + (a.radius / (a.distance_unit * COS(RADIANS(p.latitude))))
AND a.distance_unit
* DEGREES(ACOS(COS(RADIANS(p.latitude))
* COS(RADIANS(l.latitude))
* COS(RADIANS(p.longitude - l.longitude))
+ SIN(RADIANS(p.latitude))
* SIN(RADIANS(l.latitude)))) <= a.radius
ORDER BY a.distance_unit
* DEGREES(ACOS(COS(RADIANS(p.latitude))
* COS(RADIANS(l.latitude))
* COS(RADIANS(p.longitude - l.longitude))
+ SIN(RADIANS(p.latitude))
* SIN(RADIANS(l.latitude))))
LIMIT 1
)
I have table as linkage with below values
++++++++++++++++++++++++++
+ company_id + industry +
++++++++++++++++++++++++++
+ 1 + a +
+ 1 + b +
+ 2 + a +
+ 2 + c +
+ 3 + a +
+ 4 + c +
+ 5 + a +
++++++++++++++++++++++++++
Is there a way that i can group my industry to get the top count sort by desc order example.
a = count 4
c = count 2
b = count 1
then delete duplicated industry leaving only the industry that has the higher count for each company_id.
Edit 1
This edit is based on OP comment I wish to only have the industry with the highest count, and deleting the rest of the entry for the same company_id. say for company_id 1, we will delete the second row, for company_id 2 we will delete the forth row.
Below is what I have.
++++++++++++++++++++++++++
+ company_id + industry +
++++++++++++++++++++++++++
+ 1 + a +
+ 1 + b +
+ 1 + c +
+ 2 + a +
+ 2 + c +
+ 3 + a +
+ 4 + c +
+ 5 + a +
++++++++++++++++++++++++++
as we see in column industry, a has max count, I would like to keep this entry per duplicated company_id and remove rest all enteries.
Consider company_id=1. I would need to remove second and third row.
Consider company_id=2. I would need to remove fifth row.
For id=3,4,5 nothing will happen as those are not duplicated.
So final data that should be there in my table is
++++++++++++++++++++++++++
+ company_id + industry +
++++++++++++++++++++++++++
+ 1 + a +
+ 2 + a +
+ 3 + a +
+ 4 + c +
+ 5 + a +
++++++++++++++++++++++++++
select t6.company_id,t6.industry from
(select t5.company_id,t5.industry,
row_number() over (partition by t5.company_id order by t5.company_id) rn
from
(select t3.company_id,t4.industry from
(select t2.company_id,max(t2.count) count from(
select m.company_id,m.industry,t1.count from linkage m
join
(select n.industry,count(n.industry) count from linkage n
group by n.industry
order by count desc)t1
on m.industry = t1.industry
order by m.company_id)t2
group by t2.company_id
order by t2.company_id)t3
join
(
select m.company_id,m.industry,t1.count from linkage m
join
(select n.industry,count(n.industry) count from linkage n
group by n.industry
order by count desc)t1
on m.industry = t1.industry
order by m.company_id)t4
on t3.company_id = t4.company_id
and t3.count = t4.count)t5
)t6
where t6.rn = '1'
How about this?
SELECT industry, count(industry) as "total"
FROM linkage
GROUP BY industry
ORDER BY total DESC
Demo at sqlfiddle
Edit 1
Can you take at look at below question.
how can I delete duplicate records from my database
I think that is what you are looking for.
select n.industry,count(n.industry) count from linkage n
group by n.industry
order by count desc
select t3.company_id,t4.industry from
(select t2.company_id,max(t2.count) count from(
select m.company_id,m.industry,t1.count from linkage m
join
(select n.industry,count(n.industry) count from linkage n
group by n.industry
order by count desc)t1
on m.industry = t1.industry
order by m.company_id)t2
group by t2.company_id
order by t2.company_id)t3
join
(
select m.company_id,m.industry,t1.count from linkage m
join
(select n.industry,count(n.industry) count from linkage n
group by n.industry
order by count desc)t1
on m.industry = t1.industry
order by m.company_id)t4
on t3.company_id = t4.company_id
and t3.count = t4.count
Demo at sqlfiddle
I have one problem with this query; I can't seem to get ((total + rec_host) / 2) AS total2 to work. How would I go about this procedure without doing:
((((rank_ur + rank_scs + rank_tsk + rank_csb + rank_vfm + rank_orr) / 6) + rec_host ) / 2)
Here's my Query:
SELECT host_name,
SUM(rank_ur) AS cnt1,
SUM(rank_scs) AS cnt2,
SUM(rank_tsk) AS cnt3,
SUM(rank_csb) AS cnt4,
SUM(rank_vfm) AS cnt5,
SUM(rank_orr) AS cnt6,
SUM(IF(rec_host = 1,1,0)) AS rh1,
SUM(IF(rec_host = 0,1,0)) AS rh2,
((rank_ur + rank_scs + rank_tsk + rank_csb + rank_vfm + rank_orr) / 6) AS total,
((total + rec_host) / 2) AS total2
FROM lhr_reviews
GROUP BY host_name
ORDER BY total
DESC LIMIT 0,10
Use a subquery like so:
SELECT
host_name,
cnt1,
cnt2,
cnt3,
cnt4,
cnt5,
cnt6,
rh1,
rh2,
total,
((total + rec_host) / 2) AS total2
FROM
(
SELECT host_name,
rec_host,
SUM(rank_ur) AS cnt1,
SUM(rank_scs) AS cnt2,
SUM(rank_tsk) AS cnt3,
SUM(rank_csb) AS cnt4,
SUM(rank_vfm) AS cnt5,
SUM(rank_orr) AS cnt6,
SUM(IF(rec_host = 1,1,0)) AS rh1,
SUM(IF(rec_host = 0,1,0)) AS rh2,
((rank_ur + rank_scs + rank_tsk +
rank_csb + rank_vfm + rank_orr
) / 6) AS total
FROM lhr_reviews
GROUP BY host_name, rec_host
) t
ORDER BY total
DESC LIMIT 0,10;
What you could do is this:
select x.*, ((x.total + rec_host) / 2) AS total2
from (
SELECT host_name, rec_host,
SUM(rank_ur) AS cnt1,
SUM(rank_scs) AS cnt2,
SUM(rank_tsk) AS cnt3,
SUM(rank_csb) AS cnt4,
SUM(rank_vfm) AS cnt5,
SUM(rank_orr) AS cnt6,
SUM(IF(rec_host = 1,1,0)) AS rh1,
SUM(IF(rec_host = 0,1,0)) AS rh2,
((rank_ur + rank_scs + rank_tsk + rank_csb + rank_vfm + rank_orr) / 6) AS total
FROM lhr_reviews
GROUP BY host_name
ORDER BY total
DESC LIMIT 0,10
) as x
;
You cannot use the column as an alias when the alias and other column are in the same level of SELECT. So you can use a derived query which lets you basically rename your columns and/or name any computed columns.Check on Rubens Farias and Rob Van Dam answer here
PS: will search for a better article to update the answer :)
I need to SUM the contents of a column which is already worked out using GROUP BYs.. How exactly would you go about that?
The group should be based on the user name, not the entire contents of the result set. I believe this essentially a group by on that username field, but that i believe would break how the query currently works..
Example below:
SELECT A1.USERNAME, DATE_FORMAT(FROM_UNIXTIME(A1.TIME_STAMP),'%Y-%m-%d') AS DTTM, A1.ACCTSESSIONID,
MAX(IFNULL(A1.ACCTINPUTGW,0) * POW(2,32) + IFNULL(A1.ACCTINPUTOCT, 0)) - MAX(IFNULL(A2.ACCTINPUTGW,0) * POW(2,32) + IFNULL(A2.ACCTINPUTOCT, 0)) as TOTAL_UPLOAD,
MAX(IFNULL(A1.ACCTOUTPUTGW,0) * POW(2,32) + IFNULL(A1.ACCTOUTPUTOCT, 0)) - MAX(IFNULL(A2.ACCTOUTPUTGW,0) * POW(2,32) + IFNULL(A2.ACCTOUTPUTOCT, 0)) as TOTAL_DOWNLOAD
FROM ACCOUNTING A1
LEFT JOIN ACCOUNTING A2
ON A1.ACCTSESSIONID = A2.ACCTSESSIONID
AND DATE_FORMAT(FROM_UNIXTIME(A2.TIME_STAMP), '%Y-%m-%d') = '2011-07-04'
WHERE DATE_FORMAT(FROM_UNIXTIME(A1.TIME_STAMP), '%Y-%m-%d') = '2011-07-05'
GROUP BY A1.ACCTSESSIONID,A2.ACCTSESSIONID
ORDER BY A1.USERNAME
Edit:
The columns would be: TOTAL_DOWNLOAD and TOTAL_UPLOAD
Thanks # ypercube, worked a treat
SELECT A3.USERNAME
, SUM(A3.TOTAL_UPLOAD) AS FINAL_UPLOAD
, SUM(A3.TOTAL_DOWNLOAD) AS FINAL_DOWNLOAD
FROM
( SELECT
A1.USERNAME
, DATE_FORMAT(FROM_UNIXTIME(A1.TIME_STAMP),'%Y-%m-%d') AS DTTM
, A1.ACCTSESSIONID
, MAX(IFNULL(A1.ACCTINPUTGW,0) * POW(2,32) + IFNULL(A1.ACCTINPUTOCT, 0))
- MAX(IFNULL(A2.ACCTINPUTGW,0) * POW(2,32) + IFNULL(A2.ACCTINPUTOCT, 0))
AS TOTAL_UPLOAD
, MAX(IFNULL(A1.ACCTOUTPUTGW,0) * POW(2,32) + IFNULL(A1.ACCTOUTPUTOCT, 0))
- MAX(IFNULL(A2.ACCTOUTPUTGW,0) * POW(2,32) + IFNULL(A2.ACCTOUTPUTOCT, 0))
AS TOTAL_DOWNLOAD
FROM ACCOUNTING A1
LEFT JOIN ACCOUNTING A2
ON A1.ACCTSESSIONID = A2.ACCTSESSIONID
AND DATE_FORMAT(FROM_UNIXTIME(A2.TIME_STAMP), '%Y-%m-%d') = '2011-07-04'
WHERE DATE_FORMAT(FROM_UNIXTIME(A1.TIME_STAMP), '%Y-%m-%d') = '2011-07-05'
GROUP BY A1.ACCTSESSIONID,A2.ACCTSESSIONID
ORDER BY A1.USERNAME
) AS A3
GROUP BY A3.USERNAME