MySql: WHERE NOT EXISTS (skip duplicates) with group by

MySql: WHERE NOT EXISTS (skip duplicates) with group by - mysql

I have this challenge, I'm into a kind of solution, but I dont'understand why is it wrong.
Write a query to print the hacker_id, name, and the total number of challenges created by each student. Sort your results by the total number of challenges in descending order. If more than one student created the same number of challenges, then sort the result by hacker_id. If more than one student created the same number of challenges and the count is less than the maximum number of challenges created, then exclude those students from the result.
Hackers: The hacker_id is the id of the hacker, and name is the name of the hacker.
Challenges: The challenge_id is the id of the challenge, and hacker_id is the id of the student who created the challenge.
My solution:
select h.hacker_id, h.name, count(c.challenge_id) as total from challenges c
join hackers h
on h.hacker_id= c.hacker_id
where not exists
(select h1.hacker_id, h1.name, count(C1.challenge_id) as total1 from challenges C1
join hackers h1 on h1.hacker_id= c1.hacker_id
group by h1.hacker_id, h1.name
having total1 < max(count(c.challenge_id) and total1 = count(c.challenge_id)
order by total1 desc)
group by h.hacker_id, h.name
order by total desc
I get this error:
ERROR 1064 (42000) at line 1: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'order by total1 desc)
group by h.hacker_id, h.name
order by total desc' at line 16

select h.hacker_id, h.name, count(c.challenge_id) as total from challenges c
join hackers h
on h.hacker_id= c.hacker_id
where not exists
(select t.total1
from (select count(C1.challenge_id) as total1
from challenges C1
join hackers h1 on h1.hacker_id = c1.hacker_id
group by h1.hacker_id) t
having t.total1 < big = (select max(count)
from (select count(c3.challenge_id) as count from challenges c3 group by c3.hacker_id))) big
and t.total1 = 1
)
group by h.hacker_id, h.name
order by total desc

Related

HackerRank SQL Challenges: Subqueries

The HackerRank Question is here.
My code for this challenge is in MySQL:
SELECT c.hacker_id as id, h.name as name, COUNT(c.hacker_id) as cnt
FROM hackers h INNER JOIN challenges c
ON h.hacker_id = c.hacker_id
GROUP BY c.hacker_id
HAVING cnt in (
SELECT MAX(Counter1)
FROM (
SELECT COUNT(*) as Counter1
FROM challenges c1
GROUP BY c1.hacker_id
ORDER BY Counter1, c1.hacker_id
) LIMIT 1
) OR IN (
SELECT Counter2
FROM (
SELECT COUNT(*) as Counter2
FROM challenges c2
GROUP BY c2.hacker_id
HAVING Counter2=1
)
)
ORDER BY cnt DESC, id;
But the error says
ERROR 1064 (42000) at line 1: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'IN (
SELECT Counter2
FROM (
SELECT COUNT(*) as Counter2
' at line 13
How shall I correct my answer? How do I improve my running speed? and I am confused about the rules that when shall I give alias to subqueries table and when shall not?
Thank you!

Use a with clause and define your counts there and pass it in the having clause.

It sux.. they have a pretty low version of mysql -> see the output for "select version()"
5.7.27-0ubuntu0.18.04.1
Anyway.. if you wanna stick with MySQL you would have to repeat your "core" query multiple times :( this version does not support WITH clause .. which makes it more readable ...
/*
Enter your query here.
*/
SELECT a.hacker_id
, a.name
, a.ch_cnt as challenges_created
FROM ( -- get all challenges
SELECT h.hacker_id
, h.name
, count(c.challenge_id) as ch_cnt
FROM hackers h
LEFT JOIN challenges c
ON c.hacker_id = h.hacker_id
WHERE 1=1
GROUP BY h.hacker_id
, h.name
) a
--
LEFT JOIN ( -- tag challenges with duplicate numbers (>1)
SELECT ch_cnt
, count(*) as dupl
FROM (
SELECT h.hacker_id
, h.name
, count(c.challenge_id) as ch_cnt
FROM hackers h
LEFT JOIN challenges c
ON c.hacker_id = h.hacker_id
WHERE 1=1
GROUP BY h.hacker_id
, h.name
) bb
GROUP BY ch_cnt
HAVING count(*) > 1
) b
ON a.ch_cnt = b.ch_cnt
--
LEFT JOIN ( -- tag challenges with the max count
SELECT max(ch_cnt) ch_cnt
FROM (
SELECT h.hacker_id
, h.name
, count(c.challenge_id) as ch_cnt
FROM hackers h
LEFT JOIN challenges c
ON c.hacker_id = h.hacker_id
WHERE 1=1
GROUP BY h.hacker_id
, h.name
) cc
) c
ON a.ch_cnt = c.ch_cnt
--
WHERE 1=1
AND (
b.ch_cnt IS NULL -- remove duplicates
OR c.ch_cnt IS NOT NULL -- keep the duplicates for the max nbr of challenges
)
--
ORDER BY 3 DESC, 1 ASC
winning query: Congratulations!

SQL Question Involving Number of Challenges Solved

The following question is based on a HackerRank question here. It is written as follows:
Julia asked her students to create some coding challenges. Write a
query to print the hacker_id, name, and the total number of challenges
created by each student. Sort your results by the total number of
challenges in descending order. If more than one student created the
same number of challenges, then sort the result by hacker_id. If more
than one student created the same number of challenges and the count
is less than the maximum number of challenges created, then exclude
those students from the result.
In the challenge, I submitted the following, but for some reason there is a syntax error. What is the problem?
select h.hacker_id
, h.name
, count(c.challenge_id) count1
from hackers h
join challenges c
on c.hacker_id = h.hacker_id
where h.hacker_id not in
(select hh.hacker_id
from hackers hh
join challenges cc
on cc.hacker_id = hh.hacker_id
join
( select hhh.hacker_id
, count(ccc.challenge_id) count2
from hackers hhh
join challenges ccc
on ccc.hacker_id = hhh.hacker_id
group
by hhh.hacker_id
having count(ccc.challenge_id) <
(select max(count2)
from
( select count(cc.challenge_id) count2
from hackers hh
join challenges cc
on hh.hacker_id = cc.hacker_id
) a
) b
) t
on t.hacker_id <> hh.hacker_id
) c
group
by h.hacker_id
, h.name
order
by count(c.challenge_id)
, h.hacker_id desc

I can't comment on the validity of what follows, but it is at least syntactically valid...
select c.hacker_id
, h.name
, count(c.hacker_id) c_count
from hackers h
join challenges c
on c.hacker_id = h.hacker_id
group
by c.hacker_id
, h.name
having c_count =
( SELECT MAX(temp1.cnt)
from
( SELECT COUNT(hacker_id) cnt
from challenges
group
by hacker_id
) temp1
)
or c_count in
(select t.cnt
from
( select count(*) cnt
from challenges
group
by hacker_id
) t
group
by t.cnt
having count(t.cnt) = 1)
order
by c_count DESC
, c.hacker_id

I have tested the below two methods and they both worked fine. Just different approach to the question.
/*
Working platform:- MySQL
/
/
SELECT H.HACKER_ID,
H.NAME,
COUNT(C.CHALLENGE_ID) AS C_COUNT
FROM HACKERS H
JOIN CHALLENGES C ON C.HACKER_ID = H.HACKER_ID
GROUP BY H.HACKER_ID, H.NAME
HAVING C_COUNT =
(SELECT COUNT(C2.CHALLENGE_ID) AS C_MAX
FROM CHALLENGES AS C2
GROUP BY C2.HACKER_ID
ORDER BY C_MAX DESC LIMIT 1)
OR C_COUNT IN
(SELECT DISTINCT C_COMPARE AS C_UNIQUE
FROM (SELECT H2.HACKER_ID,
H2.NAME,
COUNT(CHALLENGE_ID) AS C_COMPARE
FROM HACKERS H2
JOIN CHALLENGES C ON C.HACKER_ID = H2.HACKER_ID
GROUP BY H2.HACKER_ID, H2.NAME) COUNTS
GROUP BY C_COMPARE
HAVING COUNT(C_COMPARE) = 1)
ORDER BY C_COUNT DESC, H.HACKER_ID;
*/
/*
The above is the original code
The following is the code I suggest
The idea is instead of looking for unique counts, you eliminate counts that are not unique
Credit: https://medium.com/jen-li-chen-in-data-science/hackerrank-sql-bd819dfcaee7
*/
SELECT c.hacker_id, h.name, count(c.challenge_id) AS cnt
FROM Hackers AS h JOIN Challenges AS c ON h.hacker_id = c.hacker_id
GROUP BY c.hacker_id, h.name
HAVING cnt = (SELECT count(c1.challenge_id) FROM Challenges AS c1 GROUP BY c1.hacker_id
ORDER BY count(*) desc limit 1) or
cnt NOT IN (SELECT count(c2.challenge_id) FROM Challenges AS c2 GROUP BY c2.hacker_id
HAVING c2.hacker_id <> c.hacker_id)
ORDER BY cnt DESC, c.hacker_id;

MySQL interpreter calls query without an aggregate function an aggregate query

I am trying to solve this HackerRank SQL coding challenge. We have two tables, one called Hackers with columns hacker_id and name and another called Submissions with columns submission_date, submission_id, hacker_id, and score.
One query I submitted to solve the challenge was:
SELECT es.date, es.count, m.hacker_id, m.name, m.score
FROM
(
SELECT submission_date as date, COUNT(hacker_id) as count
FROM (
SELECT submission_date, COUNT(submission_id) as count, hacker_id
FROM Submissions
GROUP BY submission_date, hacker_id
) f
HAVING count >=1
) es
JOIN (
SELECT s.submission_date as date, s.hacker_id, h.name, s.score
FROM Submissions s
JOIN Hackers h ON h.hacker_id = s.hacker_id
JOIN (SELECT submission_date, MAX(score) as score FROM Submissions GROUP BY submission_date) foo ON foo.submission_date = s.submission_date
WHERE s.score = foo.score
) m
ON es.date = m.date
ORDER BY es.date
However, I got back the error:
ERROR 1140 (42000) at line 1: In aggregated query without GROUP BY, expression #1 of SELECT list contains nonaggregated column 'f.submission_date'; this is incompatible with sql_mode=only_full_group_by
This really confused me since I didn't use an aggregate function in my query. Why did the interpreter give this error, and what should I think about to resolve it?

There are two columns named count in first subquery. One is count of field submission_id and second is count of hacker_id. As I understood query needs to perform selection of submission_dates where count greater than or equal to 1. So depends on which count is presumed to be greater than or equal to 1, the query might be rewritten as following:
1) If it is needed that count(submission_id) greater than or equal to 1:
SELECT es.date, es.count, m.hacker_id, m.name, m.score
FROM
(
SELECT submission_date as date, COUNT(hacker_id) as count
FROM (
SELECT submission_date, COUNT(submission_id) as count, hacker_id
FROM Submissions
GROUP BY submission_date, hacker_id
) f
WHERE f.count >=1
GROUP BY submission_date
) es
JOIN (
SELECT s.submission_date as date, s.hacker_id, h.name, s.score
FROM Submissions s
JOIN Hackers h ON h.hacker_id = s.hacker_id
JOIN (SELECT submission_date, MAX(score) as score FROM Submissions GROUP BY submission_date) foo ON foo.submission_date = s.submission_date
WHERE s.score = foo.score
) m
ON es.date = m.date
ORDER BY es.date
2) the count(hacker_id) is greater than or equal to 1:
SELECT es.date, es.count, m.hacker_id, m.name, m.score
FROM
(
SELECT submission_date as date, COUNT(hacker_id) as count
FROM (
SELECT submission_date, COUNT(submission_id) as count, hacker_id
FROM Submissions
GROUP BY submission_date, hacker_id
) f
GROUP BY submission_date
HAVING count(hacker_id) >= 1
) es
JOIN (
SELECT s.submission_date as date, s.hacker_id, h.name, s.score
FROM Submissions s
JOIN Hackers h ON h.hacker_id = s.hacker_id
JOIN (SELECT submission_date, MAX(score) as score FROM Submissions GROUP BY submission_date) foo ON foo.submission_date = s.submission_date
WHERE s.score = foo.score
) m
ON es.date = m.date
ORDER BY es.date
The problem is that in subquery the aggregated function count(hacker_id) was used without "group by" section with column "submission_date" defined there as nonaggregated column.
Still I'm not sure that the sql-query is logically correct.

SQL: exclude code

I don't understand what's the meaning of this clause:
cn = (select count(c1.challenge_id) from challenges as c1
group by c1.hacker_id
order by count(c1.challenge_id) desc limit 1) "?
Whole SQL:
select c.hacker_id, h.name, count(c.challenge_id) as cn
from challenges as c join hackers as h
on c.hacker_id = h.hacker_id
group by c.hacker_id having
cn =
(select count(c1.challenge_id) from challenges as c1
group by c1.hacker_id
order by count(c1.challenge_id) desc limit 1)
or
cn not in
(select count(c2.challenge_id) from challenges as c2
group by c2.hacker_id having
c.hacker_id != c2.hacker_id)
order by cn desc, c.hacker_id

select count(c1.challenge_id) from challenge
It counts the number of challenges.
select count(c1.challenge_id) from challenges as c1
group by c1.hacker_id
It counts challenges created by hacker.
E.g Hacker A has created 56 challenges
Hacker B has create 36 challenges
order by count(c1.challenge_id) desc limit 1
It orders the count of challenges by hackers in descending order. Limit 1 picks up the first element in this order.
It's a query of finding the maximum challenges.

Exclude results on basis of test cases

Write a query to print the hacker_id, name, and the total number of challenges created by each student. Sort your results by the total number of challenges in descending order. If more than one student created the same number of challenges, then sort the result by hacker_id. If more than one student created the same number of challenges and the count is less than the maximum number of challenges created, then exclude those students from the result.
I have also attached the images for the respective tables
Table Hackers:
hacker_id name
5077 Rose
21283 Angela
62743 Frank
88255 Patrick
96196 Lisa
Table Challenges:
challenge_id hacker_id
61654 5077
58302 21283
40587 88255
29477 5077
1220 21283
69514 21283
46561 62743
58077 62743
18483 88255
76766 21283
52382 5077
74467 21283
33625 96196
26053 88255
42665 62743
12859 62743
70094 21283
34599 88255
54680 88255
61881 5077
So, far I have done this
SELECT c.hacker_id, h.name, COUNT(c.challenge_id) AS challenge_count
FROM Challenges c LEFT JOIN Hackers h on c.hacker_id = h.hacker_id
GROUP by 1,c.hacker_id HAVING challenge_count >=
MAX(challenge_count) ORDER BY challenge_count DESC ,c.hacker_id DESC;
But not getting expected output. My output
I need to exclude duplicate results from output such as hackers with same number of challenges should be excluded.

There are several criteria here:
hacker_id, name, and the total number of challenges created by each student
sort your results by the total number of challenges in descending order.
If more than one student created the same number of challenges, then sort the result by hacker_id.
If more than one student created the same number of challenges then exclude those students from the result.
Except if the count equals the maximum number of challenges created,
The following deals with items 1,2, & 3...
SELECT h.*
, COUNT(c.challenge_id) challenge_count
FROM hackers h
JOIN challenges c
ON c.hacker_id = h.hacker_id
GROUP
BY h.hacker_id
ORDER
BY challenge_count DESC, hacker_id;
We can join this query to itself once, to resolve criteria 4, and again to resolve item 5, as follows:
SELECT DISTINCT a.*
FROM
( SELECT h.*
, COUNT(c.challenge_id) challenge_count
FROM hackers h
JOIN challenges c
ON c.hacker_id = h.hacker_id
GROUP
BY h.hacker_id
) a
LEFT
JOIN
( SELECT h.*
, COUNT(c.challenge_id) challenge_count
FROM hackers h
JOIN challenges c
ON c.hacker_id = h.hacker_id
GROUP
BY h.hacker_id
) b
ON b.hacker_id <> a.hacker_id AND b.challenge_count = a.challenge_count
LEFT
JOIN
( SELECT h.*
, COUNT(c.challenge_id) challenge_count
FROM hackers h
JOIN challenges c
ON c.hacker_id = h.hacker_id
GROUP
BY h.hacker_id
) c
ON c.challenge_count > a.challenge_count
WHERE b.hacker_id IS NULL
OR c.hacker_id IS NULL
ORDER
BY challenge_count DESC, hacker_id;

SELECT t1.name,
t1.hacker_id,
COALESCE(t2.challengeCount, 0) AS challengeCount
FROM Hackers t1
LEFT JOIN
(
SELECT hacker_id, COUNT(*) AS challengeCount
FROM Challenges
GROUP BY hacker_id
) t2
ON t1.hacker_id = t2.hacker_id
WHERE COALESCE(t2.challengeCount, 0) IN
(
SELECT t1.challengeCount
FROM
(
SELECT t1.hacker_id,
COALESCE(t2.challengeCount, 0) AS challengeCount
FROM Hackers t1
LEFT JOIN
(
SELECT hacker_id, COUNT(*) AS challengeCount
FROM Challenges
GROUP BY hacker_id
) t2
ON t1.hacker_id = t2.hacker_id
) t1
GROUP BY t1.challengeCount
HAVING COUNT(*) = 1
) OR COALESCE(t2.challengeCount, 0) =
(
SELECT MAX(t.challengeCount) FROM
(
SELECT COUNT(*) AS challengeCount
FROM Challenges GROUP BY hacker_id
) t
)
ORDER BY COALESCE(t2.challengeCount, 0) DESC,
t1.hacker_id

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

MySql: WHERE NOT EXISTS (skip duplicates) with group by - mysql

Related

HackerRank SQL Challenges: Subqueries

SQL Question Involving Number of Challenges Solved

MySQL interpreter calls query without an aggregate function an aggregate query

SQL: exclude code

Exclude results on basis of test cases

Categories

Resources