Find employees who have worked on at least 2 projects - mysql

I need to make basically this without using the "Group by"
SELECT idEmploye , COUNT(*) AS nbrProjet
FROM ressourcesprojet_
GROUP BY idEmploye
HAVING COUNT(*) > 1;
My Database code for creating it : https://sourceb.in/JePvGUccpU
Things i tried :
SELECT e.idEtape, e.idProjet, e.dateDebut, e.dateFin
FROM (
SELECT idProjet, dateDebut, dateFin
FROM Projet) AS p
RIGHT JOIN EtapexProjet AS e
ON e.dateDebut > p.dateDebut AND e.dateFin > p.dateFin
ORDER BY p.idProjet;
SELECT E.idEmploye ,
(SELECT COUNT(idProjet)
FROM ressourcesprojet_
Where E.idEmploye = idEmploye) AS nbrProjet
From employe_ E;
SELECT idEmploye
FROM ressourcesprojet_ WHERE 1 < (
SELECT COUNT(idProjet)
FROM ressourcesprojet_
where idEmploye = idEmploye);
I just can't wrap my head around it

You can use this:
SELECT DISTINCT p1.idEmploye
FROM RessourcesProjet_ AS p1
JOIN RessourcesProjet_ AS p2 ON p1.idEmploye = p2.idEmploye AND p1.idProjet != p2.idProjet
This joins the RessourcesProjet_ with itself to find all pairs of rows with the same employee and different projects, so this will find anyone who has worked on more than one project.
Then it uses DISTINCT so we don't see all the duplications.
This doesn't generalize easily to "at least N" like the methods that use COUNT() do. It needs to self-join N copies of the table, and all the ON clauses will need to check that the project isn't in any of the previous copies of the table.

The reason why you want, would affect if this is usable. Here is an example, which technically doesn't use the grouping, but if you added any other columns would easily require it:
DECLARE #test2 TABLE (num INT)
INSERT INTO #test2 VALUES (1), (2), (3), (2), (4)
SELECT DISTINCT *
FROM #Test2 t
WHERE (SELECT COUNT(*) FROM #Test2 it WHERE it.num = t.num) > 1
P.S - I have to assume this is either a puzzle or test question
Alternative Edit:
DECLARE #test2 TABLE (id INT, num INT)
INSERT INTO #test2 VALUES (1, 1), (2, 1), (2, 3), (3, 2), (3, 4)
SELECT *
FROM #Test2 t
WHERE NOT EXISTS (SELECT * FROM #Test2 it WHERE it.id = t.id AND it.num <> t.num)
This results in the following being returned:
id, num
1, 1

Related

How to determine columns dynamically for the SELECT query in MySQL with CASE statement. OR: How to replace columns dynamically in the SELECT query

I have to make some SQL query.
I'll only put here tables and results I need - I am sure this is the best way for a clear explanation (at the bottom of the question I provided SQL queries for database filling).
short description:
TASK: After full join concatenation I receive a result where (for example) tableA.point column (that is used in the SELECT statement) in some cells returns NULL. In these cases, I need to change tableA.point column to the tableB.point (from the joined table).
So, tables:
(Columns point + date are composite key.)
outcome_o:
income_o:
The result I need an example (we can see - I need a concatenated table with both out and inc columns in rows)
My attempt:
SELECT outcome_o.point,
outcome_o.date,
inc,
out
FROM income_o
FULL JOIN outcome_o ON income_o.point = outcome_o.point AND income_o.date = outcome_o.date
The result is the same as I need, except NULL in different point and date columns:
I tried to avoid this with CASE statement:
SELECT
CASE outcome_o.point
WHEN NULL
THEN income_o.point
ELSE outcome_o.point
END as point,
....
But this not works as I imagined (all cells became NULL in point column).
Could anyone help me with this solution? I know there is I have to use JOIN, CASE (case-mandatory) and possibly UNION commands.
Thanks
Tables creation:
CREATE TABLE income(
point INT,
date VARCHAR(60),
inc FLOAT
)
CREATE TABLE outcome(
point INT,
date VARCHAR(60),
ou_t FLOAT
)
INSERT INTO income VALUES
(1, '2001-03-22', 15000.0000),
(1, '2001-03-23', 15000.0000),
(1, '2001-03-24', 3400.0000),
(1, '2001-04-13', 5000.0000),
(1, '2001-05-11', 4500.0000),
(2, '2001-03-22', 10000.0000),
(2, '2001-03-24', 1500.0000),
(3, '2001-09-13', 11500.0000),
(3, '2001-10-02', 18000.0000);
INSERT INTO outcome VALUES
(1, '2001-03-14 00:00:00.000', 15348.0000),
(1, '2001-03-24 00:00:00.000', 3663.0000),
(1, '2001-03-26 00:00:00.000', 1221.0000),
(1, '2001-03-28 00:00:00.000', 2075.0000),
(1, '2001-03-29 00:00:00.000', 2004.0000),
(1, '2001-04-11 00:00:00.000', 3195.0400),
(1, '2001-04-13 00:00:00.000', 4490.0000),
(1, '2001-04-27 00:00:00.000', 3110.0000),
(1, '2001-05-11 00:00:00.000', 2530.0000),
(2, '2001-03-22 00:00:00.000', 1440.0000),
(2, '2001-03-29 00:00:00.000', 7848.0000),
(2, '2001-04-02 00:00:00.000', 2040.0000),
(3, '2001-09-13 00:00:00.000', 1500.0000),
(3, '2001-09-14 00:00:00.000', 2300.0000),
(3, '2002-09-16 00:00:00.000', 2150.0000);
The first step is to create a date range reference table. To do that, we can use Common Table Expression (cte):
WITH RECURSIVE cte AS (
SELECT Min(mndate) mindt, MAX(mxdate) maxdt
FROM (SELECT MIN(date) AS mndate, MAX(date) AS mxdate
FROM outcome
UNION
SELECT MIN(date), MAX(date)
FROM income) v
UNION
SELECT mindt + INTERVAL 1 DAY, maxdt
FROM cte
WHERE mindt + INTERVAL 1 DAY <= maxdt)
SELECT mindt
FROM cte
Here I'm trying to generate the dynamic date range based on the minimum & maximum date value from both of your tables. This is particularly useful when you don't to keep on changing the date range but if you don't mind, you can just generate them simply like so:
WITH RECURSIVE cte AS (
SELECT '2001-03-14 00:00:00' dt
UNION
SELECT dt + INTERVAL 1 DAY
FROM cte
WHERE dt + INTERVAL 1 DAY <= '2002-09-16')
SELECT mindt
FROM cte
From here, I'll do a CROSS JOIN to get the distinct point value from both tables:
...
CROSS JOIN (SELECT DISTINCT point FROM outcome
UNION
SELECT DISTINCT point FROM income) p
Now we have a reference table with all the point and date range. Let's wrap those in another cte.
WITH RECURSIVE cte AS (
SELECT Min(mndate) mindt, MAX(mxdate) maxdt
FROM (SELECT MIN(date) AS mndate, MAX(date) AS mxdate
FROM outcome
UNION
SELECT MIN(date), MAX(date)
FROM income) v
UNION
SELECT mindt + INTERVAL 1 DAY, maxdt
FROM cte
WHERE mindt + INTERVAL 1 DAY <= maxdt),
cte2 AS (
SELECT point, mindt
FROM cte
CROSS JOIN (SELECT DISTINCT point FROM outcome
UNION
SELECT DISTINCT point FROM income) p)
SELECT *
FROM cte2;
Next step is taking your current query attempt and LEFT JOIN it to the reference table:
WITH RECURSIVE cte AS (
SELECT Min(mndate) mindt, MAX(mxdate) maxdt
FROM (SELECT MIN(date) AS mndate, MAX(date) AS mxdate
FROM outcome
UNION
SELECT MIN(date), MAX(date)
FROM income) v
UNION
SELECT mindt + INTERVAL 1 DAY, maxdt
FROM cte
WHERE mindt + INTERVAL 1 DAY <= maxdt),
cte2 AS (
SELECT point, CAST(mindt AS DATE) AS rdate
FROM cte
CROSS JOIN (SELECT DISTINCT point FROM outcome
UNION
SELECT DISTINCT point FROM income) p)
SELECT *
FROM cte2
LEFT JOIN outcome
ON cte2.point=outcome.point
AND cte2.rdate=outcome.date
LEFT JOIN income
ON cte2.point=income.point
AND cte2.rdate=income.date
/*added conditions*/
WHERE cte2.point=1
AND COALESCE(outcome.date, income.date) IS NOT NULL
/*****/
ORDER BY cte2.rdate;
I noticed that your date column is using VARCHAR() datatype instead of DATE or DATETIME. Which is why my initial test return only one result. However, I do notice that if I compare YYYY-MM-DD format against your table date value, it returns other results, which is why I did CAST(mindt AS DATE) AS rdate in cte2. I do recommend that you change the date column to MySQL standard date format though.
You probably find the query a bit too long but if you have a table where you store dates or as we call it calendar table, the query will be much shorter, perhaps like this:
SELECT *
FROM calendar
LEFT JOIN outcome
ON calendar.point=outcome.point
AND calendar.rdate=outcome.date
LEFT JOIN income
ON calendar.point=income.point
AND calendar.rdate=income.date
/*added conditions*/
WHERE calendar.point=1
AND COALESCE(outcome.date, income.date) IS NOT NULL
/*****/
ORDER BY calendar.rdate;
Demo fiddle
It seems I was using the wrong syntax for the solution. So, as I found out, dynamically column selection is accessible in the SELECT query:
correct CASE statement:
(
CASE
WHEN outcome_o.point IS NULL
THEN income_o.point
ELSE outcome_o.point
END
) as point,
In this case query selects joined table column in the case the main table column is NULL.
Full query (returns result exactly I need):
SELECT
(
CASE
WHEN outcome_o.point IS NULL
THEN income_o.point
ELSE outcome_o.point
END
) as point,
(
CASE
WHEN outcome_o.date IS NULL
THEN income_o.date
ELSE outcome_o.date
END
) as date,
inc,
out
FROM income_o
FULL JOIN outcome_o ON income_o.point = outcome_o.point AND income_o.date = outcome_o.date

How to replace multiple values in a single column with SQL?

I have a column with complex user id. I want to replace the text within my select query.
This creates a new column as updated_by for every single value. I want them to be replaced in a single column. How can I achieve this?
select replace(updated_by, '5eaf5d368141560012161636', 'A'),
replace(updated_by, '5e79d03e9abae00012ffdbb3', 'B'),
replace(updated_by, '5e7b501e9abae00012ffdbd6', 'C'),
replace(updated_by, '5e7b5b199abae00012ffdbde', 'D'),
replace(updated_by, '5e7c817c9ca5540012ea6cba', 'E'),
updated_by
from my_table
GROUP BY updated_by;
In Postgres I would use a VALUES expression to form a derived table:
To just select:
SELECT *
FROM my_table m
JOIN (
VALUES
('5eaf5d368141560012161636', 'A')
, ('5e79d03e9abae00012ffdbb3', 'B')
, ('5e7b501e9abae00012ffdbd6', 'C')
, ('5e7b5b199abae00012ffdbde', 'D')
, ('5e7c817c9ca5540012ea6cba', 'E')
) u(updated_by, new_value) USING (updated_by);
Or LEFT JOIN to include rows without replacement.
You may need explicit type casts with non-default data types. See:
Casting NULL type when updating multiple rows
For repeated use, create a persisted translation table.
CREATE TABLE updated_by_translation (updated_by text PRIMARY KEY, new_value text);
INSERT INTO my_table
VALUES
('5eaf5d368141560012161636', 'A')
, ('5e79d03e9abae00012ffdbb3', 'B')
, ('5e7b501e9abae00012ffdbd6', 'C')
, ('5e7b5b199abae00012ffdbde', 'D')
, ('5e7c817c9ca5540012ea6cba', 'E')
;
Data types and constraints according to your actual use case.
SELECT *
FROM my_table m
LEFT JOIN updated_by_translation u USING (updated_by);
MySQL recently added a VALUES statement, too. The manual:
VALUES is a DML statement introduced in MySQL 8.0.19
But it requires the keyword ROW for every row. So:
...
VALUES
ROW('5eaf5d368141560012161636', 'A')
, ROW('5e79d03e9abae00012ffdbb3', 'B')
, ROW('5e7b501e9abae00012ffdbd6', 'C')
, ROW('5e7b5b199abae00012ffdbde', 'D')
, ROW('5e7c817c9ca5540012ea6cba', 'E')
...
Use case:
select case updated_by
when '5eaf5d368141560012161636' then 'A'
when '5e79d03e9abae00012ffdbb3' then 'B'
when '5e7b501e9abae00012ffdbd6' then 'C'
when '5e7b5b199abae00012ffdbde' then 'D'
when '5e7c817c9ca5540012ea6cba' then 'E'
end as updated_by
from my_table
This has to be nested liek this
SELECT
REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(updated_by,
'5e7c817c9ca5540012ea6cba',
'E'),
'5e7b5b199abae00012ffdbde',
'D'),
'5e7b501e9abae00012ffdbd6',
'C'),
'5e79d03e9abae00012ffdbb3',
'B'),
'5eaf5d368141560012161636',
'A'),
updated_by
FROM
my_table
GROUP BY updated_by
This will replace all occurring, patterns, if they are not foung nothing happens
You can use a recursive CTE if you need to handle multiple values within a single row:
with replacements as (
select '5eaf5d368141560012161636' as oldval, 'A' as newval union all
select '5e79d03e9abae00012ffdbb3' as oldval, 'B' union all
select '5e7b501e9abae00012ffdbd6' as oldval, 'C' union all
select '5e7b5b199abae00012ffdbde' as oldval, 'D' union all
select '5e7c817c9ca5540012ea6cba' as oldval, 'E'
),
r as (
select r.*, row_number() over (order by oldval) as seqnum
from replacements r
),
recursive cte (
select r.seqnum, replace(t.updated_by, r.oldval, r.newval) as updated_by
from my_table t join
r
on r.seqnum = 1
union all
select r.seqnum, replace(cte.updated_by, r.oldval, r.newval) as updated_by
from cte t join
r
on r.seqnum = cte.seqnum + 1
)
select cte.*
from cte
where seqnum = (select count(*) from replacements);

MySQL: find IDs with constatnly increasing values

I have the following table:
create table my_table
(
SubjectID int,
Date Date,
Test_Value int
);
insert into my_table(SubjectID, Date, Test_Value)
values
(1, '2014-01-01', 55),
(1, '2014-01-05', 170),
(1, '2014-01-30', 160),
(2, '2014-01-02', 175),
(2, '2014-01-20', 166),
(2, '2014-01-21', 160),
(3, '2014-01-05', 70),
(3, '2014-01-07', 75),
(3, '2014-01-11', 180)
I want to find IDs with constantly increasing Test_Value over time. In this example, only SubjectID 3 satisfies that condition. Could you write the code to find this out? Thanks for your help as always.
SELECT *
FROM my_table o
WHERE NOT EXISTS (
SELECT null
FROM my_table t1
INNER JOIN my_table t2 ON t2.Date > t1.Date AND t2.Test_Value < t1.Test_Value AND t1.SubjectID = t2.SubjectID
WHERE t1.SubjectID = o.SubjectID
)
The inner query would select all the entities that DO VIOLATE the requirements: they have later dates with least values. Then the outer select entities that do not match ones from the inner query.
SQLFiddle: http://www.sqlfiddle.com/#!2/1a7ba/12
PS: presumably if you only need an id - use SELECT DISTINCT SubjectID
If the values are not monotonically increasing, then there is at least one case where adjacent values decrease. Hence, you can reduce this problem to just looking at the previous value:
select t.SubjectId
from (select t.*,
(select TestValue
from table t2
where t2.SubjectId = t.SubjectId and
t2.Date < t.Date
order by t2.Date desc
limit 1
) as prev_Test_value
from table t
) t
group by t.SubjectId
having coalesce(sum(Test_Value < prev_Test_value), 0) = 0;

How many different ways are there to get the second row in a SQL search?

Let's say I was looking for the second most highest record.
Sample Table:
CREATE TABLE `my_table` (
`id` int(2) NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL,
`value` int(10),
PRIMARY KEY (`id`)
);
INSERT INTO `my_table` (`id`, `name`, `value`) VALUES (NULL, 'foo', '200'), (NULL, 'bar', '100'), (NULL, 'baz', '0'), (NULL, 'quux', '300');
The second highest value is foo. How many ways can you get this result?
The obvious example is:
SELECT name FROM my_table ORDER BY value DESC LIMIT 1 OFFSET 1;
Can you think of other examples?
I was trying this one, but LIMIT & IN/ALL/ANY/SOME subquery is not supported.
SELECT name FROM my_table WHERE value IN (
SELECT MIN(value) FROM my_table ORDER BY value DESC LIMIT 1
) LIMIT 1;
Eduardo's solution in standard SQL
select *
from (
select id,
name,
value,
row_number() over (order by value) as rn
from my_table t
) t
where rn = 1 -- can pick any row using this
This works on any modern DBMS except MySQL. This solution is usually faster than solutions using sub-selects. It also can easily return the 2nd, 3rd, ... row (again this is achievable with Eduardo's solution as well).
It can also be adjusted to count by groups (adding a partition by) so the "greatest-n-per-group" problem can be solved with the same pattern.
Here is a SQLFiddle to play around with: http://sqlfiddle.com/#!12/286d0/1
This only works for exactly the second highest:
SELECT * FROM my_table two
WHERE EXISTS (
SELECT * FROM my_table one
WHERE one.value > two.value
AND NOT EXISTS (
SELECT * FROM my_table zero
WHERE zero.value > one.value
)
)
LIMIT 1
;
This one emulates a window function rank() for platforms that don't have them. It can also be adapted for ranks <> 2 by altering one constant:
SELECT one.*
-- , 1+COALESCE(agg.rnk,0) AS rnk
FROM my_table one
LEFT JOIN (
SELECT one.id , COUNT(*) AS rnk
FROM my_table one
JOIN my_table cnt ON cnt.value > one.value
GROUP BY one.id
) agg ON agg.id = one.id
WHERE agg.rnk=1 -- the aggregate starts counting at zero
;
Both solutions need functional self-joins (I don't know if mysql allows them, IIRC it only disallows them if the table is the target for updates or deletes)
The below one does not need window functions, but uses a recursive query to enumerate the rankings:
WITH RECURSIVE agg AS (
SELECT one.id
, one.value
, 1 AS rnk
FROM my_table one
WHERE NOT EXISTS (
SELECT * FROM my_table zero
WHERE zero.value > one.value
)
UNION ALL
SELECT two.id
, two.value
, agg.rnk+1 AS rnk
FROM my_table two
JOIN agg ON two.value < agg.value
WHERE NOT EXISTS (
SELECT * FROM my_table nx
WHERE nx.value > two.value
AND nx.value < agg.value
)
)
SELECT * FROM agg
WHERE rnk = 2
;
(the recursive query will not work in mysql, obviously)
You can use inline initialization like this:
select * from (
select id,
name,
value,
#curRank := #curRank + 1 AS rank
from my_table t, (SELECT #curRank := 0) r
order by value desc
) tb
where tb.rank = 2
SELECT name
FROM my_table
WHERE value < (SELECT max(value) FROM my_table)
ORDER BY value DESC
LIMIT 1
SELECT name
FROM my_table
WHERE value = (
SELECT min(r.value)
FROM (
SELECT name, value
FROM my_table
ORDER BY value DESC
LIMIT 2
) r
)
LIMIT 1

Sql server not in clause not working

Why this does not give any records in sql server 2008?
;with pricedCategories as
(
select * from Product.Category where CategoryID not in
(select Parent_CategoryID from Product.Category)
)
select * from pricedCategories
It seems that your query doesn't return values when there are NULL values in subquery inside CTE (if I replace NULL in insert (1, NULL) with let's say (1, 0) your query will work). If you want to get the categories that are not any other category's parents even with NULL values, you can do it like this:
DECLARE #Category TABLE (CategoryID INT, Parent_CategoryID INT)
INSERT #Category VALUES
(1, NULL),
(2, 1),
(3, 1),
(4, 2)
;WITH pricedCategories AS
(
SELECT * FROM #Category y WHERE NOT EXISTS
(SELECT Parent_CategoryID FROM #Category x
WHERE x.Parent_CategoryID = y.CategoryID)
)
SELECT * FROM pricedCategories
It is interesting to see that the following approach works the same as the approach described in your question:
;WITH pricedCategories AS
(
SELECT * FROM #Category y
WHERE y.CategoryID <> ALL(SELECT DISTINCT Parent_CategoryID FROM #Category)
)
SELECT * FROM pricedCategories
You could change your query to use the ISNULL function to replace NULL with some numeric value that is never used as CategoryID, like this:
;WITH pricedCategories AS
(
SELECT * FROM #Category WHERE CategoryID NOT IN
(SELECT ISNULL(Parent_CategoryID, -1) FROM #Category)
)
SELECT * FROM pricedCategories
But then the NULL value which means "nothing" would be changed to actual value of -1 which is not true and you shouldn't use it.