MySQL: extract unique dates - mysql

I have created a table with
CREATE TABLE visits (
user_id int,
event_date timestamp
);
INSERT INTO visits (user_id, event_date)
VALUES
(1, '2021-12-22 12:12:00'),
(1, '2021-12-23 12:12:05'),
(1, '2021-12-24 12:13:00'),
(1, '2021-12-24 12:14:00'),
(1, '2022-03-10 12:14:00'),
(1, '2022-03-11 12:14:00'),
(2, '2021-12-23 12:12:00'),
(1, '2022-03-12 12:14:00'),
(2, '2021-12-23 13:12:00'),
(1, '2022-03-13 12:14:00'),
(1, '2022-03-14 12:14:00'),
(3, '2021-12-25 12:12:00'),
(1, '2022-03-15 12:14:00'),
(1, '2022-03-20 12:14:00'),
(1, '2022-03-21 12:14:00'),
(1, '2022-03-23 12:14:00'),
(1, '2022-03-24 12:14:00'),
(1, '2022-03-25 12:14:00'),
(3, '2021-12-30 12:12:00'),
(3, '2021-12-31 12:12:00'),
(3, '2021-12-31 12:12:00'),
(4, '2022-03-21 12:12:00'),
(4, '2022-03-22 12:12:00'),
(4, '2022-03-23 12:12:00'),
(4, '2022-03-24 12:12:00');
And then I try to extract unique dates with
select
user_id,
distinct cast(event_date as date) as event_date
from visits;
And I get
ERROR 1064 (42000) at line 111: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'distinct cast(event_date as date) as event_date
from visits' at line 3
What did I do wrong?

You should put the keyword distinct at the first position:
select distinct cast(event_date as date) as event_date, user_id
from visits;
OR
select distinct user_id, cast(event_date as date) as event_date
from visits;
Here is a demo

You can use GROUP BY.
(The column COUNT is optional)
SELECT
count(*) "number",
user_id,
DATE(event_date) "event date"
FROM visits
GROUP BY
user_id,
DATE(event_date)
ORDER BY
user_id,
DATE(event_date);

Related

Lead window function in mysql to find sales

Given this table. I would like to know for each day how many different customers made a sale on date t and and t+1.
-- create a table
CREATE TABLE sales_t(
id INTEGER PRIMARY KEY,
d_date date NOT NULL,
sale INT NOT NULL,
customer_n INT NOT NULL
);
-- insert some values
INSERT INTO sales_t VALUES (1, '2021-06-30', 12, 1);
INSERT INTO sales_t VALUES (2, '2021-06-30', 22, 5);
INSERT INTO sales_t VALUES (3, '2021-06-30', 111, 3);
INSERT INTO sales_t VALUES (4, '2021-07-01', 27, 1);
INSERT INTO sales_t VALUES (5, '2021-07-01', 90, 4);
INSERT INTO sales_t VALUES (6, '2021-07-01', 33, 3);
INSERT INTO sales_t VALUES (6, '2021-07-01', 332, 3);
The result for date 2021-06-30 is 2 because customer 1 and 3 made a sale in t and t+1.
Date sale_t_and_t+1
.....................................
2021-06-30 2
2021-07-01 0
Use LEAD() window function for each distinct combination of date and customer to create a flag which will be 1 if the customer is present in both days or 0 if not and aggregate:
SELECT d_date, COALESCE(SUM(flag), 0) `sale_t_and_t+1`
FROM (
SELECT DISTINCT d_date, customer_n,
LEAD(d_date) OVER (PARTITION BY customer_n ORDER BY d_date) = d_date + INTERVAL 1 DAY flag
FROM sales_t
) t
GROUP BY d_date;
See the demo.

Select rows grouped by a column having max aggregate

Given the following data set, how would I find the email addresses that were references for the most ApplicationIDs that have an "Accepted" decision?
CREATE TABLE IF NOT EXISTS `EmailReferences` (
`ApplicationID` INT NOT NULL,
`Email` VARCHAR(45) NOT NULL,
PRIMARY KEY (`ApplicationID`, `Email`)
);
INSERT INTO EmailReferences (ApplicationID, Email)
VALUES
(1, 'ref10#test.org'), (1, 'ref11#test.org'), (1, 'ref12#test.org'),
(2, 'ref20#test.org'), (2, 'ref21#test.org'), (2, 'ref22#test.org'),
(3, 'ref11#test.org'), (3, 'ref31#test.org'), (3, 'ref32#test.org'),
(4, 'ref40#test.org'), (4, 'ref41#test.org'), (4, 'ref42#test.org'),
(5, 'ref50#test.org'), (5, 'ref51#test.org'), (5, 'ref52#test.org'),
(6, 'ref60#test.org'), (6, 'ref11#test.org'), (6, 'ref62#test.org'),
(7, 'ref70#test.org'), (7, 'ref71#test.org'), (7, 'ref72#test.org'),
(8, 'ref10#test.org'), (8, 'ref81#test.org'), (8, 'ref82#test.org')
;
CREATE TABLE IF NOT EXISTS `FinalDecision` (
`ApplicationID` INT NOT NULL,
`Decision` ENUM('Accepted', 'Denied') NOT NULL,
PRIMARY KEY (`ApplicationID`)
);
INSERT INTO FinalDecision (ApplicationID, Decision)
VALUES
(1, 'Accepted'), (2, 'Denied'),
(3, 'Accepted'), (4, 'Denied'),
(5, 'Denied'), (6, 'Denied'),
(7, 'Denied'), (8, 'Accepted')
;
Fiddle of same:http://sqlfiddle.com/#!9/03bcf2/1
Initially, I was using LIMIT 1 and ORDER BY CountDecision DESC, like so:
SELECT er.email, COUNT(fd.Decision) AS CountDecision
FROM EmailReferences AS er
JOIN FinalDecision AS fd ON er.ApplicationID = fd.ApplicationID
WHERE fd.Decision = 'Accepted'
GROUP BY er.email
ORDER BY CountDecision DESC
LIMIT 1
;
However, it occurred to me that I could have multiple email addresses that referred different "most accepted" decisions (i.e., a tie, so to speak), and those would be filtered out (is that the right phrasing?) with the LIMIT keyword.
I then tried a variation on the above query, replacing the ORDER BY and LIMIT lines with:
HAVING MAX(CountDecision)
But I realized that that's only half a statement: MAX(CountDecision) needs to be compared to something. I just don't know what.
Any pointers would be much appreciated. Thanks!
Note: this is for a homework assignment.
Update: To be clear, I'm trying to find value and count of Emails from EmailReferences. However, I only want rows that have FinalDecision.Decision = 'Accepted' (on matching ApplicantIDs). Based on my data, the result should be:
Email | CountDecision
---------------+--------------
ref10#test.org | 2
ref11#test.org | 2
For example...
SELECT a.*
FROM
( SELECT x.email
, COUNT(*) total
FROM emailreferences x
JOIN finaldecision y
ON y.applicationid = x.applicationid
WHERE y.decision = 'accepted'
GROUP
BY x.email
) a
JOIN
( SELECT COUNT(*) total
FROM emailreferences x
JOIN finaldecision y
ON y.applicationid = x.applicationid
WHERE y.decision = 'accepted'
GROUP
BY x.email
ORDER
BY total DESC
LIMIT 1
) b
ON b.total = a.total;
MySQL still lack window functions, but when version 8 is production ready, this becomes easier. So for fuure reference, or for those databases like Mariadb that already have window functions:
CREATE TABLE IF NOT EXISTS `EmailReferences` (
`ApplicationID` INT NOT NULL,
`Email` VARCHAR(45) NOT NULL,
PRIMARY KEY (`ApplicationID`, `Email`)
);
INSERT INTO EmailReferences (ApplicationID, Email)
VALUES
(1, 'ref10#test.org'), (1, 'ref11#test.org'), (1, 'ref12#test.org'),
(2, 'ref20#test.org'), (2, 'ref21#test.org'), (2, 'ref22#test.org'),
(3, 'ref30#test.org'), (3, 'ref31#test.org'), (3, 'ref32#test.org'),
(4, 'ref40#test.org'), (4, 'ref41#test.org'), (4, 'ref42#test.org'),
(5, 'ref50#test.org'), (5, 'ref51#test.org'), (5, 'ref52#test.org'),
(6, 'ref60#test.org'), (6, 'ref11#test.org'), (6, 'ref62#test.org'),
(7, 'ref70#test.org'), (7, 'ref71#test.org'), (7, 'ref72#test.org'),
(8, 'ref10#test.org'), (8, 'ref81#test.org'), (8, 'ref82#test.org')
;
CREATE TABLE IF NOT EXISTS `FinalDecision` (
`ApplicationID` INT NOT NULL,
`Decision` ENUM('Accepted', 'Denied') NOT NULL,
PRIMARY KEY (`ApplicationID`)
);
INSERT INTO FinalDecision (ApplicationID, Decision)
VALUES
(1, 'Accepted'), (2, 'Denied'),
(3, 'Accepted'), (4, 'Denied'),
(5, 'Denied'), (6, 'Denied'),
(7, 'Denied'), (8, 'Accepted')
;
select email, CountDecision
from (
SELECT er.email, COUNT(fd.Decision) AS CountDecision
, max(COUNT(fd.Decision)) over() maxCountDecision
FROM EmailReferences AS er
JOIN FinalDecision AS fd ON er.ApplicationID = fd.ApplicationID
WHERE fd.Decision = 'Accepted'
GROUP BY er.email
) d
where CountDecision = maxCountDecision
email | CountDecision
:------------- | ------------:
ref10#test.org | 2
dbfiddle here

Count number of rows in each day grouped by another field

For the purpose of drawing an activity chart, how can we count number of rows for each type (distinct field value) in each day?
Consider a table with a date field and a field for each type:
CREATE TABLE TableName
(`PK` int, `type` varchar(1), `timestamp` datetime)
;
INSERT INTO TableName
(`PK`, `type`, `timestamp`)
VALUES
(11, 'Q', '2013-01-04 22:23:56'),
(7, 'A', '2013-01-03 22:23:41'),
(8, 'C', '2013-01-04 22:23:42'),
(10, 'Q', '2013-01-05 22:23:56'),
(5, 'C', '2013-01-03 22:23:25'),
(12, 'Q', '2013-01-05 22:23:57'),
(6, 'Q', '2013-01-07 22:23:40'),
(4, 'Q', '2013-01-02 22:23:23'),
(9, 'A', '2013-01-05 22:23:55'),
(1, 'A', '2013-01-08 21:29:38'),
(2, 'Q', '2013-01-02 21:31:59'),
(3, 'C', '2013-01-04 21:32:22')
;
For example output can be (last field is the count of rows with that type and in that day):
'Q', 2013-01-04, 1
'C', 2013-01-04, 2
'A', 2013-01-03, 1
'C', 2013-01-03, 2
and so on...
You just need a group by.
select `type`, date(`timestamp`), count(*)
from tableName
group by `type`, date(`timestamp`)
select `type`, date(`timestamp`) as the_date, count(*) as counter
from MyTable
group by `type`, date(`timestamp`)

MySQL: Select first row with value in interval

With the following table:
CREATE TABLE table1 (`id` INT, `num` INT);
INSERT INTO table1 (`id`, `num`) VALUES
(1, 1),
(1, 5),
(1, 7),
(1, 12),
(1, 22),
(1, 23),
(1, 24),
(2, 1),
(2, 6);
How do I select a row for each num interval of 5 (ie. select the first row for [0,5), the first for [5,10), the first for [10,15), etc.), with a given id? Is this possible with a MySQL query, or must I process it in a programming language later?
For reference, the output I'd want for id=1:
(1, 1), (1,5), (1,12), (1,22)
Here is a short query:
select min(num), ceiling((num + 1)/5)
from table1
where id = 1
group by ceiling((num + 1)/5);

SQL getting shifts outside of availability

I'm trying to put together an sql query to get employee shifts that are outside of their availability for a scheduling app. Availability entries will be contiguous and will never have availability entries that are back-to-back for the same employee, nor will there be availability entries that overlap for the same employee.
Basically, I need to get the shift rows where (availabilities.start <= shifts.start AND availabilities.end >= shifts.end) does NOT hold true. Phrased another way, I need to get the rows from the shifts table that are not fully contained by an availability entry.
It needs to account for these possibilities:
Shifts that start before availability
Shifts that end after availability
Shifts that do not have any availability during the shift
I'm ok with using a stored procedure instead of a query if this would be more efficient.
Here's what the tables look like:
CREATE TABLE availabilities (`id` int primary key, `employee_id` int, `start` datetime, `end` datetime);
CREATE TABLE shifts (`id` int primary key, `employee_id` int, `start` datetime, `end` datetime);
Here is some sample data:
INSERT INTO availabilities
(`employee_id`, `start`, `end`)
VALUES
(1, '2015-01-01 08:00:00', '2015-01-01 09:00:00'),
(1, '2015-01-02 08:00:00', '2015-01-02 10:00:00'),
(2, '2015-01-03 08:00:00', '2015-01-03 14:00:00'),
(2, '2015-01-04 08:00:00', '2015-01-04 18:00:00')
;
INSERT INTO shifts
(`employee_id`, `start`, `end`)
VALUES
(1, '2015-01-01 08:00:00', '2015-01-01 09:00:00'),
(1, '2015-01-02 08:30:00', '2015-01-02 10:00:00'),
(1, '2015-01-02 10:30:00', '2015-01-02 12:00:00'),
(2, '2015-01-03 08:00:00', '2015-01-03 09:00:00'),
(2, '2015-01-03 09:00:00', '2015-01-03 14:30:00'),
(2, '2015-01-04 09:30:00', '2015-01-04 17:30:00'),
(2, '2015-01-05 08:00:00', '2015-01-05 10:00:00')
;
I would expect the 3rd, 5th and 7th shifts to be output as they are outside of availability.
I've tried something like the following (as well as many others) however all of them either give false positives or leave out shifts.
SELECT s.* FROM `shifts` AS `s`
LEFT JOIN `availabilities` AS `a` ON `s`.`employee_id` = `a`.`employee_id`
WHERE (NOT(a.start <= s.start AND a.end >= s.end) OR a.id IS NULL);
Does this help?
select *
from shifts as s
where not exists (
select 1
from availabilities as a
where a.start <= s.start AND a.end >= s.end and a.employee_id = s.employee_id
)