How to sort by timestamp streaks SQL - mysql

Say I have the following tables (simplified version of what I'm working with):
CREATE TABLE posts (
id INTEGER,
title VARCHAR(255),
text TEXT,
author_id INTEGER,
created_at TIMESTAMP
);
CREATE TABLE authors (
id INTEGER,
name VARCHAR(255),
email VARCHAR(255)
);
What I want to do is retrieve only the authors but order them by number of one-week streaks. That is, number of consecutive weeks an author published a post. The time a post has been made is stored in the posts created_at column
What I'm having the most difficulty with is understanding how to calculate the difference in time between posts across rows. I’m using MySQL

Without window functions it's a bit hard to do in MySql 5.7
But here's an experiment test snippet that uses variables :
Sample data:
DROP TABLE IF EXISTS `posts`;
DROP TABLE IF EXISTS `authors`;
CREATE TABLE `authors` (
id INTEGER PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(255),
email VARCHAR(255)
);
CREATE TABLE `posts` (
id INTEGER PRIMARY KEY AUTO_INCREMENT,
title VARCHAR(255),
`text` TEXT,
author_id INTEGER,
created_at TIMESTAMP,
CONSTRAINT fk_posts_author_id FOREIGN KEY (author_id) REFERENCES `authors`(id)
);
insert into `authors` (name, email) values
('john doe', 'john.doe#home.net'),
('jane sheppard', 'jane.sheppard#home.net');
insert into `posts` (author_id, created_at, title, `text`) values
(1, '2019-02-07', 'When', 'bla'),
(1, '2019-02-09', 'I', 'bla2'),
(1, '2019-02-14', 'Start', 'bla3'),
(1, '2019-02-19', 'looking', 'bla4'),
(1, '2019-03-10', '...', 'bla5'),
(2, '2019-02-01', 'I', 'blah1'),
(2, '2019-02-05', 'frighten', 'blah2'),
(2, '2019-02-19', 'even', 'blah3'),
(2, '2019-03-20', 'myself', 'blah4');
Query:
SELECT q3.ConcurrentWeeks, q3.StartWeekDate, a.*
FROM
(
SELECT COUNT(*) as ConcurrentWeeks, MIN(WkDt) as StartWeekDate, author_id
FROM
(
SELECT q1.WkDt, q1.Total
, case
when #author = author_id and #yr = yr and #wk = wk-1 then #rnk
else #rnk := #rnk + 1
end as rnk
, #author := author_id as author_id
, #yr := yr as yr
, #wk := wk as wk
FROM
(
SELECT
author_id, YEAR(created_at) as yr, WEEK(created_at) as wk
, COUNT(*) AS Total
, COALESCE(MIN(STR_TO_DATE(concat(YEAR(created_at),' monday ',WEEK(created_at)),'%X %W %V')), MIN(CAST(created_at AS DATE))) AS WkDt
FROM `posts` p
GROUP BY author_id, YEAR(created_at), WEEK(created_at)
ORDER BY author_id, yr, wk
) q1
CROSS JOIN (select #author := null, #yr := null, #wk := null, #rnk := 0) init
) q2
GROUP BY author_id, rnk
HAVING ConcurrentWeeks > 1
) q3
LEFT JOIN `authors` a ON a.id = q3.author_id
ORDER BY ConcurrentWeeks DESC, StartWeekDate ASC
Result:
ConcurrentWeeks StartWeekDate id name email
--------------- ------------- -- ------------- ----------------------
3 2019-02-04 1 john doe john.doe#home.net
2 2019-01-28 2 jane sheppard jane.sheppard#home.net

Related

How to get percentage of result set for each day?

I am trying to retrieve the percentage of available products at specific merchants over the last 30 days.
Desired result example:
20210504 merchant1 20%
20210504 merchant2 30%
20210505 merchant1 25%
20210505 merchant2 35%
There are 3 tables:
availability (containing availability info for each product and merchant and day)
products (where the manufacturer_id is, that we want to filter for)
merchants (merchant info)
Minimal example: https://www.db-fiddle.com/f/wtnK5R4DWi7Dy6LwLaP4mX/0
This returns the percentage for only one merchant and one day:
-- get percentage of available products per merchant over time
SELECT
m.name AS metric,
t.s AS AMOUNT_AVAILABLE,
count(*) AS AMOUNT_TOTAL,
t.s / count(*) AS percentage
FROM availability p
CROSS JOIN (
SELECT count(*) AS s FROM availability p2
INNER JOIN products mp on p2.SKU = mp.SKU
WHERE
availability = 'sofort lieferbar'
AND date = curdate() - interval 1 day -- testing for one day, but we want a time series
AND mp.MANUFACTURER_ID = 1
-- AND p2.merchant_id = p.merchant_id -- does not work
-- AND merchant_id = 2
-- GROUP BY merchant_id
) t
INNER JOIN products mp on p.SKU = mp.SKU
INNER JOIN merchants m ON m.id = p.MERCHANT_ID
WHERE
p.date = curdate() - interval 1 day
and mp.MANUFACTURER_ID = 1
-- and merchant_id = 2
GROUP BY
merchant_id
Now I am trying to somehow merge the cross join with the from table so I get the info for each merchant and day. How can a cross join be joined with the from table?
Data & Shema:
create table merchants
(
id tinyint unsigned not null
primary key,
name varchar(255) null
);
INSERT INTO merchants (id, name) VALUES (1, 'Amazon');
INSERT INTO merchants (id, name) VALUES (2, 'eBay');
create table availability
(
DATE date not null,
SKU char(10) not null,
merchant_id tinyint unsigned not null,
availability enum ('sofort lieferbar', 'verzögert lieferbar', 'nicht lieferbar', 'außer Handel') null,
constraint DATE
unique (DATE, SKU, merchant_id)
);
INSERT INTO test.availability (DATE, SKU, merchant_id, availability) VALUES ('2021-05-11', '1', 1, 'sofort lieferbar');
INSERT INTO test.availability (DATE, SKU, merchant_id, availability) VALUES ('2021-05-11', '1', 2, 'nicht lieferbar');
INSERT INTO test.availability (DATE, SKU, merchant_id, availability) VALUES ('2021-05-12', '1', 1, 'sofort lieferbar');
INSERT INTO test.availability (DATE, SKU, merchant_id, availability) VALUES ('2021-05-12', '1', 2, 'nicht lieferbar');
INSERT INTO test.availability (DATE, SKU, merchant_id, availability) VALUES ('2021-05-13', '1', 1, 'nicht lieferbar');
INSERT INTO test.availability (DATE, SKU, merchant_id, availability) VALUES ('2021-05-13', '1', 2, 'sofort lieferbar');
create table products
(
SKU char(8) not null
primary key,
NAME varchar(255) null,
MANUFACTURER_ID mediumint unsigned null,
updated datetime default CURRENT_TIMESTAMP not null on update CURRENT_TIMESTAMP
);
INSERT INTO test.products (SKU, NAME, MANUFACTURER_ID, updated) VALUES ('1', 'Sneaker', 1, '2021-05-12 02:27:46');
INSERT INTO test.products (SKU, NAME, MANUFACTURER_ID, updated) VALUES ('2', 'Ball', 1, '2021-05-12 02:27:46');
INSERT INTO test.products (SKU, NAME, MANUFACTURER_ID, updated) VALUES ('3', 'Pen', 2, '2021-05-12 02:27:46');
INSERT INTO test.products (SKU, NAME, MANUFACTURER_ID, updated) VALUES ('4', 'Paper', 2, '2021-05-12 02:27:46');
I have written a query which seems to work for the data you have provided. Let me know if there's any issue and I'll see what I can do.
SELECT CONCAT('merchant', t.ID) as merchant,
t.Date,
g.prod_available / t.all_prod_from_merch AS percentage_available
# gets total number of products in time range Date,
FROM (SELECT ID,
COUNT(merchant_ID) AS all_prod_from_merch
FROM merchants m
JOIN availability a
ON m.ID = a.merchant_ID
WHERE Date < CURDATE()
AND Date >= curdate() - INTERVAL 10 DAY
GROUP BY merchant_ID,
Date ) t
LEFT JOIN (SELECT merchant_ID,
Date,
COUNT(merchant_ID) AS prod_available
FROM availability
WHERE AVAILABILITY = 'sofort lieferbar'
AND date IN (SELECT Date
FROM availability
WHERE date < CURDATE()
AND date >= CURDATE() - INTERVAL 10 DAY
GROUP BY Date )
GROUP BY merchant_ID,
Date ) g
ON g.merchant_ID = t.ID
AND g.Date = t.Date
ORDER BY t.date;
The first select in the join gets the total number of products in the time range for each merchant. The second one gets those available from each merchant. So the select at the beginning just does the fraction.

Syntax error near (

I am trying to query something and I am not so good with Mysql, so I was wondering if someone can tell me what is the issue here. Here is my table:
create table #transfers (
sender varchar not null,
recipient varchar not null,
date date not null,
amount integer not null
);
INSERT INTO #transfers(sender,recipient,date,amount) VALUES ('Smith','Williams','2000-01-01',200);
INSERT INTO #transfers(sender,recipient,date,amount) VALUES ('Smith','Taylor','2002-09-27',1024);
INSERT INTO #transfers(sender,recipient,date,amount) VALUES ('Smith','Johnson','2005-06-26',512);
INSERT INTO #transfers(sender,recipient,date,amount) VALUES ('Williams','Johnson','2010-12-17',100);
INSERT INTO #transfers(sender,recipient,date,amount) VALUES ('Williams','Johnson','2004-03-22',10);
INSERT INTO #transfers(sender,recipient,date,amount) VALUES ('Brown','Johnson','2013-03-20',500);
INSERT INTO #transfers(sender,recipient,date,amount) VALUES ('Johnson','Williams','2007-06-02',400);
INSERT INTO #transfers(sender,recipient,date,amount) VALUES ('Johnson','Williams','2005-06-26',400);
INSERT INTO #transfers(sender,recipient,date,amount) VALUES ('Johnson','Williams','2005-06-26',200);
This is the query:
WITH cte AS
(
SELECT *, rn = ROW_NUMBER() OVER (PARTITION BY recipient ORDER BY amount DESC)
FROM #transfers
)
SELECT recipient
FROM cte
WHERE rn <= 3
GROUP BY recipient
HAVING SUM(amount) >= 1024
ORDER BY recipient
However I get this error:
near "(": syntax error
CTEs and ROW_NUMBER are not available in MySQL.
Try with Subquery:
SELECT recipient
FROM
(
SELECT t.*, #rownum := #rownum + 1 AS rank
FROM #transfers t, (SELECT #rownum := 0) r
)X
WHERE rank <= 3
GROUP BY recipient
HAVING SUM(amount) >= 1024
ORDER BY recipient
Fiddle Demo
use this create table query
create table transfers (
id INT NOT NULL AUTO_INCREMENT,
sender VARCHAR(100) NOT NULL,
recipient VARCHAR(100) NOT NULL,
date DATE,
amount VARCHAR(100) NOT NULL,
PRIMARY KEY ( id )
);

How can I do this Query on SQL Server 2008 without TSQL

I'm trying to make a query but I don't know how to do it. I'm a newbie in SQL querying, so please be patient.
Here is what I have:
select
count(*), sum(time), 'peter'
from
(select *
from ACTUATION
where OPERATOR != 'peter'
and team in (select eq1.name
from TEAMWORKS eq1
where operator1 = 'peter'
or operator2 = 'peter'
or operator3 = 'peter'
or operator4 = 'peter'
or operator5 = 'peter'
or operator6 = 'peter'
or operator7 = 'peter' )) as a
This works OK, but I need to do it for every operator in the table operators and don't know how to do it.
I've tried several subqueries and groups by, but I can not make it work properly.
Kind regards
EDIT:
Let's say I have this DataBase:
create table TEAMWORKS
(
operator1 varchar(50),
operator2 varchar(50),
operator3 varchar(50),
operator4 varchar(50),
operator5 varchar(50),
operator6 varchar(50),
operator7 varchar(50),
name varchar(50)
)
insert into TEAMWORKS (operator1,operator2,name) values ('Peter', 'Paul', 'Pe-Pa')
insert into TEAMWORKS (operator2,operator3,name) values ('Peter', 'John', 'Pe-Jo')
insert into TEAMWORKS (operator1,operator4,name) values ('John', 'Paul', 'Jo-Pa')
insert into TEAMWORKS (operator5,operator6,name) values ('John', 'Peter', 'Jo-Pe')
create table OPERATORS
(
name varchar(50),
surname varchar(50)
)
insert into OPERATORS (name,surname) values ('Peter', 'Font')
insert into OPERATORS (name,surname) values ('Paul', 'Bridges')
insert into OPERATORS (name,surname) values ('John', 'Oldfield')
create table ACTUATION
(
ID int,
time int,
operator varchar(50),
team varchar(50),
description varchar(999)
)
insert into ACTUATION (ID,time,operator,team,description) values (1,30,'Peter', '','Pick flowers')
insert into ACTUATION (ID,time,operator,team,description) values (2,15,'Paul', '','Throw flowers')
insert into ACTUATION (ID,time,operator,team,description) values (3,30,'Peter', 'Jo-Pe','Pick stones')
insert into ACTUATION (ID,time,operator,team,description) values (4,5,'John', 'Jo-Pe','Throw stones')
insert into ACTUATION (ID,time,operator,team,description) values (5,15,'Paul', 'Jo-Pa','Throw tables')
insert into ACTUATION (ID,time,operator,team,description) values (6,30,'Peter', 'Pe-Pa','Pick tables')
And I need to get the time used by Every operator in table OPERATORS where he is not the main operator in Table ACTUATIONS, but part of a TEAM in the ACTUATIONS.
In the given example, I would like to get as result:
Operator #Actuations (count) Time(sum) 'Origin (only for clarifying, list not needed)
Peter 1 5 'from actuation #4
Paul 1 30 'from actuation #6
John 2 45 'form actuations #3 and #5
Hope now it's more clear
EDIT 2:
You have a working example here: http://sqlfiddle.com/#!3/91373/6/0
That's the desired result, but obviously I would not be able to use unions as I would not know the Operators.
Edit:
After OP having altered the question, here is a working query for your example:
select top 1000
o.name
, count(1)
, sum(a.time)
from OPERATORS o
left join (
select
unpvt.Operator
, unpvt.ColName
, unpvt.name
from
(select name, operator1, operator2, operator3, operator4, operator5, operator6, operator7
from TEAMWORKS) ot
UNPIVOT (
Operator FOR ColName IN (operator1, operator2, operator3, operator4, operator5, operator6, operator7)
) as unpvt
) tw on o.name = tw.Operator
left join ACTUATION a on a.team = tw.name
where a.operator != o.name
group by o.name
This returns your decired result :)
Old Post:
I don't know the structure of the database, but I've had a go at guessing.
Firstly, i create to temporary tables in T-SQL - this is done for testing.
I've previously been down-voted for using temporary tables without descriptions in answers on SO for being to complicated, so please ask if you don't understand
declare #ACTUATION table(
time datetime default getdate()
, Operator varchar(8)
, Workteam varchar(8)
)
insert into #ACTUATION (Operator, Workteam)
select 'Gunnar', 'Peter'
union all select 'Peter', 'Gunnar'
We also need the table Teamworks:
declare #TEAMWORKS table(
time datetime default getdate()
, sometext varchar(32)
, operator1 varchar(8)
, operator2 varchar(8)
, operator3 varchar(8)
, operator4 varchar(8)
, operator5 varchar(8)
, operator6 varchar(8)
, operator7 varchar(8)
)
insert into #TEAMWORKS(sometext, operator1, operator2, operator3, operator4, operator5, operator6, operator7)
select 'Blah Blah', 'Gunnar', 'Jack', 'Sam', 'Joe', 'Lee', 'Jane', 'Jim'
union all select 'More Blah', 'Bob', 'Sal', 'Phil', 'Clark', 'Jones', 'Sue', 'Peter'
union all select 'Even more Blaah', 'Im', 'Running', 'out of', 'dummy', 'names', 'Peter', 'Gunnar'
What I've done here is to create two "virtual" tables, which only consist as variables. Eg. they are recreated each time the query is run. However, I think that they are a great tool when trying to visualize.
Next step is to UNPIVOT the operators to a more normalized structure.
This is done using the following query:
select
unpvt.time
, unpvt.sometext
, unpvt.Operator
, unpvt.ColName
from
(select time, sometext, operator1, operator2, operator3, operator4, operator5, operator6, operator7
from #TEAMWORKS) ot
UNPIVOT (
Operator FOR ColName IN (operator1, operator2, operator3, operator4, operator5, operator6, operator7)
) as unpvt;
This query should make a great starting point for further joins.
For instance, I assume that this is (partly) the result you want:
select
a.Operator
, count(1)
, max(t.time)
from #ACTUATION a
left join (
select
unpvt.Operator
, unpvt.time
from (
select time, sometext, operator1, operator2, operator3, operator4, operator5, operator6, operator7
from #TEAMWORKS
) ot
UNPIVOT (
Operator FOR ColName IN (operator1, operator2, operator3, operator4, operator5, operator6, operator7)
) as unpvt
) t on a.Workteam = t.Operator
where a.Operator != t.Operator
group by a.Operator

What does a double not exists clause mean?

SELECT c.name
FROM Customer c
WHERE NOT EXISTS(SELECT w.WID
FROM Woker w
WHERE NOT EXISTS(SELECT la
FROM look_after la
WHERE la.CID = c.CID
AND la.WID = w.WID));
I dont know what the code means... Could anyone tell me broadly what the code do? C is a Customer, who will looked after from a Worker.
The query selects customers that are looked after by all workers.
The double not exists is a way to implement relational division.
As an illustration to Andomar's excellent answer an example:
-- Some test data
DROP SCHEMA tmp CASCADE;
CREATE SCHEMA tmp ;
SET search_path=tmp;
CREATE TABLE persons
( person_id INTEGER NOT NULL PRIMARY KEY
, pname varchar
);
INSERT INTO persons( person_id, pname ) VALUES
(1 , 'Bob' ) ,(2 , 'Alice' ) ,(3 , 'Carol' )
;
CREATE TABLE movies
( movie_id INTEGER NOT NULL PRIMARY KEY
, mname varchar
);
INSERT INTO movies( movie_id, mname ) VALUES
(1, 'The Blues brothers' ), (2, 'Modern Times' ), (3, 'The Sound of Music' )
,(4, 'Amadeus' ), (5, 'Never say Never' )
;
-- people that have seen a particular movie
CREATE TABLE person_movie
( person_id INTEGER NOT NULL
, movie_id INTEGER NOT NULL
, PRIMARY KEY ( person_id, movie_id)
);
INSERT INTO person_movie( person_id, movie_id) VALUES
(1 ,5 ) ,(1 ,1 )
,(2 ,5 ) ,(2 ,4 ) ,(2 ,1 ) ,(2 ,3 ) ,(2 ,2 )
,(3 ,1 ) ,(3 ,3 )
;
-- Find the people that have seen ALL the movies
-- This is equivalent to:
-- Find persons for whom NO movie exists that (s)he has NOT seen
SELECT * FROM persons p
WHERE NOT EXISTS (
SELECT * FROM movies m
WHERE NOT EXISTS (
SELECT * FROM person_movie pm
WHERE pm.movie_id = m.movie_id
AND pm.person_id = p.person_id
)
);
-- similar: Find the movies that have been seen by ALL people
SELECT * FROM movies m
WHERE NOT EXISTS (
SELECT * FROM persons p
WHERE NOT EXISTS (
SELECT * FROM person_movie pm
WHERE pm.movie_id = m.movie_id
AND pm.person_id = p.person_id
)
);
Results:
person_id | pname
-----------+-------
2 | Alice
(1 row)
movie_id | mname
----------+--------------------
1 | The Blues brothers
(1 row)

Select data based on date of id in table

I am currently showing the last 5 events in my database where WHERE eventdate < CURDATE()
eg
CREATE TABLE venues (
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
venue VARCHAR(255)
) DEFAULT CHARACTER SET utf8 ENGINE=InnoDB;
CREATE TABLE categories (
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
category VARCHAR(255)
) DEFAULT CHARACTER SET utf8 ENGINE=InnoDB;
CREATE TABLE events (
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
eventdate DATE NOT NULL,
title VARCHAR(255),
venueid INT,
categoryid INT
) DEFAULT CHARACTER SET utf8 ENGINE=InnoDB;
INSERT INTO venues (id, venue) VALUES
(1, 'USA'),
(2, 'UK'),
(3, 'Japan');
INSERT INTO categories (id, category) VALUES
(1, 'Jazz'),
(2, 'Rock'),
(3, 'Pop');
INSERT INTO events (id, eventdate, title, venueid, categoryid) VALUES
(1,20121003,'Title number 1',1,3),
(2,20121010,'Title number 2',2,1),
(3,20121015,'Title number 3',3,2),
(4,20121020,'Title number 4',1,3),
(5,20121022,'Title number 5',2,1),
(6,20121025,'Title number 6',3,2),
(7,20121030,'Title number 7',1,3),
(8,20121130,'Title number 8',1,1),
(9,20121230,'Title number 9',1,2),
(10,20130130,'Title number 10',1,3);
SELECT DATE_FORMAT(events.eventdate,'%M %d %Y') AS DATE, title,
cats.category AS CATEGORY, loc.venue AS LOCATION
FROM events
INNER JOIN categories as cats ON events.categoryid=cats.id
INNER JOIN venues as loc ON events.venueid=loc.id
WHERE eventdate < CURDATE()
ORDER BY eventdate DESC
LIMIT 0 , 5
See fiddle below.
http://sqlfiddle.com/#!2/21ad85/14
I want to show the last 5 events in my database where the eventdate < (events.eventdate WHERE events.id =10)
so where it = 10 you should be able to see event id 9,8,7,6,5 where it = 9 you should be able to see 8,7,6,5,4 etc.
But I am not quite sure how to write it in sql. I think it should be along the lines of
WHERE eventdate < (events.eventdate WHERE events.id =10)
but this doesn't work
Maybe you need this?
WHERE eventdate < (SELECT eventdate FROM events WHERE events.id =10)
Can you try this?
wHERE eventdate < curdate() and events.id < 10
updated for the typo: `events.eventdate to curdate()`