MySQL: find IDs with constatnly increasing values - mysql

I have the following table:
create table my_table
(
SubjectID int,
Date Date,
Test_Value int
);
insert into my_table(SubjectID, Date, Test_Value)
values
(1, '2014-01-01', 55),
(1, '2014-01-05', 170),
(1, '2014-01-30', 160),
(2, '2014-01-02', 175),
(2, '2014-01-20', 166),
(2, '2014-01-21', 160),
(3, '2014-01-05', 70),
(3, '2014-01-07', 75),
(3, '2014-01-11', 180)
I want to find IDs with constantly increasing Test_Value over time. In this example, only SubjectID 3 satisfies that condition. Could you write the code to find this out? Thanks for your help as always.

SELECT *
FROM my_table o
WHERE NOT EXISTS (
SELECT null
FROM my_table t1
INNER JOIN my_table t2 ON t2.Date > t1.Date AND t2.Test_Value < t1.Test_Value AND t1.SubjectID = t2.SubjectID
WHERE t1.SubjectID = o.SubjectID
)
The inner query would select all the entities that DO VIOLATE the requirements: they have later dates with least values. Then the outer select entities that do not match ones from the inner query.
SQLFiddle: http://www.sqlfiddle.com/#!2/1a7ba/12
PS: presumably if you only need an id - use SELECT DISTINCT SubjectID

If the values are not monotonically increasing, then there is at least one case where adjacent values decrease. Hence, you can reduce this problem to just looking at the previous value:
select t.SubjectId
from (select t.*,
(select TestValue
from table t2
where t2.SubjectId = t.SubjectId and
t2.Date < t.Date
order by t2.Date desc
limit 1
) as prev_Test_value
from table t
) t
group by t.SubjectId
having coalesce(sum(Test_Value < prev_Test_value), 0) = 0;

Related

Write a query to identify frequent posters

I'm trying to write a query that will find the user_id's of all users
that have created a minimum of two posts in a maximum of 1 hour.
Here's a light example of the data:
CREATE TABLE tbl_posts
(`id` int, `user_id` int, `created_date` datetime);
INSERT INTO tbl_posts
(`id`, `user_id`, `created_date`)
VALUES
(1, 1, '2021-07-01 09:00'),
(2, 2, '2021-07-01 10:15'), -- *
(3, 2, '2021-07-01 11:00'), -- * user posted twice within an hour.
(4, 3, '2021-07-01 13:00'),
(5, 3, '2021-07-01 15:00'),
(6, 3, '2021-07-01 18:00'),
(7, 4, '2021-07-01 11:00'),
(8, 4, '2021-07-02 11:30'),
(9, 4, '2021-07-03 12:30'), -- *
(10, 4, '2021-07-03 12:45'); -- * user posted twice within an hour.
http://sqlfiddle.com/#!9/0e7cba
The expected output of the query is
2, 4
This output is expected because users 2 and 4 have each posted at least twice in under an hour.
I don't know where to begin with this in MySQL. I can export the data and get a result procedurally in something like C or Python, but I'm sure this is accomplishable in MySQL and am curious to know how. Maybe I need a Window function?
Use EXISTS:
SELECT DISTINCT t1.user_id
FROM tbl_posts t1
WHERE EXISTS (
SELECT 1
FROM tbl_posts t2
WHERE t2.user_id = t1.user_id
AND t1.created_date < t2.created_date
AND TIMESTAMPDIFF(SECOND, t1.created_date, t2.created_date) <= 60 * 60
)
Or, if your version of MySql is 8.0+ use LEAD() window function:
SELECT user_id
FROM (
SELECT *, TIMESTAMPDIFF(
SECOND,
created_date,
LEAD(created_date) OVER (PARTITION BY user_id ORDER BY created_date)
) diff
FROM tbl_posts
) t
GROUP BY user_id
HAVING MIN(diff) <= 60 * 60
See the demo.
select distinct p.user_id from tbl_posts p
inner join tbl_posts p2 on p.user_id = p2.user_id
and p.created_date < p2.created_date
and DATE_ADD(p.created_date,interval 1 hour) >= p2.created_date

MySQL 5.5 - Select rows where record changes

I have got this table and I need to produce the line with the last A and first B and then last B and first A. Expected results are here:
ID | Date | OPS
6 | 2018-12-20 | A
7 | 2018-12-20 | B
11 | 2018-12-24 | B
12 | 2018-12-25 | A
I have tried to do with the below code with nil luck. I cannot use analytic functions like ROW_NUMBER(), LEAD () or LAG ().
SELECT *
FROM T1
JOIN (
SELECT COUNT(*) cnt, t1.OPS
FROM (
SELECT DISTINCT ts1.OPS
FROM T1 ts1
JOIN T1 tx1 ON ts1.OPS = tx1.OPS
) t1
JOIN (
SELECT DISTINCT ts2.OPS
FROM T1 ts2
JOIN T1 tx2 ON ts2.OPS = tx2.OPS
) t2 on t1.OPS <= t2.OPS
GROUP BY t1.OPS
) tt ON T1.OPS = tt.OPS
My table is:
CREATE TABLE t1(
ID int,
Date1 date,
OPS CHAR
);
INSERT INTO T1 VALUES
( 1, '2018-12-17', 'A'),
( 2, '2018-12-18', 'A'),
( 3, '2018-12-19', 'A'),
( 4, '2018-12-19', 'A'),
( 5, '2018-12-19', 'A'),
( 6, '2018-12-20', 'A'),
( 7, '2018-12-20', 'B'),
( 8, '2018-12-21', 'B'),
( 9, '2018-12-22', 'B'),
(10, '2018-12-23', 'B'),
(11, '2018-12-24', 'B'),
(12, '2018-12-25', 'A'),
(13, '2018-12-26', 'A'),
(14, '2018-12-27', 'A'),
(15, '2018-12-28', 'A');
Without LEAD and LAG and considering that you have multiple records per-date you could use a horrendous simplified solution which assumes that the records are ordered by id instead of date. It does not require ids to be contiguous and works for more than two ops values:
SELECT *
FROM t AS ref
WHERE EXISTS (
SELECT 1
FROM t AS lag
WHERE ops <> ref.ops
AND id < ref.id
AND NOT EXISTS (
SELECT 1
FROM t AS tmp
WHERE id > lag.ID
AND id < ref.id
)
) OR EXISTS (
SELECT 1
FROM t AS led
WHERE ops <> ref.ops
AND id > ref.id
AND NOT EXISTS (
SELECT 1
FROM t AS tmp
WHERE id < led.ID
AND id > ref.id
)
)
Basically for each row, the query searches all previous rows where ids do not match then determines if there is a row in between. It searches forward likewise.
Try the following logic, which phrases your question using a series of unions:
(SELECT ID, Date1, OPS FROM T1 WHERE OPS = 'B' ORDER BY Date1 LIMIT 1)
UNION ALL
(SELECT ID, Date1, OPS FROM T1
WHERE OPS = 'A' AND Date1 <= (SELECT MIN(Date1) FROM T1 WHERE OPS = 'B')
ORDER BY Date1 DESC LIMIT 1)
UNION ALL
(SELECT ID, Date1, OPS FROM T1 WHERE OPS = 'B' ORDER BY Date1 DESC LIMIT 1)
UNION ALL
(SELECT ID, Date1, OPS FROM T1
WHERE OPS = 'A' AND Date1 >= (SELECT MAX(Date1) FROM T1 WHERE OPS = 'B')
ORDER BY Date1 LIMIT 1)
ORDER BY Date1, OPS;
Demo
Note generally that this is a gaps and islands problem. These are very difficult to handle without using analytic functions, which you appear to not have available. If you were using MySQL 8+, then we could phrase a much cleaner looking query.
if we are allowed max and min function, then it is pretty easy to calculate these 4 individually and union the individual results.
-- To select last A
select ID, Date1, OPS
from test_t1
where test_t1.OPS = 'A'
and test_t1.Date1 in
(
select max(t1_a.Date1) as Date1
from
(select ID, Date1, OPS
from test_t1
where OPS = 'A') t1_a inner join
(select min(Date1) as Date1
from test_t1
where OPS = 'B') t1_min_b
on t1_a.Date1 <= t1_min_b.Date1
)
union
-- To select first B
select ID, Date1, OPS
from test_t1
where test_t1.OPS = 'B'
and test_t1.Date1 in
(
select min(Date1) as Date1
from test_t1
where OPS = 'B'
)
union
/*
To select last B
/
select ID, Date1, OPS
from test_t1
where test_t1.OPS = 'B'
and test_t1.Date1 in
(
select max(Date1) as Date1
from test_t1
where OPS = 'B'
)
union
/
To select first A
*/
select ID, Date1, OPS
from test_t1
where test_t1.OPS = 'A'
and test_t1.Date1 in
(
select min(t1_a.Date1) as Date1
from
(select ID, Date1, OPS
from test_t1
where OPS = 'A') t1_a inner join
(select max(Date1) as Date1
from test_t1
where OPS = 'B') t1_max_b
on t1_a.Date1 >= t1_max_b.Date1
)
;
You could use a correlated subquery that computed the min and max ids for each ops, like
SELECT t.*
FROM T1 t
WHERE
t.id IN (
SELECT MIN(id) FROM T1 WHERE ops = t.ops
UNION ALL SELECT MAX(id) FROM T1 WHERE ops = t.ops
)
ORDER BY t.ops, t.id DESC

Get time difference between all consecutive rows (latest one not printing)

I'm trying to retrieve all columns data along with the time difference between all consecutive rows from the following table, where (sender_id = 1 OR = 2) and (recipient_id = 2 OR = 1).
CREATE TABLE records (
id INT(11) AUTO_INCREMENT,
send_date DATETIME NOT NULL,
content TEXT NOT NULL,
sender_id INT(11) NOT NULL,
recipient_id INT(11) NOT NULL,
PRIMARY KEY (id)
);
INSERT INTO records (send_date, content, sender_id, recipient_id) VALUES
('2013-08-23 14:50:00', 'record 1/5', 1, 2),
('2013-08-23 14:51:00', 'record 2/5', 2, 1),
('2013-08-23 15:50:00', 'record 3/5', 2, 1),
('2013-08-23 15:50:13', 'record 4/5', 1, 2),
('2013-08-23 16:50:00', 'record 5/5', 1, 2);
Problem is my select query won't output the latest record because of the WHERE clause :
SELECT t1.content, DATE_FORMAT(t1.send_date, '%b, %D, %H:%i') AS 'pprint_date',
TIMESTAMPDIFF(MINUTE, t1.send_date, t2.send_date) AS 'duration'
FROM records t1, records t2
WHERE (t1.id = t2.id - 1) /*<= this subtraction excludes latest record*/
AND ((t1.sender_id = 1 AND t1.recipient_id = 2)
OR (t1.sender_id = 2 AND t1.recipient_id = 1))
ORDER BY t1.id ASC
How can I properly get the time difference between all consecutive records while still printing all of them ?
I would use a correlated subquery:
select r.*,
(select r2.send_date
from records r2
where (r2.sender_id in (1, 2) or r2.recipient_id in (1, 2)) and
r2.send_date > r.send_date
order by r2.send_date asc
limit 1
) as next_send_date
from records r
where r.sender_id in (1, 2) or r.recipient_id in (1, 2);
You can get the duration (instead of the next time) by using TIMESTAMPDIFF(MINUTE, r.send_date, r2.send_date) in the subquery. I think the first version is easier for you to test with to see what is happening.

get rows from a table where value of field x is maximum

I have two tables myTable and myTable2 in a mysql database:
CREATE TABLE myTable (
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
number INT,
version INT,
date DATE
) ENGINE MyISAM;
INSERT INTO myTable
(`id`, `number`, `version`, `date`)
VALUES
(1, '123', '1', '2016-01-12'),
(2, '123', '2', '2016-01-13'),
(3, '124', '1', '2016-01-14'),
(4, '124', '2', '2016-01-15'),
(5, '124', '3', '2016-01-16'),
(6, '125', '1', '2016-01-17')
;
CREATE TABLE myTable2 (
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
myTable_id INT
) ENGINE MyISAM;
INSERT INTO myTable2
(`id`, `myTable_id`)
VALUES
(1, 1),
(2, 1),
(3, 2),
(4, 2),
(5, 3),
(6, 3),
(7, 4),
(8, 4),
(9, 4),
(10, 5),
(11, 6)
;
The field myTable2.myTable_id is a foreign key of myTable.Id.
I would like to get all the rows from myTable where myTable2.myTable_id = myTable.Id and the value of the field version in myTable is the maximum for every corresponding value for the field number in myTable.
I tried something like this:
SELECT
*
FROM
myTable,
myTable2
WHERE
myTable.version = (SELECT MAX(myTable.version) FROM myTable)
But the above query does not return the correct data. The correct query should output this:
Id number version date
2 123 2 2016-01-13
5 124 3 2016-01-16
6 125 1 2016-01-17
Please help!
One way to do this is to get the max version for each number in myTable in a derived table and join with that:
SELECT DISTINCT
m.*
FROM
myTable m
JOIN
myTable2 m2 ON m.id = m2.myTable_id
JOIN
(
SELECT number, MAX(version) AS max_version
FROM myTable
GROUP BY number
) AS derived_table
ON m.number = derived_table.number
AND m.version = derived_table.max_version
With your sample data this produces a result like this:
id number version date
6 125 1 2016-01-17
5 124 3 2016-01-16
2 123 2 2016-01-13
your Query is logically wrong. Here is the correct one
SELECT
*
FROM
myTable,
myTable2
WHERE
(myTable.version,myTable.number) in
(SELECT MAX(myTable.version),number FROM myTable group by number)
and myTable.id=myTable2.id
Here is the sqlfiddle http://sqlfiddle.com/#!9/74a67/4/0
This is the query posted for the previous edited question
SELECT * FROM myTable
inner join myTable2 on myTable.id = myTable2.mytable_id
WHERE (version, number) in
(SELECT MAX(version), number FROM myTable group by number)
Try this solution with using subquery simply as:
# Selecting desired result..
SELECT t1.id, t1.number, t1.version, t1.date
FROM myTable As t1 JOIN
# subquery to select max version and its corresponding
# number form myTable
(SELECT number, max(version) As max_ver FROM myTable
GROUP BY number
) As t2 ON t1.number = t2.number and t1.version = t2.max_ver
# Now checking for foreign key..
WHERE t1.id IN (SELECT mytable_id FROM myTable2);
Was it helpful..

Updating values of SQL column with values from the same column

If you will look at the image above. I need to update this table for the null values of the TID which is column third in the table, with the values in between two rows that actually has value.
So in the above example, I need to have rows 44-57 as 040, row 60-87 as 077 etc. One pattern that could be used is that column 2 has INS in the string, which denotes that the value in column 3 is to be changed. So I was thinking about using DATA LIKE 'INS%' in some way.
Please let me know what you think of the problem and any possible solutions.
thanks!
DECLARE #x TABLE
(Column1 INT, Column2 VARCHAR(64), TID VARCHAR(10));
INSERT #x VALUES
(42, 'INS{whatever}', '040'),
(43, 'somethingelse', '040'),
(44, 'somethingelse', NULL),
(45, 'somethingelse', NULL),
(46, 'somethingelse', NULL),
(47, 'somethingelse', NULL),
(48, 'somethingelse', NULL),
(49, 'INS{whatever}', '077'),
(50, 'somethingelse', '077'),
(51, 'somethingelse', NULL),
(52, 'somethingelse', NULL);
;WITH x AS (SELECT i = Column1, TID, rn = ROW_NUMBER() OVER (ORDER BY Column1)
FROM #x WHERE Column2 LIKE 'INS%'
),
y AS (SELECT x.TID, s = x.i, e = COALESCE(x2.i, 2000000000)
FROM x LEFT OUTER JOIN x AS x2 ON x.rn = x2.rn -1
)
UPDATE src SET TID = y.TID
FROM #x AS src
INNER JOIN y ON src.Column1 > y.s AND src.Column1 < y.e;
SELECT * FROM #x;
This assumes that:
The first two columns in your sample were duplicated (I ignore the first listed)
Col1 is a primary key
Values are to be assigned as you described based on ascending values in Col1
Performance might be bad to very bad on large tables
Performance would improve with suitable indexing (on Col1 and Col3)
Substitute in your table and column names, and check for minor typos.
UPDATE MyTable
set Col3 = mt2.Col3
from MyTable mt
inner join (-- Get the "earlier" Col3 value for each row that has no value
select t1.Col1, max(t2.Col1) EarlierValueHere
from MyTable t1
inner join MyTable t2
on t2.Col1 < t1.Col1
and t2.Col3 is not null
group by t1.Col1
where t1.Col3 is null) earlier
on earlier.Col1 = mt.Col1
inner join MyTable mt2
on mt2.Col1 = earlier.EarlierValueHere
Another query you might use:
update t set TID = X.NonNullTID
from [YourTable] t
join
(select
t1.Column1, t1.Column2, t1.TID,
(select top 1 tid from [YourTable]
where TID is not null and Column1 <= t1.Column1
order by Column1 desc) as NonNullTID
from [YourTable] t1) X
on X.Column1 = t.Column1