alternative for outer apply - mysql

This is my tables
create table #vehicles (vehicle_id int, sVehicleName varchar(50))
create table #location_history ( vehicle_id int, location varchar(50), date datetime)
insert into #vehicles values
(1, 'MH 14 aa 1111'),
(2,'MH 12 bb 2222'),
(3,'MH 13 cc 3333'),
(4,'MH 42 dd 4444')
insert into #location_history values
( 1, 'aaa', getdate()),
( 1, 'bbb' , getdate()),
( 2, 'ccc', getdate()),
( 2, 'ddd', getdate()),
(3, 'eee', getdate()),
( 3, 'fff', getdate()),
( 4, 'ggg', getdate()),
( 4 ,'hhh', getdate())
This is query which I execute in SQL server.
select v.sVehicleName as VehicleNo, ll.Location
from #vehicles v outer APPLY
(select top 1 Location from #location_history where vehicle_id = v.vehicle_id
) ll
This is output in SQL server.
VehicleNO|Location
MH14aa1111 | aaa
MH12bb2222 | ccc
MH13cc3333 | eee
MH42dd4444 |ggg
I want to execute this in MySQL. and I want same output mentioned above.

First, the SQL Server query doesn't actually make sense, because you are using top without an order by.
Presumably, you intend something like this:
select v.sVehicleName as VehicleNo, ll.Location
from #vehicles v outer APPLY
(select top 1 Location
from #location_history
where vehicle_id = v.vehicle_id
order by ?? -- something to indicate ordering
) ll;
You need a method to get the latest record for each vehicle. Under normal circumstances, I think date would contain this information -- however, this is not true in your sample data.
Assuming that date really does contain unique values, then you can do:
select v.sVehicleName as VehicleNo, ll.Location
from vehicles v join
location_history lh
using (vehicle_id)
where lh.date = (select max(lh2.date)
from location_history lh2
where lh2.vehicle_id = lh.vehicle_id
);
Otherwise, you can do what you want using a correlated subquery. However, this will return an arbitrary matching value on the most recent date:
select v.sVehicleName as VehicleNo,
(select ll.Location
from location_history lh2
where lh2.vehicle_id = lh.vehicle_id
order by date desc
limit 1
) as location
from vehicles v ;

Related

How do I select average with multiple join clauses?

I want to get the average number (Attendee NPS) from a SQL table I've already put together.
I've encased the initial table in a new select statement so I can take the average of distinct values. Is there something in my Join clause that is preventing this from working?
Im getting the following error:
ERROR: missing FROM-clause entry for table "gizmo" Position: 12
SELECT
avg(bigtable.gizmo.attendee_nps)
FROM
(
SELECT DISTINCT
attendee_survey_results.swoop_event_id AS "Swoop ID",
attendee_survey_results.startup_weekend_city AS "SW City",
swooptable.start_date AS "Date",
gizmo.attendee_nps AS "Attendee NPS"
FROM attendee_survey_results
JOIN
(
SELECT
swoop_event_id,
(
100 * count(CASE WHEN attendee_nps >= 9 THEN 1 END)
/ count(attendee_nps)
- 100 * count(CASE WHEN attendee_nps <= 6 THEN 1 END)
/ count(attendee_nps)
) AS "attendee_nps"
FROM attendee_survey_results
GROUP BY swoop_event_id
) AS "gizmo"
ON gizmo.swoop_event_id = attendee_survey_results.swoop_event_id
JOIN
(
SELECT eid,start_date,manager_email
FROM events
) AS "swooptable"
ON gizmo.swoop_event_id = swooptable.eid
) AS bigtable
[edit, ok you don't have a single problem, but the request at the bottom should work]
3 part notation bigtable.gizmo.attendee_nps
You can't use this bigtable.gizmo.attendee_nps, this is the "with DB" specific syntax : db_name.tbl_name.col_name.
You should use a table_or_alias.col_name_or_alias notation
In sub query you loose the deep table name of every deep-1 :
-- with the deep explicite
SELECT `d0`.`new_field`
FROM (
-- implicite `d1` table
SELECT `new_field`
FROM (
-- with the deep `d2` explicite and alias of field
SELECT `d2`.`field` AS `new_field`
FROM (
-- without the explicite `d3` table and `field` field
SELECT *
FROM (
-- output a `field` => 12
SELECT 12 as `field`
) AS `d3`
) AS `d2`
) AS `d1`
) AS `d0`
-- print `new_field` => 12
Access deep-1 aliased field
SELECT `attendee_nps`
FROM
(
SELECT `attendee_nps` AS `new_alias_field`
FROM attendee_survey_results
) AS bigtable
Unknown column 'attendee_nps' in 'field list'
When you make a field alias in deep-1 query, deep-0 can only access the alias new_alias_field, the original field no longer exist.
Double quote " table alias
FROM (
-- ...
) AS "bigtable"
You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '"bigtable"' at line N
MySql don't allow the use of " to make table alias (it's technically ok for field alias).
You should use the mysql back quote to escape table alias name, like AS `My Table Alias`
Correct query :
SQL Fiddle
MySQL 5.6 Schema Setup:
CREATE TABLE events
(`eid` int, `start_date` varchar(10), `manager_email` varchar(15))
;
INSERT INTO events
(`eid`, `start_date`, `manager_email`)
VALUES
(1, '2016-11-11', 'mail_1#mail.com'),
(2, '2016-11-12', 'mail_2#mail.com'),
(3, '2016-11-13', 'mail_3#mail.com'),
(4, '2016-11-14', 'mail_4#mail.com'),
(5, '2016-11-15', 'mail_5#mail.com'),
(6, '2016-11-16', 'mail_6#mail.com'),
(7, '2016-11-17', 'mail_7#mail.com')
;
CREATE TABLE attendee_survey_results
(`id` int, `swoop_event_id` int, `startup_weekend_city` varchar(6), `attendee_nps` int)
;
INSERT INTO attendee_survey_results
(`id`, `swoop_event_id`, `startup_weekend_city`, `attendee_nps`)
VALUES
(1, 1, 'city_1', 1),
(2, 2, 'city_2', 22),
(3, 3, 'city_3', 3),
(4, 1, 'city_4', 4),
(5, 2, 'city_5', 5),
(6, 3, 'city_6', 9),
(7, 7, 'city_7', 17)
;
Query 1:
SELECT
AVG(`bigtable`.`attendee_nps`)
FROM
(
SELECT DISTINCT
`asr`.`swoop_event_id` AS `Swoop ID`,
`asr`.`startup_weekend_city` AS `SW City`,
`swooptable`.`start_date` AS `date`,
`gizmo`.`attendee_nps` AS `attendee_nps`
FROM `attendee_survey_results` AS `asr`
JOIN
(
SELECT
`swoop_event_id`,
(
100 * count(CASE WHEN `attendee_nps` >= 9 THEN 1 END)
/ count(`attendee_nps`)
- 100 * count(CASE WHEN `attendee_nps` <= 6 THEN 1 END)
/ count(`attendee_nps`)
) AS `attendee_nps`
FROM `attendee_survey_results`
GROUP BY `swoop_event_id`
) AS `gizmo`
ON `gizmo`.`swoop_event_id` = `asr`.`swoop_event_id`
JOIN
(
SELECT `eid`, `start_date`, `manager_email`
FROM `events`
) AS `swooptable`
ON `gizmo`.`swoop_event_id` = `swooptable`.`eid`
) AS `bigtable`
Results:
| AVG(`bigtable`.`attendee_nps`) |
|--------------------------------|
| -14.28571429 |

Alternate way for outer apply in SQL Server

I have a query in SQL server in which I have used outer apply. Now I want to convert it so that query can run on SQL server and MySQL also.
select top 5 v.sVehicleName as VehicleNo, ll.Location
from vehicles v
outer APPLY
(select top 1 Location
from location_history
where vehicle_id = v.vehicle_id) ll
I have to covert this query so I can run on both databases.
This is my tables
create table #vehicles (vehicle_id int, sVehicleName varchar(50))
create table #location_history ( vehicle_id int, location varchar(50), date datetime)
insert into #vehicles
values
(1, 'MH 14 aa 1111'),
(2,'MH 12 bb 2222'),
(3,'MH 13 cc 3333'),
(4,'MH 42 dd 4444')
insert into #location_history
values
( 1, 'aaa', getdate()),
( 1, 'bbb', getdate()),
( 2, 'ccc', getdate()),
( 2, 'ddd', getdate()),
( 3, 'eee', getdate()),
( 3, 'fff', getdate()),
( 4, 'ggg', getdate()),
( 4 ,'hhh', getdate())
This is query which I execute in SQL server.
select v.sVehicleName as VehicleNo, ll.Location
from #vehicles v
outer APPLY
(select top 1 Location
from #location_history
where vehicle_id = v.vehicle_id) ll
This is output in SQL server.
VehicleNo
Location
MH14 aa 1111
aaa
MH12 bb 2222
ccc
MH13 cc 3333
eee
MH42 dd 4444
ggg
I want to execute this in MySQL. and I want same output mentioned above.
In this case you can use LEFT JOIN instead of OUTER APPLY. like that:
select top 5 v.sVehicleName as VehicleNo, ll.Location
from vehicles v
left join
(
select vehicle_id, min(Location) as Location
from location_history
group by vehicle_id
) ll
on ll.vehicle_id = v.vehicle_id
If you want first record from location history table for each vehicle which is present in vehicles table,
then you can use cross join.
see below e.g
create table #location (vehicle_id int, vehicle_name varchar(50))
create table #lochistory ( vehicle_id int, location varchar(50), date datetime)
insert into #location
values
(1, 'car'),
(2,'bus'),
(3,'auto'),
(4,'jeep')
insert into #lochistory
values
( 1, 'india', getdate()),
( 1, 'usa' , getdate()),
( 2, 'india', getdate())
select *from #location l
cross join
(
select top 1 * from #lochistory
)b
the output will be as below.
vehicle_id vehicle_name vehicle_id location date
1 car 1 india 2016-04-13 05:21:57.650
2 bus 1 india 2016-04-13 05:21:57.650
3 auto 1 india 2016-04-13 05:21:57.650
4 jeep 1 india 2016-04-13 05:21:57.650

MySQL Query for finding a "LAST" row, based on two fields

I have the following MySQL table to log the registration status changes of pupils:
CREATE TABLE `pupil_registration_statuses` (
`status_id` INT(11) NOT NULL AUTO_INCREMENT,
`status_pupil_id` INT(10) UNSIGNED NOT NULL,
`status_status_id` INT(10) UNSIGNED NOT NULL,
`status_effectivedate` DATE NOT NULL,
PRIMARY KEY (`status_id`),
INDEX `status_pupil_id` (`status_pupil_id`)
)
COLLATE='utf8_general_ci'
ENGINE=MyISAM;
Example data:
INSERT INTO `pupil_registration_statuses` (`status_id`, `status_pupil_id`, `status_status_id`, `status_effectivedate`) VALUES
(1, 123, 1, '2013-05-06'),
(2, 123, 2, '2014-03-15'),
(3, 123, 5, '2013-03-15'),
(4, 123, 6, '2013-05-06'),
(5, 234, 2, '2013-02-02'),
(6, 234, 4, '2013-04-17'),
(7, 345, 2, '2014-02-01'),
(8, 345, 3, '2013-06-01');
It is possible that statuses can be inserted, thus the sequence of dates does not necessarily follow the same sequence of IDs.
For example: status_id 1 might has a date of 2013-05-06, but status_id 3 might have a date of 2013-03-15.
status_id values are, however, sequential within any particular date. Thus if a pupil's registration status changes multiple times on one day then the last row will will reflect their status for that date.
It is necessary to find out a particular student's registration status on a particular date. The following query works for an individual pupil:
SELECT *
FROM pupil_registration_statuses
WHERE status_pupil_id = 123
AND status_effectivedate <= '2013-05-06'
ORDER BY status_effectivedate DESC, status_id DESC
LIMIT 1;
This returns the expected row of status_id = 4
However, I now need to issue a (single) query to return the status for all pupils on a particular date.
The following query is proposed, but doesn't obey the "last status_id in a day" requirement:
SELECT *
FROM pupil_registration_statuses prs
INNER JOIN (SELECT status_pupil_id, MAX(status_effectivedate) last_date
FROM pupil_registration_statuses
WHERE status_effectivedate <= '2013-05-06'
GROUP BY status_pupil_id) qprs ON prs.status_pupil_id = qprs.status_pupil_id AND prs.status_effectivedate = qprs.last_date;
This query, however, returns 2 rows for pupil 123.
EDIT
To clarify, if the input is the date '2013-05-06', I expect to get the rows 4 and 6 from the query.
http://sqlfiddle.com/#!2/68ee6/2
Is this what you're after?
SELECT a.*
FROM pupil_registration_statuses a
JOIN
( SELECT prs.status_pupil_id
, MIN(prs.status_id) min_status_id
FROM pupil_registration_statuses prs
JOIN
( SELECT status_pupil_id
, MAX(status_effectivedate) last_date
FROM pupil_registration_statuses
WHERE status_effectivedate <= '2013-05-06'
GROUP
BY status_pupil_id
) qprs
ON prs.status_pupil_id = qprs.status_pupil_id
AND prs.status_effectivedate = qprs.last_date
GROUP
BY prs.status_pupil_id
) b
ON b.min_status_id = a.status_id;
http://sqlfiddle.com/#!2/68ee6/7
(Incidentally, there's an ugly and undocumented hack for this kind of problem which goes something like this:
SELECT x.* FROM (SELECT * FROM prs WHERE status_effectivedate <= '2013-05-06' ORDER BY status_pupil_id, status_effectivedate DESC, status_id)x GROUP BY status_pupil_id;
...but I didn't tell you that! ;) )
If I understood right, you want to...
1) Get 1 row per person.
2) Get the status changes from the specific day you manually input.
3) Get the last status changes from within the specific day.
If that's right, you need the query you already have ordering by date and then by id, just with a distinct.
SELECT DISTINCT on status_pupil_id *
FROM pupil_registration_statuses
WHERE status_pupil_id = 123
AND status_effectivedate <= '2013-05-06'
ORDER BY status_effectivedate DESC, status_id DESC
I have changed where clause, please try it.
SELECT *
FROM pupil_registration_statuses prs
INNER JOIN (SELECT status_pupil_id, MAX(status_effectivedate) last_date
FROM pupil_registration_statuses
WHERE Datediff(status_effectivedate, '2013-05-06') <= 0
GROUP BY status_pupil_id) qprs ON prs.status_pupil_id = qprs.status_pupil_id AND prs.status_effectivedate = qprs.last_date;
EDIT
Try this
SELECT *
FROM
(
select status_pupil_id,max(status_id) as status_id from pupil_registration_statuses innr
--where Datediff(dd,status_effectivedate, '2013-05-06') >= 0
group by status_pupil_id
)as ca
inner join pupil_registration_statuses prs on prs.status_id = ca.status_id
where Datediff(dd,prs.status_effectivedate, '2013-05-06') >= 0

How to get the smallest Integers not yet in a database column

I have a table in a MySQL DB with an UNIQUE INT(10) column. The table is pretty populated and the row contains non-consecutive entries of Integer numbers in that column. I would like to do a query, which gets me the smallest number (or the n smallest numbers) that is not in any row.
Example: The table contains rows with values (1, 2, 3, 5, 7, 8, 10, 12, 15) for the column. The sql statement should return i.e. the five lowest non-contained values, which are 4, 6, 9, 11, 13 in this case.
Is this possible with MySQL?
You can use a "numbers" table (it's handy for various operations):
CREATE TABLE num
( i UNSIGNED INT NOT NULL
, PRIMARY KEY (i)
) ;
INSERT INTO num (i)
VALUES
(1), (2), ..., (1000000) ;
Then:
SELECT
num.i
FROM
num
LEFT JOIN
tableX AS t
ON num.i = t.columnX
WHERE
t.columnX IS NULL
ORDER BY
num.i
LIMIT 5
or:
SELECT
num.i
FROM
num
WHERE
NOT EXISTS
( SELECT *
FROM tableX AS t
WHERE num.i = t.columnX
)
ORDER BY
num.i
LIMIT 5
Another approach, without using an auxilary table, would be to use MySQL variables. You can test it in SQL-Fiddle, test-2. The output is not the same as the previous (just to show that it can be done):
SELECT start_id, end_id
FROM
( SELECT
IF( t.columnX <> #id, #id, NULL) AS start_id
, IF( t.columnX <> #id, t.columnX-1, NULL) AS end_id
, #rows := #rows + (t.columnX - #id) AS r
, #id := t.columnX + 1 AS running_id
FROM
tableX AS t
CROSS JOIN
( SELECT #rows := 0
, #id := 1
) AS dummy
WHERE
#rows < 5
ORDER BY
t.columnX
) AS tmp
WHERE
start_id IS NOT NULL
This will work, but I think it is pretty inefficient. You won't need an extra table though (a table that would be (2^31-1)*4/1024^3 = 8GB for all positive numbers in INT). Also I advise you look at why you need this, because it might not be neccesary.
Also it will return the start and end of a range, but not all numbers in that range. (e.g. if you have numbers 1 and 5 it will return {0,2,4,6})
SELECT (t.num-1) AS bound FROM t
WHERE t.num-1 NOT IN (SELECT t.num FROM t)
UNION
SELECT (t.num+1) AS bound FROM t
WHERE t.num+1 NOT IN (SELECT t.num FROM t)
As I said this will be pretty inefficient, JOINs might be faster but you would need benchmark it.
SELECT (t.num-1) AS bound FROM t
LEFT JOIN t AS u ON t.num-1 = u.num
WHERE u.num IS NULL
UNION
SELECT (t.num+1) AS bound FROM t
LEFT JOIN t AS u ON t.num+1 = u.num
WHERE u.num IS NULL

MySQL query, MAX() + GROUP BY

Daft SQL question. I have a table like so ('pid' is auto-increment primary col)
CREATE TABLE theTable (
`pid` INT UNSIGNED PRIMARY KEY AUTO_INCREMENT,
`timestamp` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
`cost` INT UNSIGNED NOT NULL,
`rid` INT NOT NULL,
) Engine=InnoDB;
Actual table data:
INSERT INTO theTable (`pid`, `timestamp`, `cost`, `rid`)
VALUES
(1, '2011-04-14 01:05:07', 1122, 1),
(2, '2011-04-14 00:05:07', 2233, 1),
(3, '2011-04-14 01:05:41', 4455, 2),
(4, '2011-04-14 01:01:11', 5566, 2),
(5, '2011-04-14 01:06:06', 345, 1),
(6, '2011-04-13 22:06:06', 543, 2),
(7, '2011-04-14 01:14:14', 5435, 3),
(8, '2011-04-14 01:10:13', 6767, 3)
;
I want to get the PID of the latest row for each rid (1 result per unique RID). For the sample data, I'd like:
pid | MAX(timestamp) | rid
-----------------------------------
5 | 2011-04-14 01:06:06 | 1
3 | 2011-04-14 01:05:41 | 2
7 | 2011-04-14 01:14:14 | 3
I've tried running the following query:
SELECT MAX(timestamp),rid,pid FROM theTable GROUP BY rid
and I get:
max(timestamp) ; rid; pid
----------------------------
2011-04-14 01:06:06; 1 ; 1
2011-04-14 01:05:41; 2 ; 3
2011-04-14 01:14:14; 3 ; 7
The PID returned is always the first occurence of PID for an RID (row / pid 1 is frst time rid 1 is used, row / pid 3 the first time RID 2 is used, row / pid 7 is first time rid 3 is used). Though returning the max timestamp for each rid, the pids are not the pids for the timestamps from the original table. What query would give me the results I'm looking for?
(Tested in PostgreSQL 9.something)
Identify the rid and timestamp.
select rid, max(timestamp) as ts
from test
group by rid;
1 2011-04-14 18:46:00
2 2011-04-14 14:59:00
Join to it.
select test.pid, test.cost, test.timestamp, test.rid
from test
inner join
(select rid, max(timestamp) as ts
from test
group by rid) maxt
on (test.rid = maxt.rid and test.timestamp = maxt.ts)
select *
from (
select `pid`, `timestamp`, `cost`, `rid`
from theTable
order by `timestamp` desc
) as mynewtable
group by mynewtable.`rid`
order by mynewtable.`timestamp`
Hope I helped !
SELECT t.pid, t.cost, to.timestamp, t.rid
FROM test as t
JOIN (
SELECT rid, max(tempstamp) AS maxtimestamp
FROM test GROUP BY rid
) AS tmax
ON t.pid = tmax.pid and t.timestamp = tmax.maxtimestamp
I created an index on rid and timestamp.
SELECT test.pid, test.cost, test.timestamp, test.rid
FROM theTable AS test
LEFT JOIN theTable maxt
ON maxt.rid = test.rid
AND maxt.timestamp > test.timestamp
WHERE maxt.rid IS NULL
Showing rows 0 - 2 (3 total, Query took 0.0104 sec)
This method will select all the desired values from theTable (test), left joining itself (maxt) on all timestamps higher than the one on test with the same rid. When the timestamp is already the highest one on test there are no matches on maxt - which is what we are looking for - values on maxt become NULL. Now we use the WHERE clause maxt.rid IS NULL or any other column on maxt.
You could also have subqueries like that:
SELECT ( SELECT MIN(t2.pid)
FROM test t2
WHERE t2.rid = t.rid
AND t2.timestamp = maxtimestamp
) AS pid
, MAX(t.timestamp) AS maxtimestamp
, t.rid
FROM test t
GROUP BY t.rid
But this way, you'll need one more subquery if you want cost included in the shown columns, etc.
So, the group by and join is better solution.
If you want to avoid a JOIN, you can use:
SELECT pid, rid FROM theTable t1 WHERE t1.pid IN ( SELECT MAX(t2.pid) FROM theTable t2 GROUP BY t2.rid);
Try:
select pid,cost, timestamp, rid from theTable order by timestamp DESC limit 2;