SQL - Column in field list is ambiguous - mysql

I have two tables BOOKINGS and WORKER. Basically there is table for a worker and a table to keep track of what the worker has to do in a time frame aka booking. I’m trying to check if there is an available worker for a job, so I query the booking to check if requested time has available workers between the start end date. However, I get stuck on the next part. Which is returning the list of workers that do have that time available. I read that I could join the table passed on a shared column, so I tried doing an inner join with the WORKER_NAME column, but when I try to do this I get a ambiguous error. This leads me to believe I misunderstood the concept. Does anyone understand what I;m trying to do and knows how to do it, or knows why I have the error below. Thanks guys !!!!
CREATE TABLE WORKER (
ID INT NOT NULL AUTO_INCREMENT,
WORKER_NAME varchar(80) NOT NULL,
WORKER_CODE INT,
WORKER_WAGE INT,
PRIMARY KEY (ID)
)
CREATE TABLE BOOKING (
ID INT NOT NULL AUTO_INCREMENT,
WORKER_NAME varchar(80) NOT NULL,
START DATE NOT NULL,
END DATE NOT NULL,
PRIMARY KEY (ID)
)
query
SELECT *
FROM WORKERS
INNER JOIN BOOKING
ON WORKER_NAME = WORKER_NAME
WHERE (START NOT BETWEEN '2010-10-01' AND '2010-10-10')
ORDER BY ID
#1052 - Column 'WORKER_NAME' in on clause is ambiguous

In your query, the column "worker_name" exists in two tables; in this case, you must reference the tablename as part of the column identifer.
SELECT *
FROM WORKERS
INNER JOIN BOOKING
ON workers.WORKER_NAME = booking.WORKER_NAME
WHERE (START NOT BETWEEN '2010-10-01' AND '2010-10-10')
ORDER BY ID

In your query, the column WORKER_NAME and ID columns exists in both tables, where WORKER_NAME retains the same meaning and ID is re-purposed; in this case, you must either specify you are using WORKER_NAME as the join search condition or 'project away' (rename or omit) the duplicate ID problem.
Because the ID columns are AUTO_INCREMENT, I assume (hope!) they have no business meaning. Therefore, they could both be omitted, allowing a natural join that will cause duplicate columns to be 'projected away'. This is one of those situations where one wishes SQL had a WORKER ( ALL BUT ( ID ) ) type syntax; instead, one is required to do it longhand. It might be easier in the long run to to opt for a consistent naming convention and rename the columns to WORKER_ID and BOOKING_ID respectively.
You would also need to identify a business key to order on e.g. ( START, WORKER_NAME ):
SELECT *
FROM
( SELECT WORKER_NAME, WORKER_CODE, WORKER_WAGE FROM WORKER ) AS W
NATURAL JOIN
( SELECT WORKER_NAME, START, END FROM BOOKING ) AS B
WHERE ( START NOT BETWEEN '2010-10-01' AND '2010-10-10' )
ORDER BY START, WORKER_NAME;
This is good, but its returning the start and end times as well. I'm just wanting the WOKER ROWS. I cant take the start and end out, because then sql doesn’t recognize the where clause.
Two approaches spring to mind: push the where clause to the subquery:
SELECT *
FROM
( SELECT WORKER_NAME, WORKER_CODE, WORKER_WAGE FROM WORKER ) AS W
NATURAL JOIN
( SELECT WORKER_NAME, START, END
FROM BOOKING
WHERE START NOT BETWEEN '2010-10-01' AND '2010-10-10' ) AS B
ORDER BY START, WORKER_NAME;
Alternatively, replace SELECT * with a list of columns you want to SELECT:
SELECT WORKER_NAME, WORKER_CODE, WORKER_WAGE
FROM
( SELECT WORKER_NAME, WORKER_CODE, WORKER_WAGE FROM WORKER ) AS W
NATURAL JOIN
( SELECT WORKER_NAME, START, END FROM BOOKING ) AS B
WHERE START NOT BETWEEN '2010-10-01' AND '2010-10-10'
ORDER BY START, WORKER_NAME;

This error comes after you attempt to call a field which exists in both tables, therefore you should make a reference. For instance in example below I first say cod.coordinator so that DBMS know which coordinator I want
SELECT project__number, surname, firstname,cod.coordinator FROMcoordinatorsAS co JOIN hub_applicants AS ap ON co.project__number = ap.project_id JOIN coordinator_duties AS cod ON co.coordinator = cod.email

Related

MYSQL ERROR CODE: 1288 - can't update with join statement

Thanks for past help.
While doing an update using a join, I am getting the 'Error Code: 1288. The target table _____ of the UPDATE is not updatable' and figure out why. I can update the table with a simple update statement (UPDATE sales.customerABC Set contractID = 'x';) but can't using a join like this:
UPDATE (
SELECT * #where '*' contains columns a.uniqueID and a.contractID
FROM sales.customerABC
WHERE contractID IS NULL
) as a
LEFT JOIN (
SELECT uniqueID, contractID
FROM sales.tblCustomers
WHERE contractID IS NOT NULL
) as b
ON a.uniqueID = b.uniqueID
SET a.contractID = b.contractID;
If changing that update statement a SELECT such as:
SELECT * FROM (
SELECT *
FROM opwSales.dealerFilesCTS
WHERE pcrsContractID IS NULL
) as a
LEFT JOIN (
SELECT uniqueID, pcrsContractID
FROM opwSales.dealerFileLoad
WHERE pcrsContractID IS NOT NULL
) as b
ON a."Unique ID" = b.uniqueID;
the result table would contain these columns:
a.uniqueID, a.contractID, b.uniqueID, b.contractID
59682204, NULL, NULL, NULL
a3e8e81d, NULL, NULL, NULL
cfd1dbf9, NULL, NULL, NULL
5ece009c, , 5ece009c, B123
5ece0d04, , 5ece0d04, B456
5ece7ab0, , 5ece7ab0, B789
cfd21d2a, NULL, NULL, NULL
cfd22701, NULL, NULL, NULL
cfd23032, NULL, NULL, NULL
I pretty much have all database privileges and can't find restrictions with the table reference data. Can't find much information online concerning the error code, either.
Thanks in advance guys.
You cannot update a sub-select because it's not a "real" table - MySQL cannot easily determine how the sub-select assignment maps back to the originating table.
Try:
UPDATE customerABC
JOIN tblCustomers USING (uniqueID)
SET customerABC.contractID = tblCustomers.contractID
WHERE customerABC.contractID IS NULL AND tblCustomers.contractID IS NOT NULL
Notes:
you can use a full JOIN instead of a LEFT JOIN, since you want uniqueID to exist and not be null in both tables. A LEFT JOIN would generate extra NULL rows from tblCustomers, only to have them shot down by the clause requirement that tblCustomers.contractID be not NULL. Since they allow more stringent restrictions on indexes, JOINs tend to be more efficient than LEFT JOINs.
since the field has the same name in both tables you can replace ON (a.field1 = b.field1) with the USING (field1) shortcut.
you obviously strongly want a covering index with (uniqueID, customerID) on both tables to maximize efficiency
this is so not going to work unless you have "real" tables for the update. The "tblCustomers" may be a view or a subselect, but customerABC may not. You might need a more complicated JOIN to pull out a complex WHERE which might be otherwise hidden inside a subselect, if the original 'SELECT * FROM customerABC' was indeed a more complex query than a straight SELECT. What this boils down to is, MySQL needs a strong unique key to know what it needs to update, and it must be in a single table. To reliably update more than one table I think you need two UPDATEs inside a properly write-locked transaction.

Mysql deduplicate records in single query

I have the following table:
CREATE TABLE `relations` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`relationcode` varchar(25) DEFAULT NULL,
`email_address` varchar(100) DEFAULT NULL,
`firstname` varchar(100) DEFAULT NULL,
`latname` varchar(100) DEFAULT NULL,
`last_contact_date` varchar(25) DEFAULT NULL,
PRIMARY KEY (`id`)
)
In this table there are duplicates, these are relation with exact the same relationcode and email_address. They can be in there twice or even 10 times.
I need a query that selects the id's of all records, but excludes the ones that are in there more than once. Of those records, I only would like to select the record with the most recent last_contact_id only.
I'm more into Oracle than Mysql, In Oracle I would be able to do it this way:
select * from (
select row_number () over (partition by relationcode order by to_date(last_contact_date,'dd-mm-yyyy')) rank,
id,
relationcode,
email_address ,
last_contact_date
from RELATIONS)
where rank = 1
But I can't figure out how to modify this query to work in MySql. I'm not even dure it's possible to do the same thing in a single query in MySQl.
Any ideas?
Normal way to do this is a sub query to get the latest record and then join that against the table:-
SELECT id, relationcode, email_address, firstname, latname, last_contact_date
FROM RELATIONS
INNER JOIN
(
SELECT relationcode, email_address, MAX(last_contact_date) AS latest_contact_date
FROM RELATIONS
GROUP BY relationcode, email_address
) Sub1
ON RELATIONS.relationcode = Sub1.relationcode
AND RELATIONS.email_address = Sub1.email_address
AND RELATIONS.last_contact_date = Sub1.latest_contact_date
It is possible to manually generate the kind of rank that your Oracle query uses using variables. Bit messy though!
SELECT id, relationcode, email_address, firstname, latname, last_contact_date
FROM
(
SELECT id, relationcode, email_address, firstname, latname, last_contact_date, #seq:=IF(#relationcode = relationcode AND #email_address = email_address, #seq + 1, 1) AS seq, #relationcode := relationcode, #email_address := email_address
(
SELECT id, relationcode, email_address, firstname, latname, last_contact_date
FROM RELATIONS
CROSS JOIN (SELECT #seq:=0, #relationcode := '', #email_address :='') Sub1
ORDER BY relationcode, email_address, last_contact_date DESC
) Sub2
) Sub3
WHERE seq = 1
This uses a sub query to initialise the variables. The sequence number is added to if the relation code and email address are the same as the previous row, if not they are reset to 1 and stored in a field. Then the outer select check the sequence number (as a field, not as the variable name) and records only returned if it is 1.
Note that I have done this as multiple sub queries. Partly to make it clearer to you, but also to try to force the order that MySQL executes it is. There are a couple of possible issues with how MySQL says it may order the execution of things that could cause an issue. They never have done for me, but with sub queries I would hope for force the order.
Here is a method that will work in both MySQL and Oracle. It rephrases the question as: Get me all rows from relations where the relationcode has no larger last_contact_date.
It works something like this:
select r.*
from relations r
where not exists (select 1
from relations r2
where r2.relationcode = r.relationcode and
r2.last_contact_date > r.last_contact_date
);
With the appropriate indexes, this should be pretty efficient in both databases.
Note: This assumes that last_contact_date is stored as a date not as a string (as in your table example). Storing dates as strings is just a really bad idea and you should fix your data structure

SQL query to select from two nested sub queries

This is a pretty hard sql query on my HW assignment and i am kind of stuck. any hints would be appreciated.
my query:
SELECT nest1.carname,
nest1.plndescription,
nest1.plancount,
nest2.totalmems,
Round(( nest1.plancount / nest2.totalmems ), 2) AS pct
FROM (SELECT carriers.carname,
p.plndescription,
Count(members.planid)AS plancount
FROM carriers,
plans p,
members
WHERE carriers.carrierid = p.carrierid
AND p.planid = members.planid
GROUP BY carriers.carname,
p.plndescription)nest1
NATURAL JOIN (SELECT carriers.carrierid,
Count(members.planid)AS totalmems
FROM carriers,
plans p,
members
WHERE carriers.carrierid = p.carrierid
AND p.planid = members.planid
GROUP BY carriers.carrierid)nest2
ORDER BY nest1.carname
my tables and relationships;
` CREATE TABLE Carriers
( CarrierID varchar2(4) not null,
carName varchar2(35),
carAddress varchar2(50),
carCity varchar2(30),
carStCode varchar2(2),
carZip varchar2(10),
carPhone varchar2(10),
carWebSite varchar2(255),
carContactFirstname varchar2(35),
carContactLastName varchar2(35),
carContactEmail varchar2(255),
CONSTRAINT pk_CarrierID PRIMARY KEY (CarrierID)
);`
CREATE TABLE Plans
( PlanID integer not null,
plnDescription varchar2(35),
plnCost decimal (8,2),
CarrierID varchar2(4),
CONSTRAINT pk_PlanID PRIMARY KEY (PlanID),
CONSTRAINT fk_CarrierID FOREIGN KEY (CarrierId) REFERENCES Carriers
);
CREATE TABLE Members
( MemberNo integer not null,
mbrFirstname varchar2(35),
mbrLastName varchar2(35),
mbrStreet varchar2(50),
mbrCity varchar2(30),
mbrState varchar2(2),
mbrZip varchar2(10),
mbrPhoneNo varchar2(10),
PlanID integer,
mbrEmail varchar2(255),
mbrDateEffective date,
employerID integer,
CONSTRAINT pk_MemberNo PRIMARY KEY (MemberNo),
CONSTRAINT fk_PlanID FOREIGN KEY (PlanId) REFERENCES Plans,
CONSTRAINT fk_employerID FOREIGN KEY (employerID) REFERENCES employers
);
the problem :
Create a query that will list all Carriers and their Plans along with a column that displays the number of members in that Plan, the total number of members serviced by the Carrier and the percent of the Carrier’s Members that are in that Plan. For Example – Blue Cross Blue Shield – would display as follows:
correct output:
Carrier Plan PlanCount TotalMems Pct
Blue Cross Blue Shield 2-Party Basic Medical 10 22 45.45
Being lazy, I wouldn't want to type that much. I didn't check to see if these parse correctly, but since it is homework it should be too easy.
if you were using mysql I would do it like this and use session variables with subqueries:
select c.carName As Carrier
, p.plnDescription As Plan
, count(1) As PlanCount
, (select #plnCount:=count(1) from members m where m.PlanId=p.PlanId) TotalMems
, (select #plnCount/count(1) from members) Pct
from carriers c
left join plans p on p.CarrierId=c.CarrierId
but your ddl indicates you are using oracle (b/c varchar2), so something more like:
select c.carName As Carrier
, p.plnDescription As Plan
, PlanCount
, PlanCount/tot Pct
from carriers c
,plans p on p.CarrierId=c.CarrierId
,(select sum(decode(m.planId,p.planId,1,0)) PlanCount, count(1) tot from members)
I would do with the following. The lowest query is just a query of ALL MEMBERS regardless of which plan a person is in. This will need to be run on its own in the query execution plan as it will result in a single record and applied to every record for the rest of the query. This results with no actual JOIN or WHERE clause.
Next, the first part of the from does a pre-aggregation on just the plan ID and count of members per specific plan grouped by plan.
Now, get into proper JOIN clause to pull in the details of the plan for it's description, then the carrier for the carrier name, then order it by whatever you want...
SELECT
C.CarName,
P.PlnDescription,
PerPlan.PerPlanCount,
( PerPlan.PerPlanCount / AllPlans.AllMembers ) as PcntOfAllMembers
from
( select M.PlanID, COUNT(*) as PerPlanCount
from Members M
group by M.PlanID ) PerPlan
Join Plans P
ON PerPlan.PlanID = P.PlanID
Join Carriers C
ON P.CarrierID = C.CarrierID,
( select COUNT(*) AllMembersCount
from Members ) AllPlans
order by
C.CarName,
P.PlnDescription
You had a count of 10 for a plan count on your sample, but nothing to indicate multiple sub-plans for '2-Party Basic Medical', so I don't think that is a real column per the data.
HOWEVER, if the 10 is a representation of how many distinct plans that 'Blue Cross' has, and that '2-Party...' is one of those plans, that would be a different query.

Need Help Speeding up an Aggregate SQLite Query

I have a table defined like the following...
CREATE table actions (
id INTEGER PRIMARY KEY AUTO_INCREMENT,
end BOOLEAN,
type VARCHAR(15) NOT NULL,
subtype_a VARCHAR(15),
subtype_b VARCHAR(15),
);
I'm trying to query for the last end action of some type to happen on each unique (subtype_a, subtype_b) pair, similar to a group by (except SQLite doesn't say what row is guaranteed to be returned by a group by).
On an SQLite database of about 1MB, the query I have now can take upwards of two seconds, but I need to speed it up to take under a second (since this will be called frequently).
example query:
SELECT * FROM actions a_out
WHERE id =
(SELECT MAX(a_in.id) FROM actions a_in
WHERE a_out.subtype_a = a_in.subtype_a
AND a_out.subtype_b = a_in.subtype_b
AND a_in.status IS NOT NULL
AND a_in.type = "some_type");
If it helps, I know all the unique possibilities for a (subtype_a,subtype_b)
eg:
(a,1)
(a,2)
(b,3)
(b,4)
(b,5)
(b,6)
Beginning with version 3.7.11, SQLite guarantees which record is returned in a group:
Queries of the form: "SELECT max(x), y FROM table" returns the value of y on the same row that contains the maximum x value.
So greatest-n-per-group can be implemented in a much simpler way:
SELECT *, max(id)
FROM actions
WHERE type = 'some_type'
GROUP BY subtype_a, subtype_b
Is this any faster?
select * from actions where id in (select max(id) from actions where type="some_type" group by subtype_a, subtype_b);
This is the greatest-in-per-group problem that comes up frequently on StackOverflow.
Here's how I solve it:
SELECT a_out.* FROM actions a_out
LEFT OUTER JOIN actions a_in ON a_out.subtype_a = a_in.subtype_a
AND a_out.subtype_b = a_in.subtype_b
AND a_out.id < a_in.id
WHERE a_out.type = "some type" AND a_in.id IS NULL
If you have an index on (type, subtype_a, subtype_b, id) this should run very fast.
See also my answers to similar SQL questions:
Fetch the row which has the Max value for a column
Retrieving the last record in each group
SQL join: selecting the last records in a one-to-many relationship
Or this brilliant article by Jan Kneschke: Groupwise Max.

Optimizing tree branch data aggregation in SQL Server 2008 (recursion)

I have a table containing stages and sub-stages of certain projects, and a table with specific tasks and estimated costs.
I need some way to aggregate each level (stages/sub-stages), to see how much it costs, but to do it at a minimum performance cost.
To illustrate this, I will use the following data structure:
CREATE TABLE stage
(
id int not null,
fk_parent int
)
CREATE TABLE task
(
id int not null,
fk_stage int not null,
cost decimal(18,2) not null default 0
)
with the following data:
==stage==
id fk_parent
1 null
2 1
3 1
==task==
id fk_stage cost
1 2 100
1 2 200
1 3 600
I want to obtain a table containing the total costs on each branch. Something like this:
Stage ID Total Cost
1 900
2 300
3 600
But, I also want it to be productive. I don't want to end up with extremely bad solutions like The worst algorithm in the world. I mean this is the case. In case I'll request the data for all the items in the stage table, with the total costs, each total cost will be evaluated D times, where D is the depth in the tree (level) at which it is situated. I am afraid I'll hit extremely low performances at large amounts of data with a lot of levels.
SO,
I decided to do something which made me ask this question here.
I decided to add 2 more columns to the stage table, for caching.
...
calculated_cost decimal(18,2),
date_calculated_cost datetime
...
So what I wanted to do is pass another variable within the code, a datetime value which equals to the time when this process was started (pretty much unique). That way, if the stage row already has a date_calculated_cost which equals to the one I'm carrying, I don't bother calculating it again, and just return the calculated_cost value.
I couldn't do it with Functions (updates are needed to the stage table, once costs are calculated)
I couldn't do it with Procedures (recursion within running cursors is a no-go)
I am not sure temporary tables are suitable because it wouldn't allow concurrent requests to the same procedure (which are least likely, but anyway I want to do it the right way)
I couldn't figure out other ways.
I am not expecting a definite answer to my question, but I will reward any good idea, and the best will be chosen as the answer.
1. A way to query the tables to get the aggregated cost.
Calculate the cost for each stage.
Use a recursive CTE to get the level for each stage.
Store the result in a temp table.
Add a couple of indexes to the temp table.
Update the cost in the temp table in a loop for each level
The first three steps is combined to one statement. It might be good for performance to do the first calculation, cteCost, to a temp table of it's own and use that temp table in the recursive cteLevel.
;with cteCost as
(
select s.id,
s.fk_parent,
isnull(sum(t.cost), 0) as cost
from stage as s
left outer join task as t
on s.id = t.fk_stage
group by s.id, s.fk_parent
),
cteLevel as
(
select cc.id,
cc.fk_parent,
cc.cost,
1 as lvl
from cteCost as cc
where cc.fk_parent is null
union all
select cc.id,
cc.fk_parent,
cc.cost,
lvl+1
from cteCost as cc
inner join cteLevel as cl
on cc.fk_parent = cl.id
)
select *
into #task
from cteLevel
create clustered index IX_id on #task (id)
create index IX_lvl on #task (lvl, fk_parent)
declare #lvl int
select #lvl = max(lvl)
from #task
while #lvl > 0
begin
update T1 set
T1.cost = T1.cost + T2.cost
from #task as T1
inner join (select fk_parent, sum(cost) as cost
from #task
where lvl = #lvl
group by fk_parent) as T2
on T1.id = T2.fk_parent
set #lvl = #lvl - 1
end
select id as [Stage ID],
cost as [Total Cost]
from #task
drop table #task
2. A trigger on table task that maintains a calculated_cost field in stage.
create trigger tr_task
on task
after insert, update, delete
as
-- Table to hold the updates
declare #T table
(
id int not null,
cost decimal(18,2) not null default 0
)
-- Get the updates from inserted and deleted tables
insert into #T (id, cost)
select fk_stage, sum(cost)
from (
select fk_stage, cost
from inserted
union all
select fk_stage, -cost
from deleted
) as T
group by fk_stage
declare #id int
select #id = min(id)
from #T
-- For each updated row
while #id is not null
begin
-- Recursive update of stage
with cte as
(
select s.id,
s.fk_parent
from stage as s
where id = #id
union all
select s.id,
s.fk_parent
from stage as s
inner join cte as c
on s.id = c.fk_parent
)
update s set
calculated_cost = s.calculated_cost + t.cost
from stage as s
inner join cte as c
on s.id = c.id
cross apply (select cost
from #T
where id = #id) as t
-- Get the next id
select #id = min(id)
from #T
where id > #id
end