MySQL query using subselects needs to use joins or exists - mysql

I've got this query that has been added to over time and even merged with other queries etc, so it has become quite a mess.
It takes WAY too long to execute now. I tried using EXPLAIN EXTENDED and add any indexes / keys I could but nothing I did helped for some reason.
I'm pretty sure the reason is all the sub-selects as mysql has to create a temporary table in memory and perform a non-indexed lookup on that table for every single row (at least, that's what I've been reading).
I've been reading up on sub-queries, joins, and using exists to try to optimize this thing, but I am just not understanding the best way to go about this.
Is there any way to use joins or use exists to replace some of the sub-queries and get this query to run faster?
The query:
SELECT
s.*,
case when m_id.make_name is not null
then m_id.make_name
when m.make_name is not null
then m.make_name
else s.brand end as brandname
FROM
services as s
left join makelist as m_id
on cast(s.brand as unsigned) = m_id.id
left join makelist as m
on s.brand = m.make_name
WHERE
s.is_delete = 'n'
and UNIX_TIMESTAMP(s.`date`) >= 1420070400
and UNIX_TIMESTAMP(s.`date`) <= 1451563199
and s.service_id in ( select ticket_id
from messages
where edit_id = 0
and waiting = 1
and message_id not in ( select edit_id
from messages
where edit_id != 0 )
)
or service_id in ( select ( select m3.ticket_id
from messages m3
where m88.edit_id = m2.message_id ) as ticket_id
from
messages m88
where m88.edit_id in ( select t11.edit_id
from
( select max(`datetime`) as newdate
from messages
where edit_id != 0
group by edit_id ) as t22,
messages as t11
where t11.`datetime` = t22.newdate
and `waiting` = 1 )
)
and s.service_id in ( select ticket_id
from messages
where edit_id = 0
and warning = 1
and message_id not in ( select edit_id
from messages
where edit_id != 0 )
)
or service_id in ( select
( select m33.ticket_id
from messages m33
where m888.edit_id = m22.message_id ) as ticket_id
from messages m888
where m888.edit_id in ( select t111.edit_id
from ( select max(`datetime`) as newdate
from messages
where edit_id != 0
group by edit_id ) as t222,
messages as t111
where t111.`datetime` = t222.newdate
and `warning = 1 )
)
order by
s.`date` desc
limit
0, 10
And ... the data sample...
table: messages
CREATE TABLE IF NOT EXISTS `messages` (
`message_id` int(10) NOT NULL AUTO_INCREMENT,
`employee_id` int(10) NOT NULL,
`admin_id` int(10) NOT NULL,
`ticket_id` int(10) NOT NULL,
`message` text NOT NULL,
`status` char(1) NOT NULL COMMENT 'r=read, u=unread',
`datetime` datetime NOT NULL,
`warning` tinyint(1) NOT NULL DEFAULT '0',
`waiting` tinyint(1) NOT NULL DEFAULT '0',
`edit_id` int(10) NOT NULL,
PRIMARY KEY (`message_id`),
KEY `message_id` (`message_id`),
KEY `edit_id` (`edit_id`),
KEY `ticket_id` (`ticket_id`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 AUTO_INCREMENT=197 ;
INSERT INTO `messages` (`message_id`, `employee_id`, `admin_id`, `ticket_id`, `message`, `status`, `datetime`, `warning`, `waiting`, `edit_id`) VALUES
(189, 18, 0, 4049, 'Ordered battery ', 'u', '2015-06-02 13:14:38', 0, 1, 0),
(190, 18, 0, 4069, 'Ordered Ram', 'u', '2015-06-04 09:17:57', 0, 0, 0),
(191, 18, 0, 4069, 'Ordered Ram', 'u', '2015-06-04 09:18:43', 0, 1, 0),
(192, 18, 0, 4068, 'Ordered Hard Drive', 'u', '2015-06-04 13:40:13', 0, 1, 0),
(193, 1, 0, 3712, 'customer called just now and said data was missing from last time it was here, i informed her that we keep backups for a month (not 4) and that was definitely gone, and that her screen was still going blank, and i informed her she needed to drop it by for free test. she said her daughter has it in another county and it will be a while before she can bring it in. ', 'u', '2015-06-06 09:59:27', 1, 0, 0),
(194, 18, 0, 4089, 'Ordered Keyboard ', 'u', '2015-06-09 09:51:33', 0, 1, 0),
(195, 18, 0, 4103, 'Battery PA3817u-1BRS.... $39 or Jack $100.. customer said will bring it back next week. ', 'u', '2015-06-11 16:53:16', 0, 0, 0),
(196, 18, 0, 4105, 'Ordered Screen ', 'u', '2015-06-12 11:26:09', 0, 1, 0);
table: makelist
CREATE TABLE IF NOT EXISTS `makelist` (
`id` int(255) NOT NULL AUTO_INCREMENT,
`make_name` varchar(255) NOT NULL,
`make_desc` varchar(255) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 AUTO_INCREMENT=31 ;
INSERT INTO `makelist` (`id`, `make_name`, `make_desc`) VALUES
(1, 'Acer', ''),
(2, 'Apple', ''),
(3, 'ASUS', ''),
(4, 'Compaq', ''),
(5, 'Dell', ''),
(6, 'Gateway', ''),
(7, 'HP', ''),
(8, 'IBM', ''),
(9, 'Lenovo', ''),
(10, 'Sony', ''),
(11, 'Toshiba', ''),
(27, 'Microsoft', ''),
(26, 'Printer Only', ''),
(25, 'Custom', ''),
(23, 'eMachine', ''),
(24, 'MSI', ''),
(30, 'Panasonic', ''),
(28, 'Samsung', '');
table: services
CREATE TABLE IF NOT EXISTS `services` (
`service_id` int(10) NOT NULL AUTO_INCREMENT,
`employee_id` int(10) NOT NULL,
`customer_id` int(10) NOT NULL,
`name` varchar(255) NOT NULL,
`date` datetime NOT NULL,
`phone` text NOT NULL,
`alternate_phone` text NOT NULL,
`email` varchar(50) NOT NULL,
`brand` varchar(50) NOT NULL,
`model` varchar(50) NOT NULL,
`serial_tag` varchar(50) NOT NULL,
`password` varchar(25) NOT NULL,
`type` char(1) NOT NULL,
`emergency` char(1) NOT NULL,
`symptoms` varchar(100) NOT NULL,
`left_items` text NOT NULL,
`employee_note` text NOT NULL,
`is_delete` char(1) NOT NULL DEFAULT 'n' COMMENT 'y=yes, n=no',
`pickedup` tinyint(1) NOT NULL,
`pickup_time` datetime NOT NULL,
`how_paid` varchar(255) NOT NULL DEFAULT 'NA',
`on_call_list` tinyint(1) NOT NULL,
`call_list_note` mediumtext NOT NULL,
`exclude` tinyint(1) NOT NULL DEFAULT '0',
`paymentAmount` decimal(7,2) NOT NULL,
`typeother` varchar(255) NOT NULL,
`na_reason` varchar(255) NOT NULL,
PRIMARY KEY (`service_id`),
KEY `service_id` (`service_id`),
KEY `employee_id` (`employee_id`),
KEY `customer_id` (`customer_id`),
KEY `is_delete` (`is_delete`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 AUTO_INCREMENT=4121 ;
INSERT INTO `services` (`service_id`, `employee_id`, `customer_id`, `name`, `date`, `phone`, `alternate_phone`, `email`, `brand`, `model`, `serial_tag`, `password`, `type`, `emergency`, `symptoms`, `left_items`, `employee_note`, `is_delete`, `pickedup`, `pickup_time`, `how_paid`, `on_call_list`, `call_list_note`, `exclude`, `paymentAmount`, `typeother`, `na_reason`) VALUES
(4118, 18, 0, 'custnameone', '2015-06-12 13:36:00', '(111) 111-1442', '', '', 'Other:::Packard Bell', 'MS2290', '', 'pass', 'l', '', '::diagnostics::', 'power_cord::', 'Will not turn on.. ', 'n', 0, '0000-00-00 00:00:00', 'NA', 0, '', 0, '0.00', '', ''),
(4119, 18, 0, 'custnametwo', '2015-06-12 15:51:00', '(111) 111-9390', '(111) 111-8207 cell', 'email#yahoo.com', '11', 'Satellite L675', '', '', 'l', 'n', ':virus:::', '::', 'Clean up.. Virus\r\n', 'n', 0, '0000-00-00 00:00:00', 'NA', 0, '', 0, '0.00', '', ''),
(4120, 18, 0, 'custnamethree', '2015-06-12 17:57:00', '(111) 111-1455', '', 'email#yahoo.com', '10', 'Vaio E - Sve151D11L', '', '1234', 'l', 'n', ':virus:diagnostics::', 'power_cord::', 'Will not boot to windows ', 'n', 0, '0000-00-00 00:00:00', 'NA', 0, '', 0, '0.00', '', '');
Update, after request for more detail:
This query is listing all the records in the services table and is generated dynamically through PHP. Each record in services table can have 1 or more messages attached to it, linked through services.service_id = messages.ticket_id. When someone posts a message (or edits one) they can select to flag it as "warning" and/or "waiting". This query is pulling up the tickets who have messages with warning or waiting set to 1.
However, another layer of the onion peeled back and we come to the message edits. Message edits are stored in the same table, the difference is that a message edit has no ticket_id but instead has and edit_id which equals the message_id of the original message. So the query has to find the ticket, find the messages associated with the ticket, find out if those messages have edits, and determine which one is the most recent current version of the message and if that current version of the message is marked with a warning or waiting. Hence the messy cumbersome query.
The stuff dealing with the makelist table and brand is just there for completeness as it was tricky to get working and want to make sure whatever solution has that included. It's not really pertinent in this case, but the makelist/brand stuff is basically looking up the name of the brand based on the brand id that is stored in the services table in the brand column.

Whatever comments you supply to this answer, I will continue with helping try revision for you, but was too much to describe in a comment to your original post.
You are looking at services within a given UNIX time range, yet your qualifier on service_id in sub-selects is looking against ALL messages. So, are you only interested in tickets that first qualify for the time range in question?
your complex WHERE clause (abbreviated...)
WHERE
s.is_delete = 'n'
and UNIX_TIMESTAMP(s.`date`) >= 1420070400
and UNIX_TIMESTAMP(s.`date`) <= 1451563199
and s.service_id in ... (sub-qualify 1)
or service_id in ... (sub-qualify 2)
and s.service_id in ... (sub-qualify 3)
or service_id in ... (sub-qualify 4)
is actually running against ALL messages (per the sub-qualify instances 1-4).
It MIGHT help to do a prequery of original tickets (message_id's) ONLY within the date range first and their qualified child message edits to find the max date vs the entire.
Here is something I came up with and will try to describe and you digest.
SELECT
s2.*,
COALESCE( m_id.make_name, COALESCE( m.make_name, s2.brand )) as brandname
from
( SELECT
m.ticket_id,
SUM( case when edits.edit_id = 0 then 0 else 1 end ) as NumberOfEdits,
SUM( m.waiting + coalesce( edits.waiting, 0 ) ) as WaitingMsgs,
SUM( m.warning + coalesce( edits.warning, 0 )) as WarningMsgs,
SUM( m.waiting
+ m.warning
+ coalesce( edits.waiting, 0 )
+ coalesce( edits.warning, 0 ) ) as WaitOrWarnCount,
MAX( case when edits.waiting = 1 then edits.`datetime` else null end ) as LatestWaitingDate,
MAX( case when edits.warning = 1 then edits.`datetime` else null end ) as LatestWarningDate,
MAX( case when edits.waiting = 1 then edits.message_id else null end ) as LatestWaitingMsgID,
MAX( case when edits.warning = 1 then edits.message_id else null end ) as LatestWarningMsgID
from
services as s
LEFT JOIN messages m
ON s.service_id = m.ticket_id
LEFT JOIN messages edits
ON m.message_id = edits.edit_id
WHERE
s.is_delete = 'n'
and UNIX_TIMESTAMP(s.`date`) >= 1420070400
and UNIX_TIMESTAMP(s.`date`) <= 1451563199
GROUP BY
m.ticket_id ) PreQual
JOIN services s2
ON PreQual.ticket_id = s2.service_id
LEFT JOIN makelist as m_id
ON CAST(s2.brand as unsigned) = m_id.id
LEFT JOIN makelist as m
ON s2.brand = m.make_name
LEFT JOIN messages origMsg
ON PreQual.ticket_id = origMsg.ticket_id
LEFT JOIN messages waitMsg
ON PreQual.LatestWaitingMsgID = waitMsg.Message_ID
LEFT JOIN messages warnMsg
ON PreQual.LatestWaarningMsgID = warnMsg.Message_ID
where
( PreQual.NumberOfEdits = 0
AND PreQual.WaitOrWarnCount > 0 )
OR
waitMsg.message_id > 0
OR
warnMsg.message_id > 0
The first FROM source is actually a sub-select of just those service tickets within the unix date range and status you are interested in. It is LEFT-JOINED to the messages table only for that service ticket ID. That primary message is then LEFT-JOINED to itself based on ANY edits to the original message for the given service ticket.
Now, if there can be multiple messages per ticket, I am doing a simple count via SUM(case/when) if ANY edits are associated to the message. Next, I am getting a sum() based on either the ORIGINAL TICKET message OR any EDIT messages are stamped as "Waiting", so this way, I know up-front if there are ANY Waiting messages. Similarly checking for any WARNING looking at both the original message or any edits. For grins, I am also summing an overall count of ANY WAITING or WARNING associated messages per service ticket.
Next, I am getting the maximum date/time stamp for any possible edit that is associated with waiting or warning for the ticket in question.
Finally, I am getting the latest MESSAGE ID VALUE for the corresponding waiting or warning per the specific ticket if/when applicable. I am using THIS value as IT is the direct message for the service edit ticket regardless of all other service tickets.
So all this is rolled-up to a single row per originally qualified service "Ticket_ID" within the date / status you start with.
Now, the additional joins. Here, I am joining back to the services table on the qualified ticket PreQual result, so I do not have to reapply the unix date/time for the outer query portion... I already have the ticket ID. I then to LEFT JOINs to the original message, whatever the LATEST waiting message and warning message as applicable to EACH TICKET.
At last, now I can apply the overall WHERE clause, and you need to confirm or adjust as needed.
My first criteria of tickets you are interested in are those service tickets that have no pending edits that are waiting or warning, but the ORIGINAL message was at least a status of waiting or warning.
The second criteria (OR'd) is IF THERE WAS an entry with an edit status of WAITING (already qualified from the sum case/when of prequal, I already KNOW it's status was waiting)
The third criteria (OR'd) is likewise for an edit status that has a WARNING (again, from sum case/when of the prequal query).
So, if your data is a year or more, and you are only looking for tickets within the current day / week (or whatever) range, you are only considering those messages and edits, not the entire history of everything.
Again, you may need to finalize what you want, but I think I am very close...
ONE FINAL ADDITION...
Not knowing the context of the messages if whatever the last message is, supersedes all prior, this could SIGNIFICANTLY reduce your issues too, so please clarify after.
If a given product is in for service, it gets a ticket ID and default message that a person is "WAITING" for the unit to be picked up. Something happens and an edit is made to the original message, thus an EDIT entry created and warning is indicated, so now, you have the original entry of waiting, and the follow-up as warning. After the customer is contacted and the warning resolved, the product finishes its service and another edit is made and the ticket is closed, so the FINAL edit has no value for either waiting or warning. In this case, the LAST EDIT, regardless of any prior message or edit to prior message "wins" the overall status... The ticket is complete.
Similarly, if a ticket starts as WAITING, and then an edit is made for WARNING, the WARNING (now the most recent) is the primary consideration of status.
If this latest scenario better describes the work flow of service ticket operations, please confirm and I will revise the query to even further simplify.

Related

Only show latest message from each conversation

I have a messaging system which has the tables "message" which just contains the "subject" then "message_user" which contains the message body, who sent it, who its for and whether its deleted / unread.
#Message Table
CREATE TABLE `message` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`subject` varchar(255) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1;
INSERT INTO `message` (`id`, `subject`)
VALUES
(1, 'Test'),
(2, 'Test Again');
#Message User Table
CREATE TABLE `message_user` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`message_id` int(11) NOT NULL,
`user_id` int(11) NOT NULL,
`interlocutor` int(11) DEFAULT NULL,
`body` text,
`folder` enum('inbox','sent') NOT NULL,
`starmark` tinyint(1) NOT NULL DEFAULT '0',
`unread` tinyint(1) NOT NULL DEFAULT '1',
`deleted` enum('none','trash','deleted') NOT NULL DEFAULT 'none',
`date` datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=latin1;
INSERT INTO `message_user` (`id`, `message_id`, `user_id`, `interlocutor`, `body`, `folder`, `starmark`, `unread`, `deleted`, `date`)
VALUES
(1, 1, 1, 2, 'Hi, how are you?', 'sent', 0, 1, 'none', '2018-10-23 09:36:02'),
(2, 1, 2, 1, 'Hi, how are you?', 'inbox', 0, 1, 'none', '2018-10-23 09:36:02'),
(3, 1, 2, 1, 'I am good thanks, you?', 'sent', 0, 1, 'none', '2018-10-23 09:46:02'),
(4, 1, 1, 2, 'I am good thanks, you?', 'inbox', 0, 1, 'none', '2018-10-23 09:46:02'),
(5, 2, 1, 3, 'Hi!', 'sent', 0, 1, 'none', '2018-10-23 09:50:22'),
(6, 2, 3, 1, 'Hi!', 'inbox', 0, 1, 'none', '2018-10-23 09:50:22');
I wrote the following query:
SELECT
*
FROM message m
JOIN message_user mu ON m.id = mu.message_id
WHERE mu.deleted = 'none'
AND mu.user_id = 1 #user_id of person checking messages
ORDER BY mu.id DESC;
But this is currently returning 3 rows even though there is only two conversations. I tried to GROUP BY but it still showed 3 rows.
I would expect the first two rows in the above example not the last one.
I want the query to return a list of the conversations with the latest message which has been sent which I (user_id) am involved in.
Since your MySQL version is 8.0+, we can utilize Window functions, such as Row_number(); otherwise the solution would have been much verbose, using Session variables.
For a partition (group) of m.id, we will determine the row number values. Row number values will be ordered in descending order of date.
Now, we simply need to use this result-set as a Derived Table, and just consider those rows where row number value is 1.
Date is a keyword in MySQL. You should avoid naming column/table using it. Still if you have to do so, you will need to use backticks around it.
Try the following (DB Fiddle DEMO):
SELECT dt.*
FROM (
SELECT m.id,
m.subject,
mu.id AS message_user_id,
mu.message_id,
mu.user_id,
mu.interlocutor,
mu.body,
mu.folder,
mu.starmark,
mu.unread,
mu.deleted,
mu.`date`,
Row_number()
OVER (PARTITION BY m.id
ORDER BY mu.`date` DESC) AS row_no
FROM message m
JOIN message_user mu
ON m.id = mu.message_id
WHERE mu.deleted = 'none'
AND mu.user_id = 1 ) AS dt
WHERE dt.row_no = 1
ORDER BY dt.id DESC
Try this :
select
m.id as id_message, m.subject as subject_message,
mu.id as id_message_user, mu.interlocutor, mu.body, mu.folder, mu.starmark, mu.deleted, mu.date
from message as m
inner join message_user as mu on mu.message_id = m.id and mu.deleted = 'none' and mu.user_id = 1
group by id_message
order by id_message_user desc
I removed
mu.user_id : it's in the inner join condition so always 'none'
mu.unread :same, always 1
mu.message_id : duplicate of id_message
http://sqlfiddle.com/#!9/91a5e4/15

Solution for subquery in MySQL WHERE IN

I am trying to solve WHERE subquery or to find different solution.
What I am trying to achieve is based on this query:
SELECT c.orig_point_id,
(SELECT attempts
FROM
(SELECT
orig_carrier_id,
orig_point_id,
term_point_id,
term_route,
currency_sell,
is_special,
COUNT(*) AS attempts
FROM cdr
WHERE 1=1
AND start_time >= '2016-10-01 0:00:00'
AND start_time <= '2016-10-31 23:59:59'
GROUP BY orig_carrier_id, currency_sell) AS c0
WHERE c0.orig_carrier_id=3
AND c0.currency_sell="USD"
LIMIT 1) AS attempts,
(SELECT SPLIT(clear_number) as array
FROM
(SELECT
COUNT(*) as total,
clear_number,
orig_carrier_id,
currency_sell
FROM `cdr`
WHERE `start_time`>='2016-10-01 00:00:00'
AND start_time <= '2016-10-31 23:59:59'
GROUP BY `clear_number`
ORDER BY total DESC) AS c0
WHERE c0.orig_carrier_id=3
AND c0.currency_sell="USD"
LIMIT 1) AS splitted_number
FROM cdr AS c
GROUP BY c.orig_carrier_id, c.currency_sell;
SPLIT is a function. Query in that section finds a number(most frequent) and function splits it in ex. 12345,1234,123,12,1. Problem comes when i try to use that as IN subquery. When used directly mysql says functionality not supported. Looks like query is too complex.
When i alias subquery as a workaround it returns NULL, so workaround doesn't work and i believe its returning NULL because of the same reason that its not feasible.
SELECT
CONCAT_WS(" - ",country,region) AS route_name
FROM numbering_plan_external
WHERE
prefix IN(
SELECT array
FROM
(SELECT SPLIT(clear_number) as array
FROM
(SELECT
COUNT(*) as total,
clear_number,
orig_carrier_id,
currency_sell
FROM `cdr`
WHERE `start_time`>='2016-10-01 00:00:00'
AND start_time <= '2016-10-31 23:59:59'
GROUP BY `clear_number`
ORDER BY total DESC) AS c0
WHERE c0.orig_carrier_id=3
AND c0.currency_sell="USD"
LIMIT 1) AS splitted_number)
ORDER BY prefix DESC LIMIT 1) AS top_route
Am I doing anything wrong here, or is there different approach to achieve this. I can leave just split number and later through PHP find the route. It will require lots of queries depending on the results and I am trying to avoid it if possible.
Thanks in advance guys.
Some sample data
CREATE TABLE IF NOT EXISTS `numbering_plan_external` (
`id` int(11) NOT NULL,
`country` varchar(255) NOT NULL,
`region` varchar(255) DEFAULT NULL,
`prefix` varchar(50) NOT NULL,
`is_mobile` tinyint(1) NOT NULL DEFAULT '0',
`last_updated` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`min_sale_price_currency` char(3) CHARACTER SET latin1 COLLATE latin1_general_ci DEFAULT NULL,
`min_sale_price_amount` decimal(10,4) DEFAULT NULL
) ENGINE=InnoDB AUTO_INCREMENT=14004 DEFAULT CHARSET=latin1;
INSERT INTO `numbering_plan_external`
(`id`, `country`, `region`,`prefix`, `is_mobile`, `last_updated`, `min_sale_price_currency`, `min_sale_price_amount`)
VALUES
(13047, 'Tunisia', '', '216', 0, '2016-02-17 12:30:44', NULL, NULL),
(13048, 'Tunisia', 'Mobile (ORANGE)', '2165', 1, '2016-02-17 12:30:44', NULL, NULL),
(13049, 'Tunisia', 'Mobile (ORASCOM)', '2162', 1, '2016-02-17 12:30:44', NULL, NULL),
(13050, 'Tunisia', 'Mobile (TUNTEL)', '21640', 1, '2016-02-17 12:30:44', NULL, NULL),
(13051, 'Tunisia', 'Mobile (TUNTEL)', '21641', 1, '2016-02-17 12:30:44', NULL, NULL),
(13052, 'Tunisia', 'Mobile (TUNTEL)', '2169', 1, '2016-02-17 12:30:44', NULL, NULL);
CREATE TABLE IF NOT EXISTS `cdr` (
`id` int(11) NOT NULL,
`orig_carrier_id` int(11) NOT NULL,
`orig_point_id` int(11) NOT NULL,
`term_carrier_id` int(11) NOT NULL,
`term_point_id` int(11) NOT NULL,
`clear_number` varchar(100) COLLATE latin1_general_ci NOT NULL,
`is_special` tinyint(1) NOT NULL DEFAULT '0',
`start_time` datetime NOT NULL,
`currency_sell` char(3) COLLATE latin1_general_ci NOT NULL
) ENGINE=InnoDB AUTO_INCREMENT=16385 DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci;
INSERT INTO `cdr`
(`id`, `orig_carrier_id`, `orig_point_id`, `term_carrier_id`, `term_point_id`, `clear_number`, `is_special`, `start_time`, `currency_sell`) VALUES
(1, 3, 5, 0, 0, '21658502507', 0, '2016-10-17 00:02:04', 'USD'),
(2, 3, 5, 0, 0, '21658502507', 0, '2016-10-17 00:02:04', 'USD'),
(3, 3, 5, 0, 0, '21658502507', 0, '2016-10-17 00:03:56', 'USD'),
(4, 3, 5, 0, 0, '21658502507', 0, '2016-10-17 00:09:28', 'USD'),
(5, 3, 5, 0, 0, '21658502507', 0, '2016-10-17 00:16:35', 'USD');
IN considers values as a whole. Whatever your SPLIT() is doing, even though it returns a "csv", that entire list is considered one SINGLE value, e.g. it'll be parsed/executed as the equivalent
WHERE foo IN ('12345,1234,...')
WHERE foo='12345,1234,...'
instead of these
WHERE foo IN ('12345', '1234', '123', ...)
WHERE foo='12345' OR foo='1234' OR ...
You could try using MySQL's find_in_set() instead, which basically does what you want.
Using your sample data with this query:
SELECT
cdr.orig_point_id
, count(cdr.*) attempts
, group_concat(distinct npe.region) regions
FROM cdr
INNER JOIN numbering_plan_external npe
ON cdr.clear_number like concat(npe.prefix,'%') COLLATE latin1_general_ci
AND npe.region <> ''
WHERE cdr.orig_carrier_id=3
AND cdr.currency_sell='USD'
AND cdr.start_time >= '2016-10-01'
AND cdr.start_time < '2016-11-01'
GROUP BY
cdr.orig_point_id
;
Result:
| orig_point_id | attempts | regions |
|---------------|----------|----------------|
| 5 | 5 | Mobile (ORANGE)|
The join between those tables involves comparing the prefix to the starting characters of the clear_number. However you have a collation conflict so you need to specify the collation being used. Using LIKE is not the most efficient style of join condition and it could lead to performance issues as it doesn't make use of indexes. However it does demonstrate that a logical join does exist and that you do not need that split function (which is not good for a join either by the way).
I have left the remainder of my earlier question available for reference:
Query:
SELECT
orig_point_id
, count(*) attempts
FROM cdr
WHERE orig_carrier_id=3
AND currency_sell='USD'
AND start_time >= '2016-10-01'
AND start_time < '2016-11-01'
GROUP BY
orig_point_id
Results:
| orig_point_id | attempts |
|---------------|----------|
| 5 | 5 |
extracted for Original Query:
SELECT c.orig_point_id,
(SELECT attempts
FROM
(SELECT
orig_carrier_id,
orig_point_id,
term_point_id,
/* term_route, */
currency_sell,
is_special,
COUNT(*) AS attempts
FROM cdr
WHERE 1=1
AND start_time >= '2016-10-01 0:00:00'
AND start_time <= '2016-10-31 23:59:59'
GROUP BY orig_carrier_id, currency_sell) AS c0
WHERE c0.orig_carrier_id=3
AND c0.currency_sell="USD"
LIMIT 1) AS attempts
FROM cdr AS c
GROUP BY c.orig_carrier_id, c.currency_sell
Results:
| orig_point_id | attempts |
|---------------|----------|
| 5 | 5 |
Hopefully you can see that you do not need as much complexity in your query as you do now. I suspect that if we know more about the "expected result" we might be able to do it without the split function.

specify conditions from outer query on a materialized subquery

i have got the below query which references couple of views 'goldedRunQueries' and 'currentGoldMarkings'. My issue seems to be from the view that is referred in the subquery - currentGoldMarkings. While execution, MySQL first materializes this subquery and then implements the where clauses of 'queryCode' and 'runId', which therefore results in execution time of more than hour as the view refers tables that has got millions of rows of data. My question is how do I enforce those two where conditions on the subquery before it materializes.
SELECT goldedRunQueries.queryCode, goldedRunQueries.runId
FROM goldedRunQueries
LEFT OUTER JOIN
( SELECT measuredRunId, queryCode, COUNT(resultId) as c
FROM currentGoldMarkings
GROUP BY measuredRunId, queryCode
) AS accuracy ON accuracy.measuredRunId = goldedRunQueries.runId
AND accuracy.queryCode = goldedRunQueries.queryCode
WHERE goldedRunQueries.queryCode IN ('CH001', 'CH002', 'CH003')
and goldedRunQueries.runid = 5000
ORDER BY goldedRunQueries.runId DESC, goldedRunQueries.queryCode;
Here are the two views. Both of these also get used in a standalone mode and so integrating any clauses into them is not possible.
CREATE VIEW currentGoldMarkings
AS
SELECT result.resultId, result.runId AS measuredRunId, result.documentId,
result.queryCode, result.queryValue AS measuredValue,
gold.queryValue AS goldValue,
CASE result.queryValue WHEN gold.queryValue THEN 1 ELSE 0 END AS correct
FROM results AS result
INNER JOIN gold ON gold.documentId = result.documentId
AND gold.queryCode = result.queryCode
WHERE gold.isCurrent = 1
CREATE VIEW goldedRunQueries
AS
SELECT runId, queryCode
FROM runQueries
WHERE EXISTS
( SELECT 1 AS Expr1
FROM runs
WHERE (runId = runQueries.runId)
AND (isManual = 0)
)
AND EXISTS
( SELECT 1 AS Expr1
FROM results
WHERE (runId = runQueries.runId)
AND (queryCode = runQueries.queryCode)
AND EXISTS
( SELECT 1 AS Expr1
FROM gold
WHERE (documentId = results.documentId)
AND (queryCode = results.queryCode)
)
)
Note: The above query reflects only a part of my actual query. There are 3 other left outer joins which are similar in nature to the above subquery which makes the problem far more worse.
EDIT: As suggested, here is the structure and some sample data for the tables
CREATE TABLE `results`(
`resultId` int auto_increment NOT NULL,
`runId` int NOT NULL,
`documentId` int NOT NULL,
`queryCode` char(5) NOT NULL,
`queryValue` char(1) NOT NULL,
`comment` varchar(255) NULL,
CONSTRAINT `PK_results` PRIMARY KEY
(
`resultId`
)
);
insert into results values (100, 242300, 'AC001', 'I', NULL)
insert into results values (100, 242300, 'AC001', 'S', NULL)
insert into results values (150, 242301, 'AC005', 'I', 'abc')
insert into results values (100, 242300, 'AC001', 'I', NULL)
insert into results values (109, 242301, 'PQ001', 'S', 'zzz')
insert into results values (400, 242400, 'DD006', 'I', NULL)
CREATE TABLE `gold`(
`goldId` int auto_increment NOT NULL,
`runDate` datetime NOT NULL,
`documentId` int NOT NULL,
`queryCode` char(5) NOT NULL,
`queryValue` char(1) NOT NULL,
`comment` varchar(255) NULL,
`isCurrent` tinyint(1) NOT NULL DEFAULT 0,
CONSTRAINT `PK_gold` PRIMARY KEY
(
`goldId`
)
);
insert into gold values ('2015-02-20 00:00:00', 138904, 'CH001', 'N', NULL, 1)
insert into gold values ('2015-05-20 00:00:00', 138904, 'CH001', 'N', 'aaa', 1)
insert into gold values ('2016-02-20 00:00:00', 138905, 'CH002', 'N', NULL, 0)
insert into gold values ('2015-12-12 00:00:00', 138804, 'CH001', 'N', 'zzzz', 1)
CREATE TABLE `runQueries`(
`runId` int NOT NULL,
`queryCode` char(5) NOT NULL,
CONSTRAINT `PK_runQueries` PRIMARY KEY
(
`runId`,
`queryCode`
)
);
insert into runQueries values (100, 'AC001')
insert into runQueries values (109, 'PQ001')
insert into runQueries values (400, 'DD006')
CREATE TABLE `runs`(
`runId` int auto_increment NOT NULL,
`runName` varchar(63) NOT NULL,
`isManual` tinyint(1) NOT NULL,
`runDate` datetime NOT NULL,
`comment` varchar(1023) NULL,
`folderName` varchar(63) NULL,
`documentSetId` int NOT NULL,
`pipelineVersion` varchar(50) NULL,
`isArchived` tinyint(1) NOT NULL DEFAULT 0,
`pipeline` varchar(50) NULL,
CONSTRAINT `PK_runs` PRIMARY KEY
(
`runId`
)
);
insert into runs values ('test1', 0, '2015-08-04 06:30:46.000000', 'zzzz', '2015-08-04_103046', 2, '2015-08-03', 0, NULL)
insert into runs values ('test2', 1, '2015-12-04 12:30:46.000000', 'zzzz', '2015-08-04_103046', 2, '2015-08-03', 0, NULL)
insert into runs values ('test3', 1, '2015-06-24 10:56:46.000000', 'zzzz', '2015-08-04_103046', 2, '2015-08-03', 0, NULL)
insert into runs values ('test4', 1, '2016-05-04 11:30:46.000000', 'zzzz', '2015-08-04_103046', 2, '2015-08-03', 0, NULL)
First, let's try to improve the performance via indexes:
results: INDEX(runId, queryCode) -- in either order
gold: INDEX(documentId, query_code, isCurrent) -- in that order
After that, update the CREATE TABLEs in the question and add the output of:
EXPLAIN EXTENDED SELECT ...;
SHOW WARNINGS;
What version are you running? You effectively have FROM ( SELECT ... ) JOIN ( SELECT ... ). Before 5.6, neither subquery had an index; with 5.6, an index is generated on the fly.
It is a shame that the query is built that way, since you know which one to use: and goldedRunQueries.runid = 5000.
Bottom Line: add the indexes; upgrade to 5.6 or 5.7; if that is not enough, then rethink the use of VIEWs.

Trying to left join 3 tables where the id could be the same, I am defining it via the type

Okay, so I have 3 tables:
users
CREATE TABLE IF NOT EXISTS `users` (
`user_id` int(11) NOT NULL,
`user_username` varchar(25) NOT NULL,
`user_email` varchar(100) NOT NULL,
`user_password` varchar(255) NOT NULL,
`user_enabled` int(1) NOT NULL DEFAULT '1',
`user_staff` varchar(15) NOT NULL DEFAULT '',
`user_account_type` varchar(20) NOT NULL DEFAULT '0',
`user_registerdate` date NOT NULL,
`user_twofactor` int(11) NOT NULL DEFAULT '0',
`user_twofackey` varchar(255) NOT NULL,
`user_forgot_email_code` varchar(255) NOT NULL,
`user_emailverified` varchar(25) NOT NULL DEFAULT 'unverified',
`user_banned` varchar(25) NOT NULL DEFAULT 'unbanned',
`user_has_avatar` int(11) NOT NULL DEFAULT '0',
`user_has_banner` int(11) NOT NULL DEFAULT '0'
) ENGINE=InnoDB AUTO_INCREMENT=15 DEFAULT CHARSET=latin1;
--
-- Dumping data for table `users`
--
INSERT INTO `users` (`user_id`, `user_username`, `user_email`, `user_password`, `user_enabled`, `user_staff`, `user_account_type`, `user_registerdate`, `user_twofactor`, `user_twofackey`, `user_forgot_email_code`, `user_emailverified`, `user_banned`, `user_has_avatar`, `user_has_banner`) VALUES
(1, 'fhfhfhf', 'lol#gmail.com', 'removed', 1, 'admin', 'Business', '2015-07-21', 0, '0', '0', 'unverified', 'unbanned', 1, 0);
company
CREATE TABLE IF NOT EXISTS `company` (
`company_id` int(11) NOT NULL,
`company_name` varchar(100) NOT NULL,
`company_user` int(11) NOT NULL,
`company_enabled` varchar(50) NOT NULL DEFAULT 'enabled',
`company_has_avatar` int(5) NOT NULL DEFAULT '0',
`company_has_banner` int(5) NOT NULL DEFAULT '0'
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1;
--
-- Dumping data for table `company`
--
INSERT INTO `company` (`company_id`, `company_name`, `company_user`, `company_enabled`, `company_has_avatar`, `company_has_banner`) VALUES
(1, 'Rad', 3, 'enabled', 0, 0);
training_company
CREATE TABLE IF NOT EXISTS `training_company` (
`training_company_id` int(11) NOT NULL,
`training_company_name` varchar(100) NOT NULL,
`training_company_user` int(11) NOT NULL,
`training_company_enabled` varchar(50) NOT NULL DEFAULT 'enabled',
`training_company_has_avatar` int(5) NOT NULL DEFAULT '0',
`training_company_has_banner` int(5) NOT NULL DEFAULT '0'
) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1;
--
-- Dumping data for table `training_company`
--
INSERT INTO `training_company` (`training_company_id`, `training_company_name`, `training_company_user`, `training_company_enabled`, `training_company_has_avatar`, `training_company_has_banner`) VALUES
(1, '123', 3, 'enabled', 0, 0),
(2, '123', 3, 'enabled', 0, 0),
(3, '123', 3, 'enabled', 0, 0);
Each have a profile, that have an incrementing id, so will have the same id, Iam just defining they via type, so user would be user, training would be training and company would be company, I am allowing a user to follow either one.
SQL
SELECT * FROM timeline_status
LEFT JOIN users
ON timeline_status.timeline_status_user = users.user_id
LEFT JOIN timeline_likes
ON timeline_status.timeline_status_id = timeline_likes.timeline_likes_main_status
LEFT JOIN friends
ON timeline_status.timeline_status_user = friends.friends_friend
LEFT JOIN user_personal_information
ON timeline_status.timeline_status_user = user_personal_information.user_personal_information_user
LEFT JOIN following
ON timeline_status.timeline_status_user = following.following
WHERE timeline_status_enabled = 'enabled'
AND timeline_status.timeline_status_type = 'user'
AND (timeline_status.timeline_status_user = :status_user
OR friends.friends_user = :friend_user)
AND (timeline_status_privacy = 'onlyme'
AND timeline_status_user = :status_user2
OR timeline_status_privacy = 'public'
OR timeline_status_privacy = 'private')
GROUP BY timeline_status_id
ORDER BY timeline_status_date DESC
LIMIT :start, :end
So I'd want to select from users if type = user, and row exists in followers and/or friends, select from companies or training from followers if type = company or training.
My status have the company/user/training id, and the type, so I know which table to select the 'user from'
my following table;
CREATE TABLE IF NOT EXISTS `following` (
`following_id` int(11) NOT NULL,
`following_user` int(11) NOT NULL,
`following_type` varchar(50) NOT NULL,
`following` int(11) NOT NULL
) ENGINE=InnoDB AUTO_INCREMENT=9 DEFAULT CHARSET=latin1;
--
-- Dumping data for table `following`
--
INSERT INTO `following` (`following_id`, `following_user`, `following_type`, `following`) VALUES
(5, 3, 'company', 14),
(8, 3, 'training', 1);
timeline status:
CREATE TABLE IF NOT EXISTS `timeline_status` (
`timeline_status_id` int(11) NOT NULL,
`timeline_status_user` int(11) NOT NULL,
`timeline_status_privacy` varchar(25) NOT NULL DEFAULT 'public',
`timeline_status_type` varchar(25) NOT NULL DEFAULT 'user',
`timeline_status_post` text NOT NULL,
`timeline_status_date` datetime NOT NULL,
`timeline_status_enabled` varchar(25) NOT NULL DEFAULT 'enabled'
) ENGINE=InnoDB AUTO_INCREMENT=123 DEFAULT CHARSET=latin1;
--
-- Dumping data for table `timeline_status`
--
INSERT INTO `timeline_status` (`timeline_status_id`, `timeline_status_user`, `timeline_status_privacy`, `timeline_status_type`, `timeline_status_post`, `timeline_status_date`, `timeline_status_enabled`) VALUES
(98, 3, 'private', 'user', 'hello', '2015-10-02 16:29:48', 'enabled'),
(99, 3, 'onlyme', 'user', 'yo', '2015-10-02 16:29:56', 'enabled'),
(100, 3, 'public', 'user', 'fghyjt', '2015-10-02 17:51:28', 'enabled'),
(101, 1, 'private', 'training', 'teest..', '2015-10-03 14:26:45', 'enabled'),
(102, 15, 'public', 'company', 'hello', '2015-10-06 13:32:30', 'enabled');
So how can I do it so if the following type = company select from company, if following tye = training select from training, and if following type = user, keep the sql how it is at the moment. Because at the moment, I am following a company with the id of 1, but there's a user with an id of 1 too, so I am getting their statuses.
Your best bet would be to use the UNION operator to mix them all in 1 table, and then query based on type. For instance, you could do something like this :
SELECT f.*,
t.training_company_name as name,
null as staff,
t.training_company_enabled as enabled,
t.training_company_has_banner as banner,
t.training_company_has avatar as avatar
FROM following f
INNER JOIN training_company t on f.following_user = t.training_company_user AND f.following_type='training'
UNION ALL
SELECT f.*,
c.company_name as name,
null as staff,
c.company_enabled as enabled,
c.company_has_banner as banner,
c.company_has avatar as avatar
FROM following f
INNER JOIN company c on f.following_user = c.company_user AND c.following_type='company'
UNION ALL
SELECT f.*,
u.user_username as name,
u.user_staff as staff,
u.user_enabled as enabled,
u.user_has_banner as banner,
u.user_has avatar as avatar
FROM following f
INNER JOIN users u on f.following_user = c.company_user AND f.following_type='user'
And from there you will have a derived table/view that will look like
V_followers(timeline_status_id, timeline_status_user, timeline_status_privacy, timeline_status_type, timeline_status_post, timeline_status_date, timeline_status_enabled, name, staff, enabled, banner, avatar).
I'm not 100% certain the syntax is MySql-correct though, but the idea remains the same.
I think you will need another variable to tell you the type in addition to the user id. Then you can wrap both up in a CASE statement, like so:
WHERE
CASE WHEN type = 'USER' THEN timeline_status.timeline_status_user = id
WHEN type = 'FRIENDS' THEN friends.friends_user = id
WHEN type = 'FOLLOWING' THEN following.user = id
END
In your question you actually ask few questions)
1) And company and training_company have user, that's why you can join this tables,
left join will takes only needed rows and from appropriate tables,
and with case you can select needed field:
select
u.user_id, u.user_account_type,
case when u.user_account_type = 'Business' then c.company_name
else tc.training_company_name
end as name_of_company
from users u
left join company as c on u.user_id = c.company_user
left join training_company as tc on u.user_id = tc.training_company_user
;
2) Table following contains and users and companies that's why you can do the same:
select
f.following_user,
f.following_type,
case
when f.following_type = 'company' then c.company_name
when f.following_type = 'training' then tc.training_company_name
else u.user_username
end as name
from following f
left join users u on f.following_user = u.user_id
-- here you can add inner joins to followers, friends etc
left join company as c on f.following_user = c.company_user
left join training_company as tc on f.following_user = tc.training_company_user
;
3) I think your schema pretty simple
and i'm sure that you can use joins to achieve your desires.
I would like emphasize one moment
if you have choice: user or company etc - use left join,
if you have restrictions users with followers or friends etc - use inner join... (it should work faster)...
Hope i correctly understood you, and this info will help you!
It seems like you have a situation whereby you have the concept of an entity that can post statuses, and has other information relevant to that.
These entities come in three forms; user, company, training_company. Each of these forms may have more specific sub details.
Have you considered placing all the entity info in one table:
entity - id, type, name, enabled, banner, avatar
Then implementing a has_one relationship to any information relevant to the sub_type:
user - entity_id, ... user specific fields
company - entity_id, .. company specific fields
training_company - entity_id .. training company specific fields
Usually you will be doing one of the following:
aggregate stats regarding entities/statuses - select from entity
lists of statuses - select from entity
aggregate stats regarding one specific type - select from type_table join entity
list inc. detail of one specific type - select from type_table join entity
details of one entity - select from type_table join entity
In the rare case of needing to display full details of a list of entities you can either use a UNION query or just run one query for the entities and then 3 queries on the sub_types WHERE entity_id IN (pulled entity ids).. this data is often paginated anyway, so the performance should be good enough.
We have used this structure in our projects to good effect, be aware it will require more complicated CRUD operations.

mysql select record containing highest value, joining on range of columns containing nulls

Here's what I'm working with:
CREATE TABLE IF NOT EXISTS `rate` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`client_company` int(11) DEFAULT NULL,
`client_group` int(11) DEFAULT NULL,
`client_contact` int(11) DEFAULT NULL,
`role` int(11) DEFAULT NULL,
`date_from` datetime DEFAULT NULL,
`hourly_rate` decimal(18,2) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
INSERT INTO `rate` (`id`, `client_company`, `client_group`,
`client_contact`, `role`, `date_from`, `hourly_rate`)
VALUES
(4, NULL, NULL, NULL, 3, '2012-07-30 14:48:16', 115.00),
(5, 3, NULL, NULL, 3, '2012-07-30 14:51:38', 110.00),
(6, 3, NULL, NULL, 3, '2012-07-30 14:59:20', 112.00);
This table stores chargeout rates for clients; the idea being that, when looking for the correct rate for a job role, we'd first look for a rate matching the given role and client contact, then if no rate was found, would try to match the role and the client group (or 'department'), then the client company, and finally looking for a global rate for just the role itself. Fine.
Rates can change over time, so the table may contain multiple entries matching any given combination of role, company, group and client contact: I want a query that will only return me the latest one for each distinct combination.
Given that I asked a near-identical question only days ago, and that this topic seems fairly frequent in various guises, I can only apologise for my slow-wittedness and ask once again for someone to explain why the query below is returning all three of the records above and not, as I want it to, only the records with IDs 4 and 6.
Is it something to do with my trying to join based on columns containing NULL?
SELECT
rate.*,
newest.id
FROM rate
LEFT JOIN rate AS newest ON(
rate.client_company = newest.client_company
AND rate.client_contact = newest.client_contact
AND rate.client_group = newest.client_group
AND rate.role= newest.role
AND newest.date_from > rate.date_from
)
WHERE newest.id IS NULL
FWIW, the problem WAS joining NULL columns. The vital missing ingredient was COALESCE:
SELECT
rate.*,
newest.id
FROM rate
LEFT JOIN rate AS newest ON(
COALESCE(rate.client_company,1) = COALESCE(newest.client_company,1)
AND COALESCE(rate.client_contact,1) = COALESCE(newest.client_contact,1)
AND COALESCE(rate.client_group,1) = COALESCE(newest.client_group,1)
AND COALESCE(rate.role,1) = COALESCE(newest.role,1)
AND newest.date_from > rate.date_from
)
WHERE newest.id IS NULL