AVG limited to 3 last values for each group - mysql

Basically i have two tables:
Here's code to create two tables if this can help someone who will be willing to help me:
CREATE TABLE IF NOT EXISTS `coefficients` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`datetime` datetime NOT NULL,
`campaign_id` int(11) NOT NULL,
`score` decimal(10,2) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=9 DEFAULT CHARSET=utf8;
INSERT INTO `coefficients` (`id`, `datetime`, `campaign_id`, `score`) VALUES
(1, '2017-01-29 22:32:13', 1, 20.00),
(2, '2017-01-29 22:36:22', 1, 34.00),
(3, '2017-01-29 22:36:30', 1, 30.00),
(4, '2017-01-29 22:36:43', 1, 1000.00),
(5, '2017-01-29 22:37:13', 2, 10.00),
(6, '2017-01-29 22:37:26', 2, 15.00),
(7, '2017-01-29 22:37:43', 2, 20.00),
(8, '2017-01-29 22:30:51', 2, 1000.00);
CREATE TABLE IF NOT EXISTS `statistics` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`campaign_id` int(11) NOT NULL,
`stats1` int(11) NOT NULL,
`stats2` int(11) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=utf8;
INSERT INTO `statistics` (`id`, `campaign_id`, `stats1`, `stats2`) VALUES
(1, 1, 34, 38),
(2, 2, 23, 45);
I would like to get average coefficient for each campaign_id calculated based on latest 3 logged coefficients for each campaign_id.
Here's screenshot of two tables and result that i need to get:
data + result (visual representation)
The main problem is that i have no idea how to join these two tables if i need only average coefficient for each campaign_id based on 3 latest logged nu,bers for it :(
I will appreciate any help

Following query will give you the top 3 records per campaign_id from coefficients table:
SET #currcount = NULL, #currvalue = NULL;
SELECT id, campaign_id, score, c_index FROM (
SELECT
id, campaign_id, score,
#currcount := IF(#currvalue = campaign_id, #currcount + 1, 1) AS c_index,
#currvalue := campaign_id AS s
FROM coefficients
order by id
) AS a where c_index <= 3
Now, all you have to do is, add a GROUP BY to this query, calculate average score and join it with statistics table, e.g.:
SET #currcount = NULL, #currvalue = NULL;
SELECT a.id, a.campaign_id, avg(score), c_index, s.stats1, s.stats2 FROM (
SELECT
id, campaign_id, score,
#currcount := IF(#currvalue = campaign_id, #currcount + 1, 1) AS c_index,
#currvalue := campaign_id AS s
FROM coefficients
order by id
) AS a join statistics s on a.campaign_id = s.campaign_id
where c_index <= 3
group by campaign_id
Here's the SQL Fiddle.

In MySQL, the best way is usually to use variables. Getting the statistics is just a join, so that is not interesting. Let's get the average from the coefficients table:
select c.campaign_id, avg(c.score) as avg_score
from (select c.*,
(#rn := if(#c = c.campaign_id, #rn + 1,
if(#c := c.campaign_id, 1, 1)
)
) as rn
from coefficients c cross join
(select #rn := 0, #c := -1) params
order by c.campaign_id, c.datetime desc
) c
where rn <= 3
group by c.campaign_id;

Related

select last inserted row based on date

Main Problem Is:- select last inserted row based on date
i want to be able to select distinct ref row with the last created_At date.
this is my table and data
DROP TABLE IF EXISTS `transactions_logs`;
CREATE TABLE IF NOT EXISTS `transactions_logs` (
`trans_log_Id` bigint(20) UNSIGNED NOT NULL AUTO_INCREMENT,
`etat_de_commande` varchar(100) NOT NULL,
`ref` varchar(10) NOT NULL,
`commentaire` text NOT NULL,
`staffId` bigint(20) UNSIGNED NOT NULL,
`Created_At` datetime NOT NULL,
PRIMARY KEY (`trans_log_Id`)
) ENGINE=MyISAM AUTO_INCREMENT=6 DEFAULT CHARSET=latin1;
INSERT INTO `transactions_logs` (`trans_log_Id`, `etat_de_commande`, `ref`, `commentaire`, `staffId`, `Created_At`) VALUES
(1, 'waiting confirmation', '429735061', '', 1, '2020-11-09 12:11:43'),
(2, 'waiting confirmation', '472143970', '', 1, '2020-11-09 13:45:57'),
(3, 'confirmed', '429735061', '', 1, '2020-11-09 13:46:12'),
(4, 'ready', '429735061', '', 1, '2020-11-09 13:46:18'),
(5, 'picked', '429735061', '', 1, '2020-11-09 14:46:25');
COMMIT;
I want to be able to get this result
(2,'waiting confirmation','472143970',1,'2020-11-09 13:45:57'),
(5,'picked','429735061',1,'2020-11-09 14:46:25')
One option uses window functions, available in MySQL 8.0:
select *
from (
select t.*,
rank() over(partition by ref order by created_at desc) rn
from transactions_logs t
) t
where rn = 1
You can also use a correalted subquery for filtering - this works in all MySQL versions:
select t.*
from transactions_logs t
where t.created_at = (
select max(t1.created_at)
from transactions_logs t1
where t1.ref = t.ref
)
The latter would take advantage of an index on (ref, created_at).

Only show latest message from each conversation

I have a messaging system which has the tables "message" which just contains the "subject" then "message_user" which contains the message body, who sent it, who its for and whether its deleted / unread.
#Message Table
CREATE TABLE `message` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`subject` varchar(255) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=latin1;
INSERT INTO `message` (`id`, `subject`)
VALUES
(1, 'Test'),
(2, 'Test Again');
#Message User Table
CREATE TABLE `message_user` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`message_id` int(11) NOT NULL,
`user_id` int(11) NOT NULL,
`interlocutor` int(11) DEFAULT NULL,
`body` text,
`folder` enum('inbox','sent') NOT NULL,
`starmark` tinyint(1) NOT NULL DEFAULT '0',
`unread` tinyint(1) NOT NULL DEFAULT '1',
`deleted` enum('none','trash','deleted') NOT NULL DEFAULT 'none',
`date` datetime DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=latin1;
INSERT INTO `message_user` (`id`, `message_id`, `user_id`, `interlocutor`, `body`, `folder`, `starmark`, `unread`, `deleted`, `date`)
VALUES
(1, 1, 1, 2, 'Hi, how are you?', 'sent', 0, 1, 'none', '2018-10-23 09:36:02'),
(2, 1, 2, 1, 'Hi, how are you?', 'inbox', 0, 1, 'none', '2018-10-23 09:36:02'),
(3, 1, 2, 1, 'I am good thanks, you?', 'sent', 0, 1, 'none', '2018-10-23 09:46:02'),
(4, 1, 1, 2, 'I am good thanks, you?', 'inbox', 0, 1, 'none', '2018-10-23 09:46:02'),
(5, 2, 1, 3, 'Hi!', 'sent', 0, 1, 'none', '2018-10-23 09:50:22'),
(6, 2, 3, 1, 'Hi!', 'inbox', 0, 1, 'none', '2018-10-23 09:50:22');
I wrote the following query:
SELECT
*
FROM message m
JOIN message_user mu ON m.id = mu.message_id
WHERE mu.deleted = 'none'
AND mu.user_id = 1 #user_id of person checking messages
ORDER BY mu.id DESC;
But this is currently returning 3 rows even though there is only two conversations. I tried to GROUP BY but it still showed 3 rows.
I would expect the first two rows in the above example not the last one.
I want the query to return a list of the conversations with the latest message which has been sent which I (user_id) am involved in.
Since your MySQL version is 8.0+, we can utilize Window functions, such as Row_number(); otherwise the solution would have been much verbose, using Session variables.
For a partition (group) of m.id, we will determine the row number values. Row number values will be ordered in descending order of date.
Now, we simply need to use this result-set as a Derived Table, and just consider those rows where row number value is 1.
Date is a keyword in MySQL. You should avoid naming column/table using it. Still if you have to do so, you will need to use backticks around it.
Try the following (DB Fiddle DEMO):
SELECT dt.*
FROM (
SELECT m.id,
m.subject,
mu.id AS message_user_id,
mu.message_id,
mu.user_id,
mu.interlocutor,
mu.body,
mu.folder,
mu.starmark,
mu.unread,
mu.deleted,
mu.`date`,
Row_number()
OVER (PARTITION BY m.id
ORDER BY mu.`date` DESC) AS row_no
FROM message m
JOIN message_user mu
ON m.id = mu.message_id
WHERE mu.deleted = 'none'
AND mu.user_id = 1 ) AS dt
WHERE dt.row_no = 1
ORDER BY dt.id DESC
Try this :
select
m.id as id_message, m.subject as subject_message,
mu.id as id_message_user, mu.interlocutor, mu.body, mu.folder, mu.starmark, mu.deleted, mu.date
from message as m
inner join message_user as mu on mu.message_id = m.id and mu.deleted = 'none' and mu.user_id = 1
group by id_message
order by id_message_user desc
I removed
mu.user_id : it's in the inner join condition so always 'none'
mu.unread :same, always 1
mu.message_id : duplicate of id_message
http://sqlfiddle.com/#!9/91a5e4/15

Average values from different table on join

CREATE TABLE `reviews` (
`id` int(11) NOT NULL,
`average` decimal(11,2) NOT NULL,
`house_id` int(11) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
INSERT INTO `reviews` (`id`, `average`, `house_id`) VALUES
(1, '10.00', 1),
(2, '10.00', 1);
ALTER TABLE `reviews`
ADD PRIMARY KEY (`id`);
ALTER TABLE `reviews`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT, AUTO_INCREMENT=3;
CREATE TABLE `dummy_reviews` (
`id` int(11) NOT NULL,
`average` decimal(11,2) NOT NULL,
`house_id` int(11) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
INSERT INTO `dummy_reviews` (`id`, `average`, `house_id`) VALUES
(0, '2.00', 1);
ALTER TABLE `dummy_reviews`
ADD PRIMARY KEY (`id`);
AND the query
SELECT
AVG(r.average) AS avg1,
AVG(dr.average) AS avg2
FROM
reviews r
LEFT JOIN
dummy_reviews dr ON r.house_id = dr.house_id
the result is
avg1 avg2
10.000000 2.000000
All good by now but (10 + 2) / 2 = 6 ... wrong result
I need (10+10+2) / 3 = 7,33 ... How can I get this result?
SQLFiddle
You have values joined and as such you wont have 3 rows, you will have 2. What you need is a union so you can have all rows from your average tables and do the calculation from it. Like this:
select avg(average) from
(select average from reviews
union all
select average from dummy_reviews
) queries
See it here: http://sqlfiddle.com/#!9/e0b75f/3
Jorge's answer is the simplest approach (and I duly upvoted it). In response to your comment, you can do the following:
select ( (coalesce(r.suma, 0) + coalesce(d.suma, 0)) /
(coalesce(r.cnt, 0) + coalesce(d.cnt, 0))
) as overall_average
from (select sum(average) as suma, count(*) as cnt
from reviews
) r cross join
(select sum(average) as suma, count(*) as cnt
from dummy_reviews
) d;
Actually, I suggest this not only because of your comment. Under some circumstances, this could be the better performing code.

MySQL limit result based on value in joined table

I have two tables, the first one contains a limit column. The number in this column must be used to limit the number of records received from the second table.
Is it possible to do this in just one query?
Below my tables and DEMO:
# Create table a
CREATE TABLE `a` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`limit` int(11) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=utf8;
# Create table b
CREATE TABLE `b` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(50) DEFAULT NULL,
`master` varchar(10) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=11 DEFAULT CHARSET=utf8;
# Fill table a
INSERT INTO `a` (`id`, `limit`)
VALUES
(1, 3);
# Fill table b
INSERT INTO `b` (`id`, `name`, `master`)
VALUES
(1, 'record 1', 'groupA'),
(2, 'record 2', 'groupB'),
(3, 'record 3', 'groupA'),
(4, 'record 4', 'groupB'),
(5, 'record 5', 'groupC'),
(6, 'record 6', 'groupC'),
(7, 'record 7', 'groupC'),
(8, 'record 8', 'groupA'),
(9, 'record 9', 'groupD'),
(10, 'record 10', 'groupD');
Query I tested:
SELECT b.*
FROM b
JOIN a ON a.id = 1
GROUP BY b.master
LIMIT 3
This selects only 3 records.
But now I want the limit to be read from table a. I tried to limit like this, but that fails:
SELECT b.*
FROM b
JOIN a ON a.id = 1
GROUP BY b.master
LIMIT a.limit
EDIT:
I've updated the question including the group by statement
You cannot use user-defined MySQL variables or table fields in the LIMIT clause. What you can do is use a variable to enumerate records of table b. Then use this variable to apply the limit:
SELECT t.id, t.name
FROM (
SELECT id, name, #rn := #rn + 1 AS rn
FROM b
CROSS JOIN (SELECT #rn := 0) AS v
ORDER BY id) AS t
INNER JOIN a ON a.id = 1 AND t.rn <= a.`limit`;
Demo here
Edit:
Here's a version that handles groups. It limits the records of b to those groups having the biggest population:
SELECT b.id, b.name, b.master
FROM b
INNER JOIN (
SELECT master, #rn := #rn + 1 AS rn
FROM b
CROSS JOIN (SELECT #rn := 0) AS v
GROUP BY master
ORDER BY COUNT(*) DESC) AS t ON b.master = t.master
INNER JOIN a ON a.id = 1 AND t.rn <= a.`limit`;
Demo here

Include grouped column with median calculating theory

Thanks to this post, I was able to calculate the median for a corresponding vendor in the invoices table.
This was the query used:
SELECT AVG(middle_values) AS 'median'
FROM (
SELECT t1.invoice_total AS 'middle_values'
FROM
(
SELECT #row:=#row+1 as `row`, iv.invoice_total
FROM invoices AS iv, (SELECT #row:=0) AS r
WHERE iv.vendor_id = 97
ORDER BY iv.invoice_total
) AS t1,
(
SELECT COUNT(*) as 'count'
FROM invoices iv
WHERE iv.vendor_id = 97
) AS t2
-- the following condition will return 1 record for odd number sets, or 2 records for even number sets.
WHERE t1.row >= t2.count/2 and t1.row <= ((t2.count/2) +1)) AS t3;
Instead of this just outputting one column in the resultbox, I'd like it to display two columns: vendor_id, median_invoice.
CREATE TABLE IF NOT EXISTS `invoices` (
`invoice_id` int(11) NOT NULL AUTO_INCREMENT,
`vendor_id` int(11) NOT NULL,
`invoice_number` varchar(50) NOT NULL,
`invoice_date` date NOT NULL,
`invoice_total` decimal(9,2) NOT NULL,
`payment_total` decimal(9,2) NOT NULL DEFAULT '0.00',
`credit_total` decimal(9,2) NOT NULL DEFAULT '0.00',
`terms_id` int(11) NOT NULL,
`invoice_due_date` date NOT NULL,
`payment_date` date DEFAULT NULL,
PRIMARY KEY (`invoice_id`),
KEY `invoices_fk_vendors` (`vendor_id`),
KEY `invoices_fk_terms` (`terms_id`),
KEY `invoices_invoice_date_ix` (`invoice_date`),
CONSTRAINT `invoices_fk_terms` FOREIGN KEY (`terms_id`) REFERENCES `terms` (`terms_id`),
CONSTRAINT `invoices_fk_vendors` FOREIGN KEY (`vendor_id`) REFERENCES `vendors` (`vendor_id`)
) ENGINE=InnoDB AUTO_INCREMENT=119 DEFAULT CHARSET=latin1;
Insert statements:
INSERT INTO `invoices` VALUES (118, 97, '456792', '2011-08-03', 565.60, 0.00, 0.00, 2, '2011-09-02', NULL);
INSERT INTO `invoices` VALUES (117, 97, '456791', '2011-08-03', 4390.00, 0.00, 0.00, 2, '2011-09-02', NULL);
INSERT INTO `invoices` VALUES (116, 97, '456701', '2011-08-02', 270.50, 0.00, 0.00, 2, '2011-09-01', NULL);
INSERT INTO `invoices` VALUES (115, 97, '456789', '2011-08-01', 8344.50, 0.00, 0.00, 2, '2011-08-31', NULL);
INSERT INTO `invoices` VALUES (114, 123, '963253249', '2011-08-02', 127.75, 127.75, 0.00, 3, '2011-09-01', '2011-09-04');
INSERT INTO `invoices` VALUES (113, 37, '547480102', '2011-08-01', 224.00, 0.00, 0.00, 3, '2011-08-31', NULL);
INSERT INTO `invoices` VALUES (112, 110, '0-2436', '2011-07-31', 10976.06, 0.00, 0.00, 3, '2011-08-30', NULL);
INSERT INTO `invoices` VALUES (111, 123, '263253257', '2011-07-30', 22.57, 22.57, 0.00, 3, '2011-08-29', '2011-09-03');
Doing the following was no good:
SELECT t1.vendor_id, AVG(middle_values) AS 'median'
FROM (
SELECT vendor_id, t1.invoice_total AS 'middle_values'
FROM
(
SELECT #row:=#row+1 as `row`, iv.invoice_total
FROM invoices AS iv, (SELECT #row:=0) AS r
WHERE iv.vendor_id = 97
ORDER BY iv.invoice_total
) AS t1,
(
SELECT COUNT(*) as 'count'
FROM invoices iv
WHERE iv.vendor_id = 97
) AS t2, invoices
-- the following condition will return 1 record for odd number sets, or 2 records for even number sets.
WHERE t1.row >= t2.count/2 and t1.row <= ((t2.count/2) +1)) AS t3;
In order to use vendor_id in the parent query you need to return it (select it) in each nested subquery:
SELECT t3.vendor_id, AVG(middle_values) AS 'median'
FROM (
SELECT t1.invoice_total AS 'middle_values', t1.vendor_id
FROM
(
SELECT #row:=#row+1 as `row`, iv.invoice_total, iv.vendor_id
FROM invoices AS iv, (SELECT #row:=0) AS r
WHERE iv.vendor_id = 97
ORDER BY iv.invoice_total
) AS t1,
(
SELECT COUNT(*) as 'count'
FROM invoices iv
WHERE iv.vendor_id = 97
) AS t2
-- the following condition will return 1 record for odd number sets, or 2 records for even number sets.
WHERE t1.row >= t2.count/2 and t1.row <= ((t2.count/2) +1)) AS t3