mysql get distinct tokens from a string list - mysql

I have a query in mysql that group concats records and gives a value having redundant tokens. Below is the output of the query:
Problem Area->ACC-HO->ACC-HO->Credit Note (C/N)->Problem description ->Problem description
But I want the distinct tokens of this string as below
Problem Area->ACC-HO->Credit Note (C/N)->Problem description
Is there a way to do this in the sql SELECT query itself?
EDIT
Here is schema and query
Below is my query:
SELECT
t2.transaction_id AS transaction_id,
GROUP_CONCAT(
CONCAT(
t1.display_text,
'->',
(CASE (NOT EXISTS (SELECT 1 FROM mst_node a WHERE a.parent_node_id = t1.node_id))
WHEN 1 THEN t1.display_text ELSE
(SELECT b.display_text AS DISPLAY FROM mst_node b
WHERE parent_node_id = t2.node_id AND b.display_seq = t2.entered_value)
END)
)
ORDER BY t2.logtime_stamp SEPARATOR '->'
) AS display_text
FROM
mst_node t1
JOIN trn_user_log t2
ON t1.app_id = t2.app_id AND t1.node_id = t2.node_id
WHERE (t1.app_id = 105)
AND t1.parent_node_id IS NOT NULL
AND t1.save_as_default IS NULL
GROUP BY transaction_id,
mobile_no
ORDER BY t2.transaction_id DESC,
t2.logtime_stamp,
t2.mobile_no

In you GROUP CONCAT add DISTINCT so it will only concatenate unique values.
SELECT GROUP_CONCAT(DISTINCT colName),....
FROM ...
WHERE ...
GROUP BY ...
SQLFiddle Demo

in your grouping clause, you can add another group by to group by this string column as well.
SELECT GROUP_CONCAT(myStringColumn),....
FROM ...
WHERE ...
GROUP BY myOtherColumn,myStringColumn

I was finally able to resolve my issue by using a UNION of two select queries and then doing a GROUP_CONCAT(DISTINCT column ORDER BY another_column).
Below is the query I used:
SELECT
transaction_id,
GROUP_CONCAT(DISTINCT display_text ORDER BY logtime_stamp SEPARATOR '->') AS display_text
FROM
(SELECT
t2.transaction_id AS transaction_id,
t2.logtime_stamp,
t1.display_text AS display_text
FROM mst_node t1
JOIN trn_user_log t2 ON t1.app_id = t2.app_id
AND t1.node_id = t2.node_id
WHERE (t1.app_id = 105)
AND t1.parent_node_id IS NOT NULL
AND t1.save_as_default IS NULL
UNION
SELECT t2.transaction_id AS transaction_id,t2.logtime_stamp,
CASE(NOT EXISTS (SELECT 1 FROM mst_node a WHERE a.parent_node_id = t1.node_id))
WHEN 1 THEN NULL
ELSE (SELECT b.display_text AS display_text FROM mst_node b WHERE parent_node_id = t2.node_id AND b.display_seq = t2.entered_value)
END AS display_text
FROM mst_node t1 JOIN trn_user_log t2
ON t1.app_id = t2.app_id AND t1.node_id = t2.node_id
WHERE (t1.app_id = 105)
AND t1.parent_node_id IS NOT NULL
AND t1.save_as_default IS NULL
ORDER BY transaction_id DESC,logtime_stamp
) AS T
GROUP BY transaction_id
ORDER BY transaction_id DESC,logtime_stamp

Related

Query optimization needed because of too many sub queries and sub query dependency in where conditions

I'm writing code for the production report.
I had written this query
SELECT
P.*,
(
SELECT
COUNT(id) AS cnt
FROM
bales
WHERE
create_date < '2019-11-01' AND product_id = P.id AND(TYPE = 'bale' OR TYPE = 'bag')
) AS before_prod,
(
SELECT
COUNT(id) AS cnt
FROM
bales
WHERE
(
dispatched = '0' OR disp_bunch = '0'
) AND dispatch_date < '2019-11-01' AND product_id = P.id AND(TYPE = 'bale' OR TYPE = 'bag')
) AS before_dispatched,
(
SELECT
COUNT(id) AS cnt
FROM
bales
WHERE
create_date BETWEEN '2019-11-01' AND '2019-11-06' AND product_id = P.id AND(TYPE = 'bale' OR TYPE = 'bag')
) AS production,
(
SELECT
COUNT(id) AS cnt
FROM
bales
WHERE
(
dispatched = '0' OR disp_bunch = '0'
) AND dispatch_date BETWEEN '2019-11-01' AND '2019-11-06' AND product_id = P.id AND(TYPE = 'bale' OR TYPE = 'bag')
) AS production_dispatched,
C.name AS category_name
FROM
products P
INNER JOIN category C ON
C.id = P.category
This query is working but as I have too many records in all tables it takes too much time.
also, I need only records where before_prod, before_dispatched, production, production_dispatched all these subquery results should be greater than 0.
I tried to use having clause but it also takes too much time.
I have also tried php for loop, * LOGIC: first all products than in for loop its production. but it was much slower.*
How can I optimize my query?
You can use join instead and select case to sum your data that matches your conditions.
select p.*, t.*
from products p
inner join (
select t2.id, sum(case when create_date < '2019-11-01' then 1 else 0 end) as before_prod
, sum(case when (dispatched = '0' or disp_bunch = '0') and create_date < '2019-11-01' then 1 else 0 end) as before_dispatched
, sum(case when create_date between '2019-11-01' and '2019-11-06' then 1 else 0 end) as production
, sum(case when (dispatched = '0' or disp_bunch = '0') and create_date between '2019-11-01' and '2019-11-06' then 1 else 0 end) as production_dispatched
from bales t1
inner join product t2 on t2.id= t1.product_id
inner join category t3 on t3.id = t2.category
where t1.TYPE in ('bale', 'bag')
group by t2.id) t
on t.id = p.id

MySQL with multiple SELECT and GROUP BY

I have a problem with my database in mysql. I would like to have a table from my database with date, pat, dureeP, dureeC but this function doesn't GROUP BY :
select *
from (SELECT date_format(p.date, "%Y-%m") AS date
,p.pat
,AVG(a) AS dureeP
FROM timing as t, patient as p
WHERE t.id_p = p.id_p
AND t.pos=6
AND t.heure_fin IS NOT NULL
GROUP BY p.pat, MONTH(p.date), YEAR(p.datede)
) as T1,
(SELECT AVG(b) AS dureeC
FROM timing as t, patient as p
WHERE t.id_p = p.id_p
AND t.pos=3
AND t.heure_fin IS NOT NULL
GROUP BY p.pathologie, MONTH(p.date), YEAR(p.date)
) as T2
With one SELECT I can have what I want but with multiple SELECT I can Group By.
Do you have an idea?
Thank you
To simplify your query you might be able to use "conditional aggregates" which basically means placing a case expression inside an aggregate function
SELECT
p.pathologie
, MONTH(p.date)
, YEAR(p.date)
, AVG(CASE WHEN t.pos = 3 THEN b END) AS dureeC
, AVG(CASE WHEN t.pos = 6 THEN a END) AS dureeP
FROM timing AS t
INNER JOIN patient AS p ON t.id_p = p.id_p
WHERE t.heure_fin IS NOT NULL
GROUP BY
p.pathologie
, MONTH(p.date)
, YEAR(p.date)

Multi dynamic rows into multi column MySQL

I'm using MySQL. I have to separate multi repeated rows into columns. The table has following structure.
But I need like this.
Note: There will be always 6 records of each DateTime. But Title and Feedback are dynamic. I tried to use
select DateTime,
But this is not giving my expected value. I wrote a code (not considered about title):
select
g.dateTime AS dateTime,
g.feedback as feedback ,
c.title AS title
from gauge g
inner join category c on g.category_id=c.category_id AND c.title ='title' AND g.feedback ='feedback'
group by g.dateTime
This doesn't work and I tried GROUP_CONCAT(.... SEPARATOR'.....' ) this gives me not expected output, just gives output in one column. My approach might be wrong.
Use a dynamic pivot since there are many different values
Fiddle to play with
drop table t;
create table t
(
dateTime datetime,
title varchar(50),
feedback int
);
insert into t values
( '2018-06-29 12:55:36', 'A', 1),
( '2018-06-29 12:55:36', 'B', 2),
( '2018-06-22 12:55:36', 'A', 1),
( '2018-06-22 12:55:36', 'B', 2),
( '2018-06-22 12:55:36', 'C', 3);
SET #sql = NULL;
SELECT
GROUP_CONCAT(DISTINCT
CONCAT(
'MAX(CASE WHEN title = ''',
title,
''' THEN ''', title ,''' END) AS ',
CONCAT(' Title',title,',')
),
CONCAT(
'MAX(CASE WHEN feedback = ''',
feedback,
''' THEN ''', feedback ,''' END) AS ',
CONCAT(' Feedback',feedback)
)
) INTO #sql
FROM T;
SET #sql = CONCAT('SELECT dateTime, ', #sql, '
FROM t
GROUP BY dateTime');
PREPARE stmt FROM #sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
I would simply use conditional aggregation. Enumerating the values is a bit tricky in MySQL (unless you are using MySQL 8+):
select datetime,
max(case when rn = 1 then title end) as title_1,
max(case when rn = 1 then feedback end) as feedback_1,
max(case when rn = 2 then title end) as title_2,
max(case when rn = 2 then feedback end) as feedback_2,
max(case when rn = 3 then title end) as title_3,
max(case when rn = 3 then feedback end) as feedback_3,
max(case when rn = 4 then title end) as title_4,
max(case when rn = 4 then feedback end) as feedback_4,
max(case when rn = 5 then title end) as title_5,
max(case when rn = 5 then feedback end) as feedback_5,
max(case when rn = 6 then title end) as title_6,
max(case when rn = 6 then feedback end) as feedback_6
from (select gc.*,
(#rn := if(#dt = g.dateTime, #rn + 1,
if(#dt := g.dateTime, 1, 1)
)
) as rn
from (select g.dateTime, g.feedback, c.title AS title
from gauge g inner join
category c
on g.category_id = c.category_id and
c.title = 'title' and
g.feedback = 'feedback'
group by g.dateTime
order by g.dateTime
) gc cross join
(select #dt := '', #rn := 0) params
) gc
group by datetime;
I don't think the filtering on c.title and g.feedback is correct.
Not an optimized approach but you may join them into table and filter according to row number modulo by 6. This way, we can separate the 6 rows into 6 different table
http://rextester.com/DLVV64095
SELECT MAIN.DateTime,
TBL1.TITLE AS Title1,
TBL1.FEEDBACK AS Feedback1,
TBL2.TITLE AS Title2,
TBL2.FEEDBACK AS Feedback2,
TBL3.TITLE AS Title3,
TBL3.FEEDBACK AS Feedback3,
TBL4.TITLE AS Title4,
TBL4.FEEDBACK AS Feedback4,
TBL5.TITLE AS Title5,
TBL5.FEEDBACK AS Feedback5,
TBL6.TITLE AS Title6,
TBL6.FEEDBACK AS Feedback6
FROM (
SELECT [DATETIME] AS [DateTime]
FROM GAUGE
GROUP BY [DATETIME]
) MAIN
INNER JOIN (
SELECT G.DATETIME,
G.FEEDBACK,
C.TITLE,
ROW_NUMBER() OVER(PARTITION BY ORDER BY (SELECT 1) ASC) AS ROWNO
FROM GAUGE G
INNER JOIN CATEGORY C
ON G.CATEGORY_ID = C.CATEGORY_ID
) TBL1
ON TBL1.DATETIME = MAIN.[DateTime]
AND TBL1.ROWNO % 6 = 1
INNER JOIN (
SELECT G.DATETIME,
G.FEEDBACK,
C.TITLE,
ROW_NUMBER() OVER(PARTITION BY ORDER BY (SELECT 1) ASC) AS ROWNO
FROM GAUGE G
INNER JOIN CATEGORY C
ON G.CATEGORY_ID = C.CATEGORY_ID
) TBL2
ON TBL2.DATETIME = MAIN.[DateTime]
AND TBL2.ROWNO % 6 = 2
INNER JOIN (
SELECT G.DATETIME,
G.FEEDBACK,
C.TITLE,
ROW_NUMBER() OVER(PARTITION BY ORDER BY (SELECT 1) ASC) AS ROWNO
FROM GAUGE G
INNER JOIN CATEGORY C
ON G.CATEGORY_ID = C.CATEGORY_ID
) TBL3
ON TBL3.DATETIME = MAIN.[DateTime]
AND TBL3.ROWNO % 6 = 3
INNER JOIN (
SELECT G.DATETIME,
G.FEEDBACK,
C.TITLE,
ROW_NUMBER() OVER(PARTITION BY ORDER BY (SELECT 1) ASC) AS ROWNO
FROM GAUGE G
INNER JOIN CATEGORY C
ON G.CATEGORY_ID = C.CATEGORY_ID
) TBL4
ON TBL4.DATETIME = MAIN.[DateTime]
AND TBL4.ROWNO % 6 = 4
INNER JOIN (
SELECT G.DATETIME,
G.FEEDBACK,
C.TITLE,
ROW_NUMBER() OVER(PARTITION BY ORDER BY (SELECT 1) ASC) AS ROWNO
FROM GAUGE G
INNER JOIN CATEGORY C
ON G.CATEGORY_ID = C.CATEGORY_ID
) TBL5
ON TBL5.DATETIME = MAIN.[DateTime]
AND TBL5.ROWNO % 6 = 5
INNER JOIN (
SELECT G.DATETIME,
G.FEEDBACK,
C.TITLE,
ROW_NUMBER() OVER(PARTITION BY ORDER BY (SELECT 1) ASC) AS ROWNO
FROM GAUGE G
INNER JOIN CATEGORY C
ON G.CATEGORY_ID = C.CATEGORY_ID
) TBL6
ON TBL6.DATETIME = MAIN.[DateTime]
AND TBL6.ROWNO % 6 = 0

MySQL derived table with join

In the following query I'm having a problem when it comes to returning the right value for count2.
What I need is to get the number of rows from table2 which could easily be done by using a derived table t:
SELECT name,
(SELECT COUNT(*) FROM `table1`) AS count1,
(SELECT COUNT(*) FROM (
SELECT COUNT(*) FROM `table2` t2) WHERE t2.user = prf.user)
) t AS count2,
(SELECT SUM(a) FROM `table3`) AS count3
FROM `profiles` prf
WHERE 1=1
AND prf.user = 1
The problem is that the WHERE t2.user = prf.user statement fails as the prf table is outside the subquery's scope.
How can I achieve the above?
EDIT: I'm adding the actual query in case it's helpful for getting a better grasp:
SELECT PRF.BranchID, PRF.user_id, CONCAT_WS(" ",PRF.lastname,PRF.firstname) Synergatis,
( SELECT COUNT(*) FROM Actions A JOIN Requests R ON R.RequestID=A.RequestID WHERE A.ActionStatus = 302 AND A.UserOwner = PRF.user_id AND A.ActionDate BETWEEN '2015-06-01' AND '2015-06-10' ) AS energeies,
( SELECT COUNT(DISTINCT RPP.RequestID) FROM VW_Xartofylakio_Synergati VV JOIN Requests_Prop RPP ON RPP.PropertyID = VV.PropertyID JOIN Requests R ON R.RequestID = RPP.RequestID WHERE VV.CurrUsr = PRF.user_id AND R.ModifyTime BETWEEN '2015-06-01' AND '2015-06-10' ) AS zitiseis_eidikes,
( SELECT COUNT(DISTINCT(CustomerID)) FROM Demo_Orders_M WHERE DemoOrderStatus=253 AND USER=PRF.user_id AND DemoOrderDate BETWEEN '2015-06-01' AND '2015-06-10' ) AS endiaferomenoi,
( SELECT COUNT(*) AS cnt FROM Demo_Orders_M DOM JOIN Actions A ON DOM.DemoOrderID = A.DemoOrderID WHERE DOM.User = PRF.user_id AND DOM.DemoOrderStatus = 253 AND A.ActionDate BETWEEN '2015-06-01 14:56:19' AND '2015-06-30 14:56:19' GROUP BY DOM.CustomerID, DOM.User HAVING COUNT(*) > 1 ) AS anakykl_endiaf,
( SELECT COUNT(*) FROM Demo_Orders_M DOM WHERE DOM.`User`=PRF.user_id AND DemoOrderStatus = 253 AND DOM.DemoOrderDate BETWEEN '2015-06-01' AND '2015-06-10' ) AS epideixeis,
( SELECT COUNT(DISTINCT(DOD.PropertyID)) AS PropertyID FROM Demo_Orders_M DOM JOIN Demo_Orders_D DOD ON DOM.DemoOrderID = DOD.DemoOrderID JOIN Actions A ON DOD.DemoOrderID = A.DemoOrderID WHERE DOM.DemoOrderStatus = 253 AND DOM.User = PRF.user_id AND A.ActionDate BETWEEN '2015-06-01' AND '2015-06-10' ) AS monadika_akinita
FROM tbl_profiles PRF
WHERE 1=1
AND PRF.user_id IN (
SELECT a.user_id FROM tbl_profiles a WHERE a.user_id IN ('248','1159','486','183')
OR a.GroupID IN (SELECT b.GroupID FROM L_Groups b WHERE b.ManagerID IN ('248','1159','486','183'))
)
ORDER BY PRF.user_id
The subquery I'm referring to is the one that returns the result as anakykl_endiaf.
I suspect it is not because of prf table, it is because of t2 table... There are no restrictions to use outer alias in inner subqueries because there are such a thing like correlated subquery. Your problem is that you have the opposite case here: you are referring inner alias in outer query.
(SELECT COUNT(*)
FROM (SELECT COUNT(*) FROM `table2` t2) WHERE t2.user = prf.user)
Why are you selecting count twice here? You can change to this:
(SELECT COUNT(*)
FROM (SELECT COUNT(*) FROM `table2` t2 WHERE t2.user = prf.user))
or this:
(SELECT COUNT(*)
FROM `table2` t2 WHERE t2.user = prf.user)
A suggestion to try.
You have sub queries in the SELECT, and in this case they must each only return a single row. For some reason (which we can't really tell without test data) one of these is returning more than 1 row, hence failing.
As an interim step, change the query to join against the sub queries, which should make it more obvious when there are duplicates (and may also be quite a bit more efficient, depending on the data).
Something like this (not tested so probably a few typos):-
SELECT PRF.BranchID,
PRF.user_id,
CONCAT_WS(" ",PRF.lastname,PRF.firstname) Synergatis,
ar.energeies,
vrr.zitiseis_eidikes,
m.endiaferomenoi,
ae.anakykl_endiaf,
d.epideixeis,
ddd.monadika_akinita
FROM tbl_profiles PRF
LEFT OUTER JOIN
(
SELECT A.UserOwner AS DomUser, COUNT(*) AS energeies
FROM Actions A
JOIN Requests R ON R.RequestID=A.RequestID
WHERE A.ActionStatus = 302
AND A.ActionDate BETWEEN '2015-06-01' AND '2015-06-10'
GROUP BY A.UserOwner
) ar
ON ar.DomUser = PRF.user_id
LEFT OUTER JOIN
(
SELECT VV.CurrUsr AS DomUser, COUNT(DISTINCT RPP.RequestID) AS zitiseis_eidikes
FROM VW_Xartofylakio_Synergati VV
JOIN Requests_Prop RPP ON RPP.PropertyID = VV.PropertyID
JOIN Requests R ON R.RequestID = RPP.RequestID
WHERE R.ModifyTime BETWEEN '2015-06-01' AND '2015-06-10'
GROUP BY VV.DomUser
) vrr
ON vrr.DomUser = PRF.user_id
LEFT OUTER JOIN
(
SELECT `USER` AS DomUser, COUNT(DISTINCT(CustomerID)) AS endiaferomenoi
FROM Demo_Orders_M
WHERE DemoOrderStatus=253
AND DemoOrderDate BETWEEN '2015-06-01' AND '2015-06-10'
GROUP BY DomUser
) m
ON PRF.user_id = m.DomUser
LEFT OUTER JOIN
(
SELECT DOM.CustomerID, DOM.`User` AS DomUser, COUNT(*) AS anakykl_endiaf
FROM Demo_Orders_M DOM
JOIN Actions A ON DOM.DemoOrderID = A.DemoOrderID
WHERE DOM.DemoOrderStatus = 253
AND A.ActionDate BETWEEN '2015-06-01 14:56:19' AND '2015-06-30 14:56:19'
GROUP BY DOM.CustomerID, DOM.DomUser
HAVING COUNT(*) > 1
) ae
ON PRF.user_id = ae.DomUser
LEFT OUTER JOIN
(
SELECT DOM.`User` AS DomUser, COUNT(*) AS epideixeis
FROM Demo_Orders_M DOM
WHERE DemoOrderStatus = 253
AND DOM.DemoOrderDate BETWEEN '2015-06-01' AND '2015-06-10'
GROUP BY DOM.DomUser
) d
EDIT
If you just want a count of the number of customerID fields for a user in the anakykl_endiaf field then change it to doing a count of distinct customerIDs. Ie, for the above query I have done change it to:-
LEFT OUTER JOIN
(
SELECT DOM.`User` AS DomUser, COUNT(DISTINCT DOM.CustomerID) AS anakykl_endiaf
FROM Demo_Orders_M DOM
JOIN Actions A ON DOM.DemoOrderID = A.DemoOrderID
WHERE DOM.DemoOrderStatus = 253
AND A.ActionDate BETWEEN '2015-06-01 14:56:19' AND '2015-06-30 14:56:19'
GROUP BY DOM.DomUser
HAVING COUNT(*) > 1
) ae

Using inner joins and aliases in mysql

I'm trying to join several different values from the same table. I'm trying to return printing rates in the query from different time horizons (how much has been printed in the month/day/year/week etc.) and I am getting a 1064 error when I execute the query below. What could the explanation be?
SELECT t1.station_name, t1.yearpages, t2.monthpages
FROM (
SELECT station_name, SUM(print_pages) yearpages
FROM `file_prints`
WHERE year(print_date) = 2014;
) t1
INNER JOIN (
SELECT station_name, sum(print_pages) monthpages
FROM `file_prints`
WHERE month(print_date) = 2;
) ON t1.station_name = t2.station_name;
Then, if I were to add multiple joins, would the query look like this?
SELECT t1.station_name, t1.station_name as db_name, t1.yearpages, t2.monthpages, t3.daypages, t4.weekpages
FROM (
SELECT station_name, sum(print_pages) yearpages
FROM `file_prints`
WHERE year(print_date) = $year;
GROUP BY station_name
) t1
INNER JOIN (
SELECT station_name, sum(print_pages) monthpages
FROM `file_prints`
WHERE month(print_date) = $month;
GROUP BY station_name
) t2 ON t1.station_name = t2.station_name
INNER JOIN (
SELECT station_name, sum(print_pages) daypages
FROM `file_prints`
WHERE dayofmonth(print_date) = $day;
GROUP BY station_name
) t3 ON t2.station_name = t3.station_name
INNER JOIN (
SELECT station_name, sum(print_pages) weekpages
FROM `file_prints`
WHERE week(print_date) = $week;
GROUP BY station_name
) t4 ON t3.station_name = t4.station_name
How about just using conditional aggregation?
SELECT station_name,
sum(case when year(print_date) = $year then print_pages end) as yearpages,
sum(case when month(print_date) = $month then print_pages end) as monthpages,
sum(case when day(print_date) = $day then print_pages end) as daypages,
sum(case when week(print_date) = $week then print_pages end) as weekpages
FROM `file_prints`
GROUP BY station_name;