Group by column. If null, group by other column MySQL - mysql

I have this query:
SELECT
vcl.id,
vcl.batch_id,
vcl.type,
vcl.amount,
vcl.date
FROM vrcorporateledger vcl
LEFT JOIN payroll_list pl ON pl.id = vcl.batch_id
which gives the following output:
Whenever there is "CREDIT" in col type I want to increase the running balance by the value in col amount; whenever there is "DEBIT" in col type I want to decrease the accumulated balance by the value in col amount after grouping by batch_id col. So expected result is:
1000-2+5-4-49=950.
If possible I want to also create a column "balance" where at each point/step I see the resulting balance.
expected output like:

WITH cte AS (
SELECT type,
SUM(amount) OVER (PARTITION BY CASE type WHEN 'CREDIT' THEN RAND()
WHEN 'DEBIT' THEN batchID
ELSE 0 END ) amount,
MIN(`date`) OVER (PARTITION BY CASE type WHEN 'CREDIT' THEN RAND()
WHEN 'DEBIT' THEN batchID
ELSE 0 END ) `date`,
SUM(CASE type WHEN 'CREDIT' THEN amount
WHEN 'DEBIT' THEN -amount
ELSE 0 END) OVER (ORDER BY `date`) balance,
batchID,
LEAD(batchID) OVER (ORDER BY `date`) next_batchID
FROM source_data
)
SELECT type,
amount,
balance,
`date`
FROM cte
WHERE CASE WHEN batchID = next_batchID THEN 0 ELSE 1 END
https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=75255728f6d64a91a2ebf62edc2d0a0b

I think you're looking for SQL Window functions. They basically allow you to do an aggregate "over a partition".
On a side note: This is a really bad way of calculating doing running balance.
I would strongly suggest storing balance in a separate column at runtime. This should allow you to:
have a strict check even when rows are changed or deleted
normal speed when you have millions of records

If your MySQL version is 8 or above then you can use common table expression with window function as below:
Schema (MySQL v8.0)
create table vrcorporateledger (id int,batch_id int,type varchar(10),amount float,Tdate timestamp);
insert into vrcorporateledger values (1,null,'CREDIT',1000,'2021/03/04 06:19:00');
insert into vrcorporateledger values (2,1,'DEBIT',1,'2021/03/04 07:00:19');
insert into vrcorporateledger values (3,1,'DEBIT',1,'2021/03/04 07:00:25');
insert into vrcorporateledger values (4,null,'CREDIT',5,'2021/03/05 06:19:00');
insert into vrcorporateledger values (5,2,'DEBIT',1,'2021/03/04 08:58:10');
insert into vrcorporateledger values (6,2,'DEBIT',3,'2021/03/04 08:58:16');
insert into vrcorporateledger values (7,null,'DEBIT',49,'2021/03/04 16:42:33');
Query #1
WITH cte AS (
SELECT id,type,
(case when batch_id is null then (case when type='DEBIT' then -amount else amount end) else
SUM(case when type='DEBIT' then -amount else amount end) OVER (PARTITION BY batch_id)end) amount,
(case when batch_id is null then Tdate else
MIN(Tdate) OVER (PARTITION BY batch_id ) end) Trandate,
batch_id,
LEAD(batch_id) OVER (ORDER BY id) next_batch
FROM vrcorporateledger
)
SELECT type,
amount,
sum(amount)over(order by id) running_balance,
Trandate date
FROM cte
WHERE batch_id is null or batch_id =next_batch
order by id;
type
amount
date
running_balance
CREDIT
1000
2021-03-04 06:19:00
1000
DEBIT
-2
2021-03-04 07:00:19
998
CREDIT
5
2021-03-05 06:19:00
1003
DEBIT
-4
2021-03-04 08:58:10
999
DEBIT
-49
2021-03-04 16:42:33
950
View on DB Fiddle

Related

How to create a calculated row in sql or power bi?

I am trying to do a calculation on 2 rows on a SQL I wrote so I can have a 3 row that will be Profit and show the amount is it possible?
This dummy data not true to any company!
see below :
SELECT a.pcg_type GL_Acoount_Group,
Abs(sum(b.debit-b.credit)) GL_Amount
FROM dolibarr.llx_accounting_account a
JOIN dolibarr.llx_accounting_bookkeeping b
ON a.account_number = b.numero_compte
WHERE a.pcg_type IN ('INCOME', 'EXPENSE')
ANDa.fk_pcg_version = 'PCG99-BASE'
GROUP BY a.pcg_type
Results:
Group. Amt
INCOME 379200
EXPENSE 65700
Expected Results:
Group. Amt
INCOME 379200
EXPENSE 65700
PROFIT 313500
Use ROLLUP for adding an extra row and use CASE statement inside SUM() function for treating expense value as negative for calculation
--MySQL
SELECT COALESCE(acc_type, 'Profit') "Group"
, ABS(SUM(CASE WHEN acc_type = 'EXPENSE' THEN -amount ELSE amount END)) amt
FROM test
GROUP BY acc_type WITH ROLLUP
Another way by using UNION ALL
SELECT acc_type "Group"
, SUM(amount) Amt
FROM test
GROUP BY acc_type
UNION ALL
SELECT 'Profit' AS "Group"
, SUM(CASE WHEN acc_type = 'EXPENSE' THEN -amount ELSE amount END) Amt
FROM test
Please check this url https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=f859036ffcb3d808330bce5346daee1e

SQL - PIVOT for one column and add new column

I am fairly new to SQL. I have got this input table as
TypeId EventDescription FeedHeader FeedHeaderValue
---------------------------------------------------------
166 Financial AllocRule 130
166 Financial DealID 0
175 Partner Capital InvestorID OV_P1
175 Investment Querter Q1
175 Investment DealID offset
175 Investment InvestorID OV_P2
I need an output as follows
Financial value Partner Capital value Investment value
-------------------------------------------------------------------------------
AllocRule 130 InvestorID OV_P1 Querter Q1
DealID 0 DealID offset
InvestorID OV_P2
Not sure if that is even possible. I tried using pivot but its not giving desired output
select
[Financial] as FinancialHeader
, [Partner Capital] as PartnerCapitalHeader
, [Investment] as Investmentheader
from
(
select EventDescription, FeedHeader
from [Feeder]
) x
pivot
(
MAX(FeedHeader)
for EventDescription in([Financial], [Partner Capital], [Investment])
)p
Another approach i tried
Select
Min(Case [EventDescription] When 'Financial' Then [FeedHeader] End)
Financial,
Min(Case [EventDescription] When 'Financial' Then [FeedHeaderValue] End)
value,
Min(Case [EventDescription] When 'Partner Capital' Then [FeedHeader]
End) PartnerCapital,
Min(Case [EventDescription] When 'Partner Capital' Then
[FeedHeaderValue] End) value,
Min(Case [EventDescription] When 'Investment' Then [FeedHeader] End)
Investment,
Min(Case [EventDescription] When 'Investment' Then [FeedHeaderValue] End)
value
From [Feeder]
Group By EventDescription
Is there a another way to do it?
I was curious and did some research with PIVOT on SO and google and finally my luck clicked (at least what I think now)
The key point here is that you create new EventDescription values by appending 1 or 2 to the end depending on how many columns we want to PIVOT.
Without doing this, the pivot query won't work properly and would lead to error as per my experience with this task.
select max([Financial]) as FinancialHeader
, max([Financial1]) as FinancialHeaderValue
, max([Partner Capital]) as PartnerCapitalHeader
, max([Partner Capital1]) as PartnerCapitalHeaderValue
, max([Investment]) as InvestmentHeader
, max([Investment1]) as InvestmentHeaderValue
from
(select EventDescription,
EventDescription+'1' as EventDescription1,
FeedHeader,
FeedHeaderValue,
row_number() over (partition by EventDescription order by EventDescription) rn
from [testtable]
) x
pivot
(
MAX(FeedHeader)
for EventDescription in([Financial], [Partner Capital], [Investment])
) p
pivot
(
MAX(FeedHeaderValue)
for EventDescription1 in([Financial1], [Partner Capital1] , [Investment1] )
) v
group by [RN]
DEMO: db<>fiddle

SQL multi query

I need some help to do it right in one query (if it possible).
(this is a theoretical example and I assume the presence of events in event_name(like registration/action etc)
I have 3 colums:
-user_id
-event_timestamp
-event_name
From this 3 columns we need to create new table with 4 new columns:
-user year and month registration time
-number of new user registration in this month
-number of users who returned to the second calendar month after registration
-return probability
Result must be looks like this:
2019-1 | 1 | 1 | 100%
2019-2 | 3 | 2 | 67%
2019-3 | 2 | 0 | 0%
What I've done now:
I'm use this toy example of my possible main table:
CREATE TABLE `main` (
`event_timestamp` timestamp,
`user_id` int(10),
`event_name` char(12)
) DEFAULT CHARSET=utf8;
INSERT INTO `main` (`event_timestamp`, `user_id`, `event_name`) VALUES
('2019-01-23 20:02:21.550', '1', 'registration'),
('2019-01-24 20:03:21.550', '2', 'action'),
('2019-02-21 20:04:21.550', '3', 'registration'),
('2019-02-22 20:05:21.550', '4', 'registration'),
('2019-02-23 20:06:21.550', '5', 'registration'),
('2019-02-23 20:06:21.550', '1', 'action'),
('2019-02-24 20:07:21.550', '6', 'action'),
('2019-03-20 20:08:21.550', '3', 'action'),
('2019-03-21 20:09:21.550', '4', 'action'),
('2019-03-22 20:10:21.550', '9', 'action'),
('2019-03-23 20:11:21.550', '10', 'registration'),
('2019-03-22 20:10:21.550', '4', 'action'),
('2019-03-22 20:10:21.550', '5', 'action'),
('2019-03-24 20:11:21.550', '11', 'registration');
I'm trying to test some queries to create 4 new columns:
This is for column #1, we select month and year from timestamp where action is registration (as I guess), but I need to sum it for month (like 2019-11, 2019-12)
SELECT DATE_FORMAT(event_timestamp, '%Y-%m') AS column_1 FROM main
WHERE event_name='registration';
For column #2 we need to sum users with even_name registration in this month for every month, or.. we can trying for searching first time activity by user_id, but I don't know how to do this.
Here is some thinks about it...
SELECT COUNT(DISTINCT user_id) AS user_count
FROM main
GROUP BY MONTH(event_timestamp);
SELECT COUNT(DISTINCT user_id) AS user_count FROM main
WHERE event_name='registration';
For column #3 we need to compare user_id with the event_name registration and last month event with any event of the second month so we get users who returned for the next month.
Any idea how to create this query?
This is how to calc column #4
SELECT *,
ROUND ((column_3/column_2)*100) AS column_4
FROM main;
I hope you will find the following answer helpful.
The first column is the extraction of year and month. The new_users column is the COUNT of the unique user ids when the action is 'registration' since the user can be duplicated from the JOIN as a result of taking multiple actions the following month. The returned_users column is the number of users who have an action in the next month from the registration. The returned_users column needs a DISTINCT clause since a user can have multiple actions during one month. The final column is the probability that you asked from the two previous columns.
The JOIN clause is a self-join to bring the users that had at least one action the next month of their registration.
SELECT CONCAT(YEAR(A.event_timestamp),'-',MONTH(A.event_timestamp)),
COUNT(DISTINCT(CASE WHEN A.event_name LIKE 'registration' THEN A.user_id END)) AS new_users,
COUNT(DISTINCT B.user_id) AS returned_users,
CASE WHEN COUNT(DISTINCT(CASE WHEN A.event_name LIKE 'registration' THEN A.user_id END))=0 THEN 0 ELSE COUNT(DISTINCT B.user_id)/COUNT(DISTINCT(CASE WHEN A.event_name LIKE 'registration' THEN A.user_id END))*100 END AS My_Ratio
FROM main AS A
LEFT JOIN main AS B
ON A.user_id=B.user_id AND MONTH(A.event_timestamp)+1=MONTH(B.event_timestamp)
AND A.event_name='registration' AND B.event_name='action'
GROUP BY CONCAT(YEAR(A.event_timestamp),'-',MONTH(A.event_timestamp))
What we will do is to use window functions and aggregation -- window functions to get the earliest registration date. Then some conditional aggregation.
One challenge is the handling of calendar months. To handle this, we will truncate the dates to the beginning of the month to facilitate the date arithmetic:
select yyyymm_reg, count(*) as regs_in_month,
sum( month_2 > 0 ) as visits_2months,
avg( month_2 > 0 ) as return_rate_2months
from (select m.user_id, m.yyyymm_reg,
max( (timestampdiff(month, m.yyyymm_reg, m.yyyymm) = 1) ) as month_1,
max( (timestampdiff(month, m.yyyymm_reg, m.yyyymm) = 2) ) as month_2,
max( (timestampdiff(month, m.yyyymm_reg, m.yyyymm) = 3) ) as month_3
from (select m.*,
cast(concat(extract(year_month from event_timestamp), '01') as date) as yyyymm,
cast(concat(extract(year_month from min(case when event_name = 'registration' then event_timestamp end) over (partition by user_id)), '01') as date) as yyyymm_reg
from main m
) m
where m.yyyymm_reg is not null
group by m.user_id, m.yyyymm_reg
) u
group by u.yyyymm_reg;
Here is a db<>fiddle.
Here you go, done in T-SQL:
;with cte as(
select a.* from (
select form,user_id,sum(count_regs) as count_regs,sum(count_action) as count_action from (
select FORMAT(event_timestamp,'yyyy-MM') as form,user_id,event_name,
CASE WHEN event_name = 'registration' THEN 1 ELSE 0 END as count_regs,
CASE WHEN event_name = 'action' THEN 1 ELSE 0 END as count_action from main) a
group by form,user_id) a)
select final.form,final.count_regs,final.count_action,((CAST(final.count_action as float)/(CASE WHEN final.count_regs = '0' THEN '1' ELSE final.count_regs END))*100) as probability from (
select a.form,sum(a.count_regs) count_regs,CASE WHEN sum(b.count_action) is null then '0' else sum(b.count_action) end count_action from cte a
left join
cte b
ON a.user_id = b.user_id and
DATEADD(month,1,CONVERT(date,a.form+'-01')) = CONVERT(date,b.form+'-01')
group by a.form ) final where final.count_regs != '0' or final.count_action != '0'

Count dates and compare them at the same time SQL Server 2008

I have several datetime columns. I need to calculate in SQL Server 2008 for each timestamp how many datetime stamps in the same column are smaller than each of datetime stamps.
For example: for 2016-05-01 14:24:000.00 in column DateTime1 I need to calculate how many datetime values are smaller then it in DateTime1 column.
I also need to know how many datetimestamps are smaller than a datetime stamp for the same record (in the same row) in column DateTime2 and 3.
DateTime1 DateTime2 DateTime3
----------------------------------------------------------------------------
2016-05-01 13:24:000.00 2016-05-01 15:24:000.00 2016-05-01 16:20:000.00
2016-05-01 13:30:000.00 2016-05-01 14:21:000.00 2016-05-01 15:10:000.00
2016-05-01 14:24:000.00 2016-05-01 17:21:000.00 2016-05-01 18:10:000.00
If I understand correctly, you can use rank():
select t.*,
rank() over (order by datetime1) as dt1_rank,
rank() over (order by datetime2) as dt2_rank,
rank() over (order by datetime3) as dt3_rank
from t ;
Depending on how you want to treat tied values, you might actually want dense_rank(). Also, you might want to subtract 1 from the ranking value.
Assume I have a Table name [TestTB] has 3 columns DateTime1,DateTime2,DateTime3 .
I said CountSmallerDateTime1 as "how many datetime values are smaller then it in DateTime1 column"
I said CountSmallerDateTime2 as "how many datetimestamps are smaller than a datetime stamp for the same record (in the same row) in column DateTime2" , Similarity , CountSmallerDateTime3 for DateTime3 .
Then I have a query for your request :
SELECT [DateTime1]
,[DateTime2]
,[DateTime3]
,(SELECT COUNT(1)
FROM [TestTB] Sub
WHERE TB.[DateTime1] >Sub.[DateTime1]) AS CountSmallerDateTime1
,(
CASE WHEN TB.[DateTime2] > TB.[DateTime1] AND TB.[DateTime2] > TB.[DateTime3] THEN
2
WHEN ( (TB.[DateTime2] <= TB.[DateTime1] AND TB.[DateTime2] > TB.[DateTime3])
OR (TB.[DateTime2] > TB.[DateTime1] AND TB.[DateTime2] <= TB.[DateTime3])) THEN
1
ELSE
0
END
) AS CountSmallerDateTime2,
(
CASE WHEN TB.[DateTime3] > TB.[DateTime1] AND TB.[DateTime3] > TB.[DateTime2] THEN
2
WHEN ( (TB.[DateTime3] <= TB.[DateTime1] AND TB.[DateTime3] > TB.[DateTime2])
OR (TB.[DateTime3] > TB.[DateTime1] AND TB.[DateTime3] <= TB.[DateTime2])) THEN
1
ELSE
0
END
) AS CountSmallerDateTime3 FROM [TestTB] TB
;WITH CTE(DATE1, DATE2, DATE3,RN)
AS
(
SELECT CONVERT(DATETIME , '2016-05-01 13:24:000.00'), CONVERT(DATETIME,'2016-05-01 15:24:000.00'), CONVERT(DATETIME,'2016-05-01 16:20:000.00'),1
UNION ALL
SELECT CONVERT(DATETIME , '2016-05-01 13:30:000.00'), CONVERT(DATETIME,'2016-05-01 14:21:000.00'), CONVERT(DATETIME,'2016-05-01 15:10:000.00'),2
UNION ALL
SELECT CONVERT(DATETIME , '2016-05-01 14:24:000.00'), CONVERT(DATETIME,'2016-05-01 17:21:000.00'), CONVERT(DATETIME,'2016-05-01 18:10:000.00'),3
)
SELECT RANK() OVER (ORDER BY DATE1) -1 AS SAME_COLUMN_DATE1
, RANK() OVER (ORDER BY DATE2) -1 AS SAME_COLUMN_DATE2
, RANK() OVER (ORDER BY DATE3) -1 AS SAME_COLUMN_DATE3
, CASE WHEN RN=1 AND DATE1< DATE2 AND DATE1<DATE3 THEN 0
WHEN RN=1 AND DATE1< DATE2 AND DATE1>DATE3 THEN 1
WHEN RN=1 AND DATE1> DATE2 AND DATE1<DATE3 THEN 1
ELSE 2
SAME_ROW_1
, CASE WHEN RN=2 AND DATE2< DATE1 AND DATE2<DATE3 THEN 0
WHEN RN=2 AND DATE2< DATE1 AND DATE2>DATE3 THEN 1
WHEN RN=2 AND DATE2> DATE1 AND DATE2<DATE3 THEN 1
ELSE 2
END SAME_ROW_2
, CASE WHEN RN=3 AND DATE3< DATE1 AND DATE3<DATE2 THEN 0
WHEN RN=3 AND DATE3< DATE1 AND DATE3>DATE2 THEN 1
WHEN RN=3 AND DATE3> DATE1 AND DATE3<DATE2 THEN 1
ELSE 2
END SAME_ROW_3
FROM CTE ORDER BY RN

How to do a SELECT for total from beginning until the specified date in MySQL?

I have entry table:
I need to do a SELECT to receive 'Date', 'Number of entries' (in that date), 'Total number of entries until that date'.
When I do the SELECT:
SELECT e1.*,
(select count(*) from entry where date(dateCreated) <= e1.date) as Total
from (
SELECT
DATE(e.dateCreated) as "Date",
count(e.dateCreated) as "No of Entries",
sum( case when e.premium='Y' then 1 else 0 end ) as Premium,
sum( case when e.free='Y' then 1 else 0 end ) as Free,
sum( case when e.affiliateID IS NOT NULL then 1 else 0 end) as Affiliate
FROM entry e
WHERE e.competitionID=166
GROUP BY DATE(e.dateCreated)
) as e1
ORDER BY Date DESC
I've got a result table
but the column 'Total' has a wrong data.
How the correct select should be? Is this logic of select is the best and more efficient one?
Here is a demo
If it is just the 5 vs 7 that is off I think it is because that subquery in your select list, which accesses the inline view e1 (which is filtered to competitionID = 166), is not itself filtered when also utilizing the original entry table (unfiltered). You have to filter the original table to that competitionID as well.
Notice line 3 in sql below (only change)
SELECT e1.*,
(select count(*) from entry where date(dateCreated) <= e1.date
and competitionID=166) as Total
from (
SELECT
DATE(e.dateCreated) as "Date",
count(e.dateCreated) as "No of Entries",
sum( case when e.premium='Y' then 1 else 0 end ) as Premium,
sum( case when e.free='Y' then 1 else 0 end ) as Free,
sum( case when e.affiliateID IS NOT NULL then 1 else 0 end) as Affiliate
FROM entry e
WHERE e.competitionID=166
GROUP BY DATE(e.dateCreated)
) as e1
ORDER BY Date DESC
Fiddle - http://sqlfiddle.com/#!9/e5e88/22/0