This keeps inserting already existing fields although it shouldn't.
BEGIN
INSERT INTO ohrm_attendance_raw_data (punch_time, device_id, card_number)
SELECT punch_time, device_id, card_number
FROM ohrm_attendance_master
WHERE ohrm_attendance_master.punch_time >= DATE_SUB(now(), INTERVAL 1 MONTH)
AND NOT EXISTS (
SELECT 1 FROM ohrm_attendance_record WHERE ohrm_attendance_record.punch_in_user_time = ohrm_attendance_master.punch_time)
AND NOT EXISTS (
SELECT 1 FROM ohrm_attendance_record WHERE ohrm_attendance_record.punch_out_user_time = punch_time);
end
It looks like your field punch_time is datetime type or something like that... so I think your problem is that you are comparing two dates... and what's the problem with that?, that MySQL and other RDBMS compare that including hour, minutes, seconds and miliseconds... so it can make that the comparison be false... You can trunc the date or give it some format:
With DATE function:
BEGIN
INSERT INTO ohrm_attendance_raw_data (punch_time, device_id, card_number)
SELECT punch_time, device_id, card_number
FROM ohrm_attendance_master
WHERE ohrm_attendance_master.punch_time >= DATE_SUB(now(), INTERVAL 1 MONTH)
AND NOT EXISTS (
SELECT 1 FROM ohrm_attendance_record WHERE DATE(ohrm_attendance_record.punch_in_user_time) = DATE(ohrm_attendance_master.punch_time))
AND NOT EXISTS (
SELECT 1 FROM ohrm_attendance_record WHERE DATE(ohrm_attendance_record.punch_out_user_time) = DATE(punch_time));
END
With DATE_FORMAT function:
BEGIN
INSERT INTO ohrm_attendance_raw_data (punch_time, device_id, card_number)
SELECT punch_time, device_id, card_number
FROM ohrm_attendance_master
WHERE ohrm_attendance_master.punch_time >= DATE_SUB(now(), INTERVAL 1 MONTH)
AND NOT EXISTS (
SELECT 1 FROM ohrm_attendance_record WHERE DATE_FORMAT(ohrm_attendance_record.punch_in_user_time, '%d-%b-%Y') = DATE_FORMAT(ohrm_attendance_master.punch_time, '%d-%b-%Y'))
AND NOT EXISTS (
SELECT 1 FROM ohrm_attendance_record WHERE DATE_FORMAT(ohrm_attendance_record.punch_out_user_time, '%d-%b-%Y') = DATE_FORMAT(punch_time,'%d-%b-%Y'));
END
Related
This is my clockin table:
This is my user table:
I want to accepted output in this type:
Try this:
set #start_date='2022-12-30';
set #end_date='2022-12-31';
WITH recursive Date_Ranges AS (
select #start_date as date
union all
select date + interval 1 day from Date_Ranges
where date < #end_date
)
SELECT
new_table.*,
(
SELECT
group_concat(`time`)
FROM
`clockin`
WHERE
`emid`=new_table.`id` AND
`date`=new_table.`date`
) as `time`
FROM
(select * from `user` full join Date_Ranges) as new_table
order by date desc
I am trying to calculate the number of users who have an active subscription on a particular day. I also want information related to the subscription plan they are on. I have a subscriptions table which includes the start date and end date of subscription as well as the plan name. I am using a recursive cte to find out the number of subscribers of different plans on a date range but I am getting the error that the cte table doesn't exist. I am using the following code.
SET #start = (SELECT MIN(start_date) FROM subscriptions);
SET #end = (SELECT MAX(end_date) FROM subscriptions);
WITH cte AS (
SELECT #start dt
UNION ALL
SELECT date_add(dt, interval 1 day) FROM cte
WHERE dt < #end
)
SELECT cte.dt, SUM(CASE WHEN subscriptions.plan_name IS NULL THEN 0 ELSE 1 END) FROM cte
LEFT JOIN subscriptions t
ON cte.dt BETWEEN t.start_date AND t.end_date
GROUP BY cte.dt;
the output should look like this
WITH cte AS (
SELECT #start dt
UNION ALL
SELECT date_add(dt, interval 1 day) FROM cte
WHERE dt < #end
)
You are refering cte in itself, which makes it recursive and needs to be defined as such (WITH RECURSIVE).
This is what you need:
WITH RECURSIVE cte AS (
SELECT #start dt
UNION ALL
SELECT date_add(dt, interval 1 day) FROM cte
WHERE dt < #end
)
I have a table in MySQL that contains min and max date values for each key:
key | minDate | maxDate
1 2011-01-01 2011-01-10
2 2011-02-13 2011-02-15
3 2011-10-19 2011-12-10
How can I create a new table that contains one row for each date between minDate and maxDate for each of the keys:
key | Date
1 2011-01-01
1 2011-01-02
... ...
1 2011-01-10
2 2011-02-13
2 2011-02-14
2 2011-02-15
3 2011-10-19
... ...
Using an integers table, you can do this:
SELECT "key", minDate + INTERVAL i DAY
FROM mytable
INNER JOIN integers
ON i <= DATEDIFF(maxDate, minDate)
That assumes the "integers" table has its column named "i", of course.
From there you can populate your new table with INSERT INTO ... SELECT.
Using a recursive common table expression (requires mysql 8 or mariadb 10.2+):
with recursive expanded_ranges as (
select id, mindate dt
from ranges
union all
select expanded_ranges.id, expanded_ranges.dt+interval 1 day
from expanded_ranges
join ranges on expanded_ranges.id=ranges.id and dt<maxdate
)
select * From expanded_ranges;
fiddle
-- Below are 2 variables to set start date and end date
set #start_date = '2020-01-01';
set #end_date = '2022-12-31';
-- Below is the recursive CTE which returns all the dates between Start date and End Date
WITH RECURSIVE ListDates(AllDates) AS
(
SELECT #start_date AS DATE
UNION ALL
SELECT DATE_ADD(AllDates, INTERVAL 1 DAY)
FROM ListDates
WHERE AllDates < #end_date
)
SELECT AllDates FROM ListDates
From memory, it could be something like this:
create table #res (
key int,
Date datetime
)
declare #minD datetime, #maxD datetime
select #minD = min(minDate), #maxD = max(maxDate) from tablename
while #minD <= #maxD do
insert into #res
select key, #minD from tablename where #minD >= minDate and #minD <= maxDate
select #minD = dateadd(#minD, interval 1 day)
end while;
select key, Date from #res
drop table #res
I am writing queries for some KPIs (Key Performance Indicators) to track user engagement. One such KPI is "Churn Rate", which I am calculating for a given month by:
Churn rate = (Total users deleted in month)/(Total users on the 1st of month)
I am using a users table with the following columns:
created_at, deleted_at
My process is to get all relevant months of user activity (in this case, based on "created_at" column, since we are getting several new users per month. We also have an activity log table which might technically be more accurate to use but doesn't go back as far) and then loop over them in a stored procedure. For each month, I'm calculating who was deleted that month and who was active on the first of that month (created on or before the 1st of the month and either not deleted or deleted after the first of that month). Then I'm dividing them to find churn rate and inserting into a temporary table. Here is my stored procedure:
DROP PROCEDURE ChurnRate;
DELIMITER $$
CREATE PROCEDURE ChurnRate()
BEGIN
DECLARE start_date DATETIME;
DECLARE end_date DATETIME;
DECLARE cur_date DATETIME;
DECLARE current_month VARCHAR(255);
DECLARE end_month VARCHAR(255);
DECLARE deleted_count BIGINT;
DECLARE active_user_count BIGINT;
DECLARE churn_rate FLOAT;
SELECT created_at FROM users ORDER BY created_at ASC LIMIT 1 INTO start_date;
SELECT created_at FROM users ORDER BY created_at DESC LIMIT 1 INTO end_date;
SET cur_date = start_date;
SET current_month = SUBSTR(cur_date,1,7);
SET end_month = SUBSTR(end_date,1,7);
DROP TEMPORARY TABLE IF EXISTS churn_table;
CREATE TEMPORARY TABLE churn_table
(
user_month VARCHAR(255),
deleted_count BIGINT,
active_user_count BIGINT,
churn_rate FLOAT
);
loop_label: LOOP
SELECT COUNT(U.id) FROM users AS U WHERE SUBSTR(U.deleted_at,1,7) = current_month INTO deleted_count;
SELECT COUNT(U.id) FROM users AS U
WHERE (U.deleted_at >= DATE_ADD(DATE_ADD(LAST_DAY(cur_date),INTERVAL 1 DAY),INTERVAL -1 MONTH) OR U.deleted_at IS NULL)
AND SUBSTR(U.created_at,1,7) <= current_month
INTO active_user_count;
INSERT INTO churn_table (user_month, deleted_count, active_user_count, churn_rate) VALUES (current_month, deleted_count, active_user_count, (deleted_count/active_user_count));
SET cur_date = DATE_ADD(cur_date, INTERVAL 1 MONTH);
SET current_month = SUBSTR(cur_date,1,7);
IF current_month <= end_month THEN
ITERATE loop_label;
END IF;
LEAVE loop_label;
END LOOP;
SELECT * FROM churn_table;
END$$
DELIMITER ;
CALL ChurnRate();
Here is a sample of some data that was produced:
user_month
churn_rate_percentage
2019-12
0
2020-01
0.0396982
2020-02
0
2020-03
0
2020-04
0
2020-05
0.112116
2020-06
0.59691
2020-07
0.26689
2020-08
0.144374
2020-09
0.141767
2020-10
0.125
2020-11
0.272904
2020-12
0.14937
My problem is this: I am using an API that requires this to be a select query. I have previously tried writing select queries for this, but they have been flawed. Grouping by "deleted_at" will not work because we will not show months for which no users have been deleted. Grouping by "created_at" and using subqueries ends up being extremely slow, as we have about 50k users. Is there a clean, efficient way to write this as a select query without affecting performance?
If there is not, I will have to write a chron to run this procedure and export the data.
Thank you
You shouldn't use loops in SQL that is often an indication you are doing something wrong.
Here is how to do this in a single query:
-- recursive CTE to create list of months of interest
with RECURSIVE base_months(d,y,m) AS
(
SELECT DateSerial(Year(min(create_at)), Month(min(create_at)), "1"),
min(create_at) , year(min(create_at)) , month(min(create_at))
FROM users
UNION ALL
SELECT data_add(d INTERVAL 1 MONTH) , year(data_add(d INTERVAL 1 MONTH)) , month(data_add(d INTERVAL 1 MONTH))
FROM base_months
WHERE YEAR(d) <= YEAR(CURDATE()) && MONTH(d) <= MONTH(CURDATE())
)
select
b.y as year,
b.m as month,
count(u.created_at) as total_user
sum(case when month(u.deleted_at) = b.m and year(u.deleted_at = b.y) then 1 else 0 end) as left_this_month
from base_months b
-- for each month join to the users table
join user u on u.created_at < b.d and (u.deleted_at > b.d or u.deleted_at is null)
group by b.y, b.m
If this isn't clear, first we use a recursive CTE to get all the months and years of interest -- you could do a non-recursive query on the table with a group by if only want to include create date months that are in the table -- but I think that would give you interesting results since months that don't have anyone created in that month would not be included.
Then I join that back to the users table with filters on the join to only include the rows we want to count for the given year and month. We use group by and aggregation functions to find the results.
Looping is likely to be terribly slow.
Is this how you decide if a user exists on Nov 1, 2020?
WHERE created_at < '2020-11'
AND deleted_at > '2020-11'
Hence, a COUNT(*) with that test would give that count?
For deletions for that month:
WHERE LEFT(deleted_at, 7) = '2020-11'
Putting those together into a single query or all months:
SELECT LEFT(created_at, 7) AS yyyymm,
( SELECT COUNT(*)
FROM users
WHERE created_at < yyyymm
AND deleted_at > yyyymm
) AS new_users,
( SELECT COUNT(*)
FROM users
WHERE deleted_at >= yyyymm
AND deleted_at < CONCAT(yyyymm, '-01')
) AS deleted_users
FROM users
GROUP BY yyyymm
ORDER BY yyyymm
That gives you 3 columns; check it out. To get the churn:
SELECT LEFT(created_at, 7) AS yyyymm,
( SELECT ... ) / ( SELECT ... ) AS churn
FROM users
GROUP BY yyyymm
ORDER BY yyyymm
I have a table in MySQL that contains min and max date values for each key:
key | minDate | maxDate
1 2011-01-01 2011-01-10
2 2011-02-13 2011-02-15
3 2011-10-19 2011-12-10
How can I create a new table that contains one row for each date between minDate and maxDate for each of the keys:
key | Date
1 2011-01-01
1 2011-01-02
... ...
1 2011-01-10
2 2011-02-13
2 2011-02-14
2 2011-02-15
3 2011-10-19
... ...
Using an integers table, you can do this:
SELECT "key", minDate + INTERVAL i DAY
FROM mytable
INNER JOIN integers
ON i <= DATEDIFF(maxDate, minDate)
That assumes the "integers" table has its column named "i", of course.
From there you can populate your new table with INSERT INTO ... SELECT.
Using a recursive common table expression (requires mysql 8 or mariadb 10.2+):
with recursive expanded_ranges as (
select id, mindate dt
from ranges
union all
select expanded_ranges.id, expanded_ranges.dt+interval 1 day
from expanded_ranges
join ranges on expanded_ranges.id=ranges.id and dt<maxdate
)
select * From expanded_ranges;
fiddle
-- Below are 2 variables to set start date and end date
set #start_date = '2020-01-01';
set #end_date = '2022-12-31';
-- Below is the recursive CTE which returns all the dates between Start date and End Date
WITH RECURSIVE ListDates(AllDates) AS
(
SELECT #start_date AS DATE
UNION ALL
SELECT DATE_ADD(AllDates, INTERVAL 1 DAY)
FROM ListDates
WHERE AllDates < #end_date
)
SELECT AllDates FROM ListDates
From memory, it could be something like this:
create table #res (
key int,
Date datetime
)
declare #minD datetime, #maxD datetime
select #minD = min(minDate), #maxD = max(maxDate) from tablename
while #minD <= #maxD do
insert into #res
select key, #minD from tablename where #minD >= minDate and #minD <= maxDate
select #minD = dateadd(#minD, interval 1 day)
end while;
select key, Date from #res
drop table #res