Related
I've created a stored procedure to populate all dates in a given month &year into a table 'month_date' (date column)
-- --------------------------------------------------------------------------------
-- Routine DDL
-- Note: comments before and after the routine body will not be stored by the server
-- --------------------------------------------------------------------------------
DELIMITER $$
CREATE DEFINER="root"#"localhost" PROCEDURE "populate_date"(IN p_month varchar(3),
IN p_year varchar(4))
MODIFIES SQL DATA
DETERMINISTIC
BEGIN
declare l_month varchar(9);
set l_month = '01'+p_month+p_year;
insert into month_date
SELECT date_field
FROM
(
SELECT
STR_TO_DATE(l_month, '%d%b%Y')+
INTERVAL daynum DAY date_field
FROM
(
SELECT t*10+u daynum
FROM
(SELECT 0 t UNION SELECT 1 UNION SELECT 2 UNION SELECT 3) A,
(SELECT 0 u UNION SELECT 1 UNION SELECT 2 UNION SELECT 3
UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7
UNION SELECT 8 UNION SELECT 9) B
ORDER BY daynum
) AA
) AAA
WHERE MONTH(date_field) = MONTH(STR_TO_DATE(l_month, '%d%b%Y'));
commit;
END
when I am calling this procedure
call sms.populate_date('Sep','2015');
its throwing error
Incorrect datetime value: '2016' for function str_to_date
I've run the sql on its own and its working fine.
SELECT date_field
FROM
(
SELECT
STR_TO_DATE('01Sep2015', '%d%b%Y')+
INTERVAL daynum DAY date_field
FROM
(
SELECT t*10+u daynum
FROM
(SELECT 0 t UNION SELECT 1 UNION SELECT 2 UNION SELECT 3) A,
(SELECT 0 u UNION SELECT 1 UNION SELECT 2 UNION SELECT 3
UNION SELECT 4 UNION SELECT 5 UNION SELECT 6 UNION SELECT 7
UNION SELECT 8 UNION SELECT 9) B
ORDER BY daynum
) AA
) AAA
WHERE MONTH(date_field) = MONTH(STR_TO_DATE('01Sep2015', '%d%b%Y'));
please can you help with this error?
thanks
You need to combine your Strings with the CONCAT()-Function. The + is just for an addition:
....
BEGIN
declare l_month varchar(9);
/*ADDED CONCAT HERE*/
set l_month = CONCAT('01',p_month,p_year);
insert into month_date
SELECT date_field
FROM
(
....
Problem:
I want to test a set of values is equal to another set but not necessary their order of position will be same.
For example:
'a,b,c,d' must be equal to 'b,a,c,d'
What I have tried:
I have tried IN Clause and I have checked with FIND_IN_SET.
SELECT 'a,b,c,d' IN 'b,c,a,d';
Both of them can not do this work.
Will be thankful if anyone can help.
Thanks
Sandeep
This demonstrates the use the splitting of values to multiple rows, mentioned by GolezTrol in combination with FIND_IN_SET, modified to function to be used in forms like:
SELECT are_sets_equal(col_with_set, 'a,b,d,c') FROM example;
or
SELECT * FROM example
WHERE are_sets_equal(col_with_set, 'a,b,d,c')
The idea is this:
Split the the first set to a temporary table
Check how many of those values are found in the second set.
If this count is equal to the count of elements in both sets, then the sets are equal
The function will return 1, if both sets are equal and 0, if the sets differ as by requirement.
The limit for both sets is 1000 values, but could be expanded easily:
DELIMITER //
CREATE FUNCTION are_sets_equal(set_a VARCHAR(2000), set_b VARCHAR(2000)) RETURNS BOOLEAN
BEGIN
DECLARE is_equal BOOLEAN;
DECLARE count_a INT;
DECLARE count_b INT;
-- calculate the count of elements in both sets
SET count_a = 1 + LENGTH(set_a) - LENGTH(REPLACE(set_a, ',', ''));
SET count_b = 1 + LENGTH(set_b) - LENGTH(REPLACE(set_b, ',', ''));
SELECT
-- if all elements of the first set are contained in the second
-- set and both sets have the same number of elements then both
-- sets are considered equal
COUNT(t.value) = count_a AND count_a = count_b INTO is_equal
FROM (
SELECT
SUBSTRING_INDEX(SUBSTRING_INDEX(e.col, ',', n.n), ',', -1) value
FROM ( SELECT set_a AS col ) e
CROSS JOIN(
-- build for up to 1000 separated values
SELECT
a.N + b.N * 10 + c.N * 100 + 1 AS n
FROM
(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) a
,(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) b
,(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) c
ORDER BY n
) n
WHERE n.n <= count_a
) t
WHERE FIND_IN_SET(t.value, set_b);
return is_equal;
END //
DELIMITER ;
Explanation
Building a numbers table
SELECT
a.N + b.N * 10 + c.N * 100 + 1 AS n
FROM
(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) a
,(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) b
,(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) c
ORDER BY n
builds a number table with the values from 1 to 1000 on the fly. How to expand this to a greater range should be obvious.
Note Such a numbers table could be contained in your database, so there would be no need to create one on the fly.
Split a set to a table
With the help of this number table we can split the value list to a table, using nested SUBSTRING_INDEX calls to cut just one value after the other from the list as mentioned in SQL split values to multiple rows:
SELECT
SUBSTRING_INDEX(SUBSTRING_INDEX(t.col, ',', n.n), ',', -1) value
FROM (SELECT #set_a as col ) t CROSS JOIN (
-- build for up to 100 separated values
SELECT
a.N + b.N * 10 + c.N * 100 + 1 AS n
FROM
(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) a
,(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) b
,(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) c
ORDER BY n
) n
WHERE
n <= 1 + LENGTH(#set_a) - LENGTH(REPLACE(#set_a, ',', ''))
Count the elements of the sets
We get the count of elements in the list by the expression in the WHERE clause: we have one more values than occurences of the separator.
Then we restrict the result by searching those values in the second set with FIND_IN_SET.
As a last step we check count of values in the result against the count of values in both sets and return this value.
Demo
Experiment with this demo.
FIND_IN_SET should do the trick, but the first value is an individual value and doesn't work right if it contains a comma. You would have to look for each individual value:
SELECT
FIND_IN_SET('a', 'b,c,a,d') AND
FIND_IN_SET('b', 'b,c,a,d') AND
FIND_IN_SET('c', 'b,c,a,d') AND
FIND_IN_SET('d', 'b,c,a,d')
If you don't have these separate values available, maybe you can split the input value into multiple values. The answers to the question 'Split values to multiple rows' might give you some inspiration.
The better solution would be not to store comma separated values at all. It's considered bad practice.
You can use a UDF(user defined function) to compare your sets,As from comments no duplicate values will be in the sets,I have little customized a UDF function provided by #Simon at mso.net.I have calculated a count of values in a second comma separated list and in the end compared with the matched result of find_in_set stored in numReturn variable,If both equal the return 1 else return 0 for non matched.Please not this will not work for repeated/duplicate values in a set
DELIMITER $$
DROP FUNCTION IF EXISTS `countMatchingElements`$$
CREATE DEFINER = `root` #`localhost` FUNCTION `countMatchingElements` (
inFirstList VARCHAR (1000),
inSecondList VARCHAR (1000)
) RETURNS TINYINT (3) UNSIGNED NO SQL DETERMINISTIC SQL SECURITY INVOKER
BEGIN
DECLARE numReturn TINYINT UNSIGNED DEFAULT 0 ;
DECLARE idsInFirstList TINYINT UNSIGNED ;
DECLARE currentListItem VARCHAR (255) DEFAULT '' ;
DECLARE currentID TINYINT UNSIGNED ;
DECLARE total_values_in_second INT DEFAULT 0 ;
SET total_values_in_second = ROUND(
(
LENGTH(inSecondList) - LENGTH(REPLACE (inSecondList, ',', ''))
) / LENGTH(',')
) + 1 ;
SET idsInFirstList = (CHAR_LENGTH(inFirstList) + 1) - CHAR_LENGTH(REPLACE(inFirstList, ',', '')) ;
SET currentID = 1 ;
-- Loop over inFirstList, and for each element that is in inSecondList increment numReturn
firstListLoop :
REPEAT
SET currentListItem = SUBSTRING_INDEX(
SUBSTRING_INDEX(inFirstList, ',', currentID),
',',
- 1
) ;
IF FIND_IN_SET(currentListItem, inSecondList)
THEN SET numReturn = numReturn + 1 ;
END IF ;
SET currentID = currentID + 1 ;
UNTIL currentID > idsInFirstList
END REPEAT firstListLoop ;
IF total_values_in_second = numReturn
THEN RETURN 1 ;
ELSE RETURN 0 ;
END IF ;
END $$
DELIMITER ;
Fiddle Demo
MySQL stored procedures can be used to split the string, the following details for your usage of MySQL stored procedures, for your reference learning purposes.
Existing string, such as Apple, banana, orange, pears, grape, it should follow the comma (,) is divided into:
apple
banana
orange
pears
grape
Where in () method can then query.
1, the specific function:
Function: func_split_TotalLength the
DELIMITER $ $
DROP function IF EXISTS `func_split_TotalLength` $ $
CREATE DEFINER = `root` # `%` FUNCTION `func_split_TotalLength`
(F_string varchar (1000), f_delimiter varchar (5)) RETURNS int (11)
BEGIN
return 1 + (length (f_string) - length (replace (f_string, f_delimiter,'')));
END $ $
DELIMITER;
Function: func_split
DELIMITER $ $
DROP function IF EXISTS `func_split` $ $
CREATE DEFINER = `root` # `%` FUNCTION `func_split`
(F_string varchar (1000), f_delimiter varchar (5), f_order int) RETURNS varchar (255) CHARSET utf8
BEGIN
declare result varchar (255) default '';
set result = reverse (substring_index (reverse (substring_index (f_string, f_delimiter, f_order)), f_delimiter, 1));
return result;
END $ $
DELIMITER;
Stored procedure: SplitString
DELIMITER $ $
DROP PROCEDURE IF EXISTS `splitString` $ $
CREATE Procedure the `SplitString`
(IN f_string varchar (1000), IN f_delimiter varchar (5))
BEGIN
declare cnt int default 0;
declare i int default 0;
set cnt = func_split_TotalLength (f_string, f_delimiter);
DROP TABLE IF EXISTS `tmp_split`;
create temporary table `tmp_split` (`status` varchar (128) not null) DEFAULT CHARSET = utf8;
while i <cnt
do
set i = i + 1;
insert into tmp_split (`status`) values ??(func_split (f_string, f_delimiter, i));
end while;
END $ $
DELIMITER;
2, the test will be successful segmentation
call splitString ("apple, banana, orange, pears, grape", ",");
select * from tmp_split;
The results are splitting success:
mysql> call splitString ("apple, banana, orange, pears, grape", ",");
select * from tmp_split;
Query OK, 1 row affected but i need to insert by column wise data...,please help me
Well I would use an other approach to this problem, to get the result in table form:
Simpler approach without a stored procedure
Assuming you have a table example with two columns: id and col1 like so:
CREATE TABLE example (
id INT NOT NULL PRIMARY KEY,
col1 VARCHAR(1000)
);
and those values
id | col
-----------------------------
1 | abcd,efgh,ijkl,ghjy,sdfg
2 | some other text
and you want to get the comma separated values from col into a column with one value at the time for the id = 1. Then I would use following sql:
SELECT
id,
SUBSTRING_INDEX(SUBSTRING_INDEX(e.col, ',', t.n), ',', -1) value
FROM example e CROSS JOIN (
SELECT
1 + a.N + b.N * 10 + c.N * 100 AS n
FROM
(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) a
,(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) b
,(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) c
ORDER BY n
) t
WHERE
e.id = 1
AND
t.n <= (1 + LENGTH(e.col) - LENGTH(REPLACE(e.col, ',', '')));
It's the same procedure as in the stored procedure. You can use this result easily in joins or other operations.
See this Demo
First part assuming, a stored procedure was needed.
stored procedure
no other function needed
no explicit temp table
works up to 1000 items, easily exptensible
Code:
DROP procedure splitString;
DELIMITER $ $
CREATE procedure splitString(IN f_string VARCHAR(1000), IN f_delimiter VARCHAR(5))
BEGIN
SELECT SUBSTRING_INDEX(
SUBSTRING_INDEX(h.haystack, f_delimiter, t.n
), f_delimiter, -1) as value
FROM (SELECT f_string as haystack) h
CROSS JOIN (
SELECT
1 + a.N + b.N * 10 + c.N * 100 AS n
FROM
(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) a
,(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) b
,(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) c
ORDER BY n
) t
WHERE t.n <=
(
1
+ (
CHAR_LENGTH(f_string)
-
CHAR_LENGTH(REPLACE(f_string, f_delimiter, ''))
)
/ CHAR_LENGTH(f_delimiter)
);
END;
$ $
DELIMITER ;
Explanation
1. Generating a list of numbers from 1 to 1000
The most inner SELECT
SELECT
1 + a.N + b.N * 10 + c.N * 100 AS n
FROM
(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) a
,(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) b
,(SELECT 0 AS N UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) c
ORDER BY n
does not more than creating a list with the numbers from 1 to 1000. If you need more then you should see, how it's done. Add a line with those union all and call this d, add + d.N * 1000 to the sum.
2. Performing the split and getting just one element per row
We use this list to do the split of your func_split in the form of (a bit more readable statements)
SELECT SUBSTRING_INDEX(
SUBSTRING_INDEX(h.haystack, f_delimiter, 1
), f_delimiter, -1) as value
FROM (SELECT f_string as haystack) h
SELECT SUBSTRING_INDEX(
SUBSTRING_INDEX(h.haystack, f_delimiter, 2
), f_delimiter, -1) as value
FROM (SELECT f_string as haystack) h
SELECT SUBSTRING_INDEX(
SUBSTRING_INDEX(h.haystack, f_delimiter, 3
), f_delimiter, -1) as value
FROM (SELECT f_string as haystack) h
that extract the first, second, third, ... substring, separated with the f_delimiter.
Getting the number of elements
Our WHERE clause is all but self-explanatory:
-- because we begin by 1 we've got to include the upper limit
WHERE t.n <=
(
1 -- we have one comma less than parts
+ (
-- count how many characters (not bytes) we lose by the replace operation
CHAR_LENGTH(f_string)
-
CHAR_LENGTH(REPLACE(f_string, f_delimiter, ''))
)
-- and divide this by the count of characters in the delimiter string
/ CHAR_LENGTH(f_delimiter)
);
Note
LENGTH() instead of CHAR_LENGTH() would do too, because the result of the division would be the same, but I like to do it right :-)
** Check of function**
You can check the result with
CALL splitString('apple, bananas, grape, ananas, pears', ', ');
it returns
value
-----
apple
bananas
grape
ananas
pears
I have query regarding get the dates which are not exists in database table.
I have below dates in database.
2013-08-02
2013-08-02
2013-08-02
2013-08-03
2013-08-05
2013-08-08
2013-08-08
2013-08-09
2013-08-10
2013-08-13
2013-08-13
2013-08-13
and i want the result which is expected as below,
2013-08-01
2013-08-04
2013-08-06
2013-08-07
2013-08-11
2013-08-12
as you can see result has six dates which are not present into database,
i have tried below query
SELECT
DISTINCT DATE(w1.start_date) + INTERVAL 1 DAY AS missing_date
FROM
working w1
LEFT JOIN
(SELECT DISTINCT start_date FROM working ) w2 ON DATE(w1.start_date) = DATE(w2.start_date) - INTERVAL 1 DAY
WHERE
w1.start_date BETWEEN '2013-08-01' AND '2013-08-13'
AND
w2.start_date IS NULL;
but above return following result.
2013-08-04
2013-08-14
2013-08-11
2013-08-06
as you can see its giving me back four dates from that 14 is not needed but its still not contain 3 dates its because of left join.
Now please look into my query and let me know what are the best way i can do this?
Thanks for looking and giving time.
I guess you could always generate the date sequence and just use a NOT IN to eliminate the dates that actually exist. This will max out at a 1024 day range, but is easy to shrink or extend, the date column is called "mydate" and is in the table "table1";
SELECT * FROM (
SELECT DATE_ADD('2013-08-01', INTERVAL t4+t16+t64+t256+t1024 DAY) day
FROM
(SELECT 0 t4 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 ) t4,
(SELECT 0 t16 UNION ALL SELECT 4 UNION ALL SELECT 8 UNION ALL SELECT 12 ) t16,
(SELECT 0 t64 UNION ALL SELECT 16 UNION ALL SELECT 32 UNION ALL SELECT 48 ) t64,
(SELECT 0 t256 UNION ALL SELECT 64 UNION ALL SELECT 128 UNION ALL SELECT 192) t256,
(SELECT 0 t1024 UNION ALL SELECT 256 UNION ALL SELECT 512 UNION ALL SELECT 768) t1024
) b
WHERE day NOT IN (SELECT mydate FROM Table1) AND day<'2013-08-13';
From the "I would add an SQLfiddle if it wasn't down" dept.
Thanks for help here is the query i am end up with and its working
SELECT * FROM
(
SELECT DATE_ADD('2013-08-01', INTERVAL t4+t16+t64+t256+t1024 DAY) missingDates
FROM
(SELECT 0 t4 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 ) t4,
(SELECT 0 t16 UNION ALL SELECT 4 UNION ALL SELECT 8 UNION ALL SELECT 12 ) t16,
(SELECT 0 t64 UNION ALL SELECT 16 UNION ALL SELECT 32 UNION ALL SELECT 48 ) t64,
(SELECT 0 t256 UNION ALL SELECT 64 UNION ALL SELECT 128 UNION ALL SELECT 192) t256,
(SELECT 0 t1024 UNION ALL SELECT 256 UNION ALL SELECT 512 UNION ALL SELECT 768) t1024
) b
WHERE
missingDates NOT IN (SELECT DATE_FORMAT(start_date,'%Y-%m-%d')
FROM
working GROUP BY start_date)
AND
missingDates < '2013-08-13';
My bet would be probably to create a dedicated Calendar table just to be able to use it on a LEFT JOIN.
You could create the table on per need basis, but as it will not represent a such large amount of data, the simplest and probably most efficient approach is to create it once for all, as I do below using a stored procedure:
--
-- Create a dedicated "Calendar" table
--
CREATE TABLE Calendar (day DATE PRIMARY KEY);
DELIMITER //
CREATE PROCEDURE init_calendar(IN pStart DATE, IN pEnd DATE)
BEGIN
SET #theDate := pStart;
REPEAT
-- Here I use *IGNORE* in order to be able
-- to call init_calendar again for extend the
-- "calendar range" without to bother with
-- "overlapping" dates
INSERT IGNORE INTO Calendar VALUES (#theDate);
SET #theDate := #theDate + INTERVAL 1 DAY;
UNTIL #theDate > pEnd END REPEAT;
END; //
DELIMITER ;
CALL init_calendar('2010-01-01','2015-12-31');
In this example, the Calendar hold 2191 consecutive days, which represent at a roughly estimate less that 15KB. And storing all the dates from the 21th century will represent less that 300KB...
Now, this is your actual data table as described in the question:
--
-- *Your* actual data table
--
CREATE TABLE tbl (theDate DATE);
INSERT INTO tbl VALUES
('2013-08-02'),
('2013-08-02'),
('2013-08-02'),
('2013-08-03'),
('2013-08-05'),
('2013-08-08'),
('2013-08-08'),
('2013-08-09'),
('2013-08-10'),
('2013-08-13'),
('2013-08-13'),
('2013-08-13');
And finally the query:
--
-- Now the query to find date not "in range"
--
SET #start = '2013-08-01';
SET #end = '2013-08-13';
SELECT Calendar.day FROM Calendar LEFT JOIN tbl
ON Calendar.day = tbl.theDate
WHERE Calendar.day BETWEEN #start AND #end
AND tbl.theDate IS NULL;
Producing:
+------------+
| day |
+------------+
| 2013-08-01 |
| 2013-08-04 |
| 2013-08-06 |
| 2013-08-07 |
| 2013-08-11 |
| 2013-08-12 |
+------------+
This is how i would do it:
$db_dates = array (
'2013-08-02',
'2013-08-03',
'2013-08-05',
'2013-08-08',
'2013-08-09',
'2013-08-10',
'2013-08-13'
);
$missing = array();
$month = "08";
$year = "2013";
$day_start = 1;
$day_end = 14
for ($i=$day_start; $i<$day_end; $i++) {
$day = $i;
if ($i<10) {
$day = "0".$i;
}
$check_date = $year."-".$month."-".$day;
if (!in_array($check_date, $db_dates)) {
array_push($missing, $check_date);
}
}
print_r($missing);
I made it just to that interval but you can just define another interval or make it work for the whole year.
I'm adding this to the excellent answer by Dipesh if anybody wants more than 1024 days (or hours). I generated below 279936 hours from 2015 to 2046:
SELECT
DATE_ADD('2015-01-01', INTERVAL
POWER(6,6)*t6 + POWER(6,5)*t5 + POWER(6,4)*t4 + POWER(6,3)*t3 + POWER(6,2)*t2 +
POWER(6,1)*t1 + t0
HOUR) AS period
FROM
(SELECT 0 t0 UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) t0,
(SELECT 0 t1 UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) t1,
(SELECT 0 t2 UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) t2,
(SELECT 0 t3 UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) t3,
(SELECT 0 t4 UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) t4,
(SELECT 0 t5 UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) t5,
(SELECT 0 t6 UNION SELECT 1 UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5) t6
ORDER BY period
just plug this into the answer query.
The way I would solve in this in a datawarehouse-type situation is to populate a "static" table with dates over an appropriate period (there are example scripts for this type of thing which are easy to google) and then left outer join or right outer join your table to it: rows where there are no matches are the missing dates.
DECLARE #date date;
declare #dt_cnt int = 0;
set #date='2014-11-1';
while #date < '2014-12-31'
begin
select #dt_cnt = COUNT(att_id) from date_table where att_date=#date ;
if(#dt_cnt = 0)
BEGIN
print #date
END
set #date = DATEADD(day,1,#date);
end
Here is my SQL:
SELECT
COUNT(id),
CONCAT(YEAR(created_at), '-', MONTH(created_at), '-', DAY(created_at))
FROM my_table
GROUP BY YEAR(created_at), MONTH(created_at), DAY(created_at)
I want a row to show up even for days where there was no ID created. Right now I'm missing a ton of dates for days where there was no activity.
Any thoughts on how to change this query to do that?
SQL is notoriously bad at returning data that is not in the database. You can find the beginning and ending values for gaps of dates, but getting all the dates is hard.
The solution is to create a calendar table with one record for each date and OUTER JOIN it to your query.
Here is an example assuming that created_at is type DATE:
SELECT calendar_date, COUNT(`id`)
FROM calendar LEFT OUTER JOIN my_table ON calendar.calendar_date = my_table.created_at
GROUP BY calendar_date
(I'm guessing that created_at is really DATETIME, so you'll have to do a bit more gymnastics to JOIN the tables).
General idea
There are two main approaches to generating data in MySQL. One is to generate the data on the fly when running the query and the other one is to have it in the database and using it when necessary. Of course, the second one would be faster than the first one if you're going to run your query frequently. However, the second one will require a table in the database which only purpose will be to generate the missing data. It will also require you to have privileges enough to create that table.
Dynamic data generation
This approach involves making UNIONs to generate a fake table that can be used to join the actual table with. The awful and repetitive query is:
select aDate from (
select #maxDate - interval (a.a+(10*b.a)+(100*c.a)+(1000*d.a)) day aDate from
(select 0 as a union all select 1 union all select 2 union all select 3
union all select 4 union all select 5 union all select 6 union all
select 7 union all select 8 union all select 9) a, /*10 day range*/
(select 0 as a union all select 1 union all select 2 union all select 3
union all select 4 union all select 5 union all select 6 union all
select 7 union all select 8 union all select 9) b, /*100 day range*/
(select 0 as a union all select 1 union all select 2 union all select 3
union all select 4 union all select 5 union all select 6 union all
select 7 union all select 8 union all select 9) c, /*1000 day range*/
(select 0 as a union all select 1 union all select 2 union all select 3
union all select 4 union all select 5 union all select 6 union all
select 7 union all select 8 union all select 9) d, /*10000 day range*/
(select #minDate := '2001-01-01', #maxDate := '2002-02-02') e
) f
where aDate between #minDate and #maxDate
Anyway, it is simpler than it seems. It makes cartesian products of derived tables with 10 numeric values so the result will have 10^X rows where X is the amount of derived tables in the query. In this example there is 10000 day range so you would be able to represent periods of over 27 years. If you need more, add another UNION to the query and update the interval, and if you don't need so many you can remove UNIONs or individual values from the derived tables. Just to clarify, you can fine tune the date period by applying a filter with a WHERE clause on #minDate and #maxDate variables (but don't use a longer period than the one you created with the cartesian products).
Static data generation
This solution will require you to generate a table in your database. The approach is similar to the previous one. You'll have to first insert data into that table: a range of integers ranging from 1 to X where X is the maximum needed range. Again, if you are unsure just insert 100000 values and you'll be able to create day ranges for over 273 years. So, once you've got the integer sequence, you can transform it into a date range like this:
select '2012-01-01' + interval value - 1 day aDay from seq
having aDay <= '2012-01-05'
Assuming a table named seq with a column named value. On top the from date and at the bottom the to date.
Turning this into something useful
Ok, now we have our date periods generated but we're still missing a way to query data and display the missing values as an actual 0. This is where left join comes to the rescue. To make sure we're all on the same page, a left join is similar to an inner join but with only one difference: it will preserve all records from the left table of the join, regardless of whether there is a matching record on the table of the right. In other words, an inner join will remove all non-matched rows on the join while the left join will keep the ones on the left table and, for the records on the left that have no matching record on the right table, the left join will fill that "space" with a null value.
So we should join our domain table (the one that has "missing" data) with our newly generated table putting the latter on the left part of the join and the former on the right, so that all elements are considered, regardless of their presence in the domain table.
For example, if we had a table domainTable with fields ID, birthDate and we would like to see a count of all the birthDate in the first 5 days of 2012 per day and if the count is 0 to show that value, then this query could be run:
select allDays.aDay, count(dt.id) from (
select '2012-01-01' + interval value - 1 day aDay from seq
having aDay <= '2012-01-05'
) allDays
left join domainTable dt on allDays.aDay = dt.birthDate
group by allDays.aDay
This generates a derived table with all the requried days (notice I'm using the static data generation) and performs a left join against our domain table, so all days will be displayed, regardless of whether they have a matching values in our domain tables. Also note the count should be done on the field that will have null values as those are not counted.
Notes to be considered
1) The queries can be used to query other intervals (months, years) performing small changes to the code
2) Instead of hardcoding the dates you can query for min and max values from the domain tables like this:
select (select min(aDate) from domainTable) + interval value - 1 day aDay
from seq
having aDay <= (select max(aDate) from domainTable)
This would avoid generating more records than necessary.
Actually answering your question
I think you should have already figured out how to do what you want. Anyway, here are the steps so that others can benefit from them too. Firstly, create the integer table. Secondly, run this query:
select allDays.aDay, count(mt.id) aCount from (
select (select date(min(created_at)) from my_table) + interval value - 1 day aDay
from seq s
having aDay <= (select date(max(created_at)) from my_table)
) allDays
left join my_table mt on allDays.aDay = date(mt.created_at)
group by allDays.aDay
I guess created_at is a datetime and that's why you're concatenating that way. However, that happens to be the way MySQL natively stores dates, so I'm just grouping by the date field but casting the created_at to an actual date datatype. You can play with it using this fiddle.
And here is the solution generating data dynamically:
select allDays.aDay, count(mt.id) aCount from (
select #maxDate - interval a.a day aDay from
(select 0 as a union all select 1 union all select 2 union all select 3
union all select 4 union all select 5 union all select 6 union all
select 7 union all select 8 union all select 9) a, /*10 day range*/
(select #minDate := (select date(min(created_at)) from my_table),
#maxDate := (select date(max(created_at)) from my_table)) e
where #maxDate - interval a.a day between #minDate and #maxDate
) allDays
left join my_table mt on allDays.aDay = date(mt.created_at)
group by allDays.aDay
As you can see the skeleton of the query is the same as the previous one. The only thing that changes is how the derived table allDays is generated. Now, the way the derived table is generated is also slightly different from the one I added before. This is because in the example filddle I only needed a 10-day range. As you can see, it is more readable than adding a 1000 day range. Here is the fiddle for the dynamic solution so that you can play with it too.
Hope this helps!
The way to do it in one query:
SELECT COUNT(my_table.id) AS total,
CONCAT(YEAR(dates.ddate), '-', MONTH(dates.ddate), '-', DAY(dates.ddate))
FROM (
-- Creates "on the fly" 65536 days beginning from 2000-01-01 (179 years)
SELECT DATE_ADD("2000-01-01", INTERVAL (b1.b + b2.b + b3.b + b4.b + b5.b + b6.b + b7.b + b8.b + b9.b + b10.b + b11.b + b12.b + b13.b + b14.b + b15.b + b16.b) DAY) AS ddate FROM
(SELECT 0 AS b UNION SELECT 1) b1,
(SELECT 0 AS b UNION SELECT 2) b2,
(SELECT 0 AS b UNION SELECT 4) b3,
(SELECT 0 AS b UNION SELECT 8) b4,
(SELECT 0 AS b UNION SELECT 16) b5,
(SELECT 0 AS b UNION SELECT 32) b6,
(SELECT 0 AS b UNION SELECT 64) b7,
(SELECT 0 AS b UNION SELECT 128) b8,
(SELECT 0 AS b UNION SELECT 256) b9,
(SELECT 0 AS b UNION SELECT 512) b10,
(SELECT 0 AS b UNION SELECT 1024) b11,
(SELECT 0 AS b UNION SELECT 2048) b12,
(SELECT 0 AS b UNION SELECT 4096) b13,
(SELECT 0 AS b UNION SELECT 8192) b14,
(SELECT 0 AS b UNION SELECT 16384) b15,
(SELECT 0 AS b UNION SELECT 32768) b16
) dates
LEFT JOIN my_table ON dates.ddate = my_table.created_at
GROUP BY dates.ddate
ORDER BY dates.ddate
The next code is only necessary if you want to test and don't have the "my_table" indicated on the question:
create table `my_table` (
`id` int (11),
`created_at` date
);
insert into `my_table` (`id`, `created_at`) values('1','2000-01-01');
insert into `my_table` (`id`, `created_at`) values('2','2000-01-01');
insert into `my_table` (`id`, `created_at`) values('3','2000-01-01');
insert into `my_table` (`id`, `created_at`) values('4','2001-01-01');
insert into `my_table` (`id`, `created_at`) values('5','2100-06-06');
Testbed:
create table testbed (id integer, created_at date);
insert into testbed values
(1, '2012-04-01'),
(1, '2012-04-30'),
(2, '2012-04-02'),
(3, '2012-04-03'),
(3, '2012-04-04'),
(4, '2012-04-04');
I also use any_table, which I created artificially like this:
create table any_table (id integer);
insert into any_table values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10);
insert into any_table select * from any_table; -- repeat this insert 7-8 times
You can use any table in your database that is expected to have more rows then max(created_dt) - min(created_dt) range, at least 365 to cover a year.
Query:
SELECT concat(year(dr._date),'-',month(dr._date),'-',day(dr._date)),
-- or, instead of concat(), simply: dr._date
count(id)
FROM (
SELECT date_add(r.mindt, INTERVAL #dist day) _date,
#dist := #dist + 1 AS days_away
FROM any_table t
JOIN (SELECT min(created_at) mindt,
max(created_at) maxdt,
#dist := 0
FROM testbed) r
WHERE date_add(r.mindt, INTERVAL #dist day) <= r.maxdt) dr
LEFT JOIN testbed tb ON dr._date = tb.created_at
GROUP BY dr._date;