MySQL CASE WHEN THEN empty case values - mysql

SELECT CASE WHEN age IS NULL THEN 'Unspecified'
WHEN age < 18 THEN '<18'
WHEN age >= 18 AND age <= 24 THEN '18-24'
WHEN age >= 25 AND age <= 30 THEN '25-30'
WHEN age >= 31 AND age <= 40 THEN '31-40'
WHEN age > 40 THEN '>40'
END AS ageband,
COUNT(*)
FROM (SELECT age
FROM table) t
GROUP BY ageband
This is my query. These are the results:
However if the table.age doesn't have at least 1 age in a category, it will just flat out ignore that case in the result. Like such:
This data set didnt have any records for age < 18. So the ageband "<18" doesnt show up. How can I make it so it does show up and return a value 0??

You need a table of agebands to populate the result for entries that have no matching rows. This can be done through an actual table, or dynamically generated with a subquery like this:
SELECT a.ageband, IFNULL(t.agecount, 0)
FROM (
-- ORIGINAL QUERY
SELECT
CASE
WHEN age IS NULL THEN 'Unspecified'
WHEN age < 18 THEN '<18'
WHEN age >= 18 AND age <= 24 THEN '18-24'
WHEN age >= 25 AND age <= 30 THEN '25-30'
WHEN age >= 31 AND age <= 40 THEN '31-40'
WHEN age > 40 THEN '>40'
END AS ageband,
COUNT(*) as agecount
FROM (SELECT age FROM Table1) t
GROUP BY ageband
) t
right join (
-- TABLE OF POSSIBLE AGEBANDS
SELECT 'Unspecified' as ageband union
SELECT '<18' union
SELECT '18-24' union
SELECT '25-30' union
SELECT '31-40' union
SELECT '>40'
) a on t.ageband = a.ageband
Demo: http://www.sqlfiddle.com/#!2/7e2a9/10

I haven't tested it, but this should work.
SELECT ageband, cnt FROM (
SELECT '<18' as ageband, COUNT(*) as cnt FROMT table WHERE age < 18
UNION ALL
SELECT '18-24' as ageband, COUNT(*) as cnt FROMT table WHERE age >= 18 AND age <= 24
UNION ALL
SELECT '25-30' as ageband, COUNT(*) as cnt FROMT table WHERE age >= 25 AND age <= 30
UNION ALL
SELECT '31-40' as ageband, COUNT(*) as cnt FROMT table WHERE age >= 31 AND age <= 40
UNION ALL
SELECT '>40' as ageband, COUNT(*) as cnt FROMT table WHERE age > 40
) as A

Assuming a table AgeCat that contains your categories.
SELECT c.Cat, COUNT(*) FROM Age a
RIGHT JOIN AgeCat c ON (
(a.age < 18 AND c.Cat = '<18')
OR (a.age BETWEEN 18 AND 24 AND c.Cat = '18-24')
OR (a.age BETWEEN 26 AND 30 AND c.Cat = '25-30')
-- etc.
) GROUP BY c.Cat;

Related

Mysql add an extra column at end instead of using a union

I am trying to combine two queries so the data shows up in one table. I am using a union to combine the two queries. However, everything is added to the same column, what do I change so the results from the different queries take up a new column.
Here is an image of the query result.
Here is my code
select * from(
SELECT
CASE
WHEN age BETWEEN 18 and 25 THEN 'Under 25'
WHEN age BETWEEN 25 and 40 THEN '25 - 40'
WHEN age >= 40 THEN 'Over 40'
WHEN age IS NULL THEN 'Not Filled In (NULL)'
END as age_range,
COUNT(*) AS count,
CASE
WHEN age between 18 and 25 THEN 1
WHEN age BETWEEN 25 and 40 THEN 2
WHEN age >= 40 THEN 8
WHEN age IS NULL THEN 9
END as ordinal
FROM (SELECT TIMESTAMPDIFF(YEAR, users.birthdate_on, CURDATE()) AS age FROM users
join subscriptions on users.id = subscriptions.user_id
where users.plan <> 'domain' and users.plan <> '' and users.plan <> 'domain_cpi' and users.birthdate_on is not null
) as derived
GROUP BY age_range
union
SELECT
CASE
WHEN age BETWEEN 18 and 25 THEN 'Under 25'
WHEN age BETWEEN 25 and 40 THEN '25 - 40'
WHEN age >= 40 THEN 'Over 40'
WHEN age IS NULL THEN 'Not Filled In (NULL)'
END as age_range2,
COUNT(*) AS count2,
CASE
WHEN age between 18 and 25 THEN 1
WHEN age BETWEEN 25 and 40 THEN 2
WHEN age >= 40 THEN 8
WHEN age IS NULL THEN 9
END as ordinal
FROM (SELECT TIMESTAMPDIFF(YEAR, users.birthdate_on, CURDATE()) AS age FROM users) as derived2
GROUP BY age_range2
) as test2
ORDER BY ordinal
I want the result so only one under 25 shows, but the two results for under 25 493 and 2046 are in different columns. Same for all other ranges
Sounds like you want to put a JOIN to derived.age_range ON test2.age_range2
SELECT
CASE
WHEN age BETWEEN 18 and 25 THEN 'Under 25'
WHEN age BETWEEN 25 and 40 THEN '25 - 40'
WHEN age >= 40 THEN 'Over 40'
WHEN age IS NULL THEN 'Not Filled In (NULL)'
END as age_range,
CASE
WHEN age between 18 and 25 THEN 1
WHEN age BETWEEN 25 and 40 THEN 2
WHEN age >= 40 THEN 8
WHEN age IS NULL THEN 9
END as ordinal,
count, count2
FROM (
SELECT
derived.age,
COUNT(*) AS count
FROM (
SELECT TIMESTAMPDIFF(YEAR, users.birthdate_on, CURDATE()) AS age FROM users
join subscriptions on users.id = subscriptions.user_id
where users.plan <> 'domain' and users.plan <> '' and users.plan <> 'domain_cpi' and users.birthdate_on is not null
GROUP BY age
) as derived
JOIN
SELECT
derived2.age,
COUNT(*) AS count2
FROM (
SELECT TIMESTAMPDIFF(YEAR, users.birthdate_on, CURDATE()) AS age FROM users
GROUP BY age
) as derived2
ON derived.age = derived2.age
)
ORDER BY ordinal ASC;
I don't believe you need 2 queries just a left join instead. The count() function ONLY increments for non-null values so you can have users counted even if they don't meet the subscription criteria.
SELECT
CASE
WHEN age BETWEEN 18 and 25 THEN 'Under 25'
WHEN age BETWEEN 25 and 40 THEN '25 - 40'
WHEN age >= 40 THEN 'Over 40'
WHEN age IS NULL THEN 'Not Filled In (NULL)'
END as age_range
, CASE
WHEN age between 18 and 25 THEN 1
WHEN age BETWEEN 25 and 40 THEN 2
WHEN age >= 40 THEN 8
WHEN age IS NULL THEN 9
END as ordinal
, COUNT(DISTINCT id) AS user_count # distinct might not be needed
, COUNT(subscriber_id) AS subscriber_count
FROM (
SELECT
users.id
, TIMESTAMPDIFF(YEAR, users.birthdate_on, CURDATE()) AS age
, subscriptions.user_id AS subscriber_id
FROM users
LEFT JOIN subscriptions ON users.id = subscriptions.user_id
AND users.plan <> 'domain'
AND users.plan <> ''
AND users.plan <> 'domain_cpi'
AND users.birthdate_on IS NOT NULL
) d
GROUP BY
CASE
WHEN age BETWEEN 18 and 25 THEN 'Under 25'
WHEN age BETWEEN 25 and 40 THEN '25 - 40'
WHEN age >= 40 THEN 'Over 40'
WHEN age IS NULL THEN 'Not Filled In (NULL)'
END
, CASE
WHEN age between 18 and 25 THEN 1
WHEN age BETWEEN 25 and 40 THEN 2
WHEN age >= 40 THEN 8
WHEN age IS NULL THEN 9
END

Two different date types in column

I am trying to create a age group array in mysql.
SELECT
COUNT(*),
CASE
when age < 60 THEN '<60'
when age >= 61 AND age <= 65 then '61-65'
when age >= 66 AND age <= 70 then '66-70'
when age >= 71 AND age <= 75 then '71-75'
when age >= 76 AND age <= 80 then '76-80'
when age > 81 then '>81'
END as age_group
FROM(
SELECT YEAR(current_time()) - Year(DateBorn) AS age
FROM custs
WHERE FDID = 'ANGL01'
) as custs2
GROUP BY age_group
When i ran this query, it worked fine, except that there were 2013 null results. It turns out that there are 2 data formats in the column.
The first on is just the year: 'yyyy'
The second on includes the day and month: 'dd/mm/yyyy'
How can I modify this query to take both data formats into account?
Ideally you should store dates using the DATE data type.
Given your current schema, assuming the dates are always either in yyyy or dd/mm/yyyy format then the year will always be the 4 rightmost characters, so you can use RIGHT(DateBorn,4), like this:
SELECT
COUNT(*),
CASE
when age < 60 THEN '<60'
when age >= 61 AND age <= 65 then '61-65'
when age >= 66 AND age <= 70 then '66-70'
when age >= 71 AND age <= 75 then '71-75'
when age >= 76 AND age <= 80 then '76-80'
when age > 81 then '>81'
END as age_group
FROM(
SELECT YEAR(current_date()) - CAST(RIGHT(DateBorn,4) AS UNSIGNED) AS age
FROM custs
WHERE FDID = 'ANGL01'
) as custs2
GROUP BY age_group
You should check for values that don't match your expected date formats. A query like this will give you a sample of non-conformant rows:
select DateBorn
from custs
where DateBorn not regexp '^[0-9][0-9][0-9][0-9]$'
and DateBorn not regexp '^[0-3][0-9]\/[0-1][0-9]\/[0-9][0-9][0-9][0-9]$'
limit 25
How about MySQL's STR_TO_DATE?
SELECT STR_TO_DATE(DATE_FORMAT(YourField, '%d/%m/%Y'),'%Y')...
http://dev.mysql.com/doc/refman/5.5/en/date-and-time-functions.html#function_str-to-date
Using this you can manipulate other occurrences. I have inherited legacy data you cannot change, so this function will accommodate everything you can mask, if your data is fairly consistent.
I may have it backwards. Try this, if needed.
SELECT date_format( str_to_date( dt3, '%m-%d-%Y' ) , '%m/%d/%Y' ) AS my_date FROM dt_tb3
http://www.plus2net.com/sql_tutorial/date-string.php
Assuming that DateBorn is a text column, try this:
SELECT YEAR(now()) - Year(str_to_date(DateBorn,
CASE WHEN char_length(DateBorn) = 4
THEN '01/01/%Y'
ELSE '%d/%m/%Y'
END
)) AS age
current_time() is an alias for curtime() which returns HH:MM:SS. So YEAR(current_time()) may result in some odd results. You might want to try YEAR(NOW()) instead.
You would also want to use STR_TO_DATE(str,format) to parse your varchar as a datetime. (Of course, it is better to fix the schema than patch it like this) Make sure to update the format as you have it in your table.
SELECT
COUNT(*),
CASE
when age < 60 THEN '<60'
when age >= 61 AND age <= 65 then '61-65'
when age >= 66 AND age <= 70 then '66-70'
when age >= 71 AND age <= 75 then '71-75'
when age >= 76 AND age <= 80 then '76-80'
when age > 81 then '>81'
END as age_group
FROM(
SELECT YEAR(NOW()) - Year(str_to_date(DateBorn, '%d/%m/%Y')) AS age
FROM custs
WHERE FDID = 'ANGL01'
) as custs2
GROUP BY age_group

MYSQL select info from sum within the query

this query works but pulls all results. I would like it to only pull results that are not 0.00 which is the totaldue. This is calculated within the query but I do not know how to exclude results with 0.00?
SELECT name,
SUM(IF(timeperiod='0',totalinv-paidtotal,0)) AS p0030,
SUM(IF(timeperiod='30',totalinv-paidtotal,0)) AS p3060,
SUM(IF(timeperiod='60',totalinv-paidtotal,0)) AS p6090,
SUM(IF(timeperiod='90',totalinv-paidtotal,0)) AS p9000,
SUM(totalinv)-SUM(paidtotal) AS totaldue
FROM
(
SELECT primary_key, name, timeperiod, totalinv, SUM(paidtotal) as paidtotal
FROM
(
SELECT
a.primary_key,
a_name AS name,
CAST(totalinv AS DECIMAL(10,2)) as totalinv,
CAST(IFNULL(amount,0) AS DECIMAL(10,2)) as paidtotal,
CASE
WHEN invoicedate > DATE_SUB(STR_TO_DATE($today,'%Y%m%d'),INTERVAL 29 DAY) THEN '0'
WHEN invoicedate > DATE_SUB(STR_TO_DATE($today,'%Y%m%d'),INTERVAL 59 DAY) AND invoicedate <= DATE_SUB(STR_TO_DATE($today,'%Y%m%d'),INTERVAL 29 DAY) THEN '30'
WHEN invoicedate > DATE_SUB(STR_TO_DATE($today,'%Y%m%d'),INTERVAL 89 DAY) AND invoicedate <= DATE_SUB(STR_TO_DATE($today,'%Y%m%d'),INTERVAL 29 DAY) THEN '60'
ELSE '90'
END AS timeperiod
FROM $mysql_billing a
LEFT OUTER JOIN $mysql_billing_dates b ON a.primary_key = b.id
WHERE $today >= invoicedate
AND $totaldue!='0.00'
AND void=''
) foo
GROUP BY primary_key, name, timeperiod
) bar
GROUP BY name
ORDER BY name ASC
You are just missing a HAVING at the very end:
....
GROUP BY name
HAVING totaldue != 0
ORDER BY name ASC
That will allow you to select on your calculated/grouped column.

Give every "empty" age band 0

I have a query like the following:
SELECT
COUNT(*) AS `members`,
CASE
WHEN age >= 10 AND age <= 20 THEN '10-20'
WHEN age >=21 AND age <=30 THEN '21-30'
WHEN age >=31 AND age <=40 THEN '31-40'
WHEN age >=41 AND age <= 50 THEN '41-50'
WHEN age >=51 AND age <=60 THEN '51-60'
WHEN age >=61 AND age <=70 THEN '61-70'
WHEN age >= 71 THEN '71+'
END AS ageband
FROM `members`
GROUP BY ageband
Retrieves
How do I populate the empty age bands with 0?
This is what I am looking to achieve from my query above:
members ageband
1 10-20
0 21-30
2 31-40
0 41-50
1 51-60
0 61-70
1 71+
There are NO member in range of 41-50, hence I put 0.
The rows you wish for are not displayed because there's no data for them. Outer join (left or right) all possibilities and they will be displayed:
SELECT
COUNT(members.whatever_column_best_would_be_primary_key) AS `members`,
ages.age_range
FROM `members`
RIGHT JOIN (
SELECT '10-20' as age_range
UNION ALL
SELECT '21-30'
UNION ALL
SELECT '31-40'
UNION ALL
SELECT '41-50'
UNION ALL
SELECT '51-60'
UNION ALL
SELECT '61-70'
UNION ALL
SELECT '71+'
)ages ON ages.age_range = CASE
WHEN members.age >= 10 AND age <= 20 THEN '10-20'
WHEN members.age >=21 AND age <=30 THEN '21-30'
WHEN members.age >=31 AND age <=40 THEN '31-40'
WHEN members.age >=41 AND age <= 50 THEN '41-50'
WHEN members.age >=51 AND age <=60 THEN '51-60'
WHEN members.age >=61 AND age <=70 THEN '61-70'
WHEN members.age >= 71 THEN '71+'
END
GROUP BY ages.age_range
I don't know if I got your question right. Did you miss the part with ELSE in the manual?
SELECT
COUNT(*) AS `members`,
CASE
WHEN age >= 10 AND age <= 20 THEN '10-20'
WHEN age >=21 AND age <=30 THEN '21-30'
WHEN age >=31 AND age <=40 THEN '31-40'
WHEN age >=41 AND age <= 50 THEN '41-50'
WHEN age >=51 AND age <=60 THEN '51-60'
WHEN age >=61 AND age <=70 THEN '61-70'
WHEN age >= 71 THEN '71+'
ELSE '0'
END AS ageband
FROM `members`
GROUP BY ageband
you may use coalsce !
try this
SELECT
COALESCE(COUNT(*),0) AS `members`,
CASE
WHEN age >= 10 AND age <= 20 THEN '10-20'
WHEN age >=21 AND age <=30 THEN '21-30'
WHEN age >=31 AND age <=40 THEN '31-40'
WHEN age >=41 AND age <= 50 THEN '41-50'
WHEN age >=51 AND age <=60 THEN '51-60'
WHEN age >=61 AND age <=70 THEN '61-70'
WHEN age >= 71 THEN '71+'
END AS ageband
FROM `members`
GROUP BY ageband
You can first build a table containing your age bands. Say CREATE TABLE ages(ageband text);:
min_age | max_age | ageband
---------------------------
10 | 20 | 10-20
21 | 30 | 21-30
31 | 40 | 31-40
41 | 50 | 41-50
51 | 60 | 51-60
61 | 70 | 61-70
71 | 99 | 71+
Then you FULL OUTER JOIN:
SELECT
COUNT(members.*) AS `members`,
ageband
FROM
`members`
FULL OUTER JOIN `ages` ON
age BETWEEN min_age AND max_age
GROUP BY ageband;
(You can't use COUNT(*) but have to be more specific and use COUNT(members.*) because there will be rows with an empty ageband and NULL values coming from members and you want to count them as 0. COUNT() counts non-NULL items only.)
(Furthermore, the advantage of FULL OUTER JOIN over RIGHT JOIN is that if you have an age not covered, such as 8, 20.5, 101 or text, it will still be listed with an ageband = NULL so you see the problem.)
even #FancyPants has been provided solution, you can also try below query in different way:
SELECT IFNULL(cnt,0) AS 'members', ages.age_range AS 'ageband' FROM (
SELECT '10-20' AS age_range
UNION ALL
SELECT '21-30'
UNION ALL
SELECT '31-40'
UNION ALL
SELECT '41-50'
UNION ALL
SELECT '51-60'
UNION ALL
SELECT '61-70'
UNION ALL
SELECT '71+') AS ages
LEFT JOIN
(SELECT COUNT(*) AS cnt,
CASE WHEN age>=10 AND age<=20 THEN '10-20'
WHEN age>=21 AND age<=30 THEN '21-30'
WHEN age>=31 AND age<=40 THEN '31-40'
WHEN age>=41 AND age<=50 THEN '41-50'
WHEN age>=51 AND age<=60 THEN '51-60'
WHEN age>=61 AND age<=70 THEN '61-70'
WHEN age>=71 THEN '71+' END age_range
FROM members GROUP BY age_range) a
ON a.age_range=ages.age_range

Selecting age groups using SQL

This is a follow-up question on Get age from the birthday field with type date using SQL. I have a date field in a MySQL database for the birthday of a user and get the age using this query:
SELECT
ROUND(DATEDIFF(
Cast((SELECT NOW()) as Date),
Cast(birthday as Date)
) / 365, 0) as age
FROM member
Now, I need to select the number of people in different age groups. For example, I need to know how many people are in the age group 13-17, 18-21, 22-25, 26-35, 36-50, 51-MAX.
Is that possible using MySQL?
I have thought of UNIONs, like this:
SELECT
ROUND(DATEDIFF(
Cast((SELECT NOW()) as Date),
Cast(birthday as Date)
) / 365, 0) as age,
1 as agegroup
FROM member WHERE age >=13 AND age <=17
UNION
SELECT
ROUND(DATEDIFF(
Cast((SELECT NOW()) as Date),
Cast(birthday as Date)
) / 365, 0) as age
2 as agegroup
FROM member WHERE age >=18 AND age <=21
But that would be long and ugly. There must be a better way!
select AgeGroup
, count(*)
from (
select case
when age between 13 and 17 then 1
when age between 18 and 21 then 2
...
end as AgeGroup
from (
SELECT ROUND(DATEDIFF(Cast(NOW() as Date),
Cast(birthday as Date)) / 365, 0) as age
FROM YourTable
) as SubQueryAlias
) as SubQueryAlias2
group by
AgeGroup
Another possible solution:-
SELECT AgeRange.MinAge, AgeRange.MaxAge, COUNT(*)
FROM
(
SELECT 13 AS MinAge, 17 AS MaxAge
UNION SELECT 18, 21
UNION SELECT 22, 25
UNION SELECT 26, 35
UNION SELECT 36, 50
UNION SELECT 51, 9999
) AgeRange
INNER JOIN YourTable
ON ROUND(DATEDIFF(CAST(NOW() as DATE), CAST(birthday as DATE)) / 365, 0) BETWEEN AgeRange.MinAge AND AgeRange.MaxAge
GROUP BY AgeRange.MinAge, AgeRange.MaxAge
Possibly easier to expand if needs be, or to move to using date ranges from a table (so the resulting report could be updated by users easily if required).
If you had the age as a column in a table you would do it like this:
SELECT
SUM(CASE WHEN age < 10 THEN 1 ELSE 0 END) AS under10,
SUM(CASE WHEN 10<age AND age <19 THEN 1 ELSE 0 END) AS age10to19,
.
.
.
FROM table
There are likely to be minor changes because age isn't in its own column or if you want extra or different ranges. I'm sure you can work them out yourself!