Count Age With Distinctly in MySQL - mysql

I have a table like this
PersonID Gender Age CreatedDate
================================
1 M 32 10/09/2011
2 F 33 10/09/2011
2 F 33 10/11/2011
1 M 32 10/11/2011
3 F 33 10/11/2011
I want to find Gender Count By Age with group by created date,The age range will be 30-34 and getting person will be distinctly.
Desired output should like this:
Gender AgeRange CreatedDate CountResult
================================
M 30_34 10/09/2011 1
F 30_34 10/09/2011 1
F 30_34 10/11/2011 1
So I tried this but couldtn help:
SELECT t.Gender,'30_34' AS AgeRange,t.CreatedDate,
SUM(CASE WHEN t.Age BETWEEN 30 AND 34 THEN 1 ELSE 0 END) AS CountResult,
FROM (
SELECT DISTINCT PersonID,Gender,Age,CreatedDate
FROM MyTable
GROUP PersonID,Gender,Age,CreatedDate
HAVING COUNT(PersonID)=1
) t
What can I do for solution?
Thanks

If you are want the earliest created date per personid this might do
drop table if exists mytable;
create table mytable(PersonID int, Gender varchar(1),Age int, CreatedDate date);
insert into mytable values
(1 , 'M', 32 , '2011-09-10'),
(2 , 'F', 33 , '2011-09-10'),
(2 , 'F', 33 , '2011-11-10'),
(1 , 'M', 32 , '2011-11-10'),
(3 , 'F', 33 , '2011-11-10');
select mt.gender,
mt.createddate,
sum(case when mt.age between 32 and 34 then 1 else 0 end) as Age32to34
from mytable mt
where createddate = (select min(mt1.createddate) from mytable mt1 where mt1.personid = mt.personid)
group by gender,mt.createddate

How about:
SELECT
Gender
, '30_34' AS AgeRange
, CreatedDate
, COUNT(*) AS CountResult
FROM MyTable A
JOIN (
SELECT PersonID, MIN(CreatedDate) MinCreatedDate
FROM MyTable GROUP BY PersonID
) B ON B.PersonID = A.PersonID AND B.MinCreatedDate = A.CreatedDate
WHERE Age BETWEEN 30 AND 34
GROUP BY Gender, CreatedDate
ORDER BY CreatedDate, Gender DESC

You would appear to want:
SELECT t.Gender, '30_34' AS AgeRange, t.CreatedDate,
COUNT(DISTINCT t.PersonId) AS CountResult
FROM MyTable
WHERE t.Age BETWEEN 30 AND 34
GROUP BY t.Gender, t.CreatedDate;

Related

I want to query the no of transaction done by a customer in a particular year, but the output should come year wise for each customer in table format

Output should be in below format, but I am getting wrong output:
Where 2019,2020,2021 column contains transaction done by customer in respectively 2019, 2020, 2021. Also if transactions in 2019,2020,2021 is equal Max_transaction is populated with first non-zero transaction year .
customer_name 2019 2020 2021 Max_transaction_year total_transaction
pug 2 1 0 2019 4
hari 0 1 1 2020 2
adh 0 0 1 2021 1
Sample table and data :
Also note that the first two digits in "tid" represent the year of transaction. Eg: 19597 -'19' represents 2019 and so on for 2020 and 2021.
create table client (cid int,cname char(10));
create table trans (tid int,cid int);
insert into client values(102,'pug'),(107,'ravi'),(109,'hari'),(105,'pon'),(106,'adh'),(104,'bav'),(101,'kat');
insert into trans values(19597,102),(19567,102),(20325,109),(21789,106),(17432,106),(21786,109),(20302,102),(17301,103);
Thanks in advance
Schema (MySQL v8.0)
create table client (cid int,cname char(10));
create table trans (tid int,cid int);
insert into client values(102,'pug'),(107,'ravi'),(109,'hari'),(105,'pon'),(106,'adh'),(104,'bav'),(101,'kat');
insert into trans values(19597,102),(19567,102),(20325,109),(21789,106),(17432,106),(21786,109),(20302,102),(17301,103);
Query #1
SELECT
customer_name,
SUM(
CASE WHEN year=2019 THEN no_transactions ELSE 0 END
) as '2019',
SUM(
CASE WHEN year=2020 THEN no_transactions ELSE 0 END
) as '2020',
SUM(
CASE WHEN year=2021 THEN no_transactions ELSE 0 END
) as '2021',
MAX(
CASE WHEN rn=1 THEN year ELSE 0 END
) as Max_transaction_year,
SUM(no_transactions) as total_transaction
FROM (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY customer_name ORDER BY no_transactions DESC) rn
FROM (
SELECT
c.cname as customer_name,
2000+FLOOR(tid / 1000) as year ,
COUNT(1) as no_transactions
FROM
trans t
INNER JOIN
client c ON t.cid = c.cid
WHERE
FLOOR(tid / 1000) BETWEEN 19 and 21
GROUP BY
c.cname, 2000+FLOOR(tid / 1000)
) p1
) p2
GROUP BY customer_name;
customer_name
2019
2020
2021
Max_transaction_year
total_transaction
adh
0
0
1
2021
1
hari
0
1
1
2020
2
pug
2
1
0
2019
3
View on DB Fiddle
I think a somewhat simpler method just uses conditional aggregation:
select cname, cnt_2019, cnt_2020, cnt_2021,
(case greatest(cnt_2019, cnt_2020, cnt_2021)
when cnt_2019 then '2019'
when cnt_2020 then '2020'
when cnt_2021 then '2021'
end) as max_year,
total_transactions
from (select c.cname, c.cid,
sum(year = '2019') as cnt_2019,
sum(year = '2020') as cnt_2020,
sum(year = '2021') as cnt_2021,
count(*) as total_transactions
from client c join
(select t.*, concat('20', left(t.tid, 2)) as year
from trans t
) t
on c.cid = t.cid
where year >= '2019' and year <= '2021'
group by c.cname, c.cid
) ct
order by cname, cid;
Here is a db<>fiddle.

Calculate difference between min and max for each column only if higher then 0

I need to calculate the difference between odds based on the value in the 'updated' column at the moment I take odds where the updated value is a min and minus it from odds where the updated value is max. It works perfect but I've just realized that in some columns happens to be 0 sometimes and I was wondering if it's possible to select the minimum still based on the updated column and only values where higher than 0.
That's how the table looks like
fixture_id
H_odds
D_odds
A_odds
ev_tstamp
updated
120000
1.40
1.50
1.30
132000
12
120000
1.10
1.10
1.10
132000
11
120000
1.20
0
1.60
132000
10
And that's what I would like to get back
fixture_id
H_odds
D_odds
A_odds
ev_tstamp
updated
dif_h
dif_d
dif_a
120000
1.40
1.50
1.30
132000
12
0.2
0.4
-0.3
That's what I'm getting back at the moment
fixture_id
H_odds
D_odds
A_odds
ev_tstamp
updated
dif_h
dif_d
dif_a
120000
1.40
1.50
1.30
132000
12
0.2
1.5
-0.3
The code I'm using
select
t_max.*,
(t_max.H_odds - t_min.H_odds) as dif_h,
(t_max.D_odds - t_min.D_odds) as dif_d,
(t_max.A_odds - t_min.A_odds) as dif_a
from
(
select
fixture_id,
min(updated) min_updated,
max(updated) max_updated
from
test
group by
fixture_id
) as t1
join test as t_min on (t_min.fixture_id = t1.fixture_id and t_min.updated = t1.min_updated)
join test as t_max on (t_max.fixture_id = t1.fixture_id and t_max.updated = t1.max_updated)
Consider the following:
DROP TABLE IF EXISTS my_table;
CREATE TABLE my_table
(fixture_id INT NOT NULL
,updated INT NOT NULL
,outcome ENUM('Home win','Draw','Away win') NOT NULL
,odds DECIMAL(5,2) NOT NULL
,PRIMARY KEY(fixture_id,outcome,updated)
);
INSERT INTO my_table VALUES
(120,12,'Home win',1.40),
(120,11,'Home win',1.10),
(120,10,'Home win',1.20),
(120,12,'Draw',1.50),
(120,11,'Draw',1.10),
(120,12,'Away win',1.30),
(120,11,'Away win',1.10),
(120,10,'Away win',1.60);
Latest odds:
SELECT x.*
FROM my_table x
JOIN
( SELECT fixture_id
, outcome
, MAX(updated) min_updated
FROM my_table x
GROUP
BY fixture_id
, outcome
) y
ON y.fixture_id = x.fixture_id
AND y.outcome = x.outcome
AND y.min_updated = x.updated;
Earliest odds:
Earliest odds:
SELECT x.*
FROM my_table x
JOIN
( SELECT fixture_id
, outcome
, MIN(updated) min_updated
FROM my_table x
GROUP
BY fixture_id
, outcome
) y
ON y.fixture_id = x.fixture_id
AND y.outcome = x.outcome
AND y.min_updated = x.updated;
Delta:
SELECT a.*
, a.odds - b.odds delta
FROM
( SELECT x.*
FROM my_table x
JOIN
( SELECT fixture_id
, outcome
, MAX(updated) min_updated
FROM my_table x
GROUP
BY fixture_id
, outcome
) y
ON y.fixture_id = x.fixture_id
AND y.outcome = x.outcome
AND y.min_updated = x.updated
) a
JOIN
( SELECT x.*
FROM my_table x
JOIN
( SELECT fixture_id
, outcome
, MIN(updated) min_updated
FROM my_table x
GROUP
BY fixture_id
, outcome
) y
ON y.fixture_id = x.fixture_id
AND y.outcome = x.outcome
AND y.min_updated = x.updated
) b
ON b.fixture_id = a.fixture_id
AND b.outcome = a.outcome;
Result:
+------------+---------+----------+------+-------+
| fixture_id | updated | outcome | odds | delta |
+------------+---------+----------+------+-------+
| 120 | 12 | Home win | 1.40 | 0.20 |
| 120 | 12 | Draw | 1.50 | 0.40 |
| 120 | 12 | Away win | 1.30 | -0.30 |
+------------+---------+----------+------+-------+
This solution only works on MySQL 8+.
I would suggest window functions. The following treats each odds column separately . . . and it does not make any assumptions about the odds increasing or decreasing with each update:
select fixture_id, ev_tstamp, max(updated),
max(case when update = max_h_update then h_odds end) as max_h,
max(case when update = max_d_update then h_odds end) as max_d,
max(case when update = max_a_update then h_odds end) as max_a,
(max(case when update = max_h_update then h_odds end) -
max(case when update = min_h_update then h_odds end)
) as h_diff,
(max(case when update = max_d_update then d_odds end) -
max(case when update = min_d_update then d_odds end)
) as d_diff,
(max(case when update = max_a_update then a_odds end) -
max(case when update = min_a_update then a_odds end)
) as a_diff
from (select t.*,
max(case when h_odds <> 0 then update end) over (partition by fixture_id) as max_h_update,
min(case when h_odds <> 0 then update end) over (partition by fixture_id) as min_h_update,
max(case when d_odds <> 0 then update end) over (partition by fixture_id) as max_h_update,
min(case when d_odds <> 0 then update end) over (partition by fixture_id) as min_h_update,
max(case when a_odds <> 0 then update end) over (partition by fixture_id) as max_a_update,
min(case when a_odds <> 0 then update end) over (partition by fixture_id) as min_a_update
from test t
) t
group by fixture_id, ev_tstamp;
I just modify the code little bit to calculate the difference only for specific group of odds (avg) so it look like bellow. It worked just once though, it took over 15 seconds to process and the other times I tried it didn't work due to time out error. Just to clarify in my structure the market column is the 'outcome' column from your example.
explain SELECT a.*
, a.odds - b.odds delta
FROM
( SELECT x.*
FROM average_odds x
JOIN
( SELECT fix_id
, market
, MAX(updated) min_updated
FROM average_odds x where odds_type=avg
GROUP BY fix_id
, market
) y
ON y.fix_id = x.fix_id
AND y.market = x.market
AND y.min_updated = x.updated
) a
JOIN
( SELECT x.*
FROM average_odds x
JOIN
( SELECT fix_id
, market
, MIN(updated) min_updated
FROM average_odds x where odds_type=avg
GROUP BY fix_id
, market
) y
ON y.fix_id = x.fix_id
AND y.market = x.market
AND y.min_updated = x.updated
) b
ON b.fix_id = a.fix_id
AND b.market = a.market
ORDER BY `delta` ASC
That's the explain table
ID
S TYPE
table..
parti
type
pos_keys
KEY
key len
ref
rows
filtered
extra
1
PRIMARY
derived3>
null
all
null
null
null
null
17466
100.00
Using temporary; Using filesort
1
PRIMARY
x
null
ref
fix,fixi,market,updat
fix
4
y.fix_id
596
0.11
Using where
1
PRIMARY
x
null
ref
fix,fixi,market,updat
fix
4
y.fix_id
596
2.27
Using where
1
PRIMARY
derived5>
null
ref
auto_key0>
auto_key0>
31
y.fix_id,y.market,bobi.x.updated
10
100.00
using index
5
DERIVED
x
null
ref
boki
boki
4
const
17466
100.00
Using index condition; Using temporary; Using file...
3
DERIVED
x
null
ref
boki
boki
4
const
17466
100.00
Using index condition; Using temporary; Using file...

sql server 2008 running totals between 2 dates

I need to get running totals between 2 dates in my sql server table and update the records simultaneoulsy. My data is as below and ordered by date,voucher_no
DATE VOUCHER_NO OPEN_BAL DEBITS CREDITS CLOS_BAL
-------------------------------------------------------------------
10/10/2017 1 100 10 110
12/10/2017 2 110 5 105
13/10/2017 3 105 20 125
Now if i insert a record with voucher_no 4 on 12/10/2017 the output should be like
DATE VOUCHER_NO OPEN_BAL DEBITS CREDITS CLOS_BAL
------------------------------------------------------------------
10/10/2017 1 100 10 110
12/10/2017 2 110 5 105
12/10/2017 4 105 4 109
13/10/2017 3 109 20 129
I have seen several examples which find running totals upto a certain date but not between 2 dates or from a particular date to end of file
You should consider changing your database structure. I think it will be better to keep DATE, VOUCHER_NO, DEBITS, CREDITS in one table. And create view to calculate balances. In that case you will not have to update table after each insert. In this case your table will look like
create table myTable (
DATE date
, VOUCHER_NO int
, DEBITS int
, CREDITS int
)
insert into myTable values
('20171010', 1, 10, null),( '20171012', 2, null, 5)
, ('20171013', 3, 20, null), ('20171012', 4, 4, null)
And view will be
;with cte as (
select
DATE, VOUCHER_NO, DEBITS, CREDITS, bal = isnull(DEBITS, CREDITS) * case when DEBITS is null then -1 else 1 end
, rn = row_number() over (order by DATE, VOUCHER_NO)
from
myTable
)
select
a.DATE, a.VOUCHER_NO, a.DEBITS, a.CREDITS
, OPEN_BAL = sum(b.bal + case when b.rn = 1 then 100 else 0 end) - a.bal
, CLOS_BAL = sum(b.bal + case when b.rn = 1 then 100 else 0 end)
from
cte a
join cte b on a.rn >= b.rn
group by a.DATE, a.VOUCHER_NO, a.rn, a.bal, a.DEBITS, a.CREDITS
Here's another solution if you can not change your db structure. In this case you must run update statement each time after inserts. In both cases I assume that initial balance is 100 while recalculation
create table myTable (
DATE date
, VOUCHER_NO int
, OPEN_BAL int
, DEBITS int
, CREDITS int
, CLOS_BAL int
)
insert into myTable values
('20171010', 1, 100, 10, null, 110)
,( '20171012', 2, 110, null, 5, 105)
, ('20171013', 3, 105, 20, null, 125)
, ('20171012', 4, null, 4, null, null)
;with cte as (
select
DATE, VOUCHER_NO, DEBITS, CREDITS, bal = isnull(DEBITS, CREDITS) * case when DEBITS is null then -1 else 1 end
, rn = row_number() over (order by DATE, VOUCHER_NO)
from
myTable
)
, cte2 as (
select
a.DATE, a.VOUCHER_NO
, OPEN_BAL = sum(b.bal + case when b.rn = 1 then 100 else 0 end) - a.bal
, CLOS_BAL = sum(b.bal + case when b.rn = 1 then 100 else 0 end)
from
cte a
join cte b on a.rn >= b.rn
group by a.DATE, a.VOUCHER_NO, a.rn, a.bal
)
update a
set a.OPEN_BAL = b.OPEN_BAL, a.CLOS_BAL = b.CLOS_BAL
from
myTable a
join cte2 b on a.DATE = b.DATE and a.VOUCHER_NO = b.VOUCHER_NO

Mysql distinct sum, group by

This is my table:
id user_id type value
1 1 type1 2
2 2 type1 1
3 1 type2 5
4 1 type1 2
I want output like this:
user_id type1 type2
1 4 5
2 1 0
please help
You can try like this:
SELECT
user_id,
sum( if( type = 'type1', value, 0 ) ) AS type1,
sum( if( type = 'type2', value, 0 ) ) AS type2
FROM
table_name
GROUP BY
user_id;
I think, this is what you are searching for:
select
user_id,
(select sum(value) from <yourtablename> sub1 where type = 'type1' where sub1.user_id = baseTable.user_id) as type1,
(select sum(value) from <yourtablename> sub2 where type = 'type2' where sub2.user_id = baseTable.user_id) as type2,
from
<yourtablename> baseTable

Trying to get the percentage of male/female who have applied to a job with an age range

Sorry about the confusing title, what I am trying to achieve is, getting the total applications for a job via the table below:
CREATE TABLE IF NOT EXISTS `applications` (
`application_id` int(11) NOT NULL AUTO_INCREMENT,
`application_user` varchar(100) NOT NULL,
`application_date` datetime NOT NULL,
`application_job` int(11) NOT NULL,
`application_status` varchar(10) DEFAULT 'pending',
`application_enabled` int(2) NOT NULL DEFAULT '1',
`application_resume` int(11) NOT NULL,
`application_description` text NOT NULL,
PRIMARY KEY (`application_id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
to get their ages, I am left joining user_personal_information on to application_user because they're the user who has applied to a job. My query:
SELECT count(*) as total,
user_gender as gender,
TIMESTAMPDIFF(YEAR, user_birthdate, CURDATE()) AS age,
application_date
FROM applications
LEFT JOIN user_personal_information
ON user_personal_information_user = application_user
WHERE application_job = ?
My user's table with user_gender which can equal to male / female and user_birthdate which in the above statement I am converting it to an age.
I am trying to group all the applications with an age range of for example:
16 - 21
22 - 30
31 - 45
45 - 64
65+
And the male and female percentages for that age. To use for a datachart that needs data like so:
"dataProvider": [
{
"age": "85+",
"male": 25, //
"female": 25
}, {
"age": "80-54",
"male": 25,//percentage
"female": 25//percentage
}]
So from the above, there's 25 % of males have applied aged 85 and older, and 25% of females. You get the gist, so that's how I am trying to get my select statement to work.
which will create a chart like so:
So just to clarify, I want to count the total applications and work out the percentage of applications based on gender and age group. How can I do this with the select statement above?
A couple of nested group-by subqueries can do it for you. Note that the percentage calculated is gender specific:
select a.age, a.gender, a.cnt, 100*a.cnt/b.sm as percentage from
(
SELECT user_gender as gender,
TIMESTAMPDIFF(YEAR, user_birthdate, CURDATE()) AS age,
count(*) as cnt,
FROM applications
LEFT JOIN user_personal_information
ON user_personal_information_user = application_user
WHERE application_job = ?
GROUP BY user_gender, TIMESTAMPDIFF(YEAR, user_birthdate, CURDATE())
) a,
(
SELECT TIMESTAMPDIFF(YEAR, user_birthdate, CURDATE()) AS age,
count(*) as sm
FROM applications
LEFT JOIN user_personal_information
ON user_personal_information_user = application_user
WHERE application_job = ?
GROUP BY TIMESTAMPDIFF(YEAR, user_birthdate, CURDATE())
) b
where a.age = b.age;
If you are looking for a percentage specific to total applications, you need something like:
select a.age,
a.gender,
a.cnt,
100*a.cnt/(
select count(TIMESTAMPDIFF(YEAR, user_birthdate, CURDATE()))
from
applications
LEFT JOIN user_personal_information
ON user_personal_information_user = application_user
WHERE application_job = ?
) as percentage
from
(
SELECT user_gender as gender,
TIMESTAMPDIFF(YEAR, user_birthdate, CURDATE()) AS age,
count(*) as cnt,
FROM applications
LEFT JOIN user_personal_information
ON user_personal_information_user = application_user
WHERE application_job = ?
GROUP BY user_gender, TIMESTAMPDIFF(YEAR, user_birthdate, CURDATE())
) a;
This should do the job:
select
sum(case when age between 16 and 21 and gender='male' then 1 end) as '[Male 16-21]',
sum(case when age between 16 and 21 and gender='female' then 1 end) as 'Female [16-21]',
sum(case when age between 22 and 30 and gender='male' then 1 end) as '[Male 22-30]',
sum(case when age between 22 and 30 and gender='female' then 1 end) as '[Female 22-30]',
sum(case when age between 31 and 45 and gender='male' then 1 end) as '[Male 31-45]',
sum(case when age between 31 and 45 and gender='female' then 1 end) as '[Female 31-45]',
sum(case when age between 46 and 64 and gender='male' then 1 end) as '[Male 46-64]',
sum(case when age between 46 and 64 and gender='female' then 1 end) as '[Female 46-64]',
sum(case when age > 64 and gender='male' then 1 end) as '[Male Over 64]',
sum(case when age > 64 and gender='female' then 1 end) as '[Female Over 64]',
sum(case when 1=1 and gender='female' then 1 end) as '[Male TOTAL]',
sum(case when 1=1 and gender='male' then 1 end) as '[Female TOTAL]'
FROM applications