calculate frequency using sql - mysql

I have a table in MySQL:
Col1 | Col2
a A
a B
c C
a B
i want to create a table like this:
col1 | col2 | freq
a A 0.33
a B 0.67
col1 is a specified item in Col1. col2 is distinct item that has occured with the specified item(i.e. a). freq column is the frequency of appearence of item in col2.
Can someone give me a hint of how to create such a query? Thanks a lot.

try this:
Select A.Col1, A.Col2, A.Count1 * 1.0 / B.Count2 As Freq
From (
Select Col1, Col2, Count(*) As Count1
From YourTableName
Group By Col1, Col2
) As A
Inner Join (
Select Col1, Count(*) As Count2
From YourTableName
Group By Col1
) As B
On A.Col1 = B.Col1

You can also use this which is coded in SQL server
DECLARE #Count INT;
SELECT #Count = COUNT(1) FROM YourTableName WHERE Col1 = 'a'
SELECT Col1, Col2, CAST(COUNT(1)* 1.00 /#Count AS DECIMAL(4,2) ) AS Frequency
FROM YourTableName
WHERE Col1 = 'a'
GROUP BY Col1, Col2
this way you have a better performance

With window functions
SELECT Col1, Col2, Count1*1.0 / Count2 AS freq
FROM (
SELECT
Col1,
Col2,
COUNT() OVER(PARTITION BY Col1, Col2) AS Count1,
COUNT() OVER(PARTITION BY Col1) AS Count2
FROM YourTableName
)
GROUP BY Col1, Col2

Related

Select query in IF statement MYSQL

In my MySql database, I want to create select query which should give output like this:
in my select query i want a column output as 1, if the column value present in a list returned by a select query else 0 .
Select col1,col2,
,IF col3 IN
((select col from tabl2 ),1,0)AS col5
from tbl1.
Thanks in Advance
SELECT col1, col2,
IF col3 IN ((select col from tabl2 ),1,0) AS col5
FROM tbl1
Using IF and subquery in MySQL
SELECT table1.column1, table1.column2,
(
SELECT IF (
(SELECT column3 FROM table1 WHERE column3 IN (SELECT column FROM table2)), 1, 0
)
) AS column_output
FROM table1
In general:
SELECT col1,
col2,
CASE WHEN col3 IN (select col from tabl2)
THEN 1
ELSE 0
END AS col5
FROM tbl1
Specific for MySQL:
SELECT col1,
col2,
col3 IN (select col from tabl2) AS col5
FROM tbl1

MySQL combining COUNT, MAX and SUM

In MySQL, I would like to have an extra column showing the sum of values of a particular column. However, the numbers I would like to sum come from a subquery and are not stored in a separate table, something like this:
(SELECT a.ID, MAX(a.COUNT_ID) AS MAX_COUNT FROM
(SELECT ID, COUNT(*) AS COUNT_ID
FROM my_table
GROUP BY COL1, COL2) a
GROUP BY COL1, COL2) b
And this would output something like:
ID MAX_COUNT
ABC 1
DEF 2
GHI 3
And now, I want an extra column showing the sum of MAX_COUNT, like this (repeated over all rows):
ID MAX_COUNT SUM_MAX_COUNT
ABC 1 6
DEF 2 6
GHI 3 6
The actual goal is actually to show the percentage MAX_COUNT of the total MAX_COUNT, so 1/6, 2/6 and 3/6.
How do I do this? I already tried doing a CROSS JOIN but it doesn't work:
SELECT * FROM
((SELECT a.ID, MAX(a.COUNT_ID) AS MAX_COUNT FROM
(SELECT ID, COUNT(*) AS COUNT_ID
FROM my_table
GROUP BY COL1, COL2) a
GROUP BY COL1, COL2) b
CROSS JOIN (SELECT SUM(b.MAX_COUNT)) AS c
Error: Unknown table 'b'
EXAMPLE
Example table:
CREATE TABLE TABLE1 (
COL1 varchar(255),
COL2 varchar(255),
DAY int,
HOUR int);
INSERT INTO TABLE1 VALUES
('X','Y',1,12),
('X','Y',1,13),
('X','Y',1,13),
('A','B',2,19),
('X','B',3,13),
('X','B',3,13);
Now I want to have, for each combination of COL1 and COL2, the number of lines in this table for each hour:
SELECT COL1, COL2, HOUR, COUNT(*) AS COUNT_LINES
FROM TABLE1
GROUP BY DAY, HOUR, COL1, COL2;
Which outputs this:
COL1 COL2 HOUR COUNT_LINES
X Y 12 1
X Y 13 2
A B 19 1
X B 13 2
Now I want, for each combination of COL1 and COL2, the maximum of COUNT_LINES, so I use the query above in a subquery:
SELECT a.COL1, a.COL2, MAX(a.COUNT_LINES)
FROM
(SELECT COL1, COL2, HOUR, COUNT(*) AS COUNT_LINES
FROM TABLE1
GROUP BY DAY, HOUR, COL1, COL2) a
GROUP BY COL1, COL2;
Which outputs this:
COL1 COL2 MAX(COUNT_LINES)
A B 1
X B 2
X Y 2
Now in the last step, I want the SUM of MAX(COUNT_LINES) column in a separate column, like this:
COL1 COL2 MAX(COUNT_LINES) SUM(MAX(COUNT_LINES))
A B 1 5
X B 2 5
X Y 2 5
But that is the part I don't know how to do.
Try this:
SELECT COL1,Max(COUNT_ID),Sum(SUM_MAX_COUNT) FROM(
SELECT COL1,
Count(*) AS COUNT_ID,
(SELECT Sum(Count(*))
FROM TABLE_NAME
GROUP BY COL1) AS SUM_MAX_COUNT
FROM TABLE_NAME
GROUP BY COL1)
GROUP BY COL1;
Use CTE for this
with q as
(
SELECT a.COL1, a.COL2, MAX(a.COUNT_LINES) cmax
FROM
(SELECT COL1, COL2, HOUR, COUNT(*) AS COUNT_LINES
FROM TABLE1
GROUP BY DAY, HOUR, COL1, COL2) a
GROUP BY COL1, COL2
)
SELECT q.*, (SELECT SUM(q.cmax) from q)
FROM q
dbfiddle demo
which can be rewritten (for older MySQL) as
SELECT q.*,
(
SELECT SUM(cmax) from
(
SELECT a.COL1, a.COL2, MAX(a.COUNT_LINES) cmax
FROM
(SELECT COL1, COL2, HOUR, COUNT(*) AS COUNT_LINES
FROM TABLE1
GROUP BY DAY, HOUR, COL1, COL2) a
GROUP BY COL1, COL2
) q
) as max_sum
FROM
(
SELECT a.COL1, a.COL2, MAX(a.COUNT_LINES) cmax
FROM
(SELECT COL1, COL2, HOUR, COUNT(*) AS COUNT_LINES
FROM TABLE1
GROUP BY DAY, HOUR, COL1, COL2) a
GROUP BY COL1, COL2
) q
dbfiddle demo
This seems to give the right answer:
mysql-sql> select
... x.col1,
... x.col2,
... x.max_count_lines,
... y.sum_max_count_lines
... from
... (
... select
... a.col1,
... a.col2,
... max(a.count_lines) as max_count_lines
... from
... (
... select
... col1,
... col2,
... hour,
... count(*) as count_lines
... from
... table1
... group by
... day,
... hour,
... col1,
... col2
... ) a
... group by
... col1,
... col2
... ) x,
... (
... select
... sum(max_count_lines) as sum_max_count_lines
... from
... (
... select
... b.col1,
... b.col2,
... max(b.count_lines) as max_count_lines
... from
... (
... select
... col1,
... col2,
... hour,
... count(*) as count_lines
... from
... table1
... group by
... day,
... hour,
... col1,
... col2
... ) b
... group by
... col1,
... col2
... ) c
... ) y;
+------+------+-----------------+---------------------+
| col1 | col2 | max_count_lines | sum_max_count_lines |
+------+------+-----------------+---------------------+
| A | B | 1 | 5 |
| X | B | 2 | 5 |
| X | Y | 2 | 5 |
+------+------+-----------------+---------------------+
3 rows in set (0.00 sec)

Select from select with a count

Ok i believe I've done something similar before, but don't really remember how I did it.
Searching around gives me similar answer's but not solving my issue.
Ex1, This query is fine:
SELECT col1, count(*) as col3
FROM db.table
WHERE col2 = 0 group by col1
Ex2, But I need all where col3 is bigger than 1, so I tried:
Select *
FROM
(SELECT col1, count(*) as col3
FROM db.table
WHERE col2 = 0 group by col1)
WHERE col3 > 1;
# Not working
Ex3, I dont know why but this seems to be working:
SELECT col1, count(*) as col3
FROM db.table
WHERE col2 = 0
GROUP BY col1
HAVING count(*) > 1;
Ok, I just answered my own question when I read the error message from Mysql.
Select *
FROM
(SELECT col1, count(*) as col3
FROM db.table
WHERE col2 = 0 group by col1) temp
WHERE col3 > 1;
I was missing the alias.

Selecting distinct values from multiple column of a table with their count

i want to to select distinct values from multiple columns of same table with their count
example
and the output should be like this
Since the OP says in his comment that he uses MYSQL this should work
SELECT data,COUNT(data)
FROM
(
SELECT COL1 data
FROM tableso
UNION ALL
SELECT COL2
FROM tableso
UNION ALL
SELECT COL3
FROM tableso
UNION ALL
SELECT COL4
FROM tableso
UNION ALL
SELECT COL6
FROM tableso
UNION ALL
SELECT COL7
FROM tableso
) finaltable group by data;
SQL FIDDLE: http://sqlfiddle.com/#!2/1f8cf/10
I dont think the accepted answer works on MYSQL
UPDATE:
The op has changed his mind about the database(to MSSQL) and the accepted answer has both db versions
Since you are using Sql Server too, will suggest you this way
Use Table valued constructor to convert the columns to rows then count the name
This will avoid multiple table scan's.
SELECT name,
[count]=Count(1)
FROM yourtable
CROSS apply (VALUES (col1),(COL2),(col3),(col4),(COL6),(col7)) cs (name)
GROUP BY name
Note : This will not work in Mysql
SQLFIDDLE DEMO
Get all column values to one row and find the count
SQL SERVER
;WITH CTE AS
(
SELECT COL1 Name
FROM YOURTABLE
UNION ALL
SELECT COL2
FROM YOURTABLE
UNION ALL
SELECT COL3
FROM YOURTABLE
UNION ALL
SELECT COL4
FROM YOURTABLE
UNION ALL
SELECT COL6
FROM YOURTABLE
UNION ALL
SELECT COL7
FROM YOURTABLE
)
SELECT DISTINCT Name,COUNT(Name) OVER(PARTITION BY Name) [COUNT]
FROM CTE
MYSQL
SELECT Name,COUNT(*) [COUNT]
(
SELECT COL1 Name
FROM #TEMP
UNION ALL
SELECT COL2
FROM #TEMP
UNION ALL
SELECT COL3
FROM #TEMP
UNION ALL
SELECT COL4
FROM #TEMP
UNION ALL
SELECT COL6
FROM #TEMP
UNION ALL
SELECT COL7
FROM #TEMP
)TAB
GROUP BY Name

Getting duplicate rows by several columns in MySQL

I'm trying to search duplicate rows by several columns in large table (near 18 000 rows). Problem is that queries take a lot of time, I tried this:
SELECT * FROM table_name a, table_name b
WHERE a.col1 = b.col1
AND a.col2 = b.col2
AND a.col3 = b.col3
AND a.col4 = b.col4
AND a.id <> b.id
and this:
SELECT *
FROM table_name
WHERE col1 IN (
SELECT col1
FROM table_name
GROUP BY col1
HAVING count(col1) > 1
)
AND col2 IN (
SELECT col2
FROM table_name
GROUP BY col2
HAVING count(col2) > 1
)
AND col3 IN (
SELECT col3
FROM table_name
GROUP BY col3
HAVING count(col3) > 1
)
AND col4 IN (
SELECT col4
FROM table_name
GROUP BY col4
HAVING count(col4) > 1
)
they both work, but too slow. Any ideas?
You can try using one joint GROUP BY statement like:
SELECT * FROM table_name
GROUP BY col1, col2, col3, col4
HAVING count(*) > 1
At the very least, it will look cleaner.
EDIT
To return all results as a sub-set for the previous column:
SELECT *
FROM table_name
WHERE col4 IN (
SELECT col4
FROM table_name
WHERE col3 IN (
SELECT col3
FROM table_name
WHERE col2 IN (
SELECT col2
FROM table_name
WHERE col1 IN (
SELECT col1
FROM table_name
GROUP BY col1
HAVING count(col1) > 1
)
)
)
This, in concept, should give you all results in a faster execution time.