Select N random records per group - mysql

Hallo and good sunday to everybody.
I need to select N random records from each group.
Starting from the query of Quassnoi
http://explainextended.com/2009/03/01/selecting-random-rows/
to select X random record I wrote this store procedure
delimiter //
drop procedure if exists casualiPerGruppo //
create procedure casualiPerGruppo(in tabella varchar(50),in campo varchar(50),in numPerGruppo int)
comment 'Selezione di N record casuali per gruppo'
begin
declare elenco_campi varchar(255);
declare valore int;
declare finite int default 0;
declare query1 varchar(250);
declare query2 varchar(250);
declare query3 varchar(250);
declare query4 varchar(250);
declare cur_gruppi cursor for select gruppo from tmp_view;
declare continue handler for not found set finite = 1;
drop table if exists tmp_casuali;
set #query1 = concat('create temporary table tmp_casuali like ', tabella);
prepare stmt from #query1;
execute stmt;
deallocate prepare stmt;
set #query2 = concat('create or replace view tmp_view as select ',campo,' as gruppo from ',tabella,' group by ',campo);
prepare stmt from #query2;
execute stmt;
deallocate prepare stmt;
open cur_gruppi;
mio_loop:loop
fetch cur_gruppi into valore;
if finite = 1 then
leave mio_loop;
end if;
set #query3 = concat("select group_concat(column_name) into #elenco_campi
from information_schema.columns
where table_name = '",tabella,"' and table_schema = database()");
prepare stmt from #query3;
execute stmt;
deallocate prepare stmt;
set #query4 = concat('insert into tmp_casuali select ',
#elenco_campi,' from (
select #cnt := count(*) + 1,
#lim :=', numPerGruppo,
' from ',tabella,
' where ',campo,' = ', valore,
' ) vars
straight_join
(
select r.*,
#lim := #lim - 1
from ', tabella, ' r
where (#cnt := #cnt - 1)
and rand() < #lim / #cnt and ', campo, ' = ', valore ,
') i');
prepare stmt from #query4;
execute stmt;
deallocate prepare stmt;
end loop;
close cur_gruppi;
select * from tmp_casuali;
end //
delimiter ;
that I recall in this way to give you an idea:
create table prova (
id int not null auto_increment primary key,
id_gruppo int,
altro varchar(10)
) engine = myisam;
insert into prova (id_gruppo,altro) values
(1,'aaa'),(2,'bbb'),(3,'ccc'),(1,'ddd'),(1,'eee'),(2,'fff'),
(2,'ggg'),(2,'hhh'),(3,'iii'),(3,'jjj'),(3,'kkk'),(1,'lll'),(4,'mmm');
call casualiPerGruppo('prova','id_gruppo',2);
My problem is that Quassnoi query, even though is very performant, it takes even 1 second on a large recorset. So if I apply it within my sp several times, the total time increases a lot.
Can you suggest me a better way to solve my problem?
Thanks in advance
EDIT.
create table `prova` (
`id` int(11) not null auto_increment,
`id_gruppo` int(11) default null,
`prog` int(11) default null,
primary key (`id`)
) engine=myisam charset=latin1;
delimiter //
drop procedure if exists inserisci //
create procedure inserisci(in quanti int)
begin
declare i int default 0;
while i < quanti do
insert into prova (id_gruppo,prog) values (
(floor(1 + (rand() * 100))),
(floor(1 + (rand() * 30)))
);
set i = i + 1;
end while;
end //
delimiter ;
call inserisci(1000000);
#Clodoaldo:
My stored procedure
call casualipergruppo('prova','id_gruppo',2);
gives me 200 records and takes about 23 seconds. Your stored procedure keeps on giving me Error Code : 1473 Too high level of nesting for select even though I increase varchar value to 20000. I don't know if there is any limit on unions involved in a query.

I removed the tabella and campo parameters from the procedure just to make it easier to understand. I'm sure you can bring them back.
delimiter //
drop procedure if exists casualiPerGruppo //
create procedure casualiPerGruppo(in numPerGruppo int)
begin
declare valore int;
declare finite int default 0;
declare query_part varchar(200);
declare query_union varchar(2000);
declare cur_gruppi cursor for select distinct id_gruppo from prova;
declare continue handler for not found set finite = 1;
create temporary table resultset (id int, id_gruppo int, altro varchar(10));
set #query_part = 'select id, id_gruppo, altro from (select id, id_gruppo, altro from prova where id_gruppo = #id_gruppo order by rand() limit #numPerGruppo) ss#id_gruppo';
set #query_part = replace(#query_part, '#numPerGruppo', numPerGruppo);
set #query_union = '';
open cur_gruppi;
mio_loop:loop
fetch cur_gruppi into valore;
if finite = 1 then
leave mio_loop;
end if;
set #query_union = concat(#query_union, concat(' union ', #query_part));
set #query_union = replace(#query_union, '#id_gruppo', valore);
end loop;
close cur_gruppi;
set #query_union = substr(#query_union, 8);
set #query_union = concat('insert into resultset ', #query_union);
prepare stmt from #query_union;
execute stmt;
deallocate prepare stmt;
select * from resultset order by id_gruppo, altro;
drop table resultset;
end //
delimiter ;

Wow. That's a complicated way to do something very simple. Try this:
Assuming you have sequential ids (otherwise you could get no rows).
create view random_prova as
select * from prova
where id = (select min(id) from prova) +
floor(RAND(0) * (select max(id) - min(id) from prova));
This will give you 1 random row.
To get multiple rows, either loop in a stored procedure or server program until you get enough rows, or programatically create a query that employs union.
eg, this will give you 3 random rows:
select * from random_prova
union
select * from random_prova
union
select * from random_prova;
Note that using RAND(0) instead of RAND() means getting a different random number for each invocation. RAND() will give the same value for each invocation in the one statement (so using RAND() with union won't give you multiple rows).
There are some shortcomings with using union - it is possible to get the same row twice by chance. Programatically calling this until you get enough rows is safer.
To give better performance, use something like java to randomly select the ids for a simple query like
select * from prova where id in (...)
and have java (or perl or whatever) fill in the list with random ids - you would avoid the inefficiency of having to get the id range every time.
Post if your ids are not sequential - there is an efficient way, but I its explanation is long.

Related

MySQL select from all tables with a column name

Let's say I query this:
SELECT TABLE_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE COLUMN_NAME like 'coduser';
it returns a list of tables which contains the column "coduser":
users
messages
passwords
photos
It's a 20+ items list.
I need to search on those tables all occurrences where "coduser" is equal to "5OEWP1BPSV".
SELECT * FROM tablenamehere WHERE coduser = "5OEWP1BPSV";
but I'm not using anything other than MySQL to do this.
Basically just search all tables where there is a column called "coduser" and coduser = "5OEWP1BPSV".
You will need to use dynamic sql and given the small numbers involved a cursor would be appropriate
drop table if exists t,t1;
create table t(id int auto_increment primary key , codeuser varchar(20));
create table t1(id int auto_increment primary key , codeuser varchar(20));
insert into t(codeuser) values
('aaa'),('5OEWP1BPSV');
insert into t(codeuser) values
('bbb');
drop procedure if exists p;
delimiter $$
create procedure p()
begin
declare tablename varchar(20);
declare finished int;
DECLARE GRD_CUR CURSOR FOR
SELECT TABLE_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE COLUMN_NAME like 'codeuser';
DECLARE CONTINUE HANDLER FOR NOT FOUND SET FINISHED = 0;
OPEN GRD_CUR;
LOOPROWS:LOOP
FETCH GRD_CUR INTO tablename;
IF FINISHED = 0 THEN
LEAVE LOOPROWS;
END IF;
#build and execute dynamic sql
set #sql = concat('SELECT * FROM ' , tablename, ' WHERE codeuser = "5OEWP1BPSV"');
#select #sql;
prepare sqlstmt from #sql;
execute sqlstmt;
deallocate prepare sqlstmt;
END LOOP;
close grd_cur;
end $$
delimiter ;
call p();
Loop over the returned list of tables, and for each table in that list perform a
SELECT *
WHERE columnnamehere = "valueofcolumnhere";
Bear in mind that this will give you separate lists for each table within that FOR loop, so you would have to concatenate these in some way to get a complete list.

SQL. Filter null fields in resonse

I have table users, with fields: id, name, age, gender, country, city, comment. Fields maybe null. For example:
cursor.execute('select * from users where id = 12')
cursor.fetchone()
(12, 'alex', 33, 'male', None, None, None)
How I can get back from query only not null fields?
This query must return me just
(12, 'alex', 33, 'male')
I can easily do it with a programming language, but I losing my resources by getting redundant info from tables and it also forced me to add redundant code.
So, you need to get customized table-oriented result with columns dynamically generated with SQL ANSI.
You will need a kit with two procedures:
Check if a column has only NULL values or not.
Gather all columns with NO NULL values.
With these two procedures you can check each column of a table strcuture and get an output withtout columns with NULL values.
The first procedure is generic:
set delimiter //
create procedure check_field_null(col varchar(64),
schemaname varchar(255),
tablename varchar(255),
out QN int)
BEGIN
SET #sSQL = concat('SELECT #N := COUNT(*) FROM ', schemaname, '.', tablename , ' WHERE (', col, ' <=> NULL);');
prepare stm from #sSQL;
execute stm;
set QN =#N;
deallocate prepare stm;
END
//
The second procedure means that you have the above mentioned method to identify NOT NULLS columns and put all to search.
set delimiter //
create procedure cur_cs_customer(inout allcols varchar(255), in my_schema_name varchar(64), in my_table_name varchar(64))
BEGIN
DECLARE Count_Null int default 0;
DECLARE initial INT DEFAULT 0;
DECLARE MYCOL char(64);
DECLARE ch_done INT DEFAULT 0;
DECLARE cs_cur1 CURSOR
FOR SELECT C.COLUMN_NAME
FROM information_schema.COLUMNS C
WHERE C.TABLE_SCHEMA = my_schema_name
AND C.TABLE_NAME = my_table_name;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET ch_done = true;
open cs_cur1;
read_cs_cur1:
LOOP
FETCH cs_cur1 INTO MYCOL;
IF (ch_done ) THEN
LEAVE read_cs_cur1;
END IF;
IF NOT isnull(MYCOL) THEN
call check_field_null(MYCOL,
my_schema_name,
my_table_name,
Count_Null);
if Count_Null = 0 then
/* Only it inlcudes fields with not null values */
set initial = initial + 1;
if initial = 1 then
SET allcols = MYCOL;
else
SET allcols = concat( cast(allcols as char(255)), ',', MYCOL);
end if;
end if;
END IF;
END LOOP read_cs_cur1;
close cs_cur1;
select allcols;
END
//
After, you may run as follows:
set delimiter ;
call cur_cs_customer(#my_args, 'schema_name', 'users');
select #my_args;
set #stm = concat('SELECT ', #my_args, ' FROM users;');
PREPARE stmt1 FROM #stm;
execute stmt1;
deallocate prepare stmt1;
Further explanation may be checked in an article here! .
I hope this tip may help you.

select from unknown number of tables with parameter

I want to select (union) rows from multiple tables using Parameter
I have table w with two columns:
The column table_name is referring to other tables in my DB, and condition is the 'where' that should be added to the query.
table_name | condition
---------------------
x | y=2
x | r=3
t | y=2
the query should be something like:
select * from x where y=2
union
select * from x where r=3
union
select * from t where y=2
of course that the number of unions is unknown.
Should it be stored procedure? cursor?
One way to get this done. Initial answer was SQL Server syntax. This edit has the MySQL syntax. Make sure your temp table cannot be accessed at the same time. E.g. In MySQL temp tables are unique to the connection. Also add your error checking. In MySQL set the appropriate varchar size for your needs. I used 1024 across the board just for testing purposes.
MySQL syntax
CREATE table test (
id int,
table_name varchar(1024),
where_c varchar(1024)
);
INSERT into test(id, table_name, where_c) values
(1,'x','y=2'),
(2,'x','r=3'),
(3,'t','y=2');
DROP PROCEDURE IF EXISTS generate_sql;
DELIMITER //
CREATE PROCEDURE generate_sql()
BEGIN
DECLARE v_table_name VARCHAR(1024);
DECLARE v_where_c VARCHAR(1024);
DECLARE table_id INT;
DECLARE counter INT;
DECLARE v_SQL varchar(1024);
CREATE TEMPORARY table test_copy
SELECT * FROM test;
SET v_SQL = '';
SET counter = (SELECT COUNT(1) FROM test_copy);
WHILE counter > 0 DO
SELECT id, table_name, where_c
INTO table_id, v_table_name, v_where_c
FROM test_copy LIMIT 1;
SET v_SQL = CONCAT(v_SQL, 'SELECT * FROM ', v_table_name, ' WHERE ', v_where_c);
DELETE FROM test_copy WHERE id=table_id;
SET counter = (SELECT COUNT(1) FROM test_copy);
IF counter > 0 THEN
SET v_SQL = CONCAT(v_SQL,' UNION ');
ELSE
SET v_SQL = v_SQL;
END IF;
END WHILE;
DROP table test_copy;
SELECT v_SQL;
END //
DELIMITER ;
call generate_sql()
SQL Server syntax
CREATE table test (
id int,
table_name varchar(MAX),
condition varchar(MAX)
);
INSERT into test(id, table_name, condition) values
(1,'x','y=2'),
(2,'x','r=3'),
(3,'t','y=2');
SELECT * INTO #temp FROM test;
DECLARE #SQL varchar(MAX);
SET #SQL='';
while exists (select * from #temp)
begin
DECLARE #table_name varchar(MAX);
DECLARE #condition varchar(MAX);
DECLARE #table_id int;
SELECT top 1 #table_id=id, #table_name=table_name, #condition=condition FROM #temp;
SET #SQL += 'SELECT * FROM ' + #table_name + ' WHERE ' + #condition;
delete #temp where id = #table_id;
if exists (select * from #temp) SET #SQL += ' UNION ';
end
SELECT #SQL;
drop table #temp;
Assuming that tables x and t have the same definition and that you want to ignore duplicate results by using UNION rather than UNION ALL, the following should work:
SET #sql = '';
SELECT GROUP_CONCAT(
CONCAT('SELECT * FROM `', `table_name`, '` WHERE ', `condition`)
SEPARATOR ' UNION ') INTO #sql
FROM w;
SET #sql = CONCAT('SELECT * FROM ( ', #sql, ' ) a;');
PREPARE stmt FROM #sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
(I've edited your question slightly because you had two different definitions for table x)

Dynamic table name variable in stored procedure

I want to create a function that will create a unique random id. The parameters will simply be min (the minimum number), max (the maximum number), and tablename (the name of the table to check to see if the id produced by the rand() function already exists).
I have discovered through other posts that you can't pass table names into functions, because functions can't execute dynamic SQL, but you can pass them into stored procedures. I have found numerous examples on StackOverflow of how to pass table names into stored procedures, and they all boil down to using prepared statements.
I have created a stored procedure as shown below:
DELIMITER $$
CREATE DEFINER=`user`#`localhost` PROCEDURE `rand_id`(IN `min` INT, IN `max` INT, IN `tablename` VARCHAR(20) CHARSET utf8, OUT `uid` INT)
BEGIN
DECLARE count_id int;
SET count_id = 1;
SET #s = CONCAT('COUNT(`id`) INTO count_id FROM `', tablename, '` WHERE `id` = ', uid);
WHILE count_id > 0 DO
SET uid = FLOOR(rand() * max + min);
PREPARE stmt from #s;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
END WHILE;
END$$
DELIMITER ;
Whenever I run the following code:
CALL rand_id(1000000000, 9999999999, 'test', #id);
SELECT #id;
I get this error:
#1064 - You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'NULL' at line 1
I'm at a loss for what's wrong. I saw somewhere that you can't use user variables inside a stored procedure, but that seems to be incorrect because there are a lot of examples on StackOverflow where the correct solutions do just that.
Sorry for my low level of MySQL understanding. I'm sure my code is fraught with syntax errors and poor design. I appreciate any help I can get. I researched this for quite a while and tried many things but to no avail. The above portion of code is the closest I've been able to get, and yields the least errors, but it's still not working.
Thank you.
EDIT: As per the second example in #Barmar's answer, I changed my code to look like this:
BEGIN
DECLARE count_id int;
SET count_id = 1;
SET #s = CONCAT('SELECT COUNT(`id`) INTO count_id FROM `', tablename, '` WHERE `id` = ?');
PREPARE stmt from #s;
WHILE count_id > 0 DO
SET #uid = FLOOR(rand() * max + min);
EXECUTE stmt USING #uid;
END WHILE;
DEALLOCATE PREPARE stmt;
SET uid = #uid;
END
It seems to have fixed my initial problem but now I get this error:
#1327 - Undeclared variable: count_id
EDIT: Here is my code changed to fit #slaakso's answer, and add in what #Barmar said about using #count_id:
DELIMITER $$
CREATE DEFINER=`mjrinker`#`localhost` PROCEDURE `rand_id`(IN `min` BIGINT, IN `max` BIGINT, IN `tablename` VARCHAR(128) CHARSET utf8, OUT `uid` BIGINT)
BEGIN
SET #count_id = 1;
SET uid = 0;
SET #s = CONCAT('SELECT COUNT(`id`) INTO #count_id FROM `', tablename, '` WHERE `id` = ?');
PREPARE stmt from #s;
WHILE #count_id > 0 DO
SET #uid = FLOOR(rand() * max + min);
EXECUTE stmt USING #uid;
END WHILE;
DEALLOCATE PREPARE stmt;
SET uid = #uid;
END$$
DELIMITER ;
You need to assign #s after you assign the uid variable.
You're also missing the SELECT keyword in your query.
SET #count_id = 1
WHILE #count_id > 0 DO
SET uid = FLOOR(rand() * max + min);
SET #s = CONCAT('SELECT COUNT(`id`) INTO #count_id FROM `', tablename, '` WHERE `id` = ', uid);
PREPARE stmt from #s;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
END WHILE;
But you should actually just prepare the statement once, using a placeholder, which you fill in when using EXECUTE.
SET #count_id = 1
SET #s = CONCAT('SELECT COUNT(`id`) INTO #count_id FROM `', tablename, '` WHERE `id` = ?');
PREPARE stmt from #s;
WHILE #count_id > 0 DO
SET #uid = FLOOR(rand() * max + min);
EXECUTE stmt USING #uid;
END WHILE;
DEALLOCATE PREPARE stmt;
SET uid = #uid;
Note that the parameters to EXECUTE have to be user variables, that's why I changed uid to #uid there. Then we set the output parameter at the end of the loop.
You also need to use a user variable for INTO #count_id.
First of all it is highly unusual to use random numbers as ID's for tables. Mabye you should consider using AUTO_INCREMENT columns.
If you really want to use random numbers, couple of fixes for the code:
You should use value for uid for the first time you run the query
(without it it will be NULL, therefore the error).
You are missing SELECT in your dynamic query
The "INTO count_id" syntax will not work as count_id is not visible inside the dynamic SQL (use #var variable instead)
Your min and max values are declared as INT's, but your passed parameters exceed the INT range (-2147483648 - 2147483647)

MySql, split a string and insert into table

I have two inputs for my stored procedure. One is the 'RoledID' and second one is the 'MenuIDs'. 'MenusIDs' is a list of comma separated menus ids that need to be inserted with RoledID. RoleId is just an INT and we need to put this RoledID against each MenuID. My table 'RolesMenus' contains two columns one for MenuID and one for RoleID.
Now I need to split MenuIDs and insert each MenuID with RoleID.
How can I write a stored procedure for it?
You can build one INSERT query (because statement allows to insert multiple records) and run it with prepared statements, e.g. -
SET #MenuIDs = '1,2,3';
SET #RoledID = 100;
SET #values = REPLACE(#MenuIDs, ',', CONCAT(', ', #RoledID, '),('));
SET #values = CONCAT('(', #values, ', ', #RoledID, ')'); -- This produces a string like this -> (1, 100),(2, 100),(3, 100)
SET #insert = CONCAT('INSERT INTO RolesMenus VALUES', #values); -- Build INSERT statement like this -> INSERT INTO RolesMenus VALUES(1, 100),(2, 100),(3, 100)
-- Execute INSERT statement
PREPARE stmt FROM #insert;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;
As you see, it can be done without stored procedure.
Give this a go. It may need some tweaking if the MenuIDs string does not conform to 'menuId,menuId,menuId'.
Also I do not know what data type the menuId column is in your target table (INT?) so you may have to put some numeric checking in too (in case '1,2,3,banana,4,5' is passed in as the MenuIds input parameter).
DELIMITER $$
DROP PROCEDURE IF EXISTS `insert_role_menuids`$$
CREATE PROCEDURE `insert_role_menuids`(IN RoleID INT,IN MenuIDs varchar(500))
BEGIN
declare idx,prev_idx int;
declare v_id varchar(10);
set idx := locate(',',MenuIDs,1);
set prev_idx := 1;
WHILE idx > 0 DO
set v_id := substr(MenuIDs,prev_idx,idx-prev_idx);
insert into RolesMenus (RoleId,MenuId) values (RoleID,v_id);
set prev_idx := idx+1;
set idx := locate(',',MenuIDs,prev_idx);
END WHILE;
set v_id := substr(MenuIDs,prev_idx);
insert into RolesMenus (RoleId,MenuId) values (RoleID,v_id);
END$$
DELIMITER ;
for this solution, you must create a table with the name split_table, it can have a id(autoincrement) if you need it and must have a column where to store the value (I call it valor)
DELIMITER $$
USE `dbaname`$$
DROP PROCEDURE IF EXISTS `Split`$$
CREATE DEFINER=`root`#`localhost` PROCEDURE `Split`(
IN cadena VARCHAR(8000),
IN delimitador VARCHAR(10)
)
BEGIN
TRUNCATE split_table;
SET #posicion = 1;
SET #ldel = LENGTH(delimitador);
SET #valor = SUBSTRING_INDEX(cadena, delimitador, 1);
WHILE #valor <> '' AND #posicion > 0 DO
SET #valor = SUBSTRING_INDEX(cadena, delimitador, 1);
INSERT INTO split_table(valor) VALUES (#valor);
SET #posicion = POSITION(delimitador IN cadena);
SET #largo = LENGTH(cadena);
IF #largo >= #posicion THEN
SET cadena = SUBSTR(cadena, #posicion + #ldel, #largo - #posicion);
SET #valor = SUBSTRING_INDEX(cadena, delimitador, 1);
ELSE
SET #posicion = 0;
END IF;
END WHILE;
END$$
DELIMITER ;
First create procedure
CREATE DEFINER=`root`#`localhost` PROCEDURE `split_str_save_to_tmp_table`(
IN _str TEXT,
IN _table_name VARCHAR(80)
)
BEGIN
#DROP FIRST OLD TABLE
SET #q = CONCAT('DROP TEMPORARY TABLE IF EXISTS ', _table_name);
PREPARE st FROM #q;
EXECUTE st;
#CREATE TABLE
SET #q = CONCAT('CREATE TEMPORARY TABLE ', _table_name, '(id INT UNSIGNED NOT NULL PRIMARY KEY (id) )' );
PREPARE st FROM #q;
EXECUTE st;
SET #ids = REPLACE(_str, ',', '),(');
SET #ids = CONCAT('(', #ids, ')');
#INSERT INTO TABLE
SET #q = CONCAT('INSERT INTO ' , _table_name ,' VALUES');
SET #q = CONCAT(#q, #ids);
PREPARE st FROM #q;
EXECUTE st;
DEALLOCATE PREPARE st;
END
Then call
call split_str_save_to_tmp_table('1,2,3,4,5', 'tmp_split_product');
SELECT * FROM tmp_split_product
AFAIK MySQL does not have a function to split strings. Here is the MySQL manual for string related functions. In the comments section should be some information about workarounds for splitting string with substring-functions but not really usable:
MySQL manual