I have some string output which contain alphanumeric value. I want to get only Digits from that string. how can I fetch this by query? which MySql function can I Use?
My query is like :
select DISTINCT SUBSTRING(referrerURL,71,6)
from hotshotsdblog1.annonymoustracking
where advertiserid = 10
limit 10;
Output :
100683
101313
19924&
9072&h
12368&
5888&h
10308&
100664
1&hash
101104
And I Want output like :
100683
101313
19924
9072
12368
5888
10308
100664
1
101104
If the string starts with a number, then contains non-numeric characters, you can use the CAST() function or convert it to a numeric implicitly by adding a 0:
SELECT CAST('1234abc' AS UNSIGNED); -- 1234
SELECT '1234abc'+0; -- 1234
To extract numbers out of an arbitrary string you could add a custom function like this:
DELIMITER $$
CREATE FUNCTION `ExtractNumber`(in_string VARCHAR(50))
RETURNS INT
NO SQL
BEGIN
DECLARE ctrNumber VARCHAR(50);
DECLARE finNumber VARCHAR(50) DEFAULT '';
DECLARE sChar VARCHAR(1);
DECLARE inti INTEGER DEFAULT 1;
IF LENGTH(in_string) > 0 THEN
WHILE(inti <= LENGTH(in_string)) DO
SET sChar = SUBSTRING(in_string, inti, 1);
SET ctrNumber = FIND_IN_SET(sChar, '0,1,2,3,4,5,6,7,8,9');
IF ctrNumber > 0 THEN
SET finNumber = CONCAT(finNumber, sChar);
END IF;
SET inti = inti + 1;
END WHILE;
RETURN CAST(finNumber AS UNSIGNED);
ELSE
RETURN 0;
END IF;
END$$
DELIMITER ;
Once the function is defined, you can use it in your query:
SELECT ExtractNumber("abc1234def") AS number; -- 1234
To whoever is still looking, use regex:
select REGEXP_SUBSTR(name,"[0-9]+") as amount from `subscriptions`
Here I got success with this function:
select REGEXP_REPLACE('abc12.34.56-ghj^-_~##!', '[^0-9]+', '')
output: 123456
Explaining: basically I'm asking for mysql replace all 'not numbers' in interval from 0 to 9 to ''.
Based on Eugene Yarmash Answer. Here is a version of the custom function that extracts a decimal with two decimal places. Good for price extraction.
DELIMITER $$
CREATE FUNCTION `ExtractDecimal`(in_string VARCHAR(255))
RETURNS decimal(15,2)
NO SQL
BEGIN
DECLARE ctrNumber VARCHAR(255);
DECLARE in_string_parsed VARCHAR(255);
DECLARE digitsAndDotsNumber VARCHAR(255) DEFAULT '';
DECLARE finalNumber VARCHAR(255) DEFAULT '';
DECLARE sChar VARCHAR(1);
DECLARE inti INTEGER DEFAULT 1;
DECLARE digitSequenceStarted boolean DEFAULT false;
DECLARE negativeNumber boolean DEFAULT false;
-- FIX FIND_IN_SET cannot find a comma ","
SET in_string_parsed = replace(in_string,',','.');
IF LENGTH(in_string_parsed) > 0 THEN
-- extract digits and dots
WHILE(inti <= LENGTH(in_string_parsed)) DO
SET sChar = SUBSTRING(in_string_parsed, inti, 1);
SET ctrNumber = FIND_IN_SET(sChar, '0,1,2,3,4,5,6,7,8,9,.');
IF ctrNumber > 0 AND (sChar != '.' OR LENGTH(digitsAndDotsNumber) > 0) THEN
-- add first minus if needed
IF digitSequenceStarted = false AND inti > 1 AND SUBSTRING(in_string_parsed, inti-1, 1) = '-' THEN
SET negativeNumber = true;
END IF;
SET digitSequenceStarted = true;
SET digitsAndDotsNumber = CONCAT(digitsAndDotsNumber, sChar);
ELSEIF digitSequenceStarted = true THEN
SET inti = LENGTH(in_string_parsed);
END IF;
SET inti = inti + 1;
END WHILE;
-- remove dots from the end of number list
SET inti = LENGTH(digitsAndDotsNumber);
WHILE(inti > 0) DO
IF(SUBSTRING(digitsAndDotsNumber, inti, 1) = '.') THEN
SET digitsAndDotsNumber = SUBSTRING(digitsAndDotsNumber, 1, inti-1);
SET inti = inti - 1;
ELSE
SET inti = 0;
END IF;
END WHILE;
-- extract decimal
SET inti = 1;
WHILE(inti <= LENGTH(digitsAndDotsNumber)-3) DO
SET sChar = SUBSTRING(digitsAndDotsNumber, inti, 1);
SET ctrNumber = FIND_IN_SET(sChar, '0,1,2,3,4,5,6,7,8,9');
IF ctrNumber > 0 THEN
SET finalNumber = CONCAT(finalNumber, sChar);
END IF;
SET inti = inti + 1;
END WHILE;
SET finalNumber = CONCAT(finalNumber, RIGHT(digitsAndDotsNumber, 3));
IF negativeNumber = true AND LENGTH(finalNumber) > 0 THEN
SET finalNumber = CONCAT('-', finalNumber);
END IF;
IF LENGTH(finalNumber) = 0 THEN
RETURN 0;
END IF;
RETURN CAST(finalNumber AS decimal(15,2));
ELSE
RETURN 0;
END IF;
END$$
DELIMITER ;
Tests:
select ExtractDecimal("1234"); -- 1234.00
select ExtractDecimal("12.34"); -- 12.34
select ExtractDecimal("1.234"); -- 1234.00
select ExtractDecimal("1,234"); -- 1234.00
select ExtractDecimal("1,111,234"); -- 11111234.00
select ExtractDecimal("11,112,34"); -- 11112.34
select ExtractDecimal("11,112,34 and 123123"); -- 11112.34
select ExtractDecimal("-1"); -- -1.00
select ExtractDecimal("hello. price is 123"); -- 123.00
select ExtractDecimal("123,45,-"); -- 123.45
Here is my improvement over ExtractNumber function by Eugene Yarmash.
It strips not only non-digit characters, but also HTML entities like &#[0-9];, which should be considered as non-digit unicode characters too.
Here is the code without UDP on pure MySQL <8.
CREATE DEFINER = 'user'#'host' FUNCTION `extract_number`(
str CHAR(255)
)
RETURNS char(255) CHARSET utf8mb4 COLLATE utf8mb4_unicode_ci
DETERMINISTIC
NO SQL
SQL SECURITY DEFINER
COMMENT ''
BEGIN
DECLARE tmp VARCHAR(255);
DECLARE res VARCHAR(255) DEFAULT "";
DECLARE chr VARCHAR(1);
DECLARE len INTEGER UNSIGNED DEFAULT LENGTH(str);
DECLARE i INTEGER DEFAULT 1;
IF len > 0 THEN
WHILE i <= len DO
SET chr = SUBSTRING(str, i, 1);
/* remove &#...; */
IF "&" = chr AND "#" = SUBSTRING(str, i+1, 1) THEN
WHILE (i <= len) AND (";" != SUBSTRING(str, i, 1)) DO
SET i = i + 1;
END WHILE;
END IF;
SET tmp = FIND_IN_SET(chr, "0,1,2,3,4,5,6,7,8,9");
IF tmp > 0 THEN
SET res = CONCAT(res, chr);
END IF;
SET i = i + 1;
END WHILE;
RETURN res;
END IF;
RETURN 0;
END;
But if you are using UDP's PREG_REPLACE, you can use just following line:
RETURN PREG_REPLACE("/[^0-9]/", "", PREG_REPLACE("/&#[0-9]+;/", "", str));
I have rewritten this for MemSQL Syntax:
DROP FUNCTION IF EXISTS GetNumeric;
DELIMITER //
CREATE FUNCTION GetNumeric(str CHAR(255)) RETURNS CHAR(255) AS
DECLARE i SMALLINT = 1;
DECLARE len SMALLINT = 1;
DECLARE ret CHAR(255) = '';
DECLARE c CHAR(1);
BEGIN
IF str IS NULL
THEN
RETURN "";
END IF;
WHILE i < CHAR_LENGTH( str ) + 1 LOOP
BEGIN
c = SUBSTRING( str, i, 1 );
IF c BETWEEN '0' AND '9' THEN
ret = CONCAT(ret,c);
END IF;
i = i + 1;
END;
END LOOP;
RETURN ret;
END //
DELIMITER ;
SELECT GetNumeric('abc123def456xyz789') as test;
Based on Eugene Yarmash and Martins Balodis answers.
In my case, I didn't know whether the source string contains dot as a decimal separator. Although, I knew how the specific column should be treated. E.g. in case value came up as "10,00" hours but not as "1.00" we know that the last delimiter character should be treated as a dot decimal separator. For this purposes, we can rely on a secondary boolean param that specifies how the last comma separator should behave.
DELIMITER $$
CREATE FUNCTION EXTRACT_DECIMAL(
inString VARCHAR(255)
, treatLastCommaAsDot BOOLEAN
) RETURNS varchar(255) CHARSET utf8mb4
NO SQL
DETERMINISTIC
BEGIN
DECLARE ctrNumber VARCHAR(255);
DECLARE inStringParsed VARCHAR(255);
DECLARE digitsAndDotsNumber VARCHAR(255) DEFAULT '';
DECLARE digitsBeforeDotNumber VARCHAR(255) DEFAULT '';
DECLARE digitsAfterDotNumber VARCHAR(255) DEFAULT '';
DECLARE finalNumber VARCHAR(255) DEFAULT '';
DECLARE separatorChar VARCHAR(1) DEFAULT '_';
DECLARE iterChar VARCHAR(1);
DECLARE inti INT DEFAULT 1;
DECLARE digitSequenceStarted BOOLEAN DEFAULT false;
DECLARE negativeNumber BOOLEAN DEFAULT false;
-- FIX FIND_IN_SET cannot find a comma ","
-- We need to separate entered dot from another delimiter characters.
SET inStringParsed = TRIM(REPLACE(REPLACE(inString, ',', separatorChar), ' ', ''));
IF LENGTH(inStringParsed) > 0 THEN
-- Extract digits, dots and delimiter character.
WHILE(inti <= LENGTH(inStringParsed)) DO
-- Might contain MINUS as the first character.
SET iterChar = SUBSTRING(inStringParsed, inti, 1);
SET ctrNumber = FIND_IN_SET(iterChar, CONCAT('0,1,2,3,4,5,6,7,8,9,.,', separatorChar));
-- In case the first extracted character is not '.' and `digitsAndDotsNumber` is set.
IF ctrNumber > 0 AND (iterChar != '.' OR LENGTH(digitsAndDotsNumber) > 0) THEN
-- Add first minus if needed. Note: `inti` at this point will be higher than 1.
IF digitSequenceStarted = FALSE AND inti > 1 AND SUBSTRING(inStringParsed, inti - 1, 1) = '-' THEN
SET negativeNumber = TRUE;
END IF;
SET digitSequenceStarted = TRUE;
SET digitsAndDotsNumber = CONCAT(digitsAndDotsNumber, iterChar);
ELSEIF digitSequenceStarted = true THEN
SET inti = LENGTH(inStringParsed);
END IF;
SET inti = inti + 1;
END WHILE;
-- Search the left part of string until the separator.
-- https://stackoverflow.com/a/43699586
IF (
-- Calculates the amount of delimiter characters.
CHAR_LENGTH(digitsAndDotsNumber)
- CHAR_LENGTH(REPLACE(digitsAndDotsNumber, separatorChar, SPACE(LENGTH(separatorChar)-1)))
) + (
-- Calculates the amount of dot characters.
CHAR_LENGTH(digitsAndDotsNumber)
- CHAR_LENGTH(REPLACE(digitsAndDotsNumber, '.', SPACE(LENGTH(separatorChar)-1)))
) > 0 THEN
-- If dot is present in the string. It doesn't matter for the other characters.
IF LOCATE('.', digitsAndDotsNumber) != FALSE THEN
-- Replace all special characters before the dot.
SET inti = LOCATE('.', digitsAndDotsNumber) - 1;
-- Return the first half of numbers before the last dot.
SET digitsBeforeDotNumber = SUBSTRING(digitsAndDotsNumber, 1, inti);
SET digitsBeforeDotNumber = REPLACE(digitsBeforeDotNumber, separatorChar, '');
SET digitsAfterDotNumber = SUBSTRING(digitsAndDotsNumber, inti + 2, LENGTH(digitsAndDotsNumber) - LENGTH(digitsBeforeDotNumber));
SET digitsAndDotsNumber = CONCAT(digitsBeforeDotNumber, '.', digitsAfterDotNumber);
ELSE
IF treatLastCommaAsDot = TRUE THEN
-- Find occurence of the last delimiter within the string.
SET inti = CHAR_LENGTH(digitsAndDotsNumber) - LOCATE(separatorChar, REVERSE(digitsAndDotsNumber));
-- Break the string into left part until the last occurrence of separator character.
SET digitsBeforeDotNumber = SUBSTRING(digitsAndDotsNumber, 1, inti);
SET digitsBeforeDotNumber = REPLACE(digitsBeforeDotNumber, separatorChar, '');
SET digitsAfterDotNumber = SUBSTRING(digitsAndDotsNumber, inti + 2, LENGTH(digitsAndDotsNumber) - LENGTH(digitsBeforeDotNumber));
-- Remove any dot occurence from the right part.
SET digitsAndDotsNumber = CONCAT(digitsBeforeDotNumber, '.', REPLACE(digitsAfterDotNumber, '.', ''));
ELSE
SET digitsAndDotsNumber = REPLACE(digitsAndDotsNumber, separatorChar, '');
END IF;
END IF;
END IF;
SET finalNumber = digitsAndDotsNumber;
IF negativeNumber = TRUE AND LENGTH(finalNumber) > 0 THEN
SET finalNumber = CONCAT('-', finalNumber);
END IF;
IF LENGTH(finalNumber) = 0 THEN
RETURN 0;
END IF;
RETURN CAST(finalNumber AS DECIMAL(25,5));
ELSE
RETURN 0;
END IF;
END$$
DELIMITER ;
Here are some examples of usage:
--
-- SELECT EXTRACT_DECIMAL('-711,712,34 and 123123', FALSE); -- -71171234.00000
-- SELECT EXTRACT_DECIMAL('1.234', FALSE); -- 1.23400
-- SELECT EXTRACT_DECIMAL('1,234.00', FALSE); -- 1234.00000
-- SELECT EXTRACT_DECIMAL('14 9999,99', FALSE); -- 14999999.00000
-- SELECT EXTRACT_DECIMAL('-149,999.99', FALSE); -- -149999.99000
-- SELECT EXTRACT_DECIMAL('3 536 500.53', TRUE); -- 3536500.53000
-- SELECT EXTRACT_DECIMAL('3,536,500,53', TRUE); -- 3536500.53000
-- SELECT EXTRACT_DECIMAL("-1"); -- -1.00000
-- SELECT EXTRACT_DECIMAL('2,233,536,50053', TRUE); -- 2233536.50053
-- SELECT EXTRACT_DECIMAL('13.01666667', TRUE); -- 13.01667
-- SELECT EXTRACT_DECIMAL('1,00000000', FALSE); -- 100000000.00000
-- SELECT EXTRACT_DECIMAL('1000', FALSE); -- 1000.00000
-- ==================================================================================
Try,
Query level,
SELECT CAST('1&hash' AS UNSIGNED);
for PHP,
echo intval('13213&hash');
For any newcomers with a similar request this should be exactly what you need.
select DISTINCT CONVERT(SUBSTRING(referrerURL,71,6), SIGNED) as `foo`
from hotshotsdblog1.annonymoustracking
where advertiserid = 10
limit 10;
I suggest using a pivot table (e.g., a table that only contains a vector of ordered numbers from 1 to at least the length of the string) and then doing the following:
SELECT group_concat(c.elem separator '')
from (
select b.elem
from
(
select substr('PAUL123f3211',iter.pos,1) as elem
from (select id as pos from t10) as iter
where iter.pos <= LENGTH('PAUL123f3211')
) b
where b.elem REGEXP '^[0-9]+$') c
It can be done in PHP instead.
foreach ($query_result as &$row) {
$row['column_with_numbers'] = (int) filter_var($query_result['column_with_numbers'], FILTER_SANITIZE_NUMBER_INT);
}
Try this in php
$string = '9072&h';
echo preg_replace("/[^0-9]/", '', $string);// output: 9072
or follow this link to do this in MySql
Refer the link