Split address column in streetname and number using SELECT statement

Split address column in streetname and number using SELECT statement - mysql

I have a mysql table with an address column.
Now I need to SELECT the streetname and number separately.
Address
Wallstreet 20
New Yorkavenue 30
New London Street 40
Needs to be:
Street: Number:
Wallstreet 20
New Yorkavenue 30
New London Street 40
Any ideas?
Thanks in advance!

If you assume that the number is the final "word" and separated by a space:
select replace(address, substring_index(address, ' ', -1), '') as street,
substring_index(address, ' ', -1) as number
I happen to think that those two assumptions are very big assumptions, meaning that this might not work on all your rows.

For Mysql probably you could create a MYSQL SUBSTRING_INDEX to separate the fields if the numbers are only in the address number and the address has no numbers.
Example
SELECT
REPLACE(address, SUBSTRING_INDEX(address, ' ', -1), '') as ADDRESS,
SUBSTRING_INDEX(address, ' ', -1) as NUMBER
FROM
ADDRESSES
it's not a really good method in performance and probably clould be done with other ways but if the schema is allways like the example it could works
Also probably is better in performance to do it on client side in the language that fetch the data.

You could use some string functions, like SUBSTRING_INDEX and LEFT.
Getting the Number is easy:
SELECT
SUBSTRING_INDEX(Street, ' ', -1)
(yes, it's not actually a number, but I suppose it's the last part of the string after the last space, it can also be a string as 20/C).
Getting the street name is a little more tricky:
SELECT
LEFT(Street,
CHAR_LENGTH(Street)
-CHAR_LENGTH(SUBSTRING_INDEX(Street, ' ', -1))
-1
) AS street_name,
SUBSTRING_INDEX(Street, ' ', -1) AS street_number
FROM
tablename

I'm may be some what late, but issues are still the same nower days :-)
The question was, how to split/select a streetname (Straßenname) and the number (Hausnummer) out of a adress string?
Therefor I created a function, to implement German DIN 5008:
DROP FUNCTION IF EXISTS HAUSNUMMER_DIN_5008;
delimiter //
CREATE DEFINER=`vlw`#`%` FUNCTION `HAUSNUMMER_DIN_5008`(
oldStreet VARCHAR(255), formating BOOL
) RETURNS varchar(16) CHARSET utf8
BEGIN
SET #oldString := oldStreet;
SET #newString := "";
tokenLoop: LOOP
END LOOP tokenLoop;
-- are there no figures at the beginning of street name?
IF NOT #oldString REGEXP '^[1-9]' THEN
-- must be a word, to jump over
SET #splitPoint := LOCATE(" ", #oldString);
SET #oldString := SUBSTRING(#oldString, #splitPoint+1);
ELSE
-- Okay, we found the first figure
-- Are there any chars inside the string including "."
IF #oldString REGEXP '[a-z,A-Z,.,--,/," "]' THEN
-- Are there any char directly behind a figure
IF #oldString REGEXP '[0-9][a-z,A-Z,.,--,/," "]' THEN
-- now we have to check step by step
SET #i := 1;
tokenPos: LOOP
-- jump over the first figures
IF NOT SUBSTRING(#oldString, #i, 1) REGEXP '[0-9]' THEN
-- this is the first non figure
IF formating THEN
IF SUBSTRING(#oldString, #i, 1) REGEXP '[a-z,A-Z]' THEN
-- If a char is directly written after figures, then add a blank between
SET #oldString := CONCAT(SUBSTRING(#oldString,1,#i-1)," ",SUBSTRING(#oldString,#i));
LEAVE tokenPos;
ELSE
IF SUBSTRING(#oldString, #i, 1) = "." THEN
-- this must be part of the street name, so we will loop some what
LEAVE tokenPos;
END IF;
-- SET #newString := concat(">xx>",REPLACE(#oldString," ",""));
SET #newString := REPLACE(#oldString," ","");
LEAVE tokenLoop;
END IF;
LEAVE tokenPos;
ELSE
IF SUBSTRING(#oldString, #i, 1) = "." THEN
-- this must be part of the street name, so we will loop some what
LEAVE tokenPos;
ELSE
-- the street number is found
SET #newString := #oldString;
LEAVE tokenLoop;
END IF;
END IF;
END IF;
SET #i := #i+1;
END LOOP tokenPos;
END IF;
SET #splitPoint := LOCATE(" ", #oldString);
IF SUBSTRING(#oldString, #splitPoint+1) REGEXP '[1-9]' THEN
-- we have to split one more word
SET #oldString := SUBSTRING(#oldString, #splitPoint+1);
ELSE
SET #newString := #oldString;
LEAVE tokenLoop;
END IF;
ELSE
IF formating AND #oldString REGEXP '[//][1-9]' THEN
SET #i := LOCATE(#oldString,"//")+4;
SET #oldString := CONCAT(SUBSTRING(#oldString,1,#i)," ",SUBSTRING(#oldString,#i+1));
ELSEIF formating THEN
SET #oldString := REPLACE(#oldString," ","");
END IF;
SET #newString := #oldString;
LEAVE tokenLoop;
END IF;
END IF;
RETURN #newString;
END //
delimiter ;
There is an additional BOOLEAN parameter, to reformat the number part of the address against DIN 5008. But the reformating part isn't finaly done yet.
Now we can test it with some examples:
select HAUSNUMMER_DIN_5008("Mörikestr. 28/3",TRUE);
select HAUSNUMMER_DIN_5008("Nettelbeckstraße 6 a",TRUE);
select HAUSNUMMER_DIN_5008("Auf dem Brande 19a",TRUE); ==>> "19 a"
select HAUSNUMMER_DIN_5008("Auf dem Brande 19a",FALSE); ==>> "19a"
select HAUSNUMMER_DIN_5008("Anger 1-3",TRUE);
select HAUSNUMMER_DIN_5008("Straße des 17. Juni 12-16",TRUE);
select HAUSNUMMER_DIN_5008("L11 2",TRUE); -- z.B in Mannheim
select HAUSNUMMER_DIN_5008("111. 2",TRUE);
select HAUSNUMMER_DIN_5008("Züricher Straße 17// 28",,TRUE);
-- Some special formating tests
SELECT HAUSNUMMER_DIN_5008("Mörikestr. 28 / 3",TRUE);
SELECT HAUSNUMMER_DIN_5008("8 - 6",TRUE);
SELECT HAUSNUMMER_DIN_5008("Straße des 17. Juni 12 - 16",TRUE);
SELECT HAUSNUMMER_DIN_5008("Straße des 17. Juni 12- 16",TRUE);
SELECT HAUSNUMMER_DIN_5008("Straße des 17. Juni 12 -16",FALSE);
-- Next one is not DIN 5008, but was a loop issue inside function
SELECT HAUSNUMMER_DIN_5008("8 /App.6",FALSE);
SELECT HAUSNUMMER_DIN_5008("8 /App.6",TRUE);
SELECT HAUSNUMMER_DIN_5008("8/App.6",TRUE);
If you need only the street name, you have to use HAUSNUMMER_DIN_5008() with formating FALSE, otherwise you can't find the numberpart within your adress.
SET #address := "Auf dem Brande 19a";
SELECT SUBSTRING(#address, 1, LOCATE(HAUSNUMMER_DIN_5008(#address, FALSE),#address)-1);
SET #address := "Straße des 17. Juni 12-16";
SELECT SUBSTRING(#address, 1, LOCATE(HAUSNUMMER_DIN_5008(#address, FALSE),#address)-1);
That are my 5 cent
Christian Eickhoff

ERROR 1064 (42000) at line 5: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'END LOOP tokenLoop;
IF NOT #oldString REGEXP '^[1-9]' THEN
SET #sp' at line 9

For everyone coming from Google: As you can't trust humans, I chose this way:
SELECT
REGEXP_SUBSTR(address, '[a-z"äöüß"\-\. ]+') AS street ,
REGEXP_SUBSTR(address, '[0-9]+.*') AS number,
FROM
ADDRESSES
Because this will also work on:
Streetname 5
Streetname 5B
Streetname 5 B
Streetn.5B
Street-Name5
Streetname 5 (+trailing Space)
Street name5

Related

How to replace multiple characters with a specific character in mysql with regexp_replace?

I am using regexp_replace to replace a field of type string having some special characters with '_' where ever those characters are present.
I am using
SELECT regexp_replace('name', ' |\-|\(|\)|\.', '_') from db.table;
Some values from the field 'name':
Pune Municipal Corp - Water
Kerala State Electricity Board Ltd. (KSEBL)
Paschim Gujarat Vij Company Limited (PGVCL)
What I want:
Pune_Municipal_Corp___Water
Kerala_State_Electricity_Board_Ltd___KSEBL_
Paschim_Gujarat_Vij_Company_Limited__PGVCL_

Try this
SELECT regexp_replace(name, '[^a-zA-Z0-9_]', '_')
db<>fiddle

I don't know what version of mysql you're using but, on mysql 8+ you can use the native REGEXP_REPLACE function.
Otherwise if the version which you're using don't support regexp replace, you could just create a function to do that.
Here is the function code:
DELIMITER $$
CREATE FUNCTION `regex_replace`(pattern VARCHAR(1000),replacement VARCHAR(1000),original VARCHAR(1000))
RETURNS VARCHAR(1000)
DETERMINISTIC
BEGIN
DECLARE temp VARCHAR(1000);
DECLARE ch VARCHAR(1);
DECLARE i INT;
SET i = 1;
SET temp = '';
IF original REGEXP pattern THEN
loop_label: LOOP
IF i>CHAR_LENGTH(original) THEN
LEAVE loop_label;
END IF;
SET ch = SUBSTRING(original,i,1);
IF NOT ch REGEXP pattern THEN
SET temp = CONCAT(temp,ch);
ELSE
SET temp = CONCAT(temp,replacement);
END IF;
SET i=i+1;
END LOOP;
ELSE
SET temp = original;
END IF;
RETURN temp;
END$$
DELIMITER ;
Example using your regex and text:
mysql> select regex_replace('[ |\-|\(|\)|\.-]', '_', 'Pune Municipal Corp - Water Kerala State Electricity Board Ltd. (KSEBL) Paschim Gujarat Vij Company Limited (PGVCL)');
I got the code of the function here
Hope this helps!

You can simply use the translate to replace space and - with underscore(_) as follows:
Select translate(your_column, ' -', '__') from your_table
Db<>fiddle demo

SQL InitCap that Uppercase some special words

How can i modify this InitCap function so that it always will uppercase AB when its in the end of the string and has a space before it.
test Ab <-- should display AB
testab <-- should display ab
abtest <-- should displat ab
DELIMITER $$
DROP FUNCTION IF EXISTS `CapitializeFirstCharInEveryWord`$$
CREATE FUNCTION `CapitializeFirstCharInEveryWord`(x char(100)) RETURNS char(100) CHARSET utf8
BEGIN
SET #str='';
SET #l_str='';
WHILE x REGEXP ' ' DO
SELECT SUBSTRING_INDEX(x, ' ', 1) INTO #l_str;
SELECT SUBSTRING(x, LOCATE(' ', x)+1) INTO x;
SELECT CONCAT(#str, ' ', CONCAT(UPPER(SUBSTRING(#l_str,1,1)),LOWER(SUBSTRING(#l_str,2)))) INTO #str;
END WHILE;
RETURN LTRIM(CONCAT(#str, ' ', CONCAT(UPPER(SUBSTRING(x,1,1)),LOWER(SUBSTRING(x,2)))));
END$$

You want to capitalize last word in string? You can do it in simple way. Description in comments.
CREATE FUNCTION `CapitalizeLastWord`(x char(100)) RETURNS char(100) CHARSET utf8
BEGIN
-- detect has x space
SET #space_pos_reverse = LOCATE(' ', REVERSE(x));
-- if not return unchanged x
IF #space_pos_reverse = 0 THEN RETURN x;
END IF;
-- getting last space position
SET #last_space_pos = LENGTH(x)-#space_pos_reverse+1;
-- split x to 2 parts, 2nd part gettin UPPER
RETURN CONCAT(SUBSTRING(x, 1, #last_space_pos), UPPER(SUBSTRING(x, #last_space_pos+1)) );
END$$

how can I convert a comma separated varchar to be used in an "IN" Clause in pl/sql?

I have a comma separated varchar which will be determined dynamically. for example:
varchar cHighRank := (1,2,3,4,5,6,7,8)
I would like to use this in the following IN Clause, but system produces an error since IN clause is only for integers for example:
if (rank in cHighRank) then
--do the high rank...
elsif (rank in cLowRank) then
-- do the low rank
end if;
the base of this issue is that I have to break a list of integers into half. it could be 16, 12, etc. the point is that i don't know it is dynamic. example:
16: Lowrank: 1,2,3,4,5,6,7,8 and HighRank: 9,10,11,12,13,14,15,16
I can create my lists of dynamic values as a varchar, but it won't work in an IN Clause.
Please help.
Thanks,

Not sure about MySQL, but with Oracle, you could use a regex check instead of the IN clause, with some work on border cases
If REGEXP_LIKE(cHighRank, "rank" + ",") or REGEXP_LIKE(cHighRank, "," + "rank") or REGEXP_LIKE(cHighRank, "(" + "rank" + ")")
Should be possible with MySQL as well, only I haven't worked with regexes there

One way to check whether an "item" is in a comma separate list is to use the INSTR function.
Assuming you don't have any extra spaces in the list, one trick is to add a leading and trailing comma e.g.
',1,2,3,'
And then search for a given element such as ',2,'
DECLARE
cHighRank VARCHAR2(100) := '1,2,3,4,5,6,7,8';
BEGIN
IF INSTR( ','||cHighRank||',' , ','||rank||',' ) > 0 THEN
-- matched
END IF;

declare
v_ranks constant varchar2(32767):= '1,2,3,4,5,6,7,8,9,10,11';
-- number of ranks is number of commas + 1
v_number_of_ranks constant number := regexp_count(v_ranks, ',') + 1;
-- find the middle point
-- you definition how to split odd number of ranks to hi/low might differ
v_pos constant number := instr(v_ranks, ',', 1, v_number_of_ranks / 2);
begin
-- split around the middle point
dbms_output.put_line(' lowrank: ' || substr(v_ranks, 0, v_pos));
dbms_output.put_line('highrank: ' || substr(v_ranks, v_pos + 1));
end;
/
Output:
lowrank: 1,2,3,4,5,6,
highrank: 7,8,9,10,11

DECLARE
Lowrank VARCHAR2(30) := '1,2,3,4,5,6,7,8';
HighRank VARCHAR2(30) := '9,10,11,12,13,14,15,16';
rank VARCHAR2(30) := '16';
BEGIN
IF REPLACE(REGEXP_INSTR(Lowrank, ',{0,1}' || rank || ',{0,1}'), ',') > 0 THEN
DBMS_OUTPUT.PUT_LINE('Lowrank');
ELSIF REPLACE(REGEXP_INSTR(HighRank, ',{0,1}' || rank || ',{0,1}'), ',') > 0 THEN
DBMS_OUTPUT.PUT_LINE('HighRank');
END IF;
END;

MySQL nested case procedure using concat

I'm doing a nested case. However, I am getting an error near this line:
else
select concat('The month parameter ', p_month, ' is invalid; cannot proceed.');
This is actually the else case for the most inner case. p_month is an IN parameter and also an integer. Could this be the error?
Any thoughts will be helpful. Thank you.
I played around with it a little bit more right now. So I decided to SELECT on the outside block. However, I know now I have an error for the Select statement in the inner block. How can I fix that? Thanks.
entire code:
Create procedure ExamFeesMonth(in p_cl_id int, in p_month int)
begin
declare count_cl_id int;
declare num_exam int;
declare count_exam int;
declare v_msg varchar(200);
-- check if p_cl_id is in vt_clients
select count(*) into count_cl_id from vt_clients where cl_id = p_cl_id;
-- count the number of exams that has happened in p_month of previous year
select count(*) into num_exam
from vt_clients cl
join vt_headers h on cl.cl_id = h.cl_id
join vt_details d on h.ex_id = d.ex_id
where cl.cl_id = p_cl_id
and month(ex_date) = p_month
and year(ex_date) = (year(current_date())-1)
;
select
-- first case block starts
case
-- client valid
when count_cl_id = 1 then
-- second case block starts
case
-- p_month valid
when p_month in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12) then
-- third case block starts
case
-- existing exams
when count_exam >= 1 then
select concat( 'Client ', p_cl_id, ' has ', count(h.ex_id),
' exam(s) with total fees of ', sum(ex_fee),
' in ', year(current_date())-1, '-', p_month)
from vt_clients cl
join vt_exam_headers h on cl.an_id = h.an_id
join vt_exam_details d on h.ex_id = d.ex_id
where cl_id = p_cl_id
and year(ex_date) = (year(current_date())-1)
and month(ex_date) = p_month
;
-- no exams
when count_exam = 0 then
concat( 'No exams for client ', p_cl_id, ' in 2011-' , p_month, '.');
-- third case block ends
end
-- p_month invalid
else
concat('The month parameter ', p_month, ' is invalid; cannot proceed.');
-- second case block ends
end
-- client invalid
when count_cl_id = 0 then
concat('We have no client with id ', p_cl_id, '; cannot proceed.') ;
-- first case block ends
end case;
end;
#

I think the issue in END statements in the nested CASEs. You should use END CASE. CASE...END CASE
-- third case block ends
end case; -- <-------------here
-- p_month invalid
else
concat('The month parameter ', p_month, ' is invalid; cannot proceed.');
-- second case block ends
end case; -- <-------------and here

T-SQL strip all non-alpha and non-numeric characters

Is there a smarter way to remove all special characters rather than having a series of about 15 nested replace statements?
The following works, but only handles three characters (ampersand, blank and period).
select CustomerID, CustomerName,
Replace(Replace(Replace(CustomerName,'&',''),' ',''),'.','') as CustomerNameStripped
from Customer

One flexible-ish way;
CREATE FUNCTION [dbo].[fnRemovePatternFromString](#BUFFER VARCHAR(MAX), #PATTERN VARCHAR(128)) RETURNS VARCHAR(MAX) AS
BEGIN
DECLARE #POS INT = PATINDEX(#PATTERN, #BUFFER)
WHILE #POS > 0 BEGIN
SET #BUFFER = STUFF(#BUFFER, #POS, 1, '')
SET #POS = PATINDEX(#PATTERN, #BUFFER)
END
RETURN #BUFFER
END
select dbo.fnRemovePatternFromString('cake & beer $3.99!?c', '%[$&.!?]%')
(No column name)
cake beer 399c

Create a function:
CREATE FUNCTION dbo.StripNonAlphaNumerics
(
#s VARCHAR(255)
)
RETURNS VARCHAR(255)
AS
BEGIN
DECLARE #p INT = 1, #n VARCHAR(255) = '';
WHILE #p <= LEN(#s)
BEGIN
IF SUBSTRING(#s, #p, 1) LIKE '[A-Za-z0-9]'
BEGIN
SET #n += SUBSTRING(#s, #p, 1);
END
SET #p += 1;
END
RETURN(#n);
END
GO
Then:
SELECT Result = dbo.StripNonAlphaNumerics
('My Customer''s dog & #1 friend are dope, yo!');
Results:
Result
------
MyCustomersdog1friendaredopeyo
To make it more flexible, you could pass in the pattern you want to allow:
CREATE FUNCTION dbo.StripNonAlphaNumerics
(
#s VARCHAR(255),
#pattern VARCHAR(255)
)
RETURNS VARCHAR(255)
AS
BEGIN
DECLARE #p INT = 1, #n VARCHAR(255) = '';
WHILE #p <= LEN(#s)
BEGIN
IF SUBSTRING(#s, #p, 1) LIKE #pattern
BEGIN
SET #n += SUBSTRING(#s, #p, 1);
END
SET #p += 1;
END
RETURN(#n);
END
GO
Then:
SELECT r = dbo.StripNonAlphaNumerics
('Bob''s dog & #1 friend are dope, yo!', '[A-Za-z0-9]');
Results:
r
------
Bobsdog1friendaredopeyo

I faced this problem several years ago, so I wrote a SQL function to do the trick. Here is the original article (was used to scrape text out of HTML). I have since updated the function, as follows:
IF (object_id('dbo.fn_CleanString') IS NOT NULL)
BEGIN
PRINT 'Dropping: dbo.fn_CleanString'
DROP function dbo.fn_CleanString
END
GO
PRINT 'Creating: dbo.fn_CleanString'
GO
CREATE FUNCTION dbo.fn_CleanString
(
#string varchar(8000)
)
returns varchar(8000)
AS
BEGIN
---------------------------------------------------------------------------------------------------
-- Title: CleanString
-- Date Created: March 26, 2011
-- Author: William McEvoy
--
-- Description: This function removes special ascii characters from a string.
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
declare #char char(1),
#len int,
#count int,
#newstring varchar(8000),
#replacement char(1)
select #count = 1,
#len = 0,
#newstring = '',
#replacement = ' '
---------------------------------------------------------------------------------------------------
-- M A I N P R O C E S S I N G
---------------------------------------------------------------------------------------------------
-- Remove Backspace characters
select #string = replace(#string,char(8),#replacement)
-- Remove Tabs
select #string = replace(#string,char(9),#replacement)
-- Remove line feed
select #string = replace(#string,char(10),#replacement)
-- Remove carriage return
select #string = replace(#string,char(13),#replacement)
-- Condense multiple spaces into a single space
-- This works by changing all double spaces to be OX where O = a space, and X = a special character
-- then all occurrences of XO are changed to O,
-- then all occurrences of X are changed to nothing, leaving just the O which is actually a single space
select #string = replace(replace(replace(ltrim(rtrim(#string)),' ', ' ' + char(7)),char(7)+' ',''),char(7),'')
-- Parse each character, remove non alpha-numeric
select #len = len(#string)
WHILE (#count <= #len)
BEGIN
-- Examine the character
select #char = substring(#string,#count,1)
IF (#char like '[a-z]') or (#char like '[A-Z]') or (#char like '[0-9]')
select #newstring = #newstring + #char
ELSE
select #newstring = #newstring + #replacement
select #count = #count + 1
END
return #newstring
END
GO
IF (object_id('dbo.fn_CleanString') IS NOT NULL)
PRINT 'Function created.'
ELSE
PRINT 'Function NOT created.'
GO

I know this is an old thread, but still, might be handy for others.
Here's a quick and dirty (Which I've done inversely - stripping out non-numerics) - using a recursive CTE.
What makes this one nice for me is that it's an inline function - so gets around the nasty RBAR effect of the usual scalar and table-valued functions.
Adjust your filter as needs be to include or exclude whatever char types.
Create Function fncV1_iStripAlphasFromData (
#iString Varchar(max)
)
Returns
Table With Schemabinding
As
Return(
with RawData as
(
Select #iString as iString
)
,
Anchor as
(
Select Case(IsNumeric (substring(iString, 1, 1))) when 1 then substring(iString, 1, 1) else '' End as oString, 2 as CharPos from RawData
UNION ALL
Select a.oString + Case(IsNumeric (substring(#iString, a.CharPos, 1))) when 1 then substring(#iString, a.CharPos, 1) else '' End, a.CharPos + 1
from RawData r
Inner Join Anchor a on a.CharPos <= len(rtrim(ltrim(#iString)))
)
Select top 1 oString from Anchor order by CharPos Desc
)
Go
select * from dbo.fncV1_iStripAlphasFromData ('00000')
select * from dbo.fncV1_iStripAlphasFromData ('00A00')
select * from dbo.fncV1_iStripAlphasFromData ('12345ABC6789!&*0')

If you can use SQL CLR you can use .NET regular expressions for this.
There is a third party (free) package that includes this and more - SQL Sharp .

We Keep Coding

html mysql json google-apps-script actionscript-3 ms-access google-chrome google-maps reporting-services sql-server-2008

Split address column in streetname and number using SELECT statement - mysql

I have a mysql table with an address column. Now I need to SELECT the streetname and number separately. Address Wallstreet 20 New Yorkavenue 30 New London Street 40 Needs to be: Street: Number: Wallstreet 20 New Yorkavenue 30 New London Street 40 Any ideas? Thanks in advance!

ERROR 1064 (42000) at line 5: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'END LOOP tokenLoop; IF NOT #oldString REGEXP '^[1-9]' THEN SET #sp' at line 9

Related

How to replace multiple characters with a specific character in mysql with regexp_replace?

SQL InitCap that Uppercase some special words

how can I convert a comma separated varchar to be used in an "IN" Clause in pl/sql?

MySQL nested case procedure using concat

T-SQL strip all non-alpha and non-numeric characters

Categories

Resources