SQL Strip Longest Common Prefix - mysql

I have a table tbl1 with two columns col1 and col2 containing strings:
col1 | col2
--------+--------
bar | foo
foo | foobar
bar1foo | bar2foo
Corresponding SQL dump:
CREATE TABLE `tbl1` (
`col1` varchar(20) COLLATE latin1_general_ci NOT NULL,
`col2` varchar(20) COLLATE latin1_general_ci NOT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci;
INSERT INTO `tbl1` (`col1`, `col2`) VALUES
('bar', 'foo'),
('foo', 'foobar'),
('bar1foo', 'bar2foo');
The strings of an entry share a common prefix in most cases. I need a query that strips those common prefixes. Expected result:
bar | foo
| bar
1foo | 2foo
My approach, so far:
SELECT
SUBSTR(`col1`, 1+GREATEST(LENGTH(`col1`), LENGTH(`col2`)) - CEIL(LENGTH(TRIM(TRAILING '0' FROM HEX(ABS(CONV(HEX(REVERSE(`col1`)),16,10) - CONV(HEX(REVERSE(`col2`)),16,10)))))/2)),
SUBSTR(`col2`, 1+GREATEST(LENGTH(`col1`), LENGTH(`col2`)) - CEIL(LENGTH(TRIM(TRAILING '0' FROM HEX(ABS(CONV(HEX(REVERSE(`col1`)),16,10) - CONV(HEX(REVERSE(`col2`)),16,10)))))/2))
FROM tbl1
Short explanation: The strings are reversed (REVERSE), converted into integers (HEX and CONV), subtracted from each other (- and ABS), converted into hexadecimal representation (HEX), 0's are trimmed from the end (TRIM), the length of this result is subtracted from the length of the longest string (-, LENGTH and GREATEST) and then used by SUBSTR to get the result.
Problems with my approach:
Does not work with strings longer than 64bit.
Does not work with strings containing multi-byte characters
Very lengthy and ugly
Does not have good performance.

Sadly, the most general and performance method is probably a giant case expression. However, this works only up to a certain length:
select substr(col1, prefix_length + 1),
substr(col2, prefix_length + 1)
from (select tbl1.*,
(case when left(col1, 10) = left(col2, 10) then 10
when left(col1, 9) = left(col2, 9) then 9
. . .
else 0
end) as prefix_length
from tbl1
) t;
Actually, you can do this with a recursive CTE, which is the most general approach:
with recursive cte as (
select col1, col2, 1 as lev, col1 as orig_col1, col2 as orig_col2
from tbl1
union all
select substr(col1, 2), substr(col2, 2), lev + 1, orig_col1, orig_col2
from cte
where left(col1, 1) = left(col2, 1)
)
select col1, col2
from (select cte.*,
dense_rank() over (partition by orig_col1, orig_col2 order by lev desc) as seqnum
from cte
) x
where seqnum = 1;
Although the performance will definitely be worse than your solution or the massive case expression, it is probably not that bad, and you might find it sufficient for your purposes.
Here is a db<>fiddle with both solutions.

This code works, although it's lengthy and ugly and (maybe) unperformant:
select
substring(t.col1, g.maxlen + 1) col1,
substring(t.col2, g.maxlen + 1) col2
from tbl1 t inner join (
select t.col1, t.col2,
max(case when left(col1, tt.n) = left(col2, tt.n) then tt.n else 0 end) maxlen
from tbl1 t inner join (
select 1 n union all select 2 union all select 3 union all select 4 union all
select 5 union all select 6 union all select 7 union all select 8 union all
select 9 union all select 10 union all select 11 union all select 12 union all
select 13 union all select 14 union all select 15 union all select 16 union all
select 17 union all select 18 union all select 19 union all select 20
) tt on least(length(t.col1), length(t.col2)) >= tt.n
group by t.col1, t.col2
) g on g.col1 = t.col1 and g.col2 = t.col2
See the demo.
For MySql 8.0+ you can use a recursive CTE and in this case there is no need of prior knowledge of the length of the columns:
with
recursive lengths as (
select 1 n
union all
select n + 1
from lengths
where n < (select max(least(length(col1), length(col2))) from tbl1)
),
cte as (
select t.col1, t.col2,
max(case when left(col1, l.n) = left(col2, l.n) then l.n else 0 end) maxlen
from tbl1 t inner join lengths l
on least(length(t.col1), length(t.col2)) >= l.n
group by t.col1, t.col2
)
select
substring(t.col1, c.maxlen + 1) col1,
substring(t.col2, c.maxlen + 1) col2
from tbl1 t inner join cte c
on c.col1 = t.col1 and c.col2 = t.col2
See the demo.
Results:
| col1 | col2 |
| ---- | ---- |
| | bar |
| bar | foo |
| 1foo | 2foo |

Related

Convert Excel formula (PRODUCT) to MySQL query

wondering if anyone can help me convert the following Excel formula to MySQL query:
Excel: =PRODUCT(1+A1:A7)-1
MySQL data:
id | data
---------------
1 | -1.64
2 | 1.38
3 | 0
4 | 0
5 | -1.52
6 | 0
7 | -1.78
Result should equal -0.207936
This is how you can do it. I sure hope someone can improve on it cause it is awful.
with data(id, val) as(
select 1,-1.64 from dual union all
select 2,1.38 from dual union all
select 3,0.00 from dual union all
select 4,0.00 from dual union all
select 5,-1.52 from dual union all
select 6,0.00 from dual union all
select 7,-1.78 from dual
),
products as(
select Replace(LISTAGG(val+1, '*') within group (order by val),',','.') chain
from data)
SELECT o.val-1 AS product_value
FROM products p
OUTER APPLY(
SELECT *
FROM XMLTABLE('/ROWSET/ROW/*'
passing dbms_xmlgen.getXMLType('SELECT ' || p.chain || ' FROM dual')
COLUMNS val NUMBER PATH '.')
WHERE p.chain IS NOT NULL
) o;
https://dbfiddle.uk/?rdbms=oracle_18&fiddle=c12284b5165da61de8ed418cdf1d6cd7
a prettier solution
with data(id, val) as(
select 1,-1.64 from dual union all
select 2,1.38 from dual union all
select 3,0.00 from dual union all
select 4,0.00 from dual union all
select 5,-1.52 from dual union all
select 6,0.00 from dual union all
select 7,-1.78 from dual
),
neg(val , modifier) as(
select exp(sum(ln(abs(val+1)))), case when mod(count(*),2) = 0 then 1 Else -1 end
from data
where val+1 <0
)
,
pos(val) as (
select exp(sum(ln(val+1)))
from data
where val+1 >=0
)
select (select val*modifier from neg)*(select val from pos)-1 from dual

get the range of sequence values in table column

I have a list of value in my column. And want to query the range.
Eg. If values are 1,2,3,4,5,9,11,12,13,14,17,18,19
I want to display
1-5,9,11-14,17-19
Assuming that each value is stored on a separate row, you can use some gaps-and-island technique here:
select case when min(val) <> max(val)
then concat(min(val), '-', max(val))
else min(val)
end val_range
from (select val, row_number() over(order by val) rn from mytable) t
group by val - rn
order by min(val)
The idea is to build groups of consecutive values by taking the difference between the value and an incrementing rank, which is computed using row_number() (available in MySQL 8.0):
Demo on DB Fiddle:
| val_range |
| :-------- |
| 1-5 |
| 9 |
| 11-14 |
| 17-19 |
In earlier versions, you can emulate row_number() with a correlated subquery, or a user variable. The second option goes like:
select case when min(val) <> max(val)
then concat(min(val), '-', max(val))
else min(val)
end val_range
from (select #rn := 0) x
cross join (
select val, #rn := #rn + 1 rn
from (select val from mytable order by val) t
) t
group by val - rn
order by min(val)
As a complement to other answers:
select dn.val as dnval, min(up.val) as upval
from mytable up
join mytable dn
on dn.val <= up.val
where not exists (select 1 from mytable a where a.val = up.val + 1)
and not exists (select 1 from mytable b where b.val = dn.val - 1)
group by dn.val
order by dn.val;
1 5
9 9
11 14
17 19
Needless to say, but using an OLAP function like #GNB does, is orders of magnitude more efficient.
A short article on how to mimic OLAP functions in MySQL < 8 can be found at:
mysql-row_number
Fiddle
EDIT:
If another dimension is introduced (in this case p), something like:
select dn.p, dn.val as dnval, min(up.val) as upval
from mytable up
join mytable dn
on dn.val <= up.val
and dn.p = up.p
where not exists (select 1 from mytable a where a.val = up.val + 1 and a.p = up.p)
and not exists (select 1 from mytable b where b.val = dn.val - 1 and b.p = dn.p)
group by dn.p, dn.val
order by dn.p, dn.val;
can be used, see Fiddle2

mysql move the last N characters of each line of data to the next line

I want move the last 3 characters of each line of data to the next line, the first line fill with xxx.
example:
now i have TableOne, i want get TableTwo, thanks!
[update]
mysql version is 5.7.22, not support lag function
If your version of MySql/MariaDB supports window functions and you have already created TableTwo you can insert the new rows like this:
insert into TableTwo(id, num)
select
id,
concat(
coalesce(lag(right(num, 3)) over (order by id), 'xxx'),
coalesce(left(num, 2), '')
) num
from (
select * from TableOne
union all
select max(id) + 1, null from TableOne
) t;
See the demo.
Without window functions you can do it with a self join:
insert into TableTwo(id, num)
select
t.id,
concat(
coalesce(right(t1.num, 3), 'xxx'),
coalesce(left(t.num, 2), '')
) num
from (
select * from TableOne
union all
select max(id) + 1, null from TableOne
) t left join TableOne t1
on t1.id = t.id - 1;
See the demo.
Results:
> id | num
> -: | :----
> 1 | xxxab
> 2 | cde01
> 3 | 23456
> 4 | 789

mysql - Select the 3 smallest values from multiple columns

ID COL1 COL2 COL3 COL4 COL5 COL6 COL7
-------------------------------------
1 4 13 8 0 9 11 2
2 12 3 3 10 17 12 9
3 17 0 0 19 3 1 3
4 5 0 16 0 9 11 2
Here is an example table of data.
What I need to be able to do is to select and label the three smallest values in each row so I can identify each one.
For instance I want to know that in Row2 the three smallest values are 3,3,9 and that they are in COL2,COL3,COL7
I am thinking that I need to incorporate the LEAST() command provided in mysql but it only seems to return one value (the smallest one).
SELECT LEAST(COL1,COL2,COL3,COL4,COL5,COL6,COL7)
I cant seem to figure out how to get the 3 smallest values instead of just one.
Unfortunately your table is not normalized :(
In this case a possible solution is to unpivot the table using a query like this:
CREATE VIEW unpivoted AS
SELECT id, 'col1' colname, col1 as value FROM Table1
UNION ALL
SELECT id, 'col2' colname, col2 FROM Table1
UNION ALL
SELECT id, 'col3' colname, col3 FROM Table1
UNION ALL
SELECT id, 'col4' colname, col4 FROM Table1
UNION ALL
SELECT id, 'col5' colname, col5 FROM Table1
UNION ALL
SELECT id, 'col6' colname, col6 FROM Table1
UNION ALL
SELECT id, 'col7' colname, col7 FROM Table1
and then use a query like the below to find 3 minimum values and then pivot results back:
SET #x = 0;
Set #lastid = -999;
SELECT id,
min( IF( x = 0, colname, null )) As Colname1,
min( IF( x = 0, value, null )) As value1,
min( IF( x = 1, colname, null )) As Colname2,
min( IF( x = 1, value, null )) As value2,
min( IF( x = 2, colname, null )) As Colname3,
min( IF( x = 2, value, null )) As value3
FROM (
SELECT id, colname, value,
IF( #lastid = id, #x:=#x+1,
IF( (#lastid:=id), #x:=0, #x:=0 )
) As x
FROM unpivoted
ORDER BY id, value
) q
WHERE x < 3
GROUP BY id
Demo: http://sqlfiddle.com/#!2/f20ee4/5
But the speed of these queries will be horribly slow, don't even try them on a big table.
You need to normalize the table.
Kordirko's solution is may be the fastest way to do it (without a whole lot of comparison logic on each row). However, the following is more intuitive, in my opinion:
select id,
substring_index(group_concat(colname order by value), ',', 3) as Top3Columns,
substring_index(group_concat(value order by value), ',', 3) as Top3Values
from unpivoted
group by id;
This puts the names and values each in one column, as a concatenated list. You can use a similar idea if you want them in separate columns.

how to get the table column names as entries in result column set?

The database scheme consists:
Table1(code, col1, col2, col3, col4, col5)
What to do is:
For the Table1 with the maximal code value from Table1 table, obtain all its characteristics (except for a code) in two columns:
The name of the characteristic (a name of a corresponding column in the PC table);
Value of the characteristic.
I don't have any idea how to get the table column names in my result column set.
The final result will look like:
chr value
col1 133
col2 80
col3 28
col4 2
col5 50
This is an unpivot operation. The simplest way is using union all. However, the following is generally more efficient:
select (case when n.n = 1 then 'col1'
when n.n = 2 then 'col2'
when n.n = 3 then 'col3'
when n.n = 4 then 'col4'
when n.n = 5 then 'col5'
end) as chr,
(case when n.n = 1 then col1
when n.n = 2 then col2
when n.n = 3 then col3
when n.n = 4 then col4
when n.n = 5 then col5
end) as value
from table t cross join
(select 1 as n union all select 2 union all select 3 union all select 4 union all select 5
) n;
This is more efficient when your table is big or a complicated subquery.
The union all version is:
select 'col1', col1 from table t union all
select 'col2', col2 from table t union all
select 'col3', col3 from table t union all
select 'col4', col4 from table t union all
select 'col5', col5 from table t;
SELECT 'col1', MAX(col1) FROM table1
UNION
SELECT 'col2', MAX(col2) FROM table1
UNION
...
SELECT 'cd' as chr, cd as value
FROM pc
WHERE code = (SELECT max(code) FROM pc)
UNION
SELECT 'model' as chr, cast(model as varchar)as value
FROM pc
WHERE code = (SELECT max(code) FROM pc)
UNION
SELECT 'speed' as chr,cast(speed as varchar) as value
FROM pc
WHERE code = (SELECT max(code) FROM pc)
UNION
SELECT 'ram' as chr, cast(ram as varchar) as value
FROM pc
WHERE code = (SELECT max(code) FROM pc)
UNION
SELECT 'hd' as chr, cast(hd as varchar) as value
FROM pc
WHERE code = (SELECT max(code) FROM pc)
UNION
SELECT 'price' as chr,cast(price as varchar) as value
FROM pc
WHERE code = (SELECT max(code) FROM pc)
Select char, value
From
(Select code,
cast(speed as varchar(20)) speed,
cast(ram as varchar(20)) ram,
cast(hd as varchar(20)) hd,
cast(model as varchar(20)) model,
cast(cd as varchar(20)) cd,
cast(price as varchar(20)) price
FROM pc
) src
UNPIVOT
(value For char in (speed,ram,hd,model,cd,price)
) As unpvt
where
code in (Select max(code) from PC)`