Related
Update / Answer
To fix this issue, I added the keyword BINARY to each side of the comparison operator. This generated the expected results. Also note that I set the variable to empty string. Setting this to NULL did not resolve the issue.
SET #previousstate = '';
SELECT
if(**BINARY** #previousstate != **BINARY** frm.fi_details_temp.PortfolioCode,
End Update / Answer
This is somewhat related to MYSQL Stored Procedure - Concat Row Value. That thread was based on Mysql 5.7.
The following statement worked under MySQL 5.7. I am in the process of trying to get it to work in MySql 8. I had to update the SET statement to 'SET #previousstate = NULL;' otherwise I get "Illegal mix of collations (utf8mb4_0900_ai_ci,IMPLICIT) and (utf8mb4_unicode_ci,IMPLICIT) for operation '<>'". This error was way above my understanding. I am just a marketing guy working in a small firm. So that is why I set it to NULL.
With that said, after updating the SET statement the result set returns but the if() statement is not working like it did under 5.7. The very first row returned should have the first column of data marked with '*' at the beginning and end, and as the data in the column changes it should mark it too.
So it should look like this:
*brownfdn*
brownfdn
*brownfmi*
brownfmi
But I am getting this:
brownfdn
brownfdn
brownfmi
brownfmi
Do you have any thoughts about what I am doing wrong? I appreciate your help!! Getting to MySql 8 has been a struggle.
5.7
CREATE table fi_details_temp AS (
SELECT
frm.fi_details.PortfolioCode,
frm.fi_details.MaturityDate_Final
From
frm.fi_details
Where
frm.fi_details.manager = 'Bartz' And
frm.fi_details.PortfolioCode Like ('Crown%')
Group By
frm.fi_details.PortfolioCode,
frm.fi_details.MaturityDate_Final
Order By
frm.fi_details.PortfolioCode;
SET #previousstate = '';
SELECT
if(#previousstate != frm.fi_details_temp.PortfolioCode, concat('*',#previousstate:= frm.fi_details_temp.PortfolioCode,'*'), frm.fi_details_temp.PortfolioCode) as 'Portfolio Code',
frm.fi_details_temp.MaturityDate_Final as 'Maturity Date'
From frm.fi_details_temp;
DROP TABLE IF EXISTS fi_details_temp;
END
*** Create Statements - the above was a trimmed down version ***
CREATE TABLE `fi_details` (
`id` int NOT NULL AUTO_INCREMENT,
`RecordDate` datetime DEFAULT CURRENT_TIMESTAMP,
`PortfolioCode` varchar(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`manager` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`RunDate` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`RunDate_Final` date DEFAULT NULL,
`AsOfDate` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`AsOfDate_Final` date DEFAULT NULL,
`Symbol` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`Coupon` double DEFAULT NULL,
`MaturityDate` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`MaturityDate_Final` date DEFAULT NULL,
`ParValue` double DEFAULT NULL,
`MarketValue` double DEFAULT NULL,
`AdjustedUnitCost` double DEFAULT NULL,
`TotalAdjustedCost` double DEFAULT NULL,
`SPRating` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`MoodyRating` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`YieldCost` double DEFAULT NULL,
`YieldMarket` double DEFAULT NULL,
`YieldCall` double DEFAULT NULL,
`YieldPut` double DEFAULT NULL,
`YieldWorst` double DEFAULT NULL,
`WorstDate` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`WorstDate_Final` date DEFAULT NULL,
`CallDate` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`CallDate_Final` date DEFAULT NULL,
`CallPrice` double DEFAULT NULL,
`PutDate` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`PutDate_Final` date DEFAULT NULL,
`PutPrice` double DEFAULT NULL,
`FirstCouponDate` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`FirstCouponDate_Final` date DEFAULT NULL,
`LastCouponDate` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`LastCouponDate_Final` date DEFAULT NULL,
`Freq` int DEFAULT NULL,
`BondCalendarCode` char(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`SecTypeCode` varchar(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`Security` varchar(80) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `idx_fi_details_MaturityDate_Final` (`MaturityDate_Final`),
KEY `idx_fi_details_PortfolioCode` (`PortfolioCode`),
KEY `idx_fi_details_PortfolioCode_MaturityDate_Final` (`PortfolioCode`,`MaturityDate_Final`),
KEY `idx_fi_details_manager_MaturityDate_Final` (`manager`,`MaturityDate_Final`),
KEY `idx_fi_details_MaturityDate_Final_manager` (`MaturityDate_Final`,`manager`)
) ENGINE=InnoDB AUTO_INCREMENT=234850 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
CREATE DEFINER=`root`#`localhost` PROCEDURE `fi_details_Worst_Date_Hartz_Brown`()
BEGIN
DROP TABLE IF EXISTS fi_details_temp;
CREATE table fi_details_temp AS (
SELECT
frm.fi_details.PortfolioCode,
frm.fi_details.MaturityDate_Final,
frm.fi_details.YieldWorst,
frm.fi_details.WorstDate_Final,
frm.fi_details.YieldMarket,
frm.fi_details.Symbol,
frm.fi_details.Security,
frm.fi_details.Coupon,
frm.fi_details.ParValue,
frm.fi_details.MarketValue,
frm.fi_details.TotalAdjustedCost,
frm.fi_details.AdjustedUnitCost,
frm.fi_details.YieldCost,
frm.fi_details.YieldCall,
frm.fi_details.YieldPut,
frm.fi_details.CallDate_Final
From
frm.fi_details
Where
frm.fi_details.manager = 'Hartz' And
frm.fi_details.PortfolioCode Like ('Brown%')
Group By
frm.fi_details.PortfolioCode,
frm.fi_details.MaturityDate_Final,
frm.fi_details.YieldWorst,
frm.fi_details.WorstDate_Final,
frm.fi_details.YieldMarket,
frm.fi_details.Symbol,
frm.fi_details.Security,
frm.fi_details.Coupon,
frm.fi_details.ParValue,
frm.fi_details.MarketValue,
frm.fi_details.TotalAdjustedCost,
frm.fi_details.AdjustedUnitCost,
frm.fi_details.YieldCost,
frm.fi_details.YieldCall,
frm.fi_details.YieldPut,
frm.fi_details.CallDate_Final
Order By
frm.fi_details.PortfolioCode,
frm.fi_details.WorstDate_Final );
SET #previousstate = NULL;
SELECT
if(#previousstate != frm.fi_details_temp.PortfolioCode, concat('*',#previousstate:= frm.fi_details_temp.PortfolioCode,'*'), frm.fi_details_temp.PortfolioCode) as 'Portfolio Code',
frm.fi_details_temp.MaturityDate_Final as 'Maturity Date',
frm.fi_details_temp.YieldWorst as 'Worst Yield',
frm.fi_details_temp.WorstDate_Final as 'Worst Date',
frm.fi_details_temp.YieldMarket as 'Yield to Market',
frm.fi_details_temp.Symbol,
frm.fi_details_temp.Security as 'Description',
frm.fi_details_temp.Coupon,
frm.fi_details_temp.ParValue as 'Par Value',
frm.fi_details_temp.MarketValue as 'Market Value xAI',
frm.fi_details_temp.TotalAdjustedCost as 'Adjusted Cost',
frm.fi_details_temp.AdjustedUnitCost as 'Unit Adjusted Cost',
frm.fi_details_temp.YieldCost as 'Yield to Cost',
frm.fi_details_temp.YieldCall as 'Yield to Call',
frm.fi_details_temp.YieldPut as 'Yield to Put',
frm.fi_details_temp.CallDate_Final as 'Next Call Date'
From frm.fi_details_temp;
DROP TABLE IF EXISTS fi_details_temp;
END
I have following table - 'element'
CREATE TABLE `element` (
`eid` bigint(22) NOT NULL AUTO_INCREMENT,
`tag_name` varchar(45) COLLATE utf8_bin DEFAULT NULL,
`text` text COLLATE utf8_bin,
`depth` tinyint(2) DEFAULT NULL,
`classes` tinytext COLLATE utf8_bin,
`webarchiver_uniqueid` int(11) DEFAULT NULL,
`created` datetime DEFAULT NULL,
`updated` datetime DEFAULT NULL,
`rowstatus` char(1) COLLATE utf8_bin DEFAULT 'A',
PRIMARY KEY (`eid`)
) ENGINE=InnoDB AUTO_INCREMENT=12090 DEFAULT CHARSET=utf8 COLLATE=utf8_bin;
Column details and current index details are given above. Almost 90% of queries on this table are like:
select * from element
where tag_name = 'XXX'
and text = 'YYYY'
and depth = 20
and classes = 'ZZZZZ'
and rowstatus = 'A'
What would be the most optimal way to create index on this table? The table has around 60k rows.
Change classes from TINYTEXT to VARCHAR(255) (or some more reasonable size), then have
INDEX(tag_name, depth, classes)
with the columns in any order. I left out rowstatus because it smells like a column that is likely to change. (Anyway, a flag does not add much to an index.)
You can't include TEXT or BLOB columns in an index. And it is not worth it to do a 'prefix' index.
Since a PRIMARY KEY is a UNIQUE key, DROP INDEX eid_UNIQUE.
Is there some reason for picking "binary" / "utf8_bin" for all the character fields?
I need some help getting a MySQL table to store and output characters from the following languages:
English
French
Russian
Turkish
German
These are the languages I know of in the data. It also uses mathematical symbols such as this:
b ∈ A. Define s(A):= supn≥0 r A (n) for each A ⊆ ? ∪ {0}.
I'm using htmlentities to encode the text. The ? above is meant to display as a ℕ.
It displays this way when I look at the data in PhpMyAdmin. The other characters are encoded as expected.
The table is set to utf8_unicode_ci and all aspects of the website have been set to UTF-8 (including via the .htaccess file, a PHP header and a meta tag).
Please help?
Additional info:
Hosting environment:
Linux, Apache
Mysql 5.5.38
PHP Version 5.4.4-14
Connection string :
ini_set('default_charset', 'UTF-8');
$mysqli = new mysqli($DB_host , $DB_username, $DB_password);
$mysqli->set_charset("utf8");
$mysqli->select_db($DB_name);
SHOW CREATE TABLE mydatabase.mytable output:
CREATE TABLE `tablename` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`created` datetime NOT NULL,
`updated` datetime NOT NULL,
`product` int(11) NOT NULL,
`ppub` tinytext COLLATE utf8_unicode_ci NOT NULL,
`pubdate` date NOT NULL,
`numerous_other_tinytext_cols` tinytext COLLATE utf8_unicode_ci NOT NULL,
`numerous_other_tinytext_cols` tinytext COLLATE utf8_unicode_ci NOT NULL,
`text` text COLLATE utf8_unicode_ci NOT NULL,
`keywords` tinytext COLLATE utf8_unicode_ci NOT NULL,
`active` int(11) NOT NULL DEFAULT '1',
`orderid` int(11) NOT NULL,
`src` tinytext CHARACTER SET latin1 NOT NULL,
`views` int(11) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=17780 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
SELECT DEFAULT_CHARACTER_SET_NAME FROM information_schema.SCHEMATA output:
DEFAULT_CHARACTER_SET_NAME
utf8 [->UTF-8 Unicode]
utf8mb4 [->UTF-8 Unicode]
Fonts used:
Arial
Sample of text in the database:
Let <em>A</em> be a subset of the set of nonnegative integers ℕ ∪ {0}, and let <em>r</em><sub><em>A</em></sub> (<em>n</em>) be the number of representations of <em>n</em> ≥ 0 by the sum <em>a</em> + <em>b</em> with <em>a, b</em> ∈ <em>A</em>.
Output on web page:
Let <em>A</em> be a subset of the set of nonnegative integers ? ∪ {0}, and let <em>r</em><sub><em>A</em></sub> (<em>n</em>) be the number of representations of <em>n</em> ≥ 0 by the sum <em>a</em> + <em>b</em> with <em>a, b</em> ∈ <em>A</em>.
Which becomes
Let A be a subset of the set of nonnegative integers ? ∪ {0}, and let rA (n) be the number of representations of n ≥ 0 by the sum a + b with a, b ∈ A.
While your database and table are configured to use UTF-8, one of your columns still isn't:
CREATE TABLE `tablename` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`created` datetime NOT NULL,
`updated` datetime NOT NULL,
`product` int(11) NOT NULL,
`ppub` tinytext COLLATE utf8_unicode_ci NOT NULL,
`pubdate` date NOT NULL,
`numerous_other_tinytext_cols` tinytext COLLATE utf8_unicode_ci NOT NULL,
`numerous_other_tinytext_cols` tinytext COLLATE utf8_unicode_ci NOT NULL,
`text` text COLLATE utf8_unicode_ci NOT NULL,
`keywords` tinytext COLLATE utf8_unicode_ci NOT NULL,
`active` int(11) NOT NULL DEFAULT '1',
`orderid` int(11) NOT NULL,
`src` tinytext CHARACTER SET latin1 NOT NULL, <--------- This one
`views` int(11) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=17780 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
Since all the other symbols got HTML-encoded, they will survive all charsets, but not ℕ, which doesn't have a named entity reference.
You need to convert your column:
ALTER TABLE tablename MODIFY src TINYTEXT CHARACTER SET utf8;
NOTE: I noticed you like mathematical symbols. Some of them are outside of the Basic Multilingual Plane, ie. have codepoints > 0xFFFF, for example mathematical letter variants (fraktur, double-struck, semantic italic etc.).
If you want to support them, you need to switch encoding in MySQL everywhere (table, columns, connection) to utf8mb4, which is the true UTF-8 (utf8 in MySQL means the subset of UTF-8 with BMP only), with utf8mb4_unicode_ci collation. Here is how to do the migration.
Also, I have noticed that you are HTML-encoding HTML. Maybe you have a reason, but in my opinion storing this doesn't make sense:
<em>A</em>
If you want to put it into an HTML document, now you need to HTML-decode it at least once, sometimes twice. I'd rather store what almost everybody else does:
<em>A</em>
This way, you will store Unicode characters natively, in optimal way.
I already tried This solution which says
ALTER TABLE title
CHARACTER SET utf8
COLLATE utf8_unicode_ci;
Ok here are some screen shots which might help you.
Update
here's what happens when i insert Japanese characters.
Update 2
Show create table gives this
CREATE TABLE `productInfo` (
`pID` int(11) NOT NULL AUTO_INCREMENT,
`pOperation` varchar(40) CHARACTER SET latin1 DEFAULT NULL,
`year` year(4) DEFAULT NULL,
`season` varchar(10) CHARACTER SET latin1 DEFAULT NULL,
`pName` varchar(40) CHARACTER SET latin1 DEFAULT NULL,
`category` varchar(40) CHARACTER SET latin1 DEFAULT NULL,
`margin1` text CHARACTER SET latin1,
`margin2` text CHARACTER SET latin1,
PRIMARY KEY (`pID`)
) ENGINE=InnoDB AUTO_INCREMENT=12 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
just see that
DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
But now see that the query
SELECT character_set_name, collation_name
FROM information_schema.columns
WHERE table_schema = 'trac_data'
AND table_name = 'productInfo'
AND column_name = 'pOperation';
gives
character_set_name collation_name
'latin1' 'latin1_swedish_ci'
Thats weird !
Update 3
SELECT hex(pOperation),pOperation FROM trac_data.productInfo;
gave 3F3F3F3F3F which is hex code for actual '?' and not any japanese character so that means no japanese characters are being stored
You have a mix of charsets in your table structure. The table itself uses utf8, but the column in question uses latin 1. You have it defined that way. As long as you have an own charset for your column you can change the table's or the schema's column a thousand times. It won't have any effect on your column. So, instead change the column's charset to either default (to use that of the table) or make it using utf8 explicitely.
When you alter the column's charset existing data will be converted (if possible). Your wrong input however stays wrong, so you have to fill the data again.
Ok i found the cause
CREATE TABLE `productInfo` (
`pID` int(11) NOT NULL AUTO_INCREMENT,
`pOperation` varchar(40) CHARACTER SET latin1 DEFAULT NULL,
`year` year(4) DEFAULT NULL,
`season` varchar(10) CHARACTER SET latin1 DEFAULT NULL,
`pName` varchar(40) CHARACTER SET latin1 DEFAULT NULL,
`category` varchar(40) CHARACTER SET latin1 DEFAULT NULL,
`margin1` text CHARACTER SET latin1,
`margin2` text CHARACTER SET latin1,
PRIMARY KEY (`pID`)
) ENGINE=InnoDB AUTO_INCREMENT=12 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci
I noticed how in front of each column SET latin1 was present.
So I Just changed to sjis and problem solved.
You have to set the database collation to UTF-8, not only the table collation :
Here is the SQL script result :
Let`s have a example hotels table:
CREATE TABLE `hotels` (
`HotelNo` varchar(4) character set latin1 NOT NULL default '0000',
`Hotel` varchar(80) character set latin1 NOT NULL default '',
`City` varchar(100) character set latin1 default NULL,
`CityFR` varchar(100) character set latin1 default NULL,
`Region` varchar(50) character set latin1 default NULL,
`RegionFR` varchar(100) character set latin1 default NULL,
`Country` varchar(50) character set latin1 default NULL,
`CountryFR` varchar(50) character set latin1 default NULL,
`HotelText` text character set latin1,
`HotelTextFR` text character set latin1,
`tagsforsearch` text character set latin1,
`tagsforsearchFR` text character set latin1,
PRIMARY KEY (`HotelNo`),
FULLTEXT KEY `fulltextHotelSearch` (`HotelNo`,`Hotel`,`City`,`CityFR`,`Region`,`RegionFR`,`Country`,`CountryFR`,`HotelText`,`HotelTextFR`,`tagsforsearch`,`tagsforsearchFR`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_german1_ci;
In this table for example we have only one hotel with Region name = "Graubünden" (please note umlaut ü character)
And now I want to achieve same search match for phrases:
'graubunden' and
'graubünden'
This is simple with use of MySql built in
collations in regular searches as follows:
SELECT *
FROM `hotels`
WHERE `Region` LIKE CONVERT(_utf8 '%graubunden%' USING latin1)
COLLATE latin1_german1_ci
This works fine for 'graubunden' and 'graubünden' and
as a result I receive proper result, but problem is
when we make MySQL full text search
Whats wrong with this SQL statement?:
SELECT
*
FROM
hotels
WHERE
MATCH (`HotelNo`,`Hotel`,`Address`,`City`,`CityFR`,`Region`,`RegionFR`,`Country`,`CountryFR`, `HotelText`, `HotelTextFR`, `tagsforsearch`, `tagsforsearchFR`)
AGAINST( CONVERT('+graubunden' USING latin1) COLLATE latin1_german1_ci IN BOOLEAN MODE)
ORDER BY Country ASC, Region ASC, City ASC
This doesn`t return any result.
Any ideas where the dog is buried ?
When you define individual CHARACTER SETS for your columns, you override the collation you set default on table level.
Each of your columns has default latin1 collation (which is latin1_swedish_ci). You can see it by running SHOW CREATE TABLE.
In FULLTEXT queries, indexed columns have COERCIBILITY of 0, that is all fulltext queries are converted to the collation used in the index, not vice versa.
You need to remove CHARACTER SET definitions from your columns or explicitly set all columns to latin1_german_ci:
CREATE TABLE `hotels` (
`HotelNo` varchar(4) NOT NULL default '0000',
`Hotel` varchar(80) NOT NULL default '',
`City` varchar(100) default NULL,
`CityFR` varchar(100) default NULL,
`Region` varchar(50) default NULL,
`RegionFR` varchar(100) default NULL,
`Country` varchar(50) default NULL,
`CountryFR` varchar(50) default NULL,
`HotelText` text,
`HotelTextFR` text,
`tagsforsearch` text,
`tagsforsearchFR` text,
PRIMARY KEY (`HotelNo`),
FULLTEXT KEY `fulltextHotelSearch` (`HotelNo`,`Hotel`,`City`,`CityFR`,`Region`,`RegionFR`,`Country`,`CountryFR`,`HotelText`,`HotelTextFR`,`tagsforsearch`,`tagsforsearchFR`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_german1_ci;
INSERT
INTO hotels (hotelText, HotelTextFR, tagsforsearch, tagsforsearchFR)
VALUES ('text', 'text', 'graubünden', 'tags');
SELECT *
FROM hotels
WHERE MATCH (`HotelNo`,`Hotel`,`City`,`CityFR`,`Region`,`RegionFR`,`Country`,`CountryFR`, `HotelText`, `HotelTextFR`, `tagsforsearch`, `tagsforsearchFR`)
AGAINST (CONVERT('+graubunden' USING latin1) COLLATE latin1_german1_ci IN BOOLEAN MODE)
ORDER BY
Country ASC, Region ASC, City ASC;