Mysql csv file selecting multiple value in one column - mysql

,PMID,LastName,ForeName,Initials,Affiliation
0,1,"['Makar', 'McMartin', 'Palese', 'Tephly']","['A B', 'K E', 'M', 'T R']","['AB', 'KE', 'M', 'TR']",
,,,,,
enter image description here
this is the csv file
I want to divide this to look like this
PMID Name Affiliation
1 Makar_A B_AB
1 MCMartin_K E_KE
1 Palese_M_M
1 Tephly_T R_TR
below is the code I wrote in mysql workbench
DROP TABLE IF EXISTS AuthorTBL;
CREATE TABLE IF NOT EXISTS AuthorTBL
( PMID varchar(100),
LastName varchar(50),
ForeName varchar(50),
Initials varchar(50),
Affiliation varchar(250)
);
LOAD DATA INFILE 'abcd.csv'
INTO TABLE AuthorTBL
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS
(#num, #PMID, #LastName, #ForeName, #Initials, #Affiliation)
SET
PMID = #PMID,
LastName = #LastName,
ForeName = #ForeName,
Initials = #Initials,
Affiliation = #Affiliation;
How can I change my code to make it work?

Related

Error importing csv dates into a mysql server

The following code doesn't work on the part of importing dates and i cant figure why. Dates in the csv are like DD/MM/YYYY and the error is it imports all the data but leaves null every date. Also error says:
ER_UNKNOWN_SYSTEM_VARIABLE: Unknown system variable 'FECHA_POSICION'
Lines in the csv file look like:
EDC00001,66600/7089855,21/01/2021,21/01/2021,"DEPOSIT Deposit",4000,4000
EDC00002,66600/7089855,29/01/2021,29/01/2021,CFDs,"-9,94","3990,06"
USE DATA_BASE;
CREATE TABLE ESTADO_DE_CUENTA (
ID_OPERACION VARCHAR(20) NOT NULL PRIMARY KEY,
ID_CUENTA VARCHAR(20),
FECHA_POSICION DATE,
FECHA_VALOR DATE,
CONCEPTO VARCHAR(100),
IMPORTE FLOAT(12, 2),
SALDO_EN_EFECTIVO FLOAT(12, 2)
);
LOAD DATA LOCAL INFILE 'PATH.csv' INTO TABLE ESTADO_DE_CUENTA2
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 LINES
(ID_OPERACION, ID_CUENTA, #FECHA_POSICION, #FECHA_VALOR, CONCEPTO, IMPORTE,
SALDO_EN_EFECTIVO)
SET FECHA_POSICION = STR_TO_DATE(#FECHA_POSICION, '%d/%m/%Y')
SET FECHA_VALOR = STR_TO_DATE(#FECHA_VALOR, '%d/%m/%Y')
You can call SEt only once and all columsn have to be separated by comma
Like
USE DATA_BASE;
LOAD DATA LOCAL INFILE 'PATH.csv' INTO TABLE ESTADO_DE_CUENTA2
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
IGNORE 1 LINES
(ID_OPERACION, ID_CUENTA, #FECHA_POSICION, #FECHA_VALOR, CONCEPTO, IMPORTE,
SALDO_EN_EFECTIVO)
SET `FECHA_POSICION` = STR_TO_DATE(#FECHA_POSICION, '%d/%m/%Y') , `FECHA_VALOR` = STR_TO_DATE(#FECHA_VALOR, '%d/%m/%Y')

mysql map comma seperated coumn values with new values by table lookup

I am using MySQL database.
I have a CUST_INV_DET table where I have data in below format
---------------------------
CUSTOMER_ID CUSTOMER_NO
---------------------------
1 1983,1988,1989
2 2014,2011,2010
3 3012,3059,3045
---------------------------
And there is another table. NEW_CUSTOMER_NO_FORMAT
--------------------------------------
OLD_CUSTOMER_NO NEW_CUSTOMER_NO
--------------------------------------
1983 C1983-01
1988 C1988-03
1989 C1989-06
2014 C2014-01
2011 C2011-02
2010 C2010-02
3012 C3012-03
3059 C3059-23
3045 C3045-09
Can anybody suggest me on how to map CUST_INV_DET.CUSTOMER_NO in a new format by doing lookup from NEW_CUSTOMER_NO_FORMAT Table.
I know I need to use cursor but inside cursor below section I am clueless how to map since it's a comma separated value.
Do I need one more cursor inside NO_MORE_DATA =0 ?
IF (NO_MORE_DATA = 0) THEN
//MAP OLD TO NEW FORMAT HERE
END IF;
Below are the insert script
CREATE TABLE CUST_INV_DET(
CUSTOMER_ID INT(11) AUTO_INCREMENT PRIMARY KEY,
CUSTOMER_NO VARCHAR(500)
);
INSERT INTO CUST_INV_DET(CUSTOMER_NO) VALUES
('1983,1988,1989'),
('2014,2011,2010'),
('3012,3059,3045');
CREATE TABLE NEW_CUSTOMER_NO_FORMAT (
OLD_CUSTOMER_NO VARCHAR(500),
NEW_CUSTOMER_NO VARCHAR(500)
);
INSERT INTO NEW_CUSTOMER_NO_FORMAT VALUES
('1983','C1983-01'),
('1988','C1988-03'),
('1989','C1989-06'),
('2014','C2014-01'),
('2011','C2011-02'),
('2010','C2010-02'),
('3012','C3012-03'),
('3059','C3059-23'),
('3045','C3045-09');
Below is the final output I am looking for
CUSTOMER_ID CUSTOMER_NO
1 'C1983-01,C1988-03,C1989-06'
2 'C2014-01,C2011-02,C2010-02'
3 'C3012-03,C3059-23,C3045-09'
select
CUSTOMER_ID,
substring(Customer_no,0,charindex(',',Customer_no,1)) col1,
substring(Customer_no,charindex(',',Customer_no,1) + 1,charindex(',',Customer_no,1)-1) col2,
substring(Customer_no,charindex(',',Customer_no,1) + len(substring(Customer_no,charindex(',',Customer_no,1) + 1,charindex(',',Customer_no,1)-1) )+2,charindex(',',Customer_no,1)-1) col3
into #Temp2
from CUST_INV_DET
SELECT T. CUSTOMER_ID,
NCD1.NEW_CUSTOMER_NO + ',' + NCD2.NEW_CUSTOMER_NO + ',' + NCD3.NEW_CUSTOMER_NO
FROM #Temp2 T
INNER JOIN NEW_CUSTOMER_NO_FORMAT NCD1 ON NCD1.OLD_CUSTOMER_NO = T.col1
INNER JOIN NEW_CUSTOMER_NO_FORMAT NCD2 ON NCD2.OLD_CUSTOMER_NO = T.col2
INNER JOIN NEW_CUSTOMER_NO_FORMAT NCD3 ON NCD3.OLD_CUSTOMER_NO = T.col3

CSV into MySQL using load data

I'm hoping to insert the contents of a CSV into my table using MySQL loadfile, however, everytime I do so with the following command a number of rows are dropped
LOAD DATA INFILE 'new.csv' INTO TABLE Example
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 LINES;
The CSV data I am hoping to insert is the Free Company Product Data supplied by Companies House (http://download.companieshouse.gov.uk/en_output.html)
I'd greatly appreciate any help.
Thanks!
Given this table definition, from the provided data specification here:
create table companieshouse
(
CompanyName varchar(160),
CompanyNumber varchar(8),
RegAddressCareOf varchar(100),
RegAddressPOBox varchar(10),
RegAddressAddressLine1 varchar(300),
RegAddressAddressLine2 varchar(300),
RegAddressPostTown varchar(50),
RegAddressCounty varchar(60),
RegAddressCountry varchar(50),
RegAddressPostCode varchar(20),
CompanyCategory varchar(100),
CompanyStatus varchar(70),
CountryOfOrigin varchar(50),
DissolutionDate date,
IncorporationDate date,
AccountsAccountRefDay integer,
AccountsAccountRefMonth integer,
AccountsNextDueDate date,
AccountsLastMadeUpDate date,
AccountsAccountCategory varchar(30),
ReturnsNextDueDate date,
ReturnsLastMadeUpDate date,
MortgagesNumMortCharges integer,
MortgagesNumMortOutstanding integer,
MortgagesNumMortPartSatisfied integer,
MortgagesNumMortSatisfied integer,
SICCodeSicText_1 varchar(170),
SICCodeSicText_2 varchar(170),
SICCodeSicText_3 varchar(170),
SICCodeSicText_4 varchar(170),
LimitedPartnershipsNumGenPartners integer,
LimitedPartnershipsNumLimPartners integer,
URI varchar(47),
PreviousName1CONDATE date,
PreviousName1CompanyName varchar(160),
PreviousName2CONDATE date,
PreviousName2CompanyName varchar(160),
PreviousName3CONDATE date,
PreviousName3CompanyName varchar(160),
PreviousName4CONDATE date,
PreviousName4CompanyName varchar(160),
PreviousName5CONDATE date,
PreviousName5CompanyName varchar(160),
PreviousName6CONDATE date,
PreviousName6CompanyName varchar(160),
PreviousName7CONDATE date,
PreviousName7CompanyName varchar(160),
PreviousName8CONDATE date,
PreviousName8CompanyName varchar(160),
PreviousName9CONDATE date,
PreviousName9CompanyName varchar(160),
PreviousName10CONDATE date,
PreviousName10CompanyName varchar(160),
ConfStmtNextDueDate date,
ConfStmtLastMadeUpDate date
);
This will load data from the provided .csv files into the table;
LOAD DATA INFILE '/var/lib/mysql-files/BasicCompanyData-2017-03-06-part1_5.csv'
INTO TABLE companieshouse
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
ESCAPED BY ""
LINES TERMINATED BY '\n'
IGNORE 1 LINES
(
CompanyName,
CompanyNumber,
RegAddressCareOf,
RegAddressPOBox,
RegAddressAddressLine1,
RegAddressAddressLine2,
RegAddressPostTown,
RegAddressCounty,
RegAddressCountry,
RegAddressPostCode,
CompanyCategory,
CompanyStatus,
CountryOfOrigin,
#DissolutionDate,
#IncorporationDate,
#AccountsAccountRefDay,
#AccountsAccountRefMonth,
#AccountsNextDueDate,
#AccountsLastMadeUpDate,
AccountsAccountCategory,
#ReturnsNextDueDate,
#ReturnsLastMadeUpDate,
#MortgagesNumMortCharges,
#MortgagesNumMortOutstanding,
#MortgagesNumMortPartSatisfied,
#MortgagesNumMortSatisfied,
SICCodeSicText_1,
SICCodeSicText_2,
SICCodeSicText_3,
SICCodeSicText_4,
#LimitedPartnershipsNumGenPartners,
#LimitedPartnershipsNumLimPartners,
URI,
#PreviousName1CONDATE,
PreviousName1CompanyName,
#PreviousName2CONDATE,
PreviousName2CompanyName,
#PreviousName3CONDATE,
PreviousName3CompanyName,
#PreviousName4CONDATE,
PreviousName4CompanyName,
#PreviousName5CONDATE,
PreviousName5CompanyName,
#PreviousName6CONDATE,
PreviousName6CompanyName,
#PreviousName7CONDATE,
PreviousName7CompanyName,
#PreviousName8CONDATE,
PreviousName8CompanyName,
#PreviousName9CONDATE,
PreviousName9CompanyName,
#PreviousName10CONDATE,
PreviousName10CompanyName,
#ConfStmtNextDueDate,
#ConfStmtLastMadeUpDate)
SET DissolutionDate = IF(#DissolutionDate = '', NULL, STR_TO_DATE(#DissolutionDate, '%d/%m/%Y')),
IncorporationDate = IF(#IncorporationDate = '', NULL, STR_TO_DATE(#IncorporationDate, '%d/%m/%Y')),
AccountsNextDueDate = IF(#AccountsNextDueDate = '', NULL, STR_TO_DATE(#AccountsNextDueDate, '%d/%m/%Y')),
AccountsLastMadeUpDate = IF(#AccountsLastMadeUpDate = '', NULL, STR_TO_DATE(#AccountsLastMadeUpDate, '%d/%m/%Y')),
ReturnsNextDueDate = IF(#ReturnsNextDueDate = '', NULL, STR_TO_DATE(#ReturnsNextDueDate, '%d/%m/%Y')),
ReturnsLastMadeUpDate = IF(#ReturnsLastMadeUpDate = '', NULL, STR_TO_DATE(#ReturnsLastMadeUpDate, '%d/%m/%Y')),
PreviousName1CONDATE = IF(#PreviousName1CONDATE = '', NULL, STR_TO_DATE(#PreviousName1CONDATE, '%d/%m/%Y')),
PreviousName2CONDATE = IF(#PreviousName2CONDATE = '', NULL, STR_TO_DATE(#PreviousName2CONDATE, '%d/%m/%Y')),
PreviousName3CONDATE = IF(#PreviousName3CONDATE = '', NULL, STR_TO_DATE(#PreviousName3CONDATE, '%d/%m/%Y')),
PreviousName4CONDATE = IF(#PreviousName4CONDATE = '', NULL, STR_TO_DATE(#PreviousName4CONDATE, '%d/%m/%Y')),
PreviousName5CONDATE = IF(#PreviousName5CONDATE = '', NULL, STR_TO_DATE(#PreviousName5CONDATE, '%d/%m/%Y')),
PreviousName6CONDATE = IF(#PreviousName6CONDATE = '', NULL, STR_TO_DATE(#PreviousName6CONDATE, '%d/%m/%Y')),
PreviousName7CONDATE = IF(#PreviousName7CONDATE = '', NULL, STR_TO_DATE(#PreviousName7CONDATE, '%d/%m/%Y')),
PreviousName8CONDATE = IF(#PreviousName8CONDATE = '', NULL, STR_TO_DATE(#PreviousName8CONDATE, '%d/%m/%Y')),
PreviousName9CONDATE = IF(#PreviousName9CONDATE = '', NULL, STR_TO_DATE(#PreviousName9CONDATE, '%d/%m/%Y')),
PreviousName10CONDATE = IF(#PreviousName10CONDATE = '', NULL, STR_TO_DATE(#PreviousName10CONDATE, '%d/%m/%Y')),
AccountsAccountRefDay = NULLIF(#AccountsAccountRefDay, ''),
AccountsAccountRefMonth = NULLIF(#AccountsAccountRefMonth, '') ,
MortgagesNumMortCharges = NULLIF(#MortgagesNumMortCharges, ''),
MortgagesNumMortOutstanding = NULLIF(#MortgagesNumMortOutstanding, ''),
MortgagesNumMortPartSatisfied = NULLIF(#MortgagesNumMortPartSatisfied, ''),
MortgagesNumMortSatisfied = NULLIF(#MortgagesNumMortSatisfied, ''),
LimitedPartnershipsNumGenPartners = NULLIF(#LimitedPartnershipsNumGenPartners, ''),
LimitedPartnershipsNumLimPartners = NULLIF(#LimitedPartnershipsNumLimPartners, '')
;
... loaded the data without any errors or warnings:
Query OK, 849999 rows affected (19.43 sec)
Records: 849999 Deleted: 0 Skipped: 0 Warnings: 0
From one side - LOAD DATA INFILE, fasted method, but real life always require some checks, transformations and other business logic.
And normal practice - use ETL tools rather than direct import
or multi stages processes - clean the data (check, log errors, transform, add calculated columns and etc on 1st step), than import final result.
Now there are many excellent OpenSource tools for this:
Talend - http://www.talend.com
StreamSets - http://www.streamsets.com
Apache NiFi - https://nifi.apache.org, https://hortonworks.com/apache/nifi/
transforming Your import/export logic You can realise all steps, such as:
download files
parse them
make any changes and lookups
and after use direct loading from tools or call bulk load if it prefer
good example recommendations from comments for Your other question - https://dba.stackexchange.com/questions/168194/mysql-select-and-sort-performance-v-large-table - easy add date column from wrong formatted files

"Missing comma" error while inserting clob value in table

CREATE TABLE fcc_consistency_check
(
cons_id VARCHAR2(30),
cons_desc VARCHAR2(4000),
cons_query CLOB,
module_id VARCHAR2(2),
main_tab_name VARCHAR2(30),
hist_tab_name VARCHAR2(30),
col_name VARCHAR2(4000),
col_type VARCHAR2(4000),
check_reqd VARCHAR2(1)
);
INSERT INTO fcc_consistency_check
VALUES ('CHK_BC003','Missing records in contract_event_log','select a.CONTRACT_REF_NO ,a.Latest_Event_Seq_No,
c.PREV_WORKING_DAY from cstb_contract A ,sttm_dates c
where module_code = 'BC'
and c.Branch_code='000'
and not exists (select * from cstb_contract_event_log B
where a.contract_ref_no = b.contract_ref_no
and latest_event_seq_no = event_seq_no);',
'BC','BCCC_EVENT_LOG_MISREC','BCCC_EVENT_LOG_MISREC_HISTORY','CONTRACT_REF_NO,LATEST_EVENT_SEQ_NO,EOD_DATE','VARCHAR2(16),NUMBER,DATE','Y');
Not able to insert clob value, I'm getting this error
ORA-00917: missing comma
When I try to insert individual column value then I found that, error is throwing for column cons_query.
The problem is that you have quotes within your query:
'select a.CONTRACT_REF_NO
,a.Latest_Event_Seq_No,
c.PREV_WORKING_DAY from cstb_contract A ,sttm_dates
where module_code = 'BC'
^string starst here:
^ends here, there's a commma missing
However, the actual issue is not that a comma is missing but that you have quotes you forgot to escape. You need to write module_code = ''BC'' for example to escape those quotes (you have additional quotes in there, not just at 'BC').

How do I remove unknown characters in a string?

I would like to delete parts of an string.
We have a Table: Locations
mk-MK=New York; sq-AL=Nej York; en-US=New York
mk-MK=London; sq-AL=London; en-US=London
mk-MK=Paris; sq-AL=Paris; en-US=Paris
I Want to remove everything and keep only sq-AL=LocationName.
I want the result to be:
sq-AL=Nej York;
sq-AL=London;
This is yet another example of the importance of normalized databases.
In a normalized database you would have a table with 2 columns, one for the culture (sq-Al, en-US etc`) and one for the value. I would go a step further and have the cultures in a lookup table.
However, since this is not the case you have to use string manipulations to get the value of a specific culture. you can use SUBSTRING and CHARINDEX to find the specific pattern you want.
This will work in any of the cases represented by the sample data I've listed.
-- Create the table and insert sample data
CREATE TABLE Location ([Name] varchar(100))
INSERT INTO Location ([Name]) VALUES
('en-US=Huston; mk-MK=Huston; sq-AL=Huston;'), -- end of the row, with the ending ';'.
('en-US=New York; mk-MK=New York; sq-AL=Nej York'), -- end of the row, without the ending ';'.
('mk-MK=London; sq-AL=London; en-US=London'), -- middle of the row
('sq-AL=Paris; en-US=Paris; mk-MK=Paris') -- begining of the row
SELECT SUBSTRING(Name,
CHARINDEX('sq-AL=', Name), -- index of 'sq-AL='
CASE WHEN CHARINDEX(';', Name, CHARINDEX('sq-AL=', Name)) > 0 THEN -- If there is a ';' after 'sq-AL='.
CHARINDEX(';', Name, CHARINDEX('sq-AL=', Name)) -- index of the first ';' after 'sq-AL='
- CHARINDEX('sq-AL=', Name) -- index of the first ';' - the index of 'sq-AL=' will give you the length for `Nej York`
ELSE
LEN(Name)
END
) + ';'
FROM Location
-- Cleanup
DROP Table Location
You can use CHARINDEX function. I've tried same with a variable as,
declare #locations varchar(100) = 'mk-MK=New York; sq-AL=Nej York; en-US=New York'
select LEFT(
RIGHT(
#locations, LEN(#locations)-CHARINDEX(';',#locations)
--output here : sq-AL=Nej York; en-US=New York
)
,CHARINDEX(';',#locations)
) + ';'
--Final Output : sq-AL=Nej York;
In your case: Query will be as,
select LEFT(
RIGHT(
Name, LEN(Name)-CHARINDEX(';',Name)
--output here : sq-AL=Nej York; en-US=New York
)
,CHARINDEX(';',Name)
) + ';'
FROM Locations