How to index nested mysql object into elasticsearch using logstash? - mysql

I'm trying to index mysql database with elasticsearch. Consider the example mapping:
{"blog":
{"properties":
{"id": "string"}
{"author": "string"}
{"time_created": }
{"author_info":
{"author_name":}
{"author_sex":}
}
{"posts":
{"post_author":}
{"post_time":}
}
}
}
I have three tables which are author_info, blog and post. How can I index these records into elastic with a nested structure? I cannot find documents about it. Thanks

input {
jdbc{
jdbc_validate_connection => true
jdbc_connection_string => "jdbc:mysql://172.17.0.2:3306/_db"
jdbc_user => "root"
jdbc_password => "admin"
jdbc_driver_library => "/home/ilsa/mysql-connector-java-5.1.36-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
clean_run => true
statement => "SELECT
u.id as employee_number, u.email as email, u.username as username,
up.id as post_id, up.text_content as content,
pc.id as comment_id , pc.user_post_id as comment_post_id, pc.comment as comment_text
FROM users u join user_posts up on up.user_id = u.id
LEFT JOIN post_comments pc ON pc.user_post_id = up.id
ORDER BY up.id ASC"
}
}
filter {
aggregate {
task_id => "%{employee_number}"
code => "
map['employee_number'] = event.get('employee_number')
map['email'] = event.get('email')
map['username'] = event.get('username')
map['posts'] ||= []
map['posts'] << {
'post_id' => event.get('post_id'),
'content' => event.get('content'),
'comments' => [] << {
'comment_id' => event.get('comment_id'),
'comment_post_id' => event.get('comment_post_id'),
'comment_text' => event.get('comment_text')
}
}
event.cancel()"
push_previous_map_as_event => true
timeout => 30
}
}
output {
stdout{ codec => rubydebug }
elasticsearch{
action => "index"
index => "_dev"
document_type => "_doc"
document_id => "%{employee_number}"
hosts => "localhost:9200"
}
}

In the sql part of logstash input you might try to select the fields with the nested names you want in elasticsearch. Below is a small sample of how it might look.
input {
jdbc {
statement => "SELECT id as blog.properties.id, author as blog.properties.author,..... from blog inner join properties inner join posts"
}
}

Related

How to set many-to-many relationship in elasticsearch, logstash

I'm trying to set up a many-to-many relationship using elasticSearch.
I searched and found that there is no way to set it up like a relational database.
So I want to set it like parent-children.
How should I write the statement in the code below?
#logstash.conf
jdbc{
jdbc_driver_library => "/Users/kun/Downloads/mysql-connector-java-8.0.29/mysql-connector-java-8.0.29.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/dbtest"
jdbc_user => "xxxx"
jdbc_password => "xxxx"
tracking_column => "unix_ts_in_secs_user"
use_column_value => true
tracking_column_type => "numeric"
schedule => "*/3 * * * * *"
statement => "SELECT *,UNIX_TIMESTAMP(updatedAt) as unix_ts_in_secs_user,
(select
JSON_ARRAYAGG(
JSON_OBJECT(
'id', `id`,
'name', `name`,
'name2', `name2`
)
)
from cat inner join user_cats_cat on user_cats_cat.catId = cat.id and user_cats_cat.userId = user.id) as cat
FROM user WHERE (UNIX_TIMESTAMP(updatedAt) > :sql_last_value AND updatedAt < NOW())"
last_run_metadata_path => "./logstash_jdbc_last_run_user"
type => "user"
}
If you write the code as above, I could not call cat.name in elasticSearch.
GET index_user_test/_search
{
"track_total_hits": true,
"from":0,
"size":10,
"query" : {
"bool" : {
"must" : [
{
"query_string":{
"fields": ["name.ngram","name2.ngram"],
"query": "kun"
}
},
{
"terms" : {
"cat.name" : "blue"
}
}
]
}
}
}
cat and user a many-to-many relationship.
user_cats_cat is an intermediate table between user and cat.
Thank you!

logstash-input-jdbc how to use utf-8 chars in statement

I use logstash-input-jdbc to sync my database to elasticsearch.
Env: (logstash 7.5, elasticsearch 7.5,mysql-connector-java-5.1.48.jar, logstash-input-jdbc-4.3.16)
materials.conf:
input {
jdbc {
jdbc_connection_string => "jdbc:mysql://localhost:3306/sc_education"
jdbc_driver_library => "connector/mysql-connector-java-5.1.48.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_user => "dauser"
jdbc_password => "daname"
jdbc_paging_enabled => "true"
jdbc_page_size => "50"
statement_filepath => "./materials.sql"
schedule => "* * * * *"
last_run_metadata_path => "./materials.info"
record_last_run => true
tracking_column => updated_at
codec => plain { charset => "UTF-8"}
# parameters => { "favorite_artist" => "Beethoven" }
# statement => "SELECT * from songs where artist = :favorite_artist"
}
}
filter {
json {
source => "message"
remove_field => ["message"]
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "materials"
document_id => "%{material_id}"
}
stdout {
codec => json_lines
}
}
materials.sql:
SELECT material_name,material_id,
CASE grade_id
WHEN grade_id = 1 THEN "一年级"
WHEN grade_id = 2 THEN "二年级"
WHEN grade_id = 3 THEN "三年级"
WHEN grade_id = 4 THEN "四年级"
WHEN grade_id = 5 THEN "五年级"
WHEN grade_id = 6 THEN "六年级"
WHEN grade_id = 7 THEN "初一"
WHEN grade_id = 8 THEN "初二"
WHEN grade_id = 9 THEN "初三"
WHEN grade_id = 10 THEN "高一"
WHEN grade_id = 11 THEN "高二"
WHEN grade_id = 12 THEN "高三"
ELSE "" END as grade,
CASE subject_id
WHEN subject_id = 1 THEN "数学"
WHEN subject_id = 2 THEN "物理"
WHEN subject_id = 3 THEN "化学"
WHEN subject_id = 4 THEN "语文"
WHEN subject_id = 5 THEN "英语"
WHEN subject_id = 6 THEN "科学"
WHEN subject_id = 7 THEN "音乐"
WHEN subject_id = 8 THEN "绘画"
WHEN subject_id = 9 THEN "政治"
WHEN subject_id = 10 THEN "历史"
WHEN subject_id = 11 THEN "地理"
WHEN subject_id = 12 THEN "生物"
WHEN subject_id = 13 THEN "奥数"
ELSE "" END as subject,
CASE course_term_id
WHEN course_term_id = 1 THEN "春"
WHEN course_term_id = 2 THEN "暑"
WHEN course_term_id = 3 THEN "秋"
WHEN course_term_id = 4 THEN "寒"
ELSE "" END as season,
created_at, updated_at from sc_materials where updated_at > :sql_last_value and material_id in (2025,317,2050);
./bin/logstash -f materials.conf
{"#version":"1","updated_at":"2019-08-19T02:04:54.000Z","season":"?","grade":"","created_at":"2019-08-19T02:04:54.000Z","#timestamp":"2019-12-13T01:02:01.907Z","material_name":"test material seri''al","material_id":2025,"subject":"??"}
{"#version":"1","updated_at":"2019-08-26T09:25:35.000Z","season":"","grade":"","created_at":"2019-08-26T09:25:35.000Z","#timestamp":"2019-12-13T01:02:01.908Z","material_name":"人教版高中英语必修三第10讲Unit5 Canada The True North语法篇A学生版2.pdf","material_id":2050,"subject":""}
{"#version":"1","updated_at":"2019-08-10T06:50:48.000Z","season":"?","grade":"","created_at":"2019-05-27T06:26:44.000Z","#timestamp":"2019-12-13T01:02:01.880Z","material_name":"90aca2238832143fb75dcf0fe6dbbfa9.pdf","material_id":317,"subject":""}
The chinese chars in db works well, but the chinese chars in statement becomes chars ?.
for me, characterEncoding=utf8 was not working.
after added this,
stdin {
codec => plain { charset => "UTF-8"}
}
works well.
here is my working conf file.
It's a bit of a time to post an answer, but I hope it helps someone.
input {
jdbc {
jdbc_connection_string => "jdbc:postgresql://localhost:5432/atlasdb?useTimezone=true&useLegacyDatetimeCode=false&serverTimezone=UTC&useSSL=false&useUnicode=true&characterEncoding=utf8"
jdbc_user => "atlas"
jdbc_password => "atlas"
jdbc_validate_connection => true
jdbc_driver_library => "/lib/postgres-42-test.jar"
jdbc_driver_class => "org.postgresql.Driver"
schedule => "* * * * *"
statement => "SELECT * from naver_city"
}
stdin {
codec => plain { charset => "UTF-8"}
}
}
output {
elasticsearch {
hosts => [ "localhost:9200" ]
index => "2020-04-23-2"
doc_as_upsert => true
action => "update"
document_id => "%{code}"
}
stdout { codec => rubydebug }
}
I have encountered this problem when use query contain Japanese character.
You could change jdbc_connection_string in materials.conf
<i>
jdbc_connection_string => "jdbc:mysql://localhost:3306/sc_education?useSSL=false&useUnicode=true&characterEncoding=utf8"
</i>
Restart logstash

How sync data from mysql database to elasticsearch with logstash: only indexing new data and index by country?

Can I specify the Id where starting syncrhonize? For not index all data again and can I specify diferent index in diferents scenarios, for example, index by country?
This is my logstach conf:
# file: contacts-index-logstash.conf
input {
jdbc {
jdbc_driver_library => "/home/peter/Downloads/mysql-connector-java-5.1.40-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost/MYJOBS"
jdbc_user => "readuser"
jdbc_password => "xxxx"
# schedule => "* * * * *"
statement => "SELECT af.IdAnuncio as idanuncio, af.Titulo, af.Descripcion, af.Empresa, p.Abreviatura,
pr.Nombre as Provincia, cd.Nombre as Ciudad, af.Localidad
FROM `ANUNCIO_FORM` af
INNER JOIN PAIS p ON p.Id = IdPais
INNER JOIN PROVINCIA pr ON pr.Id = af.IdProvincia
INNER JOIN CIUDAD cd ON cd.Id = af.IdCiudad
WHERE af.IdAnuncio >0
AND af.Fecha_de_publicacion > '2016-12-01'
AND af.Estado =1"
}
}
output {
stdout { codec => json_lines }
elasticsearch {
index => "anuncios"
document_type => "internos"
document_id => "%{idanuncio}"
hosts => "localhost:9200"
}
}
Thanks in advance.
PD. English not is my first language, so please excuse any mistakes.

YII2 creating relations in models between tables from 2 databases

I have defined 2 databases , for example
return [
'components' => [
'db1' => [
'class' => 'yii\db\Connection',
'dsn' => 'mysql:host=localhost;dbname=db1name',
'username' => 'db1username',
'password' => 'db1password',
],
'db2' => [
'class' => 'yii\db\Connection',
'dsn' => 'mysql:host=localhost;dbname=db2name',
'username' => 'db2username',
'password' => 'db2password',
],
],
];
Now i have a table as 'users' in 'db1' and table 'countries' in 'db2'
users
id , country_code , username , password
1 , DE , xyz , 12345
2 , FR , abc , 12345
countries
code , name
DE , Germany
FR , France
IN , India
I have defined the foreign key relation between users.country_code & countries.code
ISSUE
But when i try to create the model for 'users' table using gii it gives an error , possibly because the tables relation are from 2 different databases.
How to use tables from different databases in relations of a model.
Any suggestions are welcomed
This works in my case to list iten on GridView::widget
-> bd_sisarc is my secound data base
-> deposito_sondagem is a table from my first data base
public static function getDb() // on your model
{
return Yii::$app->get('db1');
}
public static function getDb() // on your model
{
return Yii::$app->get('db2');
}
public function getEmpresaSondagem() // Relation on you model
{
return $this->hasOne(EmpresaSondagem::className(), ['idEmpSondagem' => 'entidade_deposito']);
}
public function search($params)
{
$this->load($params);
$sql = "SELECT deposito_sondagem.*
FROM
deposito_sondagem,
`bd_sisarc`.`tbempresasondagem`
WHERE
`bd_sisarc`.`tbempresasondagem`.`idEmpSondagem`=`deposito_sondagem`.`entidade_deposito`
and deposito_sondagem.estado=1
and tbempresasondagem.estado=1
and numero_registo LIKE '%$this->numero_registo%'
and nomeempsondagem LIKE '%$this->nomeEntidade%'
and dono_sondagem LIKE '%$this->dono_sondagem%'
and data_deposito LIKE '%$this->data_deposito%'";
$query = DepositoSondagem::findBySql($sql);
$dataProvider = new ActiveDataProvider([
'query' => $query,
]);
if (!$this->validate()) {
// uncomment the following line if you do not want to return any records when validation fails
// $query->where('0=1');
return $dataProvider;
}
return $dataProvider;
}
Try this one
SELECT `users`.* FROM `users` LEFT JOIN `db2name`.`countries` ON `users`.`country_code` = `db2name`.`countries`.`code `

elastic search: Advanced Filter Query

I am Working on Elastic Search for My current Project. I need a filter for users based on their industries. please look at my code once. and mySql query as follows
SELECT U.* FROM `users` `U`
JOIN `user_industries` `UI` ON `UI`.`user_id`=`U`.`id`
WHERE `UI`.`industry_id` IN('1','3','5');
$query = array("from" => $start,
"size" => $recordslimit,
"sort" => array(array('id' => 'desc')),
"query" => array(
"filtered" => array(
"query" => array("match_all" => array()),
"filter" => array(
"bool" => array(
'must' => array(array('term' => array('user_type' => 'v')),
array('term' => array('status' => 'a')),
array('term' => array('industries.id' => 1))
),
'must_not' => array(
array('term' => array('subscription_type' => 'n'))
)
))
)));
I passed one Industry Value. how can i pass multiple values of industries
Great start !! You can achieve what you want by using a terms filter instead of a term one and specifying the values 1, 3, 5 in an array():
...
array('terms' => array('industries.id' => array(1, 3, 5)))
...