How to set many-to-many relationship in elasticsearch, logstash - mysql

I'm trying to set up a many-to-many relationship using elasticSearch.
I searched and found that there is no way to set it up like a relational database.
So I want to set it like parent-children.
How should I write the statement in the code below?
#logstash.conf
jdbc{
jdbc_driver_library => "/Users/kun/Downloads/mysql-connector-java-8.0.29/mysql-connector-java-8.0.29.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://127.0.0.1:3306/dbtest"
jdbc_user => "xxxx"
jdbc_password => "xxxx"
tracking_column => "unix_ts_in_secs_user"
use_column_value => true
tracking_column_type => "numeric"
schedule => "*/3 * * * * *"
statement => "SELECT *,UNIX_TIMESTAMP(updatedAt) as unix_ts_in_secs_user,
(select
JSON_ARRAYAGG(
JSON_OBJECT(
'id', `id`,
'name', `name`,
'name2', `name2`
)
)
from cat inner join user_cats_cat on user_cats_cat.catId = cat.id and user_cats_cat.userId = user.id) as cat
FROM user WHERE (UNIX_TIMESTAMP(updatedAt) > :sql_last_value AND updatedAt < NOW())"
last_run_metadata_path => "./logstash_jdbc_last_run_user"
type => "user"
}
If you write the code as above, I could not call cat.name in elasticSearch.
GET index_user_test/_search
{
"track_total_hits": true,
"from":0,
"size":10,
"query" : {
"bool" : {
"must" : [
{
"query_string":{
"fields": ["name.ngram","name2.ngram"],
"query": "kun"
}
},
{
"terms" : {
"cat.name" : "blue"
}
}
]
}
}
}
cat and user a many-to-many relationship.
user_cats_cat is an intermediate table between user and cat.
Thank you!

Related

logstash-input-jdbc how to use utf-8 chars in statement

I use logstash-input-jdbc to sync my database to elasticsearch.
Env: (logstash 7.5, elasticsearch 7.5,mysql-connector-java-5.1.48.jar, logstash-input-jdbc-4.3.16)
materials.conf:
input {
jdbc {
jdbc_connection_string => "jdbc:mysql://localhost:3306/sc_education"
jdbc_driver_library => "connector/mysql-connector-java-5.1.48.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_user => "dauser"
jdbc_password => "daname"
jdbc_paging_enabled => "true"
jdbc_page_size => "50"
statement_filepath => "./materials.sql"
schedule => "* * * * *"
last_run_metadata_path => "./materials.info"
record_last_run => true
tracking_column => updated_at
codec => plain { charset => "UTF-8"}
# parameters => { "favorite_artist" => "Beethoven" }
# statement => "SELECT * from songs where artist = :favorite_artist"
}
}
filter {
json {
source => "message"
remove_field => ["message"]
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "materials"
document_id => "%{material_id}"
}
stdout {
codec => json_lines
}
}
materials.sql:
SELECT material_name,material_id,
CASE grade_id
WHEN grade_id = 1 THEN "一年级"
WHEN grade_id = 2 THEN "二年级"
WHEN grade_id = 3 THEN "三年级"
WHEN grade_id = 4 THEN "四年级"
WHEN grade_id = 5 THEN "五年级"
WHEN grade_id = 6 THEN "六年级"
WHEN grade_id = 7 THEN "初一"
WHEN grade_id = 8 THEN "初二"
WHEN grade_id = 9 THEN "初三"
WHEN grade_id = 10 THEN "高一"
WHEN grade_id = 11 THEN "高二"
WHEN grade_id = 12 THEN "高三"
ELSE "" END as grade,
CASE subject_id
WHEN subject_id = 1 THEN "数学"
WHEN subject_id = 2 THEN "物理"
WHEN subject_id = 3 THEN "化学"
WHEN subject_id = 4 THEN "语文"
WHEN subject_id = 5 THEN "英语"
WHEN subject_id = 6 THEN "科学"
WHEN subject_id = 7 THEN "音乐"
WHEN subject_id = 8 THEN "绘画"
WHEN subject_id = 9 THEN "政治"
WHEN subject_id = 10 THEN "历史"
WHEN subject_id = 11 THEN "地理"
WHEN subject_id = 12 THEN "生物"
WHEN subject_id = 13 THEN "奥数"
ELSE "" END as subject,
CASE course_term_id
WHEN course_term_id = 1 THEN "春"
WHEN course_term_id = 2 THEN "暑"
WHEN course_term_id = 3 THEN "秋"
WHEN course_term_id = 4 THEN "寒"
ELSE "" END as season,
created_at, updated_at from sc_materials where updated_at > :sql_last_value and material_id in (2025,317,2050);
./bin/logstash -f materials.conf
{"#version":"1","updated_at":"2019-08-19T02:04:54.000Z","season":"?","grade":"","created_at":"2019-08-19T02:04:54.000Z","#timestamp":"2019-12-13T01:02:01.907Z","material_name":"test material seri''al","material_id":2025,"subject":"??"}
{"#version":"1","updated_at":"2019-08-26T09:25:35.000Z","season":"","grade":"","created_at":"2019-08-26T09:25:35.000Z","#timestamp":"2019-12-13T01:02:01.908Z","material_name":"人教版高中英语必修三第10讲Unit5 Canada The True North语法篇A学生版2.pdf","material_id":2050,"subject":""}
{"#version":"1","updated_at":"2019-08-10T06:50:48.000Z","season":"?","grade":"","created_at":"2019-05-27T06:26:44.000Z","#timestamp":"2019-12-13T01:02:01.880Z","material_name":"90aca2238832143fb75dcf0fe6dbbfa9.pdf","material_id":317,"subject":""}
The chinese chars in db works well, but the chinese chars in statement becomes chars ?.
for me, characterEncoding=utf8 was not working.
after added this,
stdin {
codec => plain { charset => "UTF-8"}
}
works well.
here is my working conf file.
It's a bit of a time to post an answer, but I hope it helps someone.
input {
jdbc {
jdbc_connection_string => "jdbc:postgresql://localhost:5432/atlasdb?useTimezone=true&useLegacyDatetimeCode=false&serverTimezone=UTC&useSSL=false&useUnicode=true&characterEncoding=utf8"
jdbc_user => "atlas"
jdbc_password => "atlas"
jdbc_validate_connection => true
jdbc_driver_library => "/lib/postgres-42-test.jar"
jdbc_driver_class => "org.postgresql.Driver"
schedule => "* * * * *"
statement => "SELECT * from naver_city"
}
stdin {
codec => plain { charset => "UTF-8"}
}
}
output {
elasticsearch {
hosts => [ "localhost:9200" ]
index => "2020-04-23-2"
doc_as_upsert => true
action => "update"
document_id => "%{code}"
}
stdout { codec => rubydebug }
}
I have encountered this problem when use query contain Japanese character.
You could change jdbc_connection_string in materials.conf
<i>
jdbc_connection_string => "jdbc:mysql://localhost:3306/sc_education?useSSL=false&useUnicode=true&characterEncoding=utf8"
</i>
Restart logstash

How sync data from mysql database to elasticsearch with logstash: only indexing new data and index by country?

Can I specify the Id where starting syncrhonize? For not index all data again and can I specify diferent index in diferents scenarios, for example, index by country?
This is my logstach conf:
# file: contacts-index-logstash.conf
input {
jdbc {
jdbc_driver_library => "/home/peter/Downloads/mysql-connector-java-5.1.40-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost/MYJOBS"
jdbc_user => "readuser"
jdbc_password => "xxxx"
# schedule => "* * * * *"
statement => "SELECT af.IdAnuncio as idanuncio, af.Titulo, af.Descripcion, af.Empresa, p.Abreviatura,
pr.Nombre as Provincia, cd.Nombre as Ciudad, af.Localidad
FROM `ANUNCIO_FORM` af
INNER JOIN PAIS p ON p.Id = IdPais
INNER JOIN PROVINCIA pr ON pr.Id = af.IdProvincia
INNER JOIN CIUDAD cd ON cd.Id = af.IdCiudad
WHERE af.IdAnuncio >0
AND af.Fecha_de_publicacion > '2016-12-01'
AND af.Estado =1"
}
}
output {
stdout { codec => json_lines }
elasticsearch {
index => "anuncios"
document_type => "internos"
document_id => "%{idanuncio}"
hosts => "localhost:9200"
}
}
Thanks in advance.
PD. English not is my first language, so please excuse any mistakes.

How to index nested mysql object into elasticsearch using logstash?

I'm trying to index mysql database with elasticsearch. Consider the example mapping:
{"blog":
{"properties":
{"id": "string"}
{"author": "string"}
{"time_created": }
{"author_info":
{"author_name":}
{"author_sex":}
}
{"posts":
{"post_author":}
{"post_time":}
}
}
}
I have three tables which are author_info, blog and post. How can I index these records into elastic with a nested structure? I cannot find documents about it. Thanks
input {
jdbc{
jdbc_validate_connection => true
jdbc_connection_string => "jdbc:mysql://172.17.0.2:3306/_db"
jdbc_user => "root"
jdbc_password => "admin"
jdbc_driver_library => "/home/ilsa/mysql-connector-java-5.1.36-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
clean_run => true
statement => "SELECT
u.id as employee_number, u.email as email, u.username as username,
up.id as post_id, up.text_content as content,
pc.id as comment_id , pc.user_post_id as comment_post_id, pc.comment as comment_text
FROM users u join user_posts up on up.user_id = u.id
LEFT JOIN post_comments pc ON pc.user_post_id = up.id
ORDER BY up.id ASC"
}
}
filter {
aggregate {
task_id => "%{employee_number}"
code => "
map['employee_number'] = event.get('employee_number')
map['email'] = event.get('email')
map['username'] = event.get('username')
map['posts'] ||= []
map['posts'] << {
'post_id' => event.get('post_id'),
'content' => event.get('content'),
'comments' => [] << {
'comment_id' => event.get('comment_id'),
'comment_post_id' => event.get('comment_post_id'),
'comment_text' => event.get('comment_text')
}
}
event.cancel()"
push_previous_map_as_event => true
timeout => 30
}
}
output {
stdout{ codec => rubydebug }
elasticsearch{
action => "index"
index => "_dev"
document_type => "_doc"
document_id => "%{employee_number}"
hosts => "localhost:9200"
}
}
In the sql part of logstash input you might try to select the fields with the nested names you want in elasticsearch. Below is a small sample of how it might look.
input {
jdbc {
statement => "SELECT id as blog.properties.id, author as blog.properties.author,..... from blog inner join properties inner join posts"
}
}

YII2 creating relations in models between tables from 2 databases

I have defined 2 databases , for example
return [
'components' => [
'db1' => [
'class' => 'yii\db\Connection',
'dsn' => 'mysql:host=localhost;dbname=db1name',
'username' => 'db1username',
'password' => 'db1password',
],
'db2' => [
'class' => 'yii\db\Connection',
'dsn' => 'mysql:host=localhost;dbname=db2name',
'username' => 'db2username',
'password' => 'db2password',
],
],
];
Now i have a table as 'users' in 'db1' and table 'countries' in 'db2'
users
id , country_code , username , password
1 , DE , xyz , 12345
2 , FR , abc , 12345
countries
code , name
DE , Germany
FR , France
IN , India
I have defined the foreign key relation between users.country_code & countries.code
ISSUE
But when i try to create the model for 'users' table using gii it gives an error , possibly because the tables relation are from 2 different databases.
How to use tables from different databases in relations of a model.
Any suggestions are welcomed
This works in my case to list iten on GridView::widget
-> bd_sisarc is my secound data base
-> deposito_sondagem is a table from my first data base
public static function getDb() // on your model
{
return Yii::$app->get('db1');
}
public static function getDb() // on your model
{
return Yii::$app->get('db2');
}
public function getEmpresaSondagem() // Relation on you model
{
return $this->hasOne(EmpresaSondagem::className(), ['idEmpSondagem' => 'entidade_deposito']);
}
public function search($params)
{
$this->load($params);
$sql = "SELECT deposito_sondagem.*
FROM
deposito_sondagem,
`bd_sisarc`.`tbempresasondagem`
WHERE
`bd_sisarc`.`tbempresasondagem`.`idEmpSondagem`=`deposito_sondagem`.`entidade_deposito`
and deposito_sondagem.estado=1
and tbempresasondagem.estado=1
and numero_registo LIKE '%$this->numero_registo%'
and nomeempsondagem LIKE '%$this->nomeEntidade%'
and dono_sondagem LIKE '%$this->dono_sondagem%'
and data_deposito LIKE '%$this->data_deposito%'";
$query = DepositoSondagem::findBySql($sql);
$dataProvider = new ActiveDataProvider([
'query' => $query,
]);
if (!$this->validate()) {
// uncomment the following line if you do not want to return any records when validation fails
// $query->where('0=1');
return $dataProvider;
}
return $dataProvider;
}
Try this one
SELECT `users`.* FROM `users` LEFT JOIN `db2name`.`countries` ON `users`.`country_code` = `db2name`.`countries`.`code `

elastic search: Advanced Filter Query

I am Working on Elastic Search for My current Project. I need a filter for users based on their industries. please look at my code once. and mySql query as follows
SELECT U.* FROM `users` `U`
JOIN `user_industries` `UI` ON `UI`.`user_id`=`U`.`id`
WHERE `UI`.`industry_id` IN('1','3','5');
$query = array("from" => $start,
"size" => $recordslimit,
"sort" => array(array('id' => 'desc')),
"query" => array(
"filtered" => array(
"query" => array("match_all" => array()),
"filter" => array(
"bool" => array(
'must' => array(array('term' => array('user_type' => 'v')),
array('term' => array('status' => 'a')),
array('term' => array('industries.id' => 1))
),
'must_not' => array(
array('term' => array('subscription_type' => 'n'))
)
))
)));
I passed one Industry Value. how can i pass multiple values of industries
Great start !! You can achieve what you want by using a terms filter instead of a term one and specifying the values 1, 3, 5 in an array():
...
array('terms' => array('industries.id' => array(1, 3, 5)))
...