Importing Geo Point (Lat, Lng) from MySQL into Elasticsearch - mysql

I am trying to import data from MySQL into an Elastic index using following Logstash script (ELK v 6.22):
input {
jdbc {
jdbc_driver_library => "E:\ELK 6.22\logstash-6.2.2\bin\mysql-connector-java-5.1.45-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/fbk"
jdbc_user => "root"
jdbc_password => ""
statement => "SELECT fbk_repeat._URI AS URI, _SUBMISSION_DATE AS SUBMISSION_DATE, DEVICEID, LOCATION_LAT, LOCATION_LNG, SECTOR, COMMENTS, ACTION_TAKEN, PURPOSE
FROM
fbk_core
INNER JOIN fbk_repeat ON fbk_core._URI = fbk_repeat._PARENT_AURI"
}
}
filter {
# mutate { convert => {"LOCATION_LAT" => "float"} }
# mutate { convert => {"LOCATION_LNG" => "float"} }
# mutate { rename => {"LOCATION_LAT" => "[location][lat]"} }
# mutate { rename => {"LOCATION_LNG" => "[location][lon]"} }
mutate {
# Location and lat/lon should be used as is, this is as per logstash documentation
# Here we are tying to create a two-dimensional array in order to save data as per Logstash documentation
add_field => { "[location][lat]" => [ "%{LOCATION_LAT}" ] }
add_field => { "[location][lon]" => [ "%{LOCATION_LNG}" ] }
convert => [ "[location]", "float" ]
}
# date {
# locale => "eng"
# match => ["_SUBMISSION_DATE", "yyyy-MM-dd HH:mm:ss", "ISO8601"]
# target => "SUBMISSION_DATE"
# }
}
output{
elasticsearch {
hosts => ["localhost:9200"]
index => "feedback"
document_id => "%{URI}"
document_type => "feedbackdata"
manage_template => true
# user => "elastic"
# password => "changeme"
}
stdout { codec => rubydebug { metadata => true } }
# stdout { codec => dots }
}
Once data is imported, I couldn't find any Geo Point field in Kibana to be able to plot data into a map, can anyone guide what must be going wrong.
Thanks!
Data

Elasticsearch can automatically do the mapping, but not for al fields.
You should set your mapping like this for example :
PUT index
{
"mappings": {
"type": {
"properties": {
"location": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"field": {
"properties": {
"date": {
"format": "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
"type": "date"
}
}
}
}
}
}
}
Adapt this to handle your data.
Don't forget to create the index pattern in Kibana.

Related

How to create grok/json filter to parse the below json format

I want to parse this JSON to Kibana using Logstash
{
"Format": "IDEA0",
"ID": "2b03eb1f-fc4c-4f67-94e5-31c9fb32dccc",
"DetectTime": "2022-01-31T08:16:12.600470+07:00",
"EventTime": "2022-01-31T01:23:01.637438+00:00",
"Category": ['Intrusion.Botnet'],
"Confidence": 0.03,
"Note": "C&C channel, destination IP: 192.168.1.24 port: 8007/tcp score: 0.9324",
"Source": [{'IP4': ['192.168.1.25'], 'Type': ['CC']}]
}
I want that ID, Detect Time, Event Time, Category, Confidence, Note, Source is a single field so later i can do visualization in kibana.
Here's what I'm already trying to do
input {
file {
path => "/home/ubuntu/Downloads/StratosphereLinuxIPS/output/*.json"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
json {
source => "message"
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "test-test"
user => "***"
password => "***"
}
stdout{}
}
But the field is not separated correctly
Any help will be meaningful.
Thanks.
:::UPDATE:::
I already found the solution (Help by other guys from Elastic forum but not 100% optimize need to tweak it a little more)
Here's the Logstash Conf I'm using if someone needs it in the future
input {
file {
path => "/home/ubuntu/Downloads/StratosphereLinuxIPS/output/alerts.json"
start_position => "beginning"
sincedb_path => "/dev/null"
codec => multiline { pattern => "^{$" negate => "true" what => "previous" }
}
}
filter {
mutate {
gsub => ["message", "'", '"']
}
json {
source => "message"
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "test-keempat"
user => "xxx"
password => "xxx"
}
stdout{ codec => rubydebug }
}
Thanks !

Parse date inside input logs file

I'm new using ELK stack and i'm trying to create an index from an S3 file. This S3 file's format CSV and has the following schema:
date: Date field with format yyyy-MM-dd HH:mm:ss
filename: Name of the input file that triggers some events
input_registers: count with num of lines for the file
wrong_registers: count with num of wrong registers
result_registers: count with num of result registers (validated)
I need to set date as the #timestamp field on ELK.
I already tried some things with date filter plugin, here i show my current configuration:
input{
s3 {
"id" => "rim-pfinal"
"access_key_id" => ""
"secret_access_key" => ""
"region" => "eu-west-3"
"bucket" => "practica.final.rim.elk"
"prefix" => "logs"
"interval" => "3600"
"additional_settings" => {
"force_path_style" => true
"follow_redirects" => false
}
sincedb_path => "/dev/null"
}
}
filter {
date {
match => [ "date", "ISO8601", "yyyy-MM-dd HH:mm:ss" ]
target => "date"
add_field => { "DummyField" => "Fecha cambiada" }
}
csv{
columns => ["date", "filename", "input_registers", "wrong_registers", "result_registers", "err_type"]
separator => ";"
}
mutate { convert => [ "input_registers", "integer"] }
mutate { convert => [ "wrong_registers", "integer"] }
mutate { convert => [ "result_registers", "integer"] }
#Remove first header line to insert in elasticsearch
if [PK] =~ "PK"{
drop {}
}
}
output{
elasticsearch {
hosts => ["localhost:9200"]
index => "practica-rim"
}
}
I tried to set target to timestamp and match too but seems not working.
Thank you for the help!
{
"query":{
"range":{
"#timestamp":{
"gte":"2015-08-04T11:00:00",
"lt":"2015-08-04T12:00:00"
}
}
}
}
datetimes will be serialized
es.index(index="my-index", doc_type="test-type", id=42, body={"any": "data", "timestamp": datetime.now()})
{u'_id': u'42', u'_index': u'my-index', u'_type': u'test-type', u'_version': 1, u'ok': True}
# but not deserialized
>>> es.get(index="my-index", doc_type="test-type", id=42)['_source']
{u'any': u'data', u'timestamp': u'2013-05-12T19:45:31.804229'}
https://www.elastic.co/guide/en/elasticsearch/reference/current/date.html

Logstash Parsing and Calculations with CSV

I am having trouble parsing and calculating performance Navigation Timing data I have in a csv.
I was able to parse the fields but not sure how to approach the calculations (below) properly. Some points to keep in mind:
Data sets are grouped together by the bolded value (it is the ts of when the 21 datapoints were taken
ACMEPage-1486643427973,unloadEventEnd,1486643372422
2.Calculations need to be done with data points within the group
I am assuming some tagging and grouping will need to be done but I don't have a clear vision on how to implement it. Any help would be greatly appreciated.
Thanks,
---------------Calculations-----------------
Total First byte Time = responseStart - navigationStart
Latency = responseStart – fetchStart
DNS / Domain Lookup Time = domainLookupEnd - domainLookupStart
Server connect Time = connectEnd - connectStart
Server Response Time = responseStart - requestStart
Page Load time = loadEventStart - navigationStart
Transfer/Page Download Time = responseEnd - responseStart
DOM Interactive Time = domInteractive - navigationStart
DOM Content Load Time = domContentLoadedEventEnd - navigationStart
DOM Processing to Interactive =domInteractive - domLoading
DOM Interactive to Complete = domComplete - domInteractive
Onload = loadEventEnd - loadEventStart
-------Data in CSV-----------
ACMEPage-1486643427973,unloadEventEnd,1486643372422
ACMEPage-1486643427973,responseEnd,1486643372533
ACMEPage-1486643427973,responseStart,1486643372416
ACMEPage-1486643427973,domInteractive,1486643373030
ACMEPage-1486643427973,domainLookupEnd,1486643372194
ACMEPage-1486643427973,unloadEventStart,1486643372422
ACMEPage-1486643427973,domComplete,1486643373512
ACMEPage-1486643427973,domContentLoadedEventStart,1486643373030
ACMEPage-1486643427973,domainLookupStart,1486643372194
ACMEPage-1486643427973,redirectEnd,0
ACMEPage-1486643427973,redirectStart,0
ACMEPage-1486643427973,connectEnd,1486643372194
ACMEPage-1486643427973,toJSON,{}
ACMEPage-1486643427973,connectStart,1486643372194
ACMEPage-1486643427973,loadEventStart,1486643373512
ACMEPage-1486643427973,navigationStart,1486643372193
ACMEPage-1486643427973,requestStart,1486643372203
ACMEPage-1486643427973,secureConnectionStart,0
ACMEPage-1486643427973,fetchStart,1486643372194
ACMEPage-1486643427973,domContentLoadedEventEnd,1486643373058
ACMEPage-1486643427973,domLoading,1486643372433
ACMEPage-1486643427973,loadEventEnd,1486643373514
----------Output---------------
"path" => "/Users/philipp/Downloads/build2/logDataPoints_com.concur.automation.cge.ui.admin.ADCLookup_1486643340910.csv",
"#timestamp" => 2017-02-09T12:29:57.763Z,
"navigationTimer" => "connectStart",
"#version" => "1",
"host" => "15mbp-09796.local",
"elapsed_time" => "1486643372194",
"pid" => "1486643397763",
"page" => "ADCLookupDataPage",
"message" => "ADCLookupDataPage-1486643397763,connectStart,1486643372194",
"type" => "csv"
}
--------------logstash.conf----------------
input {
file {
type => "csv"
path => "/Users/path/logDataPoints_com.concur.automation.acme.ui.admin.acme_1486643340910.csv"
start_position => beginning
# to read from the beginning of file
sincedb_path => "/dev/null"
}
}
filter {
csv {
columns => ["page_id", "navigationTimer", "elapsed_time"]
}
if (["elapsed_time"] == "{}" ) {
drop{}
}
else {
grok {
match => { "page_id" => "%{WORD:page}-%{INT:pid}"
}
remove_field => [ "page_id" ]
}
}
date {
match => [ "pid", "UNIX_MS" ]
target => "#timestamp"
}
}
output {
elasticsearch { hosts => ["localhost:9200"] }
stdout { codec => rubydebug }
}
I the following to get trend my data:
-I found it easier to pivot the data, rather than going down the column, to have the data go along the rows per each "event" or "document"
-Each field needed to be mapped accordingly as an integer or string
Once the data was in Kibana properly I had problems using the ruby code filter to make simple math calculations so I ended up using the "scripted fields" to make the calculations in Kibana.
input {
file {
type => "csv"
path => "/Users/philipp/perf_csv_pivot2.csv"
start_position => beginning
# to read from the beginning of file
sincedb_path => "/dev/null"
}
}
filter {
csv {
columns => ["page_id","unloadEventEnd","responseEnd","responseStart","domInteractive","domainLookupEnd","unloadEventStart","domComplete","domContentLoadedEventStart","domainLookupstart","redirectEnd","redirectStart","connectEnd","toJSON","connectStart","loadEventStart","navigationStart","requestStart","secureConnectionStart","fetchStart","domContentLoadedEventEnd","domLoading","loadEventEnd"]
}
grok {
match => { "page_id" => "%{WORD:page}-%{INT:page_ts}" }
remove_field => [ "page_id", "message", "path" ]
}
mutate {
convert => { "unloadEventEnd" => "integer" }
convert => { "responseEnd" => "integer" }
convert => { "responseStart" => "integer" }
convert => { "domInteractive" => "integer" }
convert => { "domainLookupEnd" => "integer" }
convert => { "unloadEventStart" => "integer" }
convert => { "domComplete" => "integer" }
convert => { "domContentLoadedEventStart" => "integer" }
convert => { "domainLookupstart" => "integer" }
convert => { "redirectEnd" => "integer" }
convert => { "redirectStart" => "integer" }
convert => { "connectEnd" => "integer" }
convert => { "toJSON" => "string" }
convert => { "connectStart" => "integer" }
convert => { "loadEventStart" => "integer" }
convert => { "navigationStart" => "integer" }
convert => { "requestStart" => "integer" }
convert => { "secureConnectionStart" => "integer" }
convert => { "fetchStart" => "integer" }
convert => { "domContentLoadedEventEnd" => "integer" }
convert => { "domLoading" => "integer" }
convert => { "loadEventEnd" => "integer" }
}
date {
match => [ "page_ts", "UNIX_MS" ]
target => "#timestamp"
remove_field => [ "page_ts", "timestamp", "host", "toJSON" ]
}
}
output {
elasticsearch { hosts => ["localhost:9200"] }
stdout { codec => rubydebug }
}
Hope this can help someone else,

logstash mysql general_logs CSV format

I want to add the mysql general_log to the logstash. I have managed to make the mysql log in CSV format and with the CSV pattern there should be no easier thing to do. Here is my general_log entry:
"2015-08-15 11:52:57","mrr[mrr] # localhost []",4703,0,"Query","SET NAMES utf8"
"2015-08-15 11:52:57","mrr[mrr] # localhost []",4703,0,"Query","SELECT ##SESSION.sql_mode"
"2015-08-15 11:52:57","mrr[mrr] # localhost []",4703,0,"Query","SET SESSION sql_mode='NO_ENGINE_SUBSTITUTION'"
"2015-08-15 11:52:57","mrr[mrr] # localhost []",4703,0,"Init DB","mrr"
and here is my logstash.conf:
input {
lumberjack {
port => 5000
type => "logs"
ssl_certificate => "/etc/pki/tls/certs/logstash_forwarder.crt"
ssl_key => "/etc/pki/tls/private/logstash_forwarder.key"
}
}
filter {
if [type] == "nginx-access" {
grok {
match => { 'message' => '%{IPORHOST:clientip} %{NGUSER:indent} %{NGUSER:agent} \[%{HTTPDATE:timestamp}\] \"(?:%{WORD:verb} %{URIPATHPARAM:request}(?: HTTP/%{NUMBER:httpversion})?|)\" %{NUMBER:answer} (?:%{NUMBER:byte}|-) (?:\"(?:%{URI:referrer}|-))\" (?:%{QS:referree}) %{QS:agent}' }
}
geoip {
source => "clientip"
target => "geoip"
database => "/etc/logstash/GeoLiteCity.dat"
add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}" ]
}
mutate {
convert => [ "[geoip][coordinates]", "float" ]
}
}
if [type] == "mysql-general" {
csv {
columns => [ "#timestamp(6)", "user_host", "thready_id", "server_id", "ctype", "query" ]
separator => ","
}
grok {
match => { "user_host", "%{WORD:remoteuser}\[%{WORD:localuser}\] \# %{IPORHOST:dbhost} \[(?:%{IPORHOST:qhost}|-)\]" }
}
}
}
output {
stdout {
codec => rubydebug
}
elasticsearch {
host => "172.17.0.5"
cluster => "z0z0.tk-1.5"
flush_size => 2000
}
}
however the user_host column has this format:
"mrr[mrr] # localhost []" and I would like to split it into at least two different values one for the user and the otherone for the host.
I have run this configuration on logstash and it ends up in _grokparsefailure due to the grok parse
when I am running the checktest option on the config file I am getting the following output:
Error: Expected one of #, => at line 36, column 26 (byte 1058) after filter {
if [type] == "nginx-access" {
grok {
match => { 'message' => '%{IPORHOST:clientip} %{NGUSER:indent} %{NGUSER:agent} \[%{HTTPDATE:timestamp}\] \"(?:%{WORD:verb} %{URIPATHPARAM:request}(?: HTTP/%{NUMBER:httpversion})?|)\" %{NUMBER:answer} (?:%{NUMBER:byte}|-) (?:\"(?:%{URI:referrer}|-))\" (?:%{QS:referree}) %{QS:agent}' }
}
geoip {
source => "clientip"
target => "geoip"
database => "/etc/logstash/GeoLiteCity.dat"
add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}" ]
}
mutate {
convert => [ "[geoip][coordinates]", "float" ]
}
}
if [type] == "mysql-general" {
csv {
columns => [ "#timestamp(6)", "user_host", "thready_id", "server_id", "ctype", "query" ]
separator => ","
}
grok {
match => { "user_host"
Can you give me an idea what is wrong?
The csv{} filter is only parsing, um, comma-separated values. If you'd like to parse fields of other formats, use grok{} on the user_host column after the csv{} filter has created it.
EDIT: to be more explicit.
Run the csv filter:
csv {
columns => [ "#timestamp(6)", "user_host", "thready_id". "server_id", "ctype", "query" ]
separator => ","
}
which should create you a field called "user_host".
You can then run this field through a grok filter, like this (untested) one:
grok {
match => [ "user_host", "%{WORD:myUser}\[%{WORD}\] # %{WORD:myHost} \[\]" ]
}
This will create two more fields for you: myUser and myHost.
Got it working. The error was in fact in the grok patters since the first user and the last host was at some point emtpy the grok did failed in parsing so I had to add some brackets to accept also empty strings. The current logstash.conf looks like this:
input {
lumberjack {
port => 5000
type => "logs"
ssl_certificate => "/etc/pki/tls/certs/logstash_forwarder.crt"
ssl_key => "/etc/pki/tls/private/logstash_forwarder.key"
}
}
filter {
if [type] == "nginx-access" {
grok {
match => { 'message' => '%{IPORHOST:clientip} %{NGUSER:indent} %{NGUSER:agent} \[%{HTTPDATE:timestamp}\] \"(?:%{WORD:verb} %{URIPATHPARAM:request}(?: HTTP/%{NUMBER:httpversion})?|)\" %{NUMBER:answer} (?:%{NUMBER:byte}|-) (?:\"(?:%{URI:referrer}|-))\" (?:%{QS:referree}) %{QS:agent}' }
}
geoip {
source => "clientip"
target => "geoip"
database => "/etc/logstash/GeoLiteCity.dat"
add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}" ]
}
mutate {
convert => [ "[geoip][coordinates]", "float" ]
}
}
if [type] == "mysql-general" {
csv {
columns => [ "#timestamp(6)", "user_host", "thready_id", "server_id", "ctype", "query" ]
separator => ","
}
grok {
match => { "user_host", "(?:%{WORD:remoteuser}|)\[%{WORD:localuser}\] \# %{IPORHOST:dbhost} \[(?:%{IPORHOST:qhost}|)\]" }
}
}
}
output {
stdout {
codec => rubydebug
}
elasticsearch {
host => "172.17.0.5"
cluster => "clustername"
flush_size => 2000
}
}
Thanks for your help and suggestions

Logstash: Parse Complicated Multiline JSON from log file into ElasticSearch

Let me first say that I have gone through as many examples on here as I could that still do not work. I am not sure if it's because of the complicated nature of the JSON in the log file or not.
I am looking to take the example log entry, have Logstash read it in, and send the JSON as JSON to ElasticSearch.
Here is what the (shortened) example looks:
[0m[0m16:02:08,685 INFO [org.jboss.as.server] (ServerService Thread Pool -- 28) JBAS018559: {
"appName": "SomeApp",
"freeMemReqStartBytes": 544577648,
"freeMemReqEndBytes": 513355408,
"totalMem": 839385088,
"maxMem": 1864368128,
"anonymousUser": false,
"sessionId": "zz90g0dFQkACVao4ZZL34uAb",
"swAction": {
"clock": 0,
"clockStart": 1437766438950,
"name": "General",
"trackingMemory": false,
"trackingMemoryGcFirst": true,
"memLast": 0,
"memOrig": 0
},
"remoteHost": "127.0.0.1",
"remoteAddr": "127.0.0.1",
"requestMethod": "GET",
"mapLocalObjectCount": {
"FinanceEmployee": {
"x": 1,
"singleton": false
},
"QuoteProcessPolicyRef": {
"x": 10,
"singleton": false
},
"LocationRef": {
"x": 2,
"singleton": false
}
},
"theSqlStats": {
"lstStat": [
{
"sql": "select * FROM DUAL",
"truncated": false,
"truncatedSize": -1,
"recordCount": 1,
"foundInCache": false,
"putInCache": false,
"isUpdate": false,
"sqlFrom": "DUAL",
"usingPreparedStatement": true,
"isLoad": false,
"sw": {
"clock": 104,
"clockStart": 1437766438970,
"name": "General",
"trackingMemory": false,
"trackingMemoryGcFirst": true,
"memLast": 0,
"memOrig": 0
},
"count": 0
},
{
"sql": "select * FROM DUAL2",
"truncated": false,
"truncatedSize": -1,
"recordCount": 0,
"foundInCache": false,
"putInCache": false,
"isUpdate": false,
"sqlFrom": "DUAL2",
"usingPreparedStatement": true,
"isLoad": false,
"sw": {
"clock": 93,
"clockStart": 1437766439111,
"name": "General",
"trackingMemory": false,
"trackingMemoryGcFirst": true,
"memLast": 0,
"memOrig": 0
},
"count": 0
}
]
}
}
The Logstash configs I have tried have not worked. The one closest so far is:
input {
file {
codec => multiline {
pattern => '\{(.*)\}'
negate => true
what => previous
}
path => [ '/var/log/logstash.log' ]
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
json {
source => message
}
}
output {
stdout { codec => rubydebug }
elasticsearch {
cluster => "logstash"
index => "logstashjson"
}
}
I have also tried:
input {
file {
type => "json"
path => "/var/log/logstash.log"
codec => json #also tried json_lines
}
}
filter {
json {
source => "message"
}
}
output {
stdout { codec => rubydebug }
elasticsearch {
cluster => "logstash"
codec => "json" #also tried json_lines
index => "logstashjson"
}
}
I just want to take the JSON posted above and send it "as is" to ElasticSearch just as if I did a cURL PUT with that file. I appreciate any help, thank you!
UPDATE
After help from Leonid, here is the configuration I have right now:
input {
file {
codec => multiline {
pattern => "^\["
negate => true
what => previous
}
path => [ '/var/log/logstash.log' ]
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
grok {
match => { "message" => "^(?<rubbish>.*?)(?<logged_json>{.*)" }
}
json {
source => "logged_json"
target => "parsed_json"
}
}
output {
stdout {
codec => rubydebug
}
elasticsearch {
cluster => "logstash"
index => "logstashjson"
}
}
Sorry, I can't yet make comments, so will post an answer. You are missing a document_type in the elaticsearch config, how would it be otherwise deduced?
All right, after looking into the logstash reference and working closely with #Ascalonian we came up with the following config:
input {
file {
# in the input you need to properly configure the multiline codec.
# You need to match the line that has the timestamp at the start,
# and then say 'everything that is NOT this line should go to the previous line'.
# the pattern may be improved to handle case when json array starts at the first
# char of the line, but it is sufficient currently
codec => multiline {
pattern => "^\["
negate => true
what => previous
max_lines => 2000
}
path => [ '/var/log/logstash.log']
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
# extract the json part of the message string into a separate field
grok {
match => { "message" => "^.*?(?<logged_json>{.*)" }
}
# replace newlines in the json string since the json filter below
# can not deal with those. Also it is time to delete unwanted fields
mutate {
gsub => [ 'logged_json', '\n', '' ]
remove_field => [ "message", "#timestamp", "host", "path", "#version", "tags"]
}
# parse the json and remove the string field upon success
json {
source => "logged_json"
remove_field => [ "logged_json" ]
}
}
output {
stdout {
codec => rubydebug
}
elasticsearch {
cluster => "logstash"
index => "logstashjson"
}
}