logstash mysql general_logs CSV format - mysql

I want to add the mysql general_log to the logstash. I have managed to make the mysql log in CSV format and with the CSV pattern there should be no easier thing to do. Here is my general_log entry:
"2015-08-15 11:52:57","mrr[mrr] # localhost []",4703,0,"Query","SET NAMES utf8"
"2015-08-15 11:52:57","mrr[mrr] # localhost []",4703,0,"Query","SELECT ##SESSION.sql_mode"
"2015-08-15 11:52:57","mrr[mrr] # localhost []",4703,0,"Query","SET SESSION sql_mode='NO_ENGINE_SUBSTITUTION'"
"2015-08-15 11:52:57","mrr[mrr] # localhost []",4703,0,"Init DB","mrr"
and here is my logstash.conf:
input {
lumberjack {
port => 5000
type => "logs"
ssl_certificate => "/etc/pki/tls/certs/logstash_forwarder.crt"
ssl_key => "/etc/pki/tls/private/logstash_forwarder.key"
}
}
filter {
if [type] == "nginx-access" {
grok {
match => { 'message' => '%{IPORHOST:clientip} %{NGUSER:indent} %{NGUSER:agent} \[%{HTTPDATE:timestamp}\] \"(?:%{WORD:verb} %{URIPATHPARAM:request}(?: HTTP/%{NUMBER:httpversion})?|)\" %{NUMBER:answer} (?:%{NUMBER:byte}|-) (?:\"(?:%{URI:referrer}|-))\" (?:%{QS:referree}) %{QS:agent}' }
}
geoip {
source => "clientip"
target => "geoip"
database => "/etc/logstash/GeoLiteCity.dat"
add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}" ]
}
mutate {
convert => [ "[geoip][coordinates]", "float" ]
}
}
if [type] == "mysql-general" {
csv {
columns => [ "#timestamp(6)", "user_host", "thready_id", "server_id", "ctype", "query" ]
separator => ","
}
grok {
match => { "user_host", "%{WORD:remoteuser}\[%{WORD:localuser}\] \# %{IPORHOST:dbhost} \[(?:%{IPORHOST:qhost}|-)\]" }
}
}
}
output {
stdout {
codec => rubydebug
}
elasticsearch {
host => "172.17.0.5"
cluster => "z0z0.tk-1.5"
flush_size => 2000
}
}
however the user_host column has this format:
"mrr[mrr] # localhost []" and I would like to split it into at least two different values one for the user and the otherone for the host.
I have run this configuration on logstash and it ends up in _grokparsefailure due to the grok parse
when I am running the checktest option on the config file I am getting the following output:
Error: Expected one of #, => at line 36, column 26 (byte 1058) after filter {
if [type] == "nginx-access" {
grok {
match => { 'message' => '%{IPORHOST:clientip} %{NGUSER:indent} %{NGUSER:agent} \[%{HTTPDATE:timestamp}\] \"(?:%{WORD:verb} %{URIPATHPARAM:request}(?: HTTP/%{NUMBER:httpversion})?|)\" %{NUMBER:answer} (?:%{NUMBER:byte}|-) (?:\"(?:%{URI:referrer}|-))\" (?:%{QS:referree}) %{QS:agent}' }
}
geoip {
source => "clientip"
target => "geoip"
database => "/etc/logstash/GeoLiteCity.dat"
add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}" ]
}
mutate {
convert => [ "[geoip][coordinates]", "float" ]
}
}
if [type] == "mysql-general" {
csv {
columns => [ "#timestamp(6)", "user_host", "thready_id", "server_id", "ctype", "query" ]
separator => ","
}
grok {
match => { "user_host"
Can you give me an idea what is wrong?

The csv{} filter is only parsing, um, comma-separated values. If you'd like to parse fields of other formats, use grok{} on the user_host column after the csv{} filter has created it.
EDIT: to be more explicit.
Run the csv filter:
csv {
columns => [ "#timestamp(6)", "user_host", "thready_id". "server_id", "ctype", "query" ]
separator => ","
}
which should create you a field called "user_host".
You can then run this field through a grok filter, like this (untested) one:
grok {
match => [ "user_host", "%{WORD:myUser}\[%{WORD}\] # %{WORD:myHost} \[\]" ]
}
This will create two more fields for you: myUser and myHost.

Got it working. The error was in fact in the grok patters since the first user and the last host was at some point emtpy the grok did failed in parsing so I had to add some brackets to accept also empty strings. The current logstash.conf looks like this:
input {
lumberjack {
port => 5000
type => "logs"
ssl_certificate => "/etc/pki/tls/certs/logstash_forwarder.crt"
ssl_key => "/etc/pki/tls/private/logstash_forwarder.key"
}
}
filter {
if [type] == "nginx-access" {
grok {
match => { 'message' => '%{IPORHOST:clientip} %{NGUSER:indent} %{NGUSER:agent} \[%{HTTPDATE:timestamp}\] \"(?:%{WORD:verb} %{URIPATHPARAM:request}(?: HTTP/%{NUMBER:httpversion})?|)\" %{NUMBER:answer} (?:%{NUMBER:byte}|-) (?:\"(?:%{URI:referrer}|-))\" (?:%{QS:referree}) %{QS:agent}' }
}
geoip {
source => "clientip"
target => "geoip"
database => "/etc/logstash/GeoLiteCity.dat"
add_field => [ "[geoip][coordinates]", "%{[geoip][longitude]}" ]
add_field => [ "[geoip][coordinates]", "%{[geoip][latitude]}" ]
}
mutate {
convert => [ "[geoip][coordinates]", "float" ]
}
}
if [type] == "mysql-general" {
csv {
columns => [ "#timestamp(6)", "user_host", "thready_id", "server_id", "ctype", "query" ]
separator => ","
}
grok {
match => { "user_host", "(?:%{WORD:remoteuser}|)\[%{WORD:localuser}\] \# %{IPORHOST:dbhost} \[(?:%{IPORHOST:qhost}|)\]" }
}
}
}
output {
stdout {
codec => rubydebug
}
elasticsearch {
host => "172.17.0.5"
cluster => "clustername"
flush_size => 2000
}
}
Thanks for your help and suggestions

Related

Parsing nested JSON log file into ELK - Shodan.io Logs

I'm trying to parse the nested JSON log file(Shodan.io)
I have parsed few values. Not able to parse below mentioned values:
hostnames
smb
smb_version
shares
temporary
type
name
comments
anonymous
transport
It will be good if can rid of value of 'raw':[0000, 0000]
You can check my sample log file here.
Below is my existing logstash filter configuration:
input {
file {
path => [ "/path/to/shodan-logs.json" ]
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
json {
source => "message"
target => "json_parse"
add_tag => ["json_filter"]
tag_on_failure => ["json"]
}
grok {
break_on_match => false
add_tag => ["filtered"]
tag_on_failure => ["no_match_found"]
match => {
"message" => [
"%{IP:client_ip}",
"%{TIMESTAMP_ISO8601:timestamp}"
]
}
}
geoip {
source => "client_ip"
add_tag => ["geo_ip_found"]
tag_on_failure => ["geo_ip_not_found"]
}
useragent {
source => "message"
add_tag => ["user_details_found"]
}
# ruby {
# add_tag => ["ruby_filter"]
# code => '
# props = event.get("message")
# if props
# props.each { |x|
# key = x["key"]
# event.set("message.#{key}", x["value"])
# }
# end
# '
# }
mutate {
remove_field => [ "#timestamp", "path", "host", "#version" ]
}
}
output {
elasticsearch {
hosts => ["http://localhost:9200"]
user => "elastic"
password => "password"
index => "shodan-demo-%{+dd-MM-YYYY}"
}
stdout {
codec => rubydebug
}
}
Here are snapshots of my output on ELK
Note: I have tried below mentioned methods/filters
Commented ruby code filter, and it is not working.
Multiline input
json codec in input

How to create grok/json filter to parse the below json format

I want to parse this JSON to Kibana using Logstash
{
"Format": "IDEA0",
"ID": "2b03eb1f-fc4c-4f67-94e5-31c9fb32dccc",
"DetectTime": "2022-01-31T08:16:12.600470+07:00",
"EventTime": "2022-01-31T01:23:01.637438+00:00",
"Category": ['Intrusion.Botnet'],
"Confidence": 0.03,
"Note": "C&C channel, destination IP: 192.168.1.24 port: 8007/tcp score: 0.9324",
"Source": [{'IP4': ['192.168.1.25'], 'Type': ['CC']}]
}
I want that ID, Detect Time, Event Time, Category, Confidence, Note, Source is a single field so later i can do visualization in kibana.
Here's what I'm already trying to do
input {
file {
path => "/home/ubuntu/Downloads/StratosphereLinuxIPS/output/*.json"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
json {
source => "message"
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "test-test"
user => "***"
password => "***"
}
stdout{}
}
But the field is not separated correctly
Any help will be meaningful.
Thanks.
:::UPDATE:::
I already found the solution (Help by other guys from Elastic forum but not 100% optimize need to tweak it a little more)
Here's the Logstash Conf I'm using if someone needs it in the future
input {
file {
path => "/home/ubuntu/Downloads/StratosphereLinuxIPS/output/alerts.json"
start_position => "beginning"
sincedb_path => "/dev/null"
codec => multiline { pattern => "^{$" negate => "true" what => "previous" }
}
}
filter {
mutate {
gsub => ["message", "'", '"']
}
json {
source => "message"
}
}
output {
elasticsearch {
hosts => ["localhost:9200"]
index => "test-keempat"
user => "xxx"
password => "xxx"
}
stdout{ codec => rubydebug }
}
Thanks !

Logstash cannot extract json key

I need help regarding logstash filter to extract json key/value to new_field. The following is my logstash conf.
input {
tcp {
port => 5044
}
}
filter {
json {
source => "message"
add_field => {
"data" => "%{[message][data]}"
}
}
}
output {
stdout { codec => rubydebug }
}
I have tried with mutate:
filter {
json {
source => "message"
}
mutate {
add_field => {
"data" => "%{[message][data]}"
}
}
}
I have tried with . instead of []:
filter {
json {
source => "message"
}
mutate {
add_field => {
"data" => "%{message.data}"
}
}
}
I have tried with index number:
filter {
json {
source => "message"
}
mutate {
add_field => {
"data" => "%{[message][0]}"
}
}
}
All with no luck. :(
The following json is sent to port 5044:
{"data": "blablabla"}
The problem is the new field not able to extract value from the key of the json.
"data" => "%{[message][data]}"
The following is my stdout:
{
"#version" => "1",
"host" => "localhost",
"type" => "logstash",
"data" => "%{[message][data]}",
"path" => "/path/from/my/app",
"#timestamp" => 2019-01-11T20:39:10.845Z,
"message" => "{\"data\": \"blablabla\"}"
}
However if I use "data" => "%{[message]}" instead:
filter {
json {
source => "message"
add_field => {
"data" => "%{[message]}"
}
}
}
I will get the whole json from stdout.
{
"#version" => "1",
"host" => "localhost",
"type" => "logstash",
"data" => "{\"data\": \"blablabla\"}",
"path" => "/path/from/my/app",
"#timestamp" => 2019-01-11T20:39:10.845Z,
"message" => "{\"data\": \"blablabla\"}"
}
Can anyone please tell me what I did wrong.
Thank you in advance.
I use docker-elk stack, ELK_VERSION=6.5.4
add_field is used to add custom logic when filter succeeds, many filters have this option. If you want to parse json into a field, you should use target:
filter {
json {
source => "message"
target => "data" // parse into data field
}
}

Importing Geo Point (Lat, Lng) from MySQL into Elasticsearch

I am trying to import data from MySQL into an Elastic index using following Logstash script (ELK v 6.22):
input {
jdbc {
jdbc_driver_library => "E:\ELK 6.22\logstash-6.2.2\bin\mysql-connector-java-5.1.45-bin.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/fbk"
jdbc_user => "root"
jdbc_password => ""
statement => "SELECT fbk_repeat._URI AS URI, _SUBMISSION_DATE AS SUBMISSION_DATE, DEVICEID, LOCATION_LAT, LOCATION_LNG, SECTOR, COMMENTS, ACTION_TAKEN, PURPOSE
FROM
fbk_core
INNER JOIN fbk_repeat ON fbk_core._URI = fbk_repeat._PARENT_AURI"
}
}
filter {
# mutate { convert => {"LOCATION_LAT" => "float"} }
# mutate { convert => {"LOCATION_LNG" => "float"} }
# mutate { rename => {"LOCATION_LAT" => "[location][lat]"} }
# mutate { rename => {"LOCATION_LNG" => "[location][lon]"} }
mutate {
# Location and lat/lon should be used as is, this is as per logstash documentation
# Here we are tying to create a two-dimensional array in order to save data as per Logstash documentation
add_field => { "[location][lat]" => [ "%{LOCATION_LAT}" ] }
add_field => { "[location][lon]" => [ "%{LOCATION_LNG}" ] }
convert => [ "[location]", "float" ]
}
# date {
# locale => "eng"
# match => ["_SUBMISSION_DATE", "yyyy-MM-dd HH:mm:ss", "ISO8601"]
# target => "SUBMISSION_DATE"
# }
}
output{
elasticsearch {
hosts => ["localhost:9200"]
index => "feedback"
document_id => "%{URI}"
document_type => "feedbackdata"
manage_template => true
# user => "elastic"
# password => "changeme"
}
stdout { codec => rubydebug { metadata => true } }
# stdout { codec => dots }
}
Once data is imported, I couldn't find any Geo Point field in Kibana to be able to plot data into a map, can anyone guide what must be going wrong.
Thanks!
Data
Elasticsearch can automatically do the mapping, but not for al fields.
You should set your mapping like this for example :
PUT index
{
"mappings": {
"type": {
"properties": {
"location": {
"properties": {
"coordinates": {
"type": "geo_point"
}
}
},
"field": {
"properties": {
"date": {
"format": "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
"type": "date"
}
}
}
}
}
}
}
Adapt this to handle your data.
Don't forget to create the index pattern in Kibana.

Error in logstash configuration file tomcat

I have problem with Logstash configuration
My logs pattern are
2017-07-26 14:31:03,644 INFO [http-bio-10.60.2.21-10267-exec-92] jsch.DeployManagerFileUSImpl (DeployManagerFileUSImpl.java:132) - passage par ficher temporaire .bindings.20170726-143103.tmp
My current pattern is
match => { "message" => "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:log-level} \(%{DATA:class}\):%{GREEDYDATA:message}" }
Which pattern for [http-bio-10.60.2.21-10267-exec-92] and for jsch.DeployManagerFileUSImpl?
Doesn't seem like the current pattern you've shown would work, as you don't have anything in your sample message that matches \(%{DATA:class}\):%{GREEDYDATA:message} and you're not dealing with the double space after the loglevel.
If you want to match some random stuff in the middle of a line, use %{DATA}, e.g.:
\[%{DATA:myfield}\]
and then you can use %{GREEDYDATA} to get the stuff at the end of the line:
\[%{DATA:myfield1}\] %{GREEDYDATA:myfield2}
If you need to break these items down into fields of their own, then be more specific with the pattern or use a second grok{} block.
in my logstash.conf i have change my pattern to
match => [ "message", "%{TIMESTAMP_ISO8601:logdate},%{INT} %{LOGLEVEL:log-level} \[(?<threadname>[^\]]+)\] %{JAVACLASS:package} \(%{JAVAFILE:file}:%{INT:line}\) - %{GREEDYDATA:message}" ]
With helping of site https://grokdebug.herokuapp.com/ .
But i could not see in kibana 5.4.3 my static log file contains in /home/elasticsearch/static_logs/ directory ?
My logstash configuration file with "static" section
input {
file {
type => "access-log"
path => "/home/elasticsearch/tomcat/logs/*.txt"
}
file {
type => "tomcat"
path => "/home/elasticsearch/tomcat/logs/*.log" exclude => "*.zip"
codec => multiline {
negate => true
pattern => "(^%{MONTH} %{MONTHDAY}, 20%{YEAR} %{HOUR}:?%{MINUTE}(?::?%{SECOND}) (?:AM|PM))"
what => "previous"
}
}
file {
type => "static"
path => "/home/elasticsearch/static_logs/*.log" exclude => "*.zip"
}
}
filter {
if [type] == "access-log" {
grok {
# Access log pattern is %a %{waffle.servlet.NegotiateSecurityFilter.PRINCIPAL}s %t %m %U%q %s %B %T "%{Referer}i" "%{User-Agent}i"
match => [ "message" , "%{IPV4:clientIP} %{NOTSPACE:user} \[%{DATA:timestamp}\] %{WORD:method} %{NOTSPACE:request} %{NUMBER:status} %{NUMBER:bytesSent} %{NUMBER:duration} \"%{NOTSPACE:referer}\" \"%{DATA:userAgent}\"" ]
remove_field => [ "message" ]
}
grok{
match => [ "request", "/%{USERNAME:app}/" ]
tag_on_failure => [ ]
}
date {
match => [ "timestamp", "dd/MMM/YYYY:HH:mm:ss Z" ]
remove_field => [ "timestamp" ]
}
geoip {
source => ["clientIP"]
}
dns {
reverse => [ "clientIP" ]
}
mutate {
lowercase => [ "user" ]
convert => [ "bytesSent", "integer", "duration", "float" ]
}
if [referer] == "-" {
mutate {
remove_field => [ "referer" ]
}
}
if [user] == "-" {
mutate {
remove_field => [ "user" ]
}
}
}
if [type] == "tomcat" {
if [message] !~ /(.+)/ {
drop { }
}
grok{
patterns_dir => "./patterns"
overwrite => [ "message" ]
# oK Catalina normal
match => [ "message", "%{CATALINA_DATESTAMP:timestamp} %{NOTSPACE:className} %{WORD:methodName}\r\n%{LOGLEVEL: logLevel}: %{GREEDYDATA:message}" ]
}
grok{
match => [ "path", "/%{USERNAME:app}.20%{NOTSPACE}.log"]
tag_on_failure => [ ]
}
# Aug 25, 2014 11:23:31 AM
date{
match => [ "timestamp", "MMM dd, YYYY hh:mm:ss a" ]
remove_field => [ "timestamp" ]
}
}
if [type] == "static" {
if [message] !~ /(.+)/ {
drop { }
}
grok{
patterns_dir => "./patterns"
overwrite => [ "message" ]
# 2017-08-03 16:01:11,352 WARN [Thread-552] pcf2.AbstractObjetMQDAO (AbstractObjetMQDAO.java:137) - Descripteur de
match => [ "message", "%{TIMESTAMP_ISO8601:logdate},%{INT} %{LOGLEVEL:log-level} \[(?<threadname>[^\]]+)\] %{JAVACLASS:package} \(%{JAVAFILE:file}:%{INT:line}\) - %{GREEDYDATA:message}" ]
}
# 2017-08-03 16:01:11,352
date{
match => [ "timestamp", "YYYY-MM-dd hh:mm:ss,SSS" ]
remove_field => [ "timestamp" ]
}
}
}
output {
elasticsearch { hosts => ["192.168.99.100:9200"]}
}
Where is my mistake ?
Regards