Logstash: How to split multiline json object and add_field into kibana? - json

I have the following JSON data, what I need is split each object into the separate message and add_filed, but with my current configuration, this whole JSON is getting into one message, I'm not sure what I'm doing wrong, any help or right direction will be really helpful.
[
{
"SOURCE": "Source A",
"Model": "ModelABC",
"Qty": "3"
},
{
"SOURCE": "Source B",
"Model": "MoBC",
"Qty": "31"
},
{
"SOURCE": "Source C",
"Model": "MoBCSss",
"Qty": "3qq"
}
]
logstash.config
input {
file {
path => "/usr/share/logstash/sample-log/Test-Log-For-Kibana.json"
start_position => "beginning"
codec => multiline {
pattern => "^}"
negate => true what => previous auto_flush_interval => 1 multiline_tag => ""
}
}
}
filter {
json {
source => "message"
target => "someField"
}
mutate {
add_field => {
"SOURCE" => "%{[someField][SOURCE]}"
"Model" => "%{[someField][Model]}"
"Qty" => "%{[someField][Qty]}"
}
}
}
output {
stdout {
codec => rubydebug
}
elasticsearch {
hosts => "elasticsearch:9200"
user => "elastic"
password => "changeme"
}
}

Related

Logstash Grok JSON error - mapper of different type

I have this log file:
2020-08-05 09:11:19 INFO-flask.model-{"version": "1.2.1", "time": 0.651745080947876, "output": {...}}
This is my logstash filter setting
grok{
match => {
"message" => "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:log.level}-%{DATA:model}-%{GREEDYDATA:log.message}"}
}
date {
timezone => "UTC"
match => ["timestamp" , "ISO8601", "yyyy-MM-dd HH:mm:ss"]
target => "#timestamp"
remove_field => [ "timestamp" ]
}
json{
source => "log.message"
target => "log.message"
}
mutate {
add_field => {
"execution.time" => "%{[log.message][time]}"
}
}
}
I want to extract the "time" value from the message. But I receive this error:
[2020-08-05T09:11:32,688][WARN ][logstash.outputs.elasticsearch][main][81ad4d5f6359b99ec4e52c93e518567c1fe91de303faf6fa1a4d905a73d3c334] Could not index event to Elasticsearch. {:status=>400, :action=>["index", {:_id=>nil, :_index=>"index-2020.08.05", :routing=>nil, :_type=>"_doc"}, #<LogStash::Event:0xbe6a80>], :response=>{"index"=>{"_index"=>"index-2020.08.05", "_type"=>"_doc", "_id"=>"ywPjvXMByEqBCvLy1871", "status"=>400, "error"=>{"type"=>"illegal_argument_exception", "reason"=>"mapper [log.message.input.values] of different type, current_type [long], merged_type [text]"}}}}
Please find the filter part for your logstash configuration:
filter {
grok {
match => { "message" => "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:log-level}-%{DATA:model}-%{GREEDYDATA:KV}" }
overwrite => [ "message" ]
}
kv {
source => "KV"
value_split => ": "
field_split => ", "
target => "msg"
}
}
Hope this will solve your problem.

Error in logstash configuration file tomcat

I have problem with Logstash configuration
My logs pattern are
2017-07-26 14:31:03,644 INFO [http-bio-10.60.2.21-10267-exec-92] jsch.DeployManagerFileUSImpl (DeployManagerFileUSImpl.java:132) - passage par ficher temporaire .bindings.20170726-143103.tmp
My current pattern is
match => { "message" => "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:log-level} \(%{DATA:class}\):%{GREEDYDATA:message}" }
Which pattern for [http-bio-10.60.2.21-10267-exec-92] and for jsch.DeployManagerFileUSImpl?
Doesn't seem like the current pattern you've shown would work, as you don't have anything in your sample message that matches \(%{DATA:class}\):%{GREEDYDATA:message} and you're not dealing with the double space after the loglevel.
If you want to match some random stuff in the middle of a line, use %{DATA}, e.g.:
\[%{DATA:myfield}\]
and then you can use %{GREEDYDATA} to get the stuff at the end of the line:
\[%{DATA:myfield1}\] %{GREEDYDATA:myfield2}
If you need to break these items down into fields of their own, then be more specific with the pattern or use a second grok{} block.
in my logstash.conf i have change my pattern to
match => [ "message", "%{TIMESTAMP_ISO8601:logdate},%{INT} %{LOGLEVEL:log-level} \[(?<threadname>[^\]]+)\] %{JAVACLASS:package} \(%{JAVAFILE:file}:%{INT:line}\) - %{GREEDYDATA:message}" ]
With helping of site https://grokdebug.herokuapp.com/ .
But i could not see in kibana 5.4.3 my static log file contains in /home/elasticsearch/static_logs/ directory ?
My logstash configuration file with "static" section
input {
file {
type => "access-log"
path => "/home/elasticsearch/tomcat/logs/*.txt"
}
file {
type => "tomcat"
path => "/home/elasticsearch/tomcat/logs/*.log" exclude => "*.zip"
codec => multiline {
negate => true
pattern => "(^%{MONTH} %{MONTHDAY}, 20%{YEAR} %{HOUR}:?%{MINUTE}(?::?%{SECOND}) (?:AM|PM))"
what => "previous"
}
}
file {
type => "static"
path => "/home/elasticsearch/static_logs/*.log" exclude => "*.zip"
}
}
filter {
if [type] == "access-log" {
grok {
# Access log pattern is %a %{waffle.servlet.NegotiateSecurityFilter.PRINCIPAL}s %t %m %U%q %s %B %T "%{Referer}i" "%{User-Agent}i"
match => [ "message" , "%{IPV4:clientIP} %{NOTSPACE:user} \[%{DATA:timestamp}\] %{WORD:method} %{NOTSPACE:request} %{NUMBER:status} %{NUMBER:bytesSent} %{NUMBER:duration} \"%{NOTSPACE:referer}\" \"%{DATA:userAgent}\"" ]
remove_field => [ "message" ]
}
grok{
match => [ "request", "/%{USERNAME:app}/" ]
tag_on_failure => [ ]
}
date {
match => [ "timestamp", "dd/MMM/YYYY:HH:mm:ss Z" ]
remove_field => [ "timestamp" ]
}
geoip {
source => ["clientIP"]
}
dns {
reverse => [ "clientIP" ]
}
mutate {
lowercase => [ "user" ]
convert => [ "bytesSent", "integer", "duration", "float" ]
}
if [referer] == "-" {
mutate {
remove_field => [ "referer" ]
}
}
if [user] == "-" {
mutate {
remove_field => [ "user" ]
}
}
}
if [type] == "tomcat" {
if [message] !~ /(.+)/ {
drop { }
}
grok{
patterns_dir => "./patterns"
overwrite => [ "message" ]
# oK Catalina normal
match => [ "message", "%{CATALINA_DATESTAMP:timestamp} %{NOTSPACE:className} %{WORD:methodName}\r\n%{LOGLEVEL: logLevel}: %{GREEDYDATA:message}" ]
}
grok{
match => [ "path", "/%{USERNAME:app}.20%{NOTSPACE}.log"]
tag_on_failure => [ ]
}
# Aug 25, 2014 11:23:31 AM
date{
match => [ "timestamp", "MMM dd, YYYY hh:mm:ss a" ]
remove_field => [ "timestamp" ]
}
}
if [type] == "static" {
if [message] !~ /(.+)/ {
drop { }
}
grok{
patterns_dir => "./patterns"
overwrite => [ "message" ]
# 2017-08-03 16:01:11,352 WARN [Thread-552] pcf2.AbstractObjetMQDAO (AbstractObjetMQDAO.java:137) - Descripteur de
match => [ "message", "%{TIMESTAMP_ISO8601:logdate},%{INT} %{LOGLEVEL:log-level} \[(?<threadname>[^\]]+)\] %{JAVACLASS:package} \(%{JAVAFILE:file}:%{INT:line}\) - %{GREEDYDATA:message}" ]
}
# 2017-08-03 16:01:11,352
date{
match => [ "timestamp", "YYYY-MM-dd hh:mm:ss,SSS" ]
remove_field => [ "timestamp" ]
}
}
}
output {
elasticsearch { hosts => ["192.168.99.100:9200"]}
}
Where is my mistake ?
Regards

Logstash Parsing and Calculations with CSV

I am having trouble parsing and calculating performance Navigation Timing data I have in a csv.
I was able to parse the fields but not sure how to approach the calculations (below) properly. Some points to keep in mind:
Data sets are grouped together by the bolded value (it is the ts of when the 21 datapoints were taken
ACMEPage-1486643427973,unloadEventEnd,1486643372422
2.Calculations need to be done with data points within the group
I am assuming some tagging and grouping will need to be done but I don't have a clear vision on how to implement it. Any help would be greatly appreciated.
Thanks,
---------------Calculations-----------------
Total First byte Time = responseStart - navigationStart
Latency = responseStart – fetchStart
DNS / Domain Lookup Time = domainLookupEnd - domainLookupStart
Server connect Time = connectEnd - connectStart
Server Response Time = responseStart - requestStart
Page Load time = loadEventStart - navigationStart
Transfer/Page Download Time = responseEnd - responseStart
DOM Interactive Time = domInteractive - navigationStart
DOM Content Load Time = domContentLoadedEventEnd - navigationStart
DOM Processing to Interactive =domInteractive - domLoading
DOM Interactive to Complete = domComplete - domInteractive
Onload = loadEventEnd - loadEventStart
-------Data in CSV-----------
ACMEPage-1486643427973,unloadEventEnd,1486643372422
ACMEPage-1486643427973,responseEnd,1486643372533
ACMEPage-1486643427973,responseStart,1486643372416
ACMEPage-1486643427973,domInteractive,1486643373030
ACMEPage-1486643427973,domainLookupEnd,1486643372194
ACMEPage-1486643427973,unloadEventStart,1486643372422
ACMEPage-1486643427973,domComplete,1486643373512
ACMEPage-1486643427973,domContentLoadedEventStart,1486643373030
ACMEPage-1486643427973,domainLookupStart,1486643372194
ACMEPage-1486643427973,redirectEnd,0
ACMEPage-1486643427973,redirectStart,0
ACMEPage-1486643427973,connectEnd,1486643372194
ACMEPage-1486643427973,toJSON,{}
ACMEPage-1486643427973,connectStart,1486643372194
ACMEPage-1486643427973,loadEventStart,1486643373512
ACMEPage-1486643427973,navigationStart,1486643372193
ACMEPage-1486643427973,requestStart,1486643372203
ACMEPage-1486643427973,secureConnectionStart,0
ACMEPage-1486643427973,fetchStart,1486643372194
ACMEPage-1486643427973,domContentLoadedEventEnd,1486643373058
ACMEPage-1486643427973,domLoading,1486643372433
ACMEPage-1486643427973,loadEventEnd,1486643373514
----------Output---------------
"path" => "/Users/philipp/Downloads/build2/logDataPoints_com.concur.automation.cge.ui.admin.ADCLookup_1486643340910.csv",
"#timestamp" => 2017-02-09T12:29:57.763Z,
"navigationTimer" => "connectStart",
"#version" => "1",
"host" => "15mbp-09796.local",
"elapsed_time" => "1486643372194",
"pid" => "1486643397763",
"page" => "ADCLookupDataPage",
"message" => "ADCLookupDataPage-1486643397763,connectStart,1486643372194",
"type" => "csv"
}
--------------logstash.conf----------------
input {
file {
type => "csv"
path => "/Users/path/logDataPoints_com.concur.automation.acme.ui.admin.acme_1486643340910.csv"
start_position => beginning
# to read from the beginning of file
sincedb_path => "/dev/null"
}
}
filter {
csv {
columns => ["page_id", "navigationTimer", "elapsed_time"]
}
if (["elapsed_time"] == "{}" ) {
drop{}
}
else {
grok {
match => { "page_id" => "%{WORD:page}-%{INT:pid}"
}
remove_field => [ "page_id" ]
}
}
date {
match => [ "pid", "UNIX_MS" ]
target => "#timestamp"
}
}
output {
elasticsearch { hosts => ["localhost:9200"] }
stdout { codec => rubydebug }
}
I the following to get trend my data:
-I found it easier to pivot the data, rather than going down the column, to have the data go along the rows per each "event" or "document"
-Each field needed to be mapped accordingly as an integer or string
Once the data was in Kibana properly I had problems using the ruby code filter to make simple math calculations so I ended up using the "scripted fields" to make the calculations in Kibana.
input {
file {
type => "csv"
path => "/Users/philipp/perf_csv_pivot2.csv"
start_position => beginning
# to read from the beginning of file
sincedb_path => "/dev/null"
}
}
filter {
csv {
columns => ["page_id","unloadEventEnd","responseEnd","responseStart","domInteractive","domainLookupEnd","unloadEventStart","domComplete","domContentLoadedEventStart","domainLookupstart","redirectEnd","redirectStart","connectEnd","toJSON","connectStart","loadEventStart","navigationStart","requestStart","secureConnectionStart","fetchStart","domContentLoadedEventEnd","domLoading","loadEventEnd"]
}
grok {
match => { "page_id" => "%{WORD:page}-%{INT:page_ts}" }
remove_field => [ "page_id", "message", "path" ]
}
mutate {
convert => { "unloadEventEnd" => "integer" }
convert => { "responseEnd" => "integer" }
convert => { "responseStart" => "integer" }
convert => { "domInteractive" => "integer" }
convert => { "domainLookupEnd" => "integer" }
convert => { "unloadEventStart" => "integer" }
convert => { "domComplete" => "integer" }
convert => { "domContentLoadedEventStart" => "integer" }
convert => { "domainLookupstart" => "integer" }
convert => { "redirectEnd" => "integer" }
convert => { "redirectStart" => "integer" }
convert => { "connectEnd" => "integer" }
convert => { "toJSON" => "string" }
convert => { "connectStart" => "integer" }
convert => { "loadEventStart" => "integer" }
convert => { "navigationStart" => "integer" }
convert => { "requestStart" => "integer" }
convert => { "secureConnectionStart" => "integer" }
convert => { "fetchStart" => "integer" }
convert => { "domContentLoadedEventEnd" => "integer" }
convert => { "domLoading" => "integer" }
convert => { "loadEventEnd" => "integer" }
}
date {
match => [ "page_ts", "UNIX_MS" ]
target => "#timestamp"
remove_field => [ "page_ts", "timestamp", "host", "toJSON" ]
}
}
output {
elasticsearch { hosts => ["localhost:9200"] }
stdout { codec => rubydebug }
}
Hope this can help someone else,

Access nested JSON Field in Logstash

I have a Problem with accessing a nested JSON field in logstash (latest version).
My config file is the following:
input {
http {
port => 5001
codec => "json"
}
}
filter {
mutate {
add_field => {"es_index" => "%{[statements][authority][name]}"}
}
mutate {
gsub => [
"es_index", " ", "_"
]
}
mutate {
lowercase => ["es_index"]
}
ruby {
init => "
def remove_dots hash
new = Hash.new
hash.each { |k,v|
if v.is_a? Hash
v = remove_dots(v)
end
new[ k.gsub('.','_') ] = v
if v.is_a? Array
v.each { |elem|
if elem.is_a? Hash
elem = remove_dots(elem)
end
new[ k.gsub('.','_') ] = elem
} unless v.nil?
end
} unless hash.nil?
return new
end
"
code => "
event.instance_variable_set(:#data,remove_dots(event.to_hash))
"
}
}
output {
stdout {
codec => rubydebug
}
elasticsearch {
hosts => "elasticsearch:9200"
index => "golab-%{+YYYY.MM.dd}"
}
}
I have a filter with mutate. I want to add a field that I can use as a part of the index name. When I use this "%{[statements][authority][name]}" the content in the brackets is used as string.%{[statements][authority][name]} is saved in the es_indexfield. Logstash seems to think this is a string, but why?
I've also tried to use this expression: "%{statements}". It's working like expected. Everything in the field statements is passed to es_index. If I use "%{[statements][authority]}" strange things happen. es_index is filled with the exact same output that "%{statements}" produces. What am I missing?
Logstash Output with "%{[statements][authority]}":
{
"statements" => {
"verb" => {
"id" => "http://adlnet.gov/expapi/verbs/answered",
"display" => {
"en-US" => "answered"
}
},
"version" => "1.0.1",
"timestamp" => "2016-07-21T07:41:18.013880+00:00",
"object" => {
"definition" => {
"name" => {
"en-US" => "Example Activity"
},
"description" => {
"en-US" => "Example activity description"
}
},
"id" => "http://adlnet.gov/expapi/activities/example"
},
"actor" => {
"account" => {
"homePage" => "http://example.com",
"name" => "xapiguy"
},
"objectType" => "Agent"
},
"stored" => "2016-07-21T07:41:18.013880+00:00",
"authority" => {
"mbox" => "mailto:info#golab.eu",
"name" => "GoLab",
"objectType" => "Agent"
},
"id" => "0771b9bc-b1b8-4cb7-898e-93e8e5a9c550"
},
"id" => "a7e31874-780e-438a-874c-964373d219af",
"#version" => "1",
"#timestamp" => "2016-07-21T07:41:19.061Z",
"host" => "172.23.0.3",
"headers" => {
"request_method" => "POST",
"request_path" => "/",
"request_uri" => "/",
"http_version" => "HTTP/1.1",
"http_host" => "logstasher:5001",
"content_length" => "709",
"http_accept_encoding" => "gzip, deflate",
"http_accept" => "*/*",
"http_user_agent" => "python-requests/2.9.1",
"http_connection" => "close",
"content_type" => "application/json"
},
"es_index" => "{\"verb\":{\"id\":\"http://adlnet.gov/expapi/verbs/answered\",\"display\":{\"en-us\":\"answered\"}},\"version\":\"1.0.1\",\"timestamp\":\"2016-07-21t07:41:18.013880+00:00\",\"object\":{\"definition\":{\"name\":{\"en-us\":\"example_activity\"},\"description\":{\"en-us\":\"example_activity_description\"}},\"id\":\"http://adlnet.gov/expapi/activities/example\",\"objecttype\":\"activity\"},\"actor\":{\"account\":{\"homepage\":\"http://example.com\",\"name\":\"xapiguy\"},\"objecttype\":\"agent\"},\"stored\":\"2016-07-21t07:41:18.013880+00:00\",\"authority\":{\"mbox\":\"mailto:info#golab.eu\",\"name\":\"golab\",\"objecttype\":\"agent\"},\"id\":\"0771b9bc-b1b8-4cb7-898e-93e8e5a9c550\"}"
}
You can see that authority is part of es_index. So it was not chosen as a field.
Many thanks in advance
I found a solution. Credits go to jpcarey (Elasticsearch Forum)
I had to remove codec => "json". That leads to another data structure. statements is now an array and not an object. So I needed to change %{[statements][authority][name]} to %{[statements][0][authority][name]}. That works without problems.
If you follow the given link you'll also find an better implementation of my mutate filters.

Logstash: Parse Complicated Multiline JSON from log file into ElasticSearch

Let me first say that I have gone through as many examples on here as I could that still do not work. I am not sure if it's because of the complicated nature of the JSON in the log file or not.
I am looking to take the example log entry, have Logstash read it in, and send the JSON as JSON to ElasticSearch.
Here is what the (shortened) example looks:
[0m[0m16:02:08,685 INFO [org.jboss.as.server] (ServerService Thread Pool -- 28) JBAS018559: {
"appName": "SomeApp",
"freeMemReqStartBytes": 544577648,
"freeMemReqEndBytes": 513355408,
"totalMem": 839385088,
"maxMem": 1864368128,
"anonymousUser": false,
"sessionId": "zz90g0dFQkACVao4ZZL34uAb",
"swAction": {
"clock": 0,
"clockStart": 1437766438950,
"name": "General",
"trackingMemory": false,
"trackingMemoryGcFirst": true,
"memLast": 0,
"memOrig": 0
},
"remoteHost": "127.0.0.1",
"remoteAddr": "127.0.0.1",
"requestMethod": "GET",
"mapLocalObjectCount": {
"FinanceEmployee": {
"x": 1,
"singleton": false
},
"QuoteProcessPolicyRef": {
"x": 10,
"singleton": false
},
"LocationRef": {
"x": 2,
"singleton": false
}
},
"theSqlStats": {
"lstStat": [
{
"sql": "select * FROM DUAL",
"truncated": false,
"truncatedSize": -1,
"recordCount": 1,
"foundInCache": false,
"putInCache": false,
"isUpdate": false,
"sqlFrom": "DUAL",
"usingPreparedStatement": true,
"isLoad": false,
"sw": {
"clock": 104,
"clockStart": 1437766438970,
"name": "General",
"trackingMemory": false,
"trackingMemoryGcFirst": true,
"memLast": 0,
"memOrig": 0
},
"count": 0
},
{
"sql": "select * FROM DUAL2",
"truncated": false,
"truncatedSize": -1,
"recordCount": 0,
"foundInCache": false,
"putInCache": false,
"isUpdate": false,
"sqlFrom": "DUAL2",
"usingPreparedStatement": true,
"isLoad": false,
"sw": {
"clock": 93,
"clockStart": 1437766439111,
"name": "General",
"trackingMemory": false,
"trackingMemoryGcFirst": true,
"memLast": 0,
"memOrig": 0
},
"count": 0
}
]
}
}
The Logstash configs I have tried have not worked. The one closest so far is:
input {
file {
codec => multiline {
pattern => '\{(.*)\}'
negate => true
what => previous
}
path => [ '/var/log/logstash.log' ]
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
json {
source => message
}
}
output {
stdout { codec => rubydebug }
elasticsearch {
cluster => "logstash"
index => "logstashjson"
}
}
I have also tried:
input {
file {
type => "json"
path => "/var/log/logstash.log"
codec => json #also tried json_lines
}
}
filter {
json {
source => "message"
}
}
output {
stdout { codec => rubydebug }
elasticsearch {
cluster => "logstash"
codec => "json" #also tried json_lines
index => "logstashjson"
}
}
I just want to take the JSON posted above and send it "as is" to ElasticSearch just as if I did a cURL PUT with that file. I appreciate any help, thank you!
UPDATE
After help from Leonid, here is the configuration I have right now:
input {
file {
codec => multiline {
pattern => "^\["
negate => true
what => previous
}
path => [ '/var/log/logstash.log' ]
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
grok {
match => { "message" => "^(?<rubbish>.*?)(?<logged_json>{.*)" }
}
json {
source => "logged_json"
target => "parsed_json"
}
}
output {
stdout {
codec => rubydebug
}
elasticsearch {
cluster => "logstash"
index => "logstashjson"
}
}
Sorry, I can't yet make comments, so will post an answer. You are missing a document_type in the elaticsearch config, how would it be otherwise deduced?
All right, after looking into the logstash reference and working closely with #Ascalonian we came up with the following config:
input {
file {
# in the input you need to properly configure the multiline codec.
# You need to match the line that has the timestamp at the start,
# and then say 'everything that is NOT this line should go to the previous line'.
# the pattern may be improved to handle case when json array starts at the first
# char of the line, but it is sufficient currently
codec => multiline {
pattern => "^\["
negate => true
what => previous
max_lines => 2000
}
path => [ '/var/log/logstash.log']
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
# extract the json part of the message string into a separate field
grok {
match => { "message" => "^.*?(?<logged_json>{.*)" }
}
# replace newlines in the json string since the json filter below
# can not deal with those. Also it is time to delete unwanted fields
mutate {
gsub => [ 'logged_json', '\n', '' ]
remove_field => [ "message", "#timestamp", "host", "path", "#version", "tags"]
}
# parse the json and remove the string field upon success
json {
source => "logged_json"
remove_field => [ "logged_json" ]
}
}
output {
stdout {
codec => rubydebug
}
elasticsearch {
cluster => "logstash"
index => "logstashjson"
}
}