logstash parse dynamically json to add new fields in the output - json

I have the following JSON INPUT:
{
"ts": "1459504800000",
"data": "30.7",
"sid": "1"
}
Whit this filter:
filter {
mutate {
convert => {
"data" => "float"
"ts" => "integer"
}
}
date {
match => [ "ts", "UNIX_MS"]
target => "ts_date"
}
}
I get the following result:
{
"ts" => 1459504800000,
"data" => 30.7,
"sid" => "1",
"#version" => "1",
"#timestamp" => "2016-04-21T14:29:54.241Z",
"type" => "redis-input",
"ts_date" => "2016-04-01T10:00:00.000Z"
}
I would like to add a new field in the result composed dynamically with "data" and "sid" parameters values (1 and 30.7) of the input. This field should be somthing like "somestring"+"1" => 30.7
Thanks!

This is what add_field is for. For tasks like this that are unrelated to other filters, I'd use it in mutate:
mutate {
add_field => { "something%{sid}" => "%{data}" }
}
The value would be a string at this point. If you want it to be numeric, you'd need a second mutate using the convert function.

Related

Logstash : Get timestamp from mysql doesn't work, convert it to string it work?

I'm using this conf file to overwrite the #timestamp field in ElasticSearch, but I automatically get an _dateparsefailure flag:
input {
jdbc {
jdbc_driver_library => "C:/path/to/mariadb-java-client.jar"
statement => "SELECT '${FIELD}' as field, from ${TABLE_NAME}"
tracking_column => "timestamp"
tracking_column_type => "timestamp"
}
}
filter {
grok {
match => ["timestamp","%{TIMESTAMP_ISO8601}"]
}
date {
match => ["timestamp", "ISO8601"]
}
}
Note that with or without the grok filter I get the same result.
The result:
{
"#timestamp" => 2022-12-13T09:16:10.365Z,
"timestamp" => 2022-11-23T10:36:13.000Z,
"#version" => "1",
"tags" => [
[0] "_dateparsefailure"
],
"type" => "mytype",
}
But when I extract the timestamp with this conf:
input {
*same input*
}
filter {
grok {
match => ["timestamp","%{TIMESTAMP_ISO8601:tmp}"]
tag_on_failure => [ "_grokparsefailure"]
}
date {
match => ["tmp", "ISO8601"]
}
}
then it give me the expected result:
{
"#timestamp" => 2022-11-23T11:16:36.000Z,
"#version" => "1",
"timestamp" => 2022-11-23T11:16:36.000Z,
"tmp" => "2022-11-23T11:16:36.000Z",
}
Can anyone explain me why is that and how can I avoid create this extra field ?
Thanks
Ok,
The first parse a string I guess, but timestamp already has the right type,
So a copy is enough to save and overwrite the #timestamp field:
filter {
mutate {
copy => { "#timestamp" => "insertion_timestamp" }
copy => { "timestamp" => "#timestamp" }
remove_field => [ "timestamp" ]
}
}
If the database column type is a timestamp then the jdbc input will automatically convert the field to a LogStash::Timestamp object, not a string. A date filter cannot parse a Timestamp object, and will add a _dateparsefailure tag.
A grok filter calls .to_s to convert everything to a string before matching it, so if you grok the timestamp from the Timestamp object it will be a string that the date filter can parse.

Logstash Grok JSON error - mapper of different type

I have this log file:
2020-08-05 09:11:19 INFO-flask.model-{"version": "1.2.1", "time": 0.651745080947876, "output": {...}}
This is my logstash filter setting
grok{
match => {
"message" => "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:log.level}-%{DATA:model}-%{GREEDYDATA:log.message}"}
}
date {
timezone => "UTC"
match => ["timestamp" , "ISO8601", "yyyy-MM-dd HH:mm:ss"]
target => "#timestamp"
remove_field => [ "timestamp" ]
}
json{
source => "log.message"
target => "log.message"
}
mutate {
add_field => {
"execution.time" => "%{[log.message][time]}"
}
}
}
I want to extract the "time" value from the message. But I receive this error:
[2020-08-05T09:11:32,688][WARN ][logstash.outputs.elasticsearch][main][81ad4d5f6359b99ec4e52c93e518567c1fe91de303faf6fa1a4d905a73d3c334] Could not index event to Elasticsearch. {:status=>400, :action=>["index", {:_id=>nil, :_index=>"index-2020.08.05", :routing=>nil, :_type=>"_doc"}, #<LogStash::Event:0xbe6a80>], :response=>{"index"=>{"_index"=>"index-2020.08.05", "_type"=>"_doc", "_id"=>"ywPjvXMByEqBCvLy1871", "status"=>400, "error"=>{"type"=>"illegal_argument_exception", "reason"=>"mapper [log.message.input.values] of different type, current_type [long], merged_type [text]"}}}}
Please find the filter part for your logstash configuration:
filter {
grok {
match => { "message" => "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:log-level}-%{DATA:model}-%{GREEDYDATA:KV}" }
overwrite => [ "message" ]
}
kv {
source => "KV"
value_split => ": "
field_split => ", "
target => "msg"
}
}
Hope this will solve your problem.

Parse date inside input logs file

I'm new using ELK stack and i'm trying to create an index from an S3 file. This S3 file's format CSV and has the following schema:
date: Date field with format yyyy-MM-dd HH:mm:ss
filename: Name of the input file that triggers some events
input_registers: count with num of lines for the file
wrong_registers: count with num of wrong registers
result_registers: count with num of result registers (validated)
I need to set date as the #timestamp field on ELK.
I already tried some things with date filter plugin, here i show my current configuration:
input{
s3 {
"id" => "rim-pfinal"
"access_key_id" => ""
"secret_access_key" => ""
"region" => "eu-west-3"
"bucket" => "practica.final.rim.elk"
"prefix" => "logs"
"interval" => "3600"
"additional_settings" => {
"force_path_style" => true
"follow_redirects" => false
}
sincedb_path => "/dev/null"
}
}
filter {
date {
match => [ "date", "ISO8601", "yyyy-MM-dd HH:mm:ss" ]
target => "date"
add_field => { "DummyField" => "Fecha cambiada" }
}
csv{
columns => ["date", "filename", "input_registers", "wrong_registers", "result_registers", "err_type"]
separator => ";"
}
mutate { convert => [ "input_registers", "integer"] }
mutate { convert => [ "wrong_registers", "integer"] }
mutate { convert => [ "result_registers", "integer"] }
#Remove first header line to insert in elasticsearch
if [PK] =~ "PK"{
drop {}
}
}
output{
elasticsearch {
hosts => ["localhost:9200"]
index => "practica-rim"
}
}
I tried to set target to timestamp and match too but seems not working.
Thank you for the help!
{
"query":{
"range":{
"#timestamp":{
"gte":"2015-08-04T11:00:00",
"lt":"2015-08-04T12:00:00"
}
}
}
}
datetimes will be serialized
es.index(index="my-index", doc_type="test-type", id=42, body={"any": "data", "timestamp": datetime.now()})
{u'_id': u'42', u'_index': u'my-index', u'_type': u'test-type', u'_version': 1, u'ok': True}
# but not deserialized
>>> es.get(index="my-index", doc_type="test-type", id=42)['_source']
{u'any': u'data', u'timestamp': u'2013-05-12T19:45:31.804229'}
https://www.elastic.co/guide/en/elasticsearch/reference/current/date.html

Logstash cannot extract json key

I need help regarding logstash filter to extract json key/value to new_field. The following is my logstash conf.
input {
tcp {
port => 5044
}
}
filter {
json {
source => "message"
add_field => {
"data" => "%{[message][data]}"
}
}
}
output {
stdout { codec => rubydebug }
}
I have tried with mutate:
filter {
json {
source => "message"
}
mutate {
add_field => {
"data" => "%{[message][data]}"
}
}
}
I have tried with . instead of []:
filter {
json {
source => "message"
}
mutate {
add_field => {
"data" => "%{message.data}"
}
}
}
I have tried with index number:
filter {
json {
source => "message"
}
mutate {
add_field => {
"data" => "%{[message][0]}"
}
}
}
All with no luck. :(
The following json is sent to port 5044:
{"data": "blablabla"}
The problem is the new field not able to extract value from the key of the json.
"data" => "%{[message][data]}"
The following is my stdout:
{
"#version" => "1",
"host" => "localhost",
"type" => "logstash",
"data" => "%{[message][data]}",
"path" => "/path/from/my/app",
"#timestamp" => 2019-01-11T20:39:10.845Z,
"message" => "{\"data\": \"blablabla\"}"
}
However if I use "data" => "%{[message]}" instead:
filter {
json {
source => "message"
add_field => {
"data" => "%{[message]}"
}
}
}
I will get the whole json from stdout.
{
"#version" => "1",
"host" => "localhost",
"type" => "logstash",
"data" => "{\"data\": \"blablabla\"}",
"path" => "/path/from/my/app",
"#timestamp" => 2019-01-11T20:39:10.845Z,
"message" => "{\"data\": \"blablabla\"}"
}
Can anyone please tell me what I did wrong.
Thank you in advance.
I use docker-elk stack, ELK_VERSION=6.5.4
add_field is used to add custom logic when filter succeeds, many filters have this option. If you want to parse json into a field, you should use target:
filter {
json {
source => "message"
target => "data" // parse into data field
}
}

Logstash Parsing and Calculations with CSV

I am having trouble parsing and calculating performance Navigation Timing data I have in a csv.
I was able to parse the fields but not sure how to approach the calculations (below) properly. Some points to keep in mind:
Data sets are grouped together by the bolded value (it is the ts of when the 21 datapoints were taken
ACMEPage-1486643427973,unloadEventEnd,1486643372422
2.Calculations need to be done with data points within the group
I am assuming some tagging and grouping will need to be done but I don't have a clear vision on how to implement it. Any help would be greatly appreciated.
Thanks,
---------------Calculations-----------------
Total First byte Time = responseStart - navigationStart
Latency = responseStart – fetchStart
DNS / Domain Lookup Time = domainLookupEnd - domainLookupStart
Server connect Time = connectEnd - connectStart
Server Response Time = responseStart - requestStart
Page Load time = loadEventStart - navigationStart
Transfer/Page Download Time = responseEnd - responseStart
DOM Interactive Time = domInteractive - navigationStart
DOM Content Load Time = domContentLoadedEventEnd - navigationStart
DOM Processing to Interactive =domInteractive - domLoading
DOM Interactive to Complete = domComplete - domInteractive
Onload = loadEventEnd - loadEventStart
-------Data in CSV-----------
ACMEPage-1486643427973,unloadEventEnd,1486643372422
ACMEPage-1486643427973,responseEnd,1486643372533
ACMEPage-1486643427973,responseStart,1486643372416
ACMEPage-1486643427973,domInteractive,1486643373030
ACMEPage-1486643427973,domainLookupEnd,1486643372194
ACMEPage-1486643427973,unloadEventStart,1486643372422
ACMEPage-1486643427973,domComplete,1486643373512
ACMEPage-1486643427973,domContentLoadedEventStart,1486643373030
ACMEPage-1486643427973,domainLookupStart,1486643372194
ACMEPage-1486643427973,redirectEnd,0
ACMEPage-1486643427973,redirectStart,0
ACMEPage-1486643427973,connectEnd,1486643372194
ACMEPage-1486643427973,toJSON,{}
ACMEPage-1486643427973,connectStart,1486643372194
ACMEPage-1486643427973,loadEventStart,1486643373512
ACMEPage-1486643427973,navigationStart,1486643372193
ACMEPage-1486643427973,requestStart,1486643372203
ACMEPage-1486643427973,secureConnectionStart,0
ACMEPage-1486643427973,fetchStart,1486643372194
ACMEPage-1486643427973,domContentLoadedEventEnd,1486643373058
ACMEPage-1486643427973,domLoading,1486643372433
ACMEPage-1486643427973,loadEventEnd,1486643373514
----------Output---------------
"path" => "/Users/philipp/Downloads/build2/logDataPoints_com.concur.automation.cge.ui.admin.ADCLookup_1486643340910.csv",
"#timestamp" => 2017-02-09T12:29:57.763Z,
"navigationTimer" => "connectStart",
"#version" => "1",
"host" => "15mbp-09796.local",
"elapsed_time" => "1486643372194",
"pid" => "1486643397763",
"page" => "ADCLookupDataPage",
"message" => "ADCLookupDataPage-1486643397763,connectStart,1486643372194",
"type" => "csv"
}
--------------logstash.conf----------------
input {
file {
type => "csv"
path => "/Users/path/logDataPoints_com.concur.automation.acme.ui.admin.acme_1486643340910.csv"
start_position => beginning
# to read from the beginning of file
sincedb_path => "/dev/null"
}
}
filter {
csv {
columns => ["page_id", "navigationTimer", "elapsed_time"]
}
if (["elapsed_time"] == "{}" ) {
drop{}
}
else {
grok {
match => { "page_id" => "%{WORD:page}-%{INT:pid}"
}
remove_field => [ "page_id" ]
}
}
date {
match => [ "pid", "UNIX_MS" ]
target => "#timestamp"
}
}
output {
elasticsearch { hosts => ["localhost:9200"] }
stdout { codec => rubydebug }
}
I the following to get trend my data:
-I found it easier to pivot the data, rather than going down the column, to have the data go along the rows per each "event" or "document"
-Each field needed to be mapped accordingly as an integer or string
Once the data was in Kibana properly I had problems using the ruby code filter to make simple math calculations so I ended up using the "scripted fields" to make the calculations in Kibana.
input {
file {
type => "csv"
path => "/Users/philipp/perf_csv_pivot2.csv"
start_position => beginning
# to read from the beginning of file
sincedb_path => "/dev/null"
}
}
filter {
csv {
columns => ["page_id","unloadEventEnd","responseEnd","responseStart","domInteractive","domainLookupEnd","unloadEventStart","domComplete","domContentLoadedEventStart","domainLookupstart","redirectEnd","redirectStart","connectEnd","toJSON","connectStart","loadEventStart","navigationStart","requestStart","secureConnectionStart","fetchStart","domContentLoadedEventEnd","domLoading","loadEventEnd"]
}
grok {
match => { "page_id" => "%{WORD:page}-%{INT:page_ts}" }
remove_field => [ "page_id", "message", "path" ]
}
mutate {
convert => { "unloadEventEnd" => "integer" }
convert => { "responseEnd" => "integer" }
convert => { "responseStart" => "integer" }
convert => { "domInteractive" => "integer" }
convert => { "domainLookupEnd" => "integer" }
convert => { "unloadEventStart" => "integer" }
convert => { "domComplete" => "integer" }
convert => { "domContentLoadedEventStart" => "integer" }
convert => { "domainLookupstart" => "integer" }
convert => { "redirectEnd" => "integer" }
convert => { "redirectStart" => "integer" }
convert => { "connectEnd" => "integer" }
convert => { "toJSON" => "string" }
convert => { "connectStart" => "integer" }
convert => { "loadEventStart" => "integer" }
convert => { "navigationStart" => "integer" }
convert => { "requestStart" => "integer" }
convert => { "secureConnectionStart" => "integer" }
convert => { "fetchStart" => "integer" }
convert => { "domContentLoadedEventEnd" => "integer" }
convert => { "domLoading" => "integer" }
convert => { "loadEventEnd" => "integer" }
}
date {
match => [ "page_ts", "UNIX_MS" ]
target => "#timestamp"
remove_field => [ "page_ts", "timestamp", "host", "toJSON" ]
}
}
output {
elasticsearch { hosts => ["localhost:9200"] }
stdout { codec => rubydebug }
}
Hope this can help someone else,