Process multi-level nested escaped JSON strings inside JSON with fluentd - json

I'm new to fluentd and I would like to parse a multi-level nested escaped JSON strings inside JSON.
My messages look like:
{"log":"HELLO WORLD\n","stream":"stdout","time":"2019-05-23T15:40:54.298531098Z"}
{"log":"{\"appName\":\"adapter\",\"time\":\"2019-05-23T15:40:54.299\",\"message\":\"{\\\"level\\\":\\\"info\\\",\\\"message\\\":\\\"Awaiting Messages from queue...\\\"}\"}\n","stream":"stdout","time":"2019-05-23T15:40:54.2996761Z"}
The first message get parsed correctly but the second one got ignored and I guess it's because of an error in parsing format
Here is my source:
<source>
#id fluentd-containers.log
#type tail
path /var/log/containers/*.log
pos_file /var/log/containers.log.pos
tag raw.kubernetes.*
read_from_head true
<parse>
#type multi_format
<pattern>
format json
time_key time
time_format %Y-%m-%dT%H:%M:%S.%NZ
</pattern>
<pattern>
format /^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$/
time_format %Y-%m-%dT%H:%M:%S.%N%:z
</pattern>
</parse>
</source>
Here is what I tried:
<filter **>
#type parser
key_name log
reserve_data true
remove_key_name_field true
hash_value_field parsed_log
<parse>
#type json
</parse>
</filter>
i actually just want to parse this log message:
{
"log":"{\"appName\":\"dedge-adapter\",\"time\":\"2019-05-24T02:39:12.242\",\"message\":\"{\\\"level\\\":\\\"warn\\\",\\\"status\\\":401,\\\"method\\\":\\\"GET\\\",\\\"path\\\":\\\"/api/v1/bookings\\\",\\\"requestId\\\":\\\"782a470b-9d62-43d3-9865-1b67397717d4\\\",\\\"ip\\\":\\\"90.79.204.18\\\",\\\"latency\\\":0.097897,\\\"user-agent\\\":\\\"PostmanRuntime/7.11.0\\\",\\\"message\\\":\\\"Request\\\"}\"}\n",
"stream":"stdout",
"time":"2019-05-24T02:39:12.242383376Z"
}

Do you have multiple format log field?
If so, you can use https://github.com/repeatedly/fluent-plugin-multi-format-parser
<source>
#type dummy
tag dummy
dummy [
{"log":"HELLO WORLD\n","stream":"stdout","time":"2019-05-23T15:40:54.298531098Z"},
{"log":"{\"appName\":\"adapter\",\"time\":\"2019-05-23T15:40:54.299\",\"message\":\"{\\\"level\\\":\\\"info\\\",\\\"message\\\":\\\"Awaiting Messages from queue...\\\"}\"}\n","stream":"stdout","time":"2019-05-23T15:40:54.2996761Z"}
]
</source>
<filter dummy>
#type parser
key_name log
reserve_data true
remove_key_name_field true
<parse>
#type multi_format
<pattern>
format json
</pattern>
<pattern>
format none
</pattern>
</parse>
</filter>
<filter dummy>
#type parser
key_name message
reserve_data true
remove_key_name_field true
<parse>
#type multi_format
<pattern>
format json
</pattern>
<pattern>
format none
</pattern>
</parse>
</filter>
<match dummy>
#type stdout
</match>
Output:
2019-06-03 11:41:13.022468253 +0900 dummy: {"stream":"stdout","time":"2019-05-23T15:40:54.298531098Z","message":"HELLO WORLD\n"}
2019-06-03 11:41:14.024253824 +0900 dummy: {"stream":"stdout","time":"2019-05-23T15:40:54.2996761Z","appName":"adapter","level":"info","message":"Awaiting Messages from queue..."}

Related

How to treat nested json using Fluentd

I used fluentd filter parser after looking at the official document, but I keep getting 'pattern not matched' error. I don't know why.
Can someone please help me. thanks.
log
{"audit_record":{"name":"Query","record":"2406760_2022-05-16T05:15:00","timestamp":"2022-05-19T03:52:25Z","command_class":"select","connection_id":"77","status":0,"sqltext":"select ##version_comment limit 1","user":"root[root] # localhost []","host":"localhost","os_user":"","ip":"","db":"Enchante"}}
td-agent.conf
<source>
#type tail
path /var/log/td-agent/audit.log-20220521
<parse>
#type json
</parse>
pos_file /var/log/td-agent/audit_temp.pos
tag audit.test
</source>
<filter audit.test>
#type parser
key_name audit_record
reserve_data false
<parse>
#type json
</parse>
</filter>
<match audit.test>
#type copy
#type mysql_bulk
host localhost
port 3306
database test
username root
password 1234
column_names log_type,log_time,command,sql_text,log_user,db_name
key_names name,timestamp,command_class,sqltext,user,db
table audit_log_temp
flush_interval 10s
</match>
error text
[warn]: #0 dump an error event: error_class=Fluent::Plugin::Parser::ParserError error="pattern not matched with data '{\"name\"=>\"Query\", \"record\"=>\"2406760_2022-05-16T05:15:00\", \"timestamp\"=>\"2022-05-19T03:52:25Z\", \"command_class\"=>\"select\", \"connection_id\"=>\"77\", \"status\"=>0, \"sqltext\"=>\"select ##version_comment limit 1\", \"user\"=>\"root[root] # localhost []\", \"host\"=>\"localhost\", \"os_user\"=>\"\", \"ip\"=>\"\", \"db\"=>\"Enchante\"}'" location=nil tag="audit.test" time=2022-08-08 13:14:58.465969695 +0900 record={"audit_record"=>{"name"=>"Query", "record"=>"2406760_2022-05-16T05:15:00", "timestamp"=>"2022-05-19T03:52:25Z", "command_class"=>"select", "connection_id"=>"77", "status"=>0, "sqltext"=>"select ##version_comment limit 1", "user"=>"root[root] # localhost []", "host"=>"localhost", "os_user"=>"", "ip"=>"", "db"=>"Enchante"}}

Fluentd formatting json

I have a json:
{
"message": "2022-04-21T10:40:24.261996286+02:00 stdout F {\"timestamp\":\"2022-04-21T08:40:24,261Z\",\"level\":\"WARN\",\"logger\":\"SoupCoreClientContext\",\"thread\":\"ThreadServiceProvider\",\"message\":\"Lost connection / cannot connect to server. \",\"instance\":\"api\"}",
"hostname": "fluentd",
"#log_name": "api"
}
and my source looks like this:
<source>
#type tail
path /data/api.log
tag api
pos_file /data/api-file.log.pos
read_from_head true
<parse>
#type none
</parse>
</source>
Also I'm using this filter to try to parse message with specified key, but it doesn't work:
<filter *>
#type parser
key_name message
reserve_data true
remove_key_name_field true
<parse>
#type multi_format
<pattern>
format json
</pattern>
<pattern>
format none
</pattern>
</parse>
</filter>
How can I parse escaped json inside "message" key?

Fluend does not automatically add the current system time in Json Parser

Fluentd Experts and Users!
Currently we have met an issue in using Fluentd to parse json format log. Fluentd does not automatically add the current system time to the parsing result, although I have configured time_key and keep_time_key according to the documentation.
The example of our log is,
{"host": "204.48.112.175", "user-identifier": "-", "method": "POST", "request": "/synthesize/initiatives/integrated", "protocol": "HTTP/2.0", "status": 502, "bytes": 10272}
and you can see that there is no time field in it.
But there is no system current time in the parsed log output (the output is in stdout (debug mode) ):
loghub_s3: {"host":"204.48.112.175","user-identifier":"-","method":"POST","request":"/synthesize/initiatives/integrated","protocol":"HTTP/2.0","status":502,"bytes":10272,"referer":"http://www.centralenable.name/user-centric/reintermediate/synergistic/e-business","s3_bucket":"loghub-logs-691546483958","s3_key":"json/json-notime.json"}
And my config file is:
<system>
log_level debug
</system>
<match loghub_s3>
#type stdout
#id output_stdout
</match>
<source>
#type s3
tag loghub_s3
s3_bucket loghub-logs-691546483958
s3_region us-east-1
store_as json
add_object_metadata true
<instance_profile_credentials>
ip_address 169.254.169.254
port 80
</instance_profile_credentials>
<sqs>
queue_name loghub-fluentd-dev
</sqs>
<parse>
#type json
time_type string
time_format %d/%b/%Y:%H:%M:%S %z
time_key time
keep_time_key true
</parse>
</source>
Other informations:
Fluentd version: 1.14.3
TD Agent version: 4.3.0
fluent-plugin-s3 version: 1.6.1
Operating system: Amazon Linux2
Kernel version: 5.10.102-99.473.amzn2.x86_64
And we have used the s3-input-plugin: https://github.com/fluent/fluent-plugin-s3
Can anyone help us to check if our configuration is wrong. And I’m not sure if this is a Fluentd issue, or Plugin issue.
Thanks a lot in advance!
As mentioned in the comments, fluentd does not create a time/timestamp field unless configured otherwise. You can inject this field under filter or match section.
Here's an example with the sample input and stdout output plugins:
fluentd: 1.12.3
fluent.conf
<source>
#type sample
#id in_sample
sample {"k":"v"}
tag sample
</source>
<match sample>
#type stdout
#id out_stdout
<inject>
time_key timestamp
time_type string
time_format %Y-%m-%dT%H:%M:%S.%NZ
</inject>
</match>
Run fluentd:
fluentd -c ./fluent.conf
fluentd logs
2022-04-10 08:46:26.053278947 +0500 sample: {"k":"v","timestamp":"2022-04-10T08:46:26.053278947Z"}
2022-04-10 08:46:27.056770340 +0500 sample: {"k":"v","timestamp":"2022-04-10T08:46:27.056770340Z"}
2022-04-10 08:46:28.059998159 +0500 sample: {"k":"v","timestamp":"2022-04-10T08:46:28.059998159Z"}

How to access json elements in fluentd config match directive

I have setup fluentd in my kubernetes cluster (AKS) to send the logs to azure blob using the microsoft plugin azure-storage-append-blob. Currently the path how my logs are stored is as follows containername/logs/file.log. but I want it to be in this way containername/logs/podname/file.log. I've used fluent-plugin-kubernetes_metadata_filter plugin to filter out the kubernetes metadata. Below is my current configuration that I tried. but this did not work out well for me. Also I'm posting a sample JSON output from the logs. I know this is possible but just need a little bit help or guidance here to finish this off.
Current configuration:
<match fluent.**>
#type null
</match>
<source>
#type tail
path /var/log/containers/*.log
pos_file /var/log/td-agent/tmp/access.log.pos
tag container.*
#format json
format json
time_key time
time_format %Y-%m-%dT%H:%M:%S.%NZ
read_from_head true
</source>
<match container.var.log.containers.**fluentd**.log>
#type null
</match>
<filter container.**>
#type kubernetes_metadata
</filter>
<match **>
#type azure-storage-append-blob
azure_storage_account mysaname
azure_storage_access_key mysaaccesskey
azure_container fluentdtest
auto_create_container true
path logs/
append false
azure_object_key_format %{path}%{tag}%{time_slice}_%{index}.log
time_slice_format %Y%m%d-%H-%M
# if you want to use %{tag} or %Y/%m/%d/ like syntax in path / azure_blob_name_format,
# need to specify tag for %{tag} and time for %Y/%m/%d in <buffer> argument.
<buffer tag,time,timekey>
#type file
path /var/log/fluent/azurestorageappendblob
timekey 300s
timekey_wait 10s
timekey_use_utc true # use utc
chunk_limit_size 5MB
queued_chunks_limit_size 1
</buffer>
</match>
Sample Json from the logs
container.var.log.containers.nginx - connector - deployment - 5 bbfdf4f86 - p86dq_mynamespace_nginx - ee437ca90cb3924e1def9bdaa7f682577fc16fb023c00975963a105b26591bfb.log:
{
"log": "2020-07-16 17:12:56,761 INFO spawned: 'consumer' with pid 87068\n",
"stream": "stdout",
"docker": {
"container_id": "ee437ca90cb3924e1def9bdaa7f682577fc16fb023c00975963a105b26591bfb"
},
"kubernetes": {
"container_name": "nginx",
"namespace_name": "mynamespace",
"pod_name": "nginx-connector-deployment-5bbfdf4f86-p86dq",
"container_image": "docker.io/nginx",
"container_image_id": "docker-pullable://docker.io/nginx:f908584cf96053e50862e27ac40534bbd57ca3241d4175c9576dd89741b4926",
"pod_id": "93a630f9-0442-44ed-a8d2-9a7173880a3b",
"host": "aks-nodepoolkube-15824989-vmss00000j",
"labels": {
"app": "nginx",
"pod-template-hash": "5bbfdf4f86"
},
"master_url": "https://docker.io:443/api",
"namespace_id": "87092784-26b4-4dd5-a9d2-4833b72a1366"
}
}
Below is the official github link for the append-blob plugin https://github.com/microsoft/fluent-plugin-azure-storage-append-blob
Please refer below link for configuration for fluentd for reading JSON/NON-JSON multiline logs. Try with this configuration it will work.
How to get ${kubernetes.namespace_name} for index_name in fluentd?

fluentd not parsing JSON log file entry

I've seen a number of similar questions on Stackoverflow, including this one. But none address my particular issue.
The application is deployed in a Kubernetes (v1.15) cluster. I'm using a docker image based on the fluent/fluentd-docker-image GitHub repo, v1.9/armhf, modified to include the elasticsearch plugin. Elasticsearch and Kibana are both version 7.6.0.
The logs are going to stdout and look like:
{"Application":"customer","HTTPMethod":"GET","HostName":"","RemoteAddr":"10.244.4.154:51776","URLPath":"/customers","level":"info","msg":"HTTP request received","time":"2020-03-10T20:17:32Z"}
In Kibana I'm seeing something like this:
{
"_index": "logstash-2020.03.10",
"_type": "_doc",
"_id": "p-UZxnABBcooPsDQMBy_",
"_version": 1,
"_score": null,
"_source": {
"log": "{\"Application\":\"customer\",\"HTTPMethod\":\"GET\",\"HostName\":\"\",\"RemoteAddr\":\"10.244.4.154:46160\",\"URLPath\":\"/customers\",\"level\":\"info\",\"msg\":\"HTTP request received\",\"time\":\"2020-03-10T20:18:18Z\"}\n",
"stream": "stdout",
"docker": {
"container_id": "cd1634b0ce410f3c89fe63f508fe6208396be87adf1f27fa9d47a01d81ff7904"
},
"kubernetes": {
I'm expecting to see the JSON pulled from the log: value somewhat like this (abbreviated):
{
"_index": "logstash-2020.03.10",
...
"_source": {
"log": "...",
"Application":"customer",
"HTTPMethod":"GET",
"HostName":"",
"RemoteAddr":"10.244.4.154:46160",
"URLPath":"/customers",
"level":"info",
"msg":"HTTP request received",
"time":"2020-03-10T20:18:18Z",
"stream": "stdout",
"docker": {
"container_id": "cd1634b0ce410f3c89fe63f508fe6208396be87adf1f27fa9d47a01d81ff7904"
},
"kubernetes": {
My fluentd config is:
match fluent.**>
#type null
</match>
<source>
#type tail
path /var/log/containers/*.log
pos_file /var/log/fluentd-containers.log.pos
time_format %Y-%m-%dT%H:%M:%S.%NZ
tag kubernetes.*
format json
read_from_head true
</source>
<match kubernetes.var.log.containers.**fluentd**.log>
#type null
</match>
<match kubernetes.var.log.containers.**kube-system**.log>
#type null
</match>
<filter kubernetes.**>
#type kubernetes_metadata
</filter>
<match **>
#type elasticsearch
#id out_es
#log_level info
include_tag_key true
host "#{ENV['FLUENT_ELASTICSEARCH_HOST']}"
port "#{ENV['FLUENT_ELASTICSEARCH_PORT']}"
path "#{ENV['FLUENT_ELASTICSEARCH_PATH']}"
<format>
#type json
</format>
</match>
I'm sure I'm missing something. Can anyone point me in the right direction?
Thanks,
Rich
This config worked for me:
<source>
#type tail
path /var/log/containers/*.log,/var/log/containers/*.log
pos_file /opt/bitnami/fluentd/logs/buffers/fluentd-docker.pos
tag kubernetes.*
read_from_head true
<parse>
#type json
time_key time
time_format %iso8601
</parse>
</source>
<filter kubernetes.**>
#type parser
key_name "$.log"
hash_value_field "log"
reserve_data true
<parse>
#type json
</parse>
</filter>
<filter kubernetes.**>
#type kubernetes_metadata
</filter>
Make sure to edit path so that it matches your use case.
This happens because docker logs in /var/log/containers/*.log put container STDOUT under 'log' key as string, so to put those JSON logs there as strings they must be first serialized to strings. What you need to do is to add an additional step that will parse this string under 'log' key:
<filter kubernetes.**>
#type parser
key_name "$.log"
hash_value_field "log"
reserve_data true
<parse>
#type json
</parse>
</filter>
Im SOLVED from this parse
check in http first, make sure it was parse, and log your container
fluentd.conf
<source>
#type http
port 5170
bind 0.0.0.0
</source>
<filter *>
#type parser
key_name "$.log"
hash_value_field "log"
reserve_data true
<parse>
#type json
</parse>
</filter>
<match **>
#type stdout
</match>
and check http in your terminal with curl
curl -i -X POST -d 'json={"source":"stderr","log":"{\"applicationName\":\"api-producer-go\",\"level\":\"info\",\"msg\":\"Development is Running\",\"time\":\"2020-09-04T14:32:29Z\"}","container_id":"f9975c6a7bc6dcc21dbdacca8ff98152cd04ae28b3bc36707eba5453f6ff9960","container_name":"/api-producer-golang"}' http://localhost:5170/test.cycle
I had a json being emmited from my container like this:
{"asctime": "2020-06-28 23:40:37,184", "filename": "streaming_pull_manager.py", "funcName": "_should_recover", "lineno": 648, "processName": "MainProcess", "threadName": "Thread-6", "message": "Observed recoverable stream error 504 Deadline Exceeded", "severity": "INFO"}
And Kibana was showing "failed to find message". Then I went and google around and I fixed that by appending the following code to my kubernetes.conf:
<filter **>
#type record_transformer
<record>
log_json ${record["log"]}
</record>
</filter>
<filter **>
#type parser
#log_level debug
key_name log_json
reserve_data true
remove_key_name_field true
emit_invalid_record_to_error false
<parse>
#type json
</parse>
</filter>
The final kuberenetes.json file looks like this:
<label #FLUENT_LOG>
<match fluent.**>
#type null
</match>
</label>
<source>
#type tail
#id in_tail_container_logs
path /var/log/containers/*.log
pos_file /var/log/fluentd-containers.log.pos
tag "#{ENV['FLUENT_CONTAINER_TAIL_TAG'] || 'kubernetes.*'}"
exclude_path "#{ENV['FLUENT_CONTAINER_TAIL_EXCLUDE_PATH'] || use_default}"
read_from_head true
<parse>
#type "#{ENV['FLUENT_CONTAINER_TAIL_PARSER_TYPE'] || 'json'}"
time_format %Y-%m-%dT%H:%M:%S.%NZ
</parse>
</source>
<source>
#type tail
#id in_tail_minion
path /var/log/salt/minion
pos_file /var/log/fluentd-salt.pos
tag salt
<parse>
#type regexp
expression /^(?<time>[^ ]* [^ ,]*)[^\[]*\[[^\]]*\]\[(?<severity>[^ \]]*) *\] (?<message>.*)$/
time_format %Y-%m-%d %H:%M:%S
</parse>
</source>
<source>
#type tail
#id in_tail_startupscript
path /var/log/startupscript.log
pos_file /var/log/fluentd-startupscript.log.pos
tag startupscript
<parse>
#type syslog
</parse>
</source>
<source>
#type tail
#id in_tail_docker
path /var/log/docker.log
pos_file /var/log/fluentd-docker.log.pos
tag docker
<parse>
#type regexp
expression /^time="(?<time>[^)]*)" level=(?<severity>[^ ]*) msg="(?<message>[^"]*)"( err="(?<error>[^"]*)")?( statusCode=($<status_code>\d+))?/
</parse>
</source>
<source>
#type tail
#id in_tail_etcd
path /var/log/etcd.log
pos_file /var/log/fluentd-etcd.log.pos
tag etcd
<parse>
#type none
</parse>
</source>
<source>
#type tail
#id in_tail_kubelet
multiline_flush_interval 5s
path /var/log/kubelet.log
pos_file /var/log/fluentd-kubelet.log.pos
tag kubelet
<parse>
#type kubernetes
</parse>
</source>
<source>
#type tail
#id in_tail_kube_proxy
multiline_flush_interval 5s
path /var/log/kube-proxy.log
pos_file /var/log/fluentd-kube-proxy.log.pos
tag kube-proxy
<parse>
#type kubernetes
</parse>
</source>
<source>
#type tail
#id in_tail_kube_apiserver
multiline_flush_interval 5s
path /var/log/kube-apiserver.log
pos_file /var/log/fluentd-kube-apiserver.log.pos
tag kube-apiserver
<parse>
#type kubernetes
</parse>
</source>
<source>
#type tail
#id in_tail_kube_controller_manager
multiline_flush_interval 5s
path /var/log/kube-controller-manager.log
pos_file /var/log/fluentd-kube-controller-manager.log.pos
tag kube-controller-manager
<parse>
#type kubernetes
</parse>
</source>
<source>
#type tail
#id in_tail_kube_scheduler
multiline_flush_interval 5s
path /var/log/kube-scheduler.log
pos_file /var/log/fluentd-kube-scheduler.log.pos
tag kube-scheduler
<parse>
#type kubernetes
</parse>
</source>
<source>
#type tail
#id in_tail_rescheduler
multiline_flush_interval 5s
path /var/log/rescheduler.log
pos_file /var/log/fluentd-rescheduler.log.pos
tag rescheduler
<parse>
#type kubernetes
</parse>
</source>
<source>
#type tail
#id in_tail_glbc
multiline_flush_interval 5s
path /var/log/glbc.log
pos_file /var/log/fluentd-glbc.log.pos
tag glbc
<parse>
#type kubernetes
</parse>
</source>
<source>
#type tail
#id in_tail_cluster_autoscaler
multiline_flush_interval 5s
path /var/log/cluster-autoscaler.log
pos_file /var/log/fluentd-cluster-autoscaler.log.pos
tag cluster-autoscaler
<parse>
#type kubernetes
</parse>
</source>
# Example:
# 2017-02-09T00:15:57.992775796Z AUDIT: id="90c73c7c-97d6-4b65-9461-f94606ff825f" ip="104.132.1.72" method="GET" user="kubecfg" as="<self>" asgroups="<lookup>" namespace="default" uri="/api/v1/namespaces/default/pods"
# 2017-02-09T00:15:57.993528822Z AUDIT: id="90c73c7c-97d6-4b65-9461-f94606ff825f" response="200"
<source>
#type tail
#id in_tail_kube_apiserver_audit
multiline_flush_interval 5s
path /var/log/kubernetes/kube-apiserver-audit.log
pos_file /var/log/kube-apiserver-audit.log.pos
tag kube-apiserver-audit
<parse>
#type multiline
format_firstline /^\S+\s+AUDIT:/
# Fields must be explicitly captured by name to be parsed into the record.
# Fields may not always be present, and order may change, so this just looks
# for a list of key="\"quoted\" value" pairs separated by spaces.
# Unknown fields are ignored.
# Note: We can't separate query/response lines as format1/format2 because
# they don't always come one after the other for a given query.
format1 /^(?<time>\S+) AUDIT:(?: (?:id="(?<id>(?:[^"\\]|\\.)*)"|ip="(?<ip>(?:[^"\\]|\\.)*)"|method="(?<method>(?:[^"\\]|\\.)*)"|user="(?<user>(?:[^"\\]|\\.)*)"|groups="(?<groups>(?:[^"\\]|\\.)*)"|as="(?<as>(?:[^"\\]|\\.)*)"|asgroups="(?<asgroups>(?:[^"\\]|\\.)*)"|namespace="(?<namespace>(?:[^"\\]|\\.)*)"|uri="(?<uri>(?:[^"\\]|\\.)*)"|response="(?<response>(?:[^"\\]|\\.)*)"|\w+="(?:[^"\\]|\\.)*"))*/
time_format %Y-%m-%dT%T.%L%Z
</parse>
</source>
<filter kubernetes.**>
#type kubernetes_metadata
#id filter_kube_metadata
kubernetes_url "#{ENV['FLUENT_FILTER_KUBERNETES_URL'] || 'https://' + ENV.fetch('KUBERNETES_SERVICE_HOST') + ':' + ENV.fetch('KUBERNETES_SERVICE_PORT') + '/api'}"
verify_ssl "#{ENV['KUBERNETES_VERIFY_SSL'] || true}"
ca_file "#{ENV['KUBERNETES_CA_FILE']}"
skip_labels "#{ENV['FLUENT_KUBERNETES_METADATA_SKIP_LABELS'] || 'false'}"
skip_container_metadata "#{ENV['FLUENT_KUBERNETES_METADATA_SKIP_CONTAINER_METADATA'] || 'false'}"
skip_master_url "#{ENV['FLUENT_KUBERNETES_METADATA_SKIP_MASTER_URL'] || 'false'}"
skip_namespace_metadata "#{ENV['FLUENT_KUBERNETES_METADATA_SKIP_NAMESPACE_METADATA'] || 'false'}"
</filter>
<filter **>
#type record_transformer
<record>
log_json ${record["log"]}
</record>
</filter>
<filter **>
#type parser
#log_level debug
key_name log_json
reserve_data true
remove_key_name_field true
emit_invalid_record_to_error false
<parse>
#type json
</parse>
</filter>
EDIT: If anyone is looking for how to overwrite fluent .conf files, especially kubernetes.conf, there is an amazing tutorial here.