I have read all the documentation about how cygnus works, I specifically tested this one successfully. I also finished reading this tutorial, but I am sure I haven't configured something correctly.
in cygnus_instance_1.conf I created:
CYGNUS_USER=root
CONFIG_FOLDER=/usr/cygnus/conf
CONFIG_FILE=/usr/cygnus/conf/agent_1.conf
AGENT_NAME=cygnusagent
LOGFILE_NAME=cygnus.log
ADMIN_PORT=8081
and in agent_1.conf I created:
#=============================================
# To be put in APACHE_FLUME_HOME/conf/cygnus.conf
#
# General configuration template explaining how to setup a sink of each of the available types (HDFS, CKAN, MySQL).
#=============================================
# The next tree fields set the sources, sinks and channels used by Cygnus. You could use different names than the
# ones suggested below, but in that case make sure you keep coherence in properties names along the configuration file.
# Regarding sinks, you can use multiple types at the same time; the only requirement is to provide a channel for each
# one of them (this example shows how to configure 3 sink types at the same time). Even, you can define more than one
# sink of the same type and sharing the channel in order to improve the performance (this is like having
# multi-threading).
cygnusagent.sources = http-source
cygnusagent.sinks = hdfs-sink mysql-sink ckan-sink
cygnusagent.channels = hdfs-channel mysql-channel ckan-channel
#=============================================
# source configuration
# channel name where to write the notification events
cygnusagent.sources.http-source.channels = hdfs-channel mysql-channel ckan-channel
# source class, must not be changed
cygnusagent.sources.http-source.type = org.apache.flume.source.http.HTTPSource
# listening port the Flume source will use for receiving incoming notifications
cygnusagent.sources.http-source.port = 5050
# Flume handler that will parse the notifications, must not be changed
cygnusagent.sources.http-source.handler = es.tid.fiware.fiwareconnectors.cygnus.handlers.OrionRestHandler
# URL target
cygnusagent.sources.http-source.handler.notification_target = /notify
# Default service (service semantic depends on the persistence sink)
cygnusagent.sources.http-source.handler.default_service = def_serv
# Default service path (service path semantic depends on the persistence sink)
cygnusagent.sources.http-source.handler.default_service_path = def_servpath
# Number of channel re-injection retries before a Flume event is definitely discarded (-1 means infinite retries)
cygnusagent.sources.http-source.handler.events_ttl = 10
# Source interceptors, do not change
cygnusagent.sources.http-source.interceptors = ts de
# Interceptor type, do not change
cygnusagent.sources.http-source.interceptors.ts.type = timestamp
# Destination extractor interceptor, do not change
cygnusagent.sources.http-source.interceptors.de.type = es.tid.fiware.fiwareconnectors.cygnus.interceptors.DestinationExtractor$Builder
# Matching table for the destination extractor interceptor, put the right absolute path to the file if necessary
# See the doc/design/interceptors document for more details
cygnusagent.sources.http-source.interceptors.de.matching_table = /usr/cygnus/conf/matching_table.conf
# ============================================
# OrionHDFSSink configuration
# channel name from where to read notification events
cygnusagent.sinks.hdfs-sink.channel = hdfs-channel
# sink class, must not be changed
cygnusagent.sinks.hdfs-sink.type = es.tid.fiware.fiwareconnectors.cygnus.sinks.OrionHDFSSink
# Comma-separated list of FQDN/IP address regarding the Cosmos Namenode endpoints
# If you are using Kerberos authentication, then the usage of FQDNs instead of IP addresses is mandatory
cygnusagent.sinks.hdfs-sink.cosmos_host = x1.y1.z1.w1,x2.y2.z2.w2
# port of the Cosmos service listening for persistence operations; 14000 for httpfs, 50070 for webhdfs and free choice for inifinty
cygnusagent.sinks.hdfs-sink.cosmos_port = 14000
# default username allowed to write in HDFS
cygnusagent.sinks.hdfs-sink.cosmos_default_username = cosmos_username
# default password for the default username
cygnusagent.sinks.hdfs-sink.cosmos_default_password = xxxxxxxxxxxxx
# HDFS backend type (webhdfs, httpfs or infinity)
cygnusagent.sinks.hdfs-sink.hdfs_api = httpfs
# how the attributes are stored, either per row either per column (row, column)
cygnusagent.sinks.hdfs-sink.attr_persistence = column
# Hive FQDN/IP address of the Hive server
cygnusagent.sinks.hdfs-sink.hive_host = x.y.z.w
# Hive port for Hive external table provisioning
cygnusagent.sinks.hdfs-sink.hive_port = 10000
# Kerberos-based authentication enabling
cygnusagent.sinks.hdfs-sink.krb5_auth = false
# Kerberos username
cygnusagent.sinks.hdfs-sink.krb5_auth.krb5_user = krb5_username
# Kerberos password
cygnusagent.sinks.hdfs-sink.krb5_auth.krb5_password = xxxxxxxxxxxxx
# Kerberos login file
cygnusagent.sinks.hdfs-sink.krb5_auth.krb5_login_conf_file = /usr/cygnus/conf/krb5_login.conf
# Kerberos configuration file
cygnusagent.sinks.hdfs-sink.krb5_auth.krb5_conf_file = /usr/cygnus/conf/krb5.conf
# ============================================
# OrionCKANSink configuration
# channel name from where to read notification events
cygnusagent.sinks.ckan-sink.channel = ckan-channel
# sink class, must not be changed
cygnusagent.sinks.ckan-sink.type = es.tid.fiware.fiwareconnectors.cygnus.sinks.OrionCKANSink
# the CKAN API key to use
cygnusagent.sinks.ckan-sink.api_key = ckanapikey
# the FQDN/IP address for the CKAN API endpoint
cygnusagent.sinks.ckan-sink.ckan_host = x.y.z.w
# the port for the CKAN API endpoint
cygnusagent.sinks.ckan-sink.ckan_port = 80
# Orion URL used to compose the resource URL with the convenience operation URL to query it
cygnusagent.sinks.ckan-sink.orion_url = http://localhost:1026
# how the attributes are stored, either per row either per column (row, column)
cygnusagent.sinks.ckan-sink.attr_persistence = row
# enable SSL for secure Http transportation; 'true' or 'false'
cygnusagent.sinks.ckan-sink.ssl = false
# ============================================
# OrionMySQLSink configuration
# channel name from where to read notification events
cygnusagent.sinks.mysql-sink.channel = mysql-channel
# sink class, must not be changed
cygnusagent.sinks.mysql-sink.type = es.tid.fiware.fiwareconnectors.cygnus.sinks.OrionMySQLSink
# the FQDN/IP address where the MySQL server runs
cygnusagent.sinks.mysql-sink.mysql_host = localhost
# the port where the MySQL server listes for incomming connections
cygnusagent.sinks.mysql-sink.mysql_port = 3306
# a valid user in the MySQL server
cygnusagent.sinks.mysql-sink.mysql_username = root
# password for the user above
cygnusagent.sinks.mysql-sink.mysql_password = klasika
# how the attributes are stored, either per row either per column (row, column)
cygnusagent.sinks.mysql-sink.attr_persistence = column
#=============================================
# hdfs-channel configuration
# channel type (must not be changed)
cygnusagent.channels.hdfs-channel.type = memory
# capacity of the channel
cygnusagent.channels.hdfs-channel.capacity = 1000
# amount of bytes that can be sent per transaction
cygnusagent.channels.hdfs-channel.transactionCapacity = 100
#=============================================
# ckan-channel configuration
# channel type (must not be changed)
cygnusagent.channels.ckan-channel.type = memory
# capacity of the channel
cygnusagent.channels.ckan-channel.capacity = 1000
# amount of bytes that can be sent per transaction
cygnusagent.channels.ckan-channel.transactionCapacity = 100
#=============================================
# mysql-channel configuration
# channel type (must not be changed)
cygnusagent.channels.mysql-channel.type = memory
# capacity of the channel
cygnusagent.channels.mysql-channel.capacity = 1000
# amount of bytes that can be sent per transaction
cygnusagent.channels.mysql-channel.transactionCapacity = 100
Although I dont use OrionHDFSSink and OrionCKANSink, I didnt touch those configurations because I really am not sure weather I should.
When I finally subscribeContext and target cygnus # default port 5050, I get a normal response, but nothing is created in my database
What am I doing wrong here?
First of all, feel free to remove the HDFS and CKAN configuration parts. You will avoid unnecessary logs related to those components when running Cygnus. Of course, remember to delete all references to sinks and channels; specifically:
cygnusagent.sources = http-source
cygnusagent.sinks = mysql-sink
cygnusagent.channels = mysql-channel
...
cygnusagent.sources.http-source.channels = mysql-channel
Second, the answer to your question can be found in the documentation:
Within tables, we can find two options:
Fixed 8-field rows, as usual: recvTimeTs, recvTime, entityId, entityType, attrName, attrType, attrValue and attrMd. These tables (and the databases) are created at execution time if the table doesn't exist previously to the row insertion. Regarding attrValue, in its simplest form, this value is just a string, but since Orion 0.11.0 it can be Json object or Json array. Regarding attrMd, it contains a string serialization of the metadata array for the attribute in Json (if the attribute hasn't metadata, an empty array [] is inserted),
Two columns per each entity's attribute (one for the value and other for the metadata), plus an addition column about the reception time of the data (recv_time). This kind of tables (and the databases) must be provisioned previously to the execution of Cygnus, because each entity may have a different number of attributes, and the notifications must ensure a value per each attribute is notified.
The behaviour of the connector regarding the internal representation of the data is governed through a configuration parameter, attr_persistence, whose values can be row or column.
Maybe there is a problem with the writting, I think the last paragraph must enda as "... whole values can be row or column, and whose behavior correspond to the options described above, respectively".
I.e. if you are using the colummn mode, then the database and the tables must be provisioned in advanced.
There is a similar question where I explain in more detail such a behavior.
HTH!
Related
Hello I am very new to elastic stack and I am trying to extract useful information based on the system logs (in JSON format) in Kibana. I am using filebeat to send to kibana in elastic cloud.
Problem: basically, I cannot view the data i send to kibana in the following format
{"code":"28000","file":"auth.c","length":164,"level":"error","line":"496","message":"no pg_hba.conf entry",everity":"FATAL","timestamp":"2022-10-05 08:40:14.308"}
Kibana is ignoring all the lines start with "code":"000000".
however, the following formatted logs have no prob.
{"code":"SELF_SIGNED_CERT_IN_CHAIN","level":"error","message":"self signed certificate in certificate chain,"requestId":"166666666623piq9lzd2ngllllll","timestamp":"2022-10-05 08:40:39.908"}
and
{"level":"error","message":"Login was refused using authentication mechanism PLAIN.","requestId":"199999999udchsuz8d2s5aoszqw6w7","stack":"Error: Handshake terminated by server","timestamp":"2022-10-05 08:57:37.330"}
Please see the filebeat.yml below.
###################### Filebeat Configuration Example #########################
# This file is an example configuration file highlighting only the most common
# options. The filebeat.reference.yml file from the same directory contains all the
# supported options with more comments. You can use it as a reference.
#
# You can find the full configuration reference here:
# https://www.elastic.co/guide/en/beats/filebeat/index.html
# For more available modules and options, please see the filebeat.reference.yml sample
# configuration file.
# ============================== Filebeat inputs ===============================
filebeat.inputs:
# Each - is an input. Most options can be set at the input level, so
# you can use different inputs for various configurations.
# Below are the input specific configurations.
# filestream is an input for collecting log messages from files.
- type: log
# Unique ID among all inputs, an ID is required.
# id: my-filestream-id
# Change to true to enable this input configuration.
enabled: true
# Paths that should be crawled and fetched. Glob based paths.
paths:
- C:\bi\hd-be-logs\hd-be-logs\combined\*.log
#- c:\programdata\elasticsearch\logs\*
json.keys_under_root: true
json.overwrite_keys: true
json.add_error_key: true
json.expand_keys: true
json.close_inactive: true
json.clean_removed: true
# Exclude lines. A list of regular expressions to match. It drops the lines that are
# matching any regular expression from the list.
# Line filtering happens after the parsers pipeline. If you would like to filter lines
# before parsers, use include_message parser.
#exclude_lines: ['^DBG']
# Include lines. A list of regular expressions to match. It exports the lines that are
# matching any regular expression from the list.
# Line filtering happens after the parsers pipeline. If you would like to filter lines
# before parsers, use include_message parser.
#include_lines: ['^ERR', '^WARN']
# Exclude files. A list of regular expressions to match. Filebeat drops the files that
# are matching any regular expression from the list. By default, no files are dropped.
#prospector.scanner.exclude_files: ['.gz$']
# Optional additional fields. These fields can be freely picked
# to add additional information to the crawled log files for filtering
#fields:
# level: debug
# review: 1
# ============================== Filebeat modules ==============================
filebeat.config.modules:
# Glob pattern for configuration loading
path: ${path.config}/modules.d/*.yml
# Set to true to enable config reloading
reload.enabled: false
# Period on which files under path should be checked for changes
#reload.period: 10s
# ======================= Elasticsearch template setting =======================
setup.template.settings:
index.number_of_shards: 1
#index.codec: best_compression
#_source.enabled: false
# ================================== General ===================================
# The name of the shipper that publishes the network data. It can be used to group
# all the transactions sent by a single shipper in the web interface.
#name:
# The tags of the shipper are included in their own field with each
# transaction published.
#tags: ["service-X", "web-tier"]
# Optional fields that you can specify to add additional information to the
# output.
#fields:
# env: staging
# ================================= Dashboards =================================
# These settings control loading the sample dashboards to the Kibana index. Loading
# the dashboards is disabled by default and can be enabled either by setting the
# options here or by using the `setup` command.
#setup.dashboards.enabled: false
# The URL from where to download the dashboards archive. By default this URL
# has a value which is computed based on the Beat name and version. For released
# versions, this URL points to the dashboard archive on the artifacts.elastic.co
# website.
#setup.dashboards.url:
# =================================== Kibana ===================================
# Starting with Beats version 6.0.0, the dashboards are loaded via the Kibana API.
# This requires a Kibana endpoint configuration.
setup.kibana:
# Kibana Host
# Scheme and port can be left out and will be set to the default (http and 5601)
# In case you specify and additional path, the scheme is required: http://localhost:5601/path
# IPv6 addresses should always be defined as: https://[2001:db8::1]:5601
#host: "localhost:5601"
# Kibana Space ID
# ID of the Kibana Space into which the dashboards should be loaded. By default,
# the Default Space will be used.
#space.id:
# =============================== Elastic Cloud ================================
# These settings simplify using Filebeat with the Elastic Cloud (https://cloud.elastic.co/).
# The cloud.id setting overwrites the `output.elasticsearch.hosts` and
# `setup.kibana.host` options.
# You can find the `cloud.id` in the Elastic Cloud web UI.
cloud.id: My_deployment:cloudid
# The cloud.auth setting overwrites the `output.elasticsearch.username` and
# `output.elasticsearch.password` settings. The format is `<user>:<pass>`.
cloud.auth: elastic:4----------F
# ================================== Outputs ===================================
# Configure what output to use when sending the data collected by the beat.
# ---------------------------- Elasticsearch Output ----------------------------
output.elasticsearch:
# Array of hosts to connect to.
# hosts: ["localhost:9200"]
# Protocol - either `http` (default) or `https`.
#protocol: "https"
# Authentication credentials - either API key or username/password.
# api_key: "changeme"
#username: "elastic"
#password: "changeme"
# ------------------------------ Logstash Output -------------------------------
#output.logstash:
# The Logstash hosts
#hosts: ["localhost:5044"]
# Optional SSL. By default is off.
# List of root certificates for HTTPS server verifications
#ssl.certificate_authorities: ["/etc/pki/root/ca.pem"]
# Certificate for SSL client authentication
#ssl.certificate: "/etc/pki/client/cert.pem"
# Client Certificate Key
#ssl.key: "/etc/pki/client/cert.key"
# ================================= Processors =================================
processors:
- add_host_metadata:
when.not.contains.tags: forwarded
- add_cloud_metadata: ~
- add_docker_metadata: ~
- add_kubernetes_metadata: ~
# ================================== Logging ===================================
# Sets log level. The default log level is info.
# Available log levels are: error, warning, info, debug
#logging.level: debug
# At debug level, you can selectively enable logging only for some components.
# To enable all selectors use ["*"]. Examples of other selectors are "beat",
# "publisher", "service".
#logging.selectors: ["*"]
# ============================= X-Pack Monitoring ==============================
# Filebeat can export internal metrics to a central Elasticsearch monitoring
# cluster. This requires xpack monitoring to be enabled in Elasticsearch. The
# reporting is disabled by default.
# Set to true to enable the monitoring reporter.
#monitoring.enabled: false
# Sets the UUID of the Elasticsearch cluster under which monitoring data for this
# Filebeat instance will appear in the Stack Monitoring UI. If output.elasticsearch
# is enabled, the UUID is derived from the Elasticsearch cluster referenced by output.elasticsearch.
#monitoring.cluster_uuid:
# Uncomment to send the metrics to Elasticsearch. Most settings from the
# Elasticsearch output are accepted here as well.
# Note that the settings should point to your Elasticsearch *monitoring* cluster.
# Any setting that is not set is automatically inherited from the Elasticsearch
# output configuration, so if you have the Elasticsearch output configured such
# that it is pointing to your Elasticsearch monitoring cluster, you can simply
# uncomment the following line.
#monitoring.elasticsearch:
# ============================== Instrumentation ===============================
# Instrumentation support for the filebeat.
#instrumentation:
# Set to true to enable instrumentation of filebeat.
#enabled: false
# Environment in which filebeat is running on (eg: staging, production, etc.)
#environment: ""
# APM Server hosts to report instrumentation results to.
#hosts:
# - http://localhost:8200
# API Key for the APM Server(s).
# If api_key is set then secret_token will be ignored.
#api_key:
# Secret token for the APM Server(s).
#secret_token:
# ================================= Migration ==================================
# This allows to enable 6.7 migration aliases
#migration.6_to_7.enabled: true
TL;DR: Time Machine cannot create a new backup on my shared drive, but can add to an existing backup.
I'm running macOS Catalina and my Time Machine backs up to a Debian 10 server with NetAtalk and Avahi. Since Mavericks macOS has preferred SMB, and given SMB is marginally faster I decided to switch to using SMB for the Time Machine shares. On a fresh AFP share I can start a new Time Machine backup in System Preferences and it will create a new .sparseimage without complaint.
If I use the exact same directory (/usr/local/smb), so same permissions etc, and create a samba share, when Time Machine attempts to create a new backup it give the error: "Time Machine couldn’t complete the backup to SERVER.local. The backup disk image could not be created."
If I first connect to the share with AFP and do the initial backup, I can then connect with SMB and add subsequent incremental backups without error. I thought maybe a permissions issue, but for debugging purposes I have /usr/local/smb set to 0777 and still get the error.
ls -la showing permissions of the share point:
drwxrwxrwx 5 root smbusers 4096 Apr 3 12:35 smb
I find the following possibly helpful error in the log:
Failed to create '/Volumes/.timemachine/SERVER._smb._tcp.local/DDE06691-7411-41DD-8419-24FEFC21CE29/TimeMachine Set A - SMB/8E394711-7E3F-520B-800C-192D4F680177.sparsebundle', results: {
}, error: 13 Permission denied
afp.conf:
[Global]
; Global server settings
vol preset = default_for_all
log file = /var/log/netatalk.log
uam list = uams_dhx2.so,uams_clrtxt.so
save password = no
[default_for_all]
file perm = 0664
directory perm = 0774
cnid scheme = dbd
[Time Machine Set A - AFP]
path = /usr/local/smb
time machine = yes
vol size limit = 4000000
I'm using some smb.conf options suggested in this GitHub: https://gist.github.com/ChloeTigre/4c2022c0d1a281deedba6f7539a2e3ae
smb.conf:
[global]
## Browsing/Identification ###
# Change this to the workgroup/NT-domain name your Samba server will part of
workgroup = WORKGROUP
wins support = yes
#### Debugging/Accounting ####
# This tells Samba to use a separate log file for each machine
# that connects
log file = /var/log/samba/log.%m
# Cap the size of the individual log files (in KiB).
max log size = 1000
# We want Samba to only log to /var/log/samba/log.{smbd,nmbd}.
# Append syslog#1 if you want important messages to be sent to syslog too.
logging = file
# Do something sensible when Samba crashes: mail the admin a backtrace
panic action = /usr/share/samba/panic-action %d
###MacOS compatability stuff
guest account = smbguest
min protocol = SMB2
map acl inherit = yes
vfs objects = catia fruit streams_xattr
fruit:metadata = netatalk
fruit:model = MacSamba
fruit:posix_rename = yes
fruit:veto_appledouble = yes
durable handles = yes
kernel oplocks = no
kernel share modes =no
posix locking = no
smb2 leases = yes
#Turned off for testing compatability
#fruit:wipe_intentionally_left_blank_rfork = yes
#fruit:delete_empty_adfiles = yes
####### Authentication #######
# Server role. Defines in which mode Samba will operate. Possible
# values are "standalone server", "member server", "classic primary
# domain controller", "classic backup domain controller", "active
# directory domain controller".
#
# Most people will want "standalone server" or "member server".
# Running as "active directory domain controller" will require first
# running "samba-tool domain provision" to wipe databases and create a
# new domain.
server role = standalone server
obey pam restrictions = yes
# This boolean parameter controls whether Samba attempts to sync the Unix
# password with the SMB password when the encrypted SMB password in the
# passdb is changed.
unix password sync = yes
# For Unix password sync to work on a Debian GNU/Linux system, the following
# parameters must be set (thanks to Ian Kahan <<kahan#informatik.tu-muenchen.de> for
# sending the correct chat script for the passwd program in Debian Sarge).
passwd program = /usr/bin/passwd %u
passwd chat = *Enter\snew\s*\spassword:* %n\n *Retype\snew\s*\spassword:* %n\n *password\supdated\ssuccessfully* .
# This boolean controls whether PAM will be used for password changes
# when requested by an SMB client instead of the program listed in
# 'passwd program'. The default is 'no'.
pam password change = yes
# This option controls how unsuccessful authentication attempts are mapped
# to anonymous connections
map to guest = bad user
######Security#######
security = user
valid users = #smbusers
username map = /etc/samba/users.map
guest ok = no
# Allow users who've been granted usershare privileges to create
# public shares, not just authenticated ones
usershare allow guests = yes
#======================= Share Definitions =======================
[TimeMachine Set A - SMB]
path = /usr/local/smb
comment = SMB Time Machine Destination Set A
browsable = yes
writeable = yes
create mode = 0664 #tried turning this off, no fix
directory mode = 0777 #tried turning this off, no fix
vfs objects = catia fruit streams_xattr
fruit:aapl = yes
fruit:time machine = yes
#guest ok = yes
fruit:time machine max size = 3.9T #tried turning this off, no fix
inherit acls = yes
As I was so helpfully informed here the issue boiled down to one variable. I needed to change fruit:metadata = netatalk to fruit:metadata = stream.
Thought I'd post some additional info for people. My currently working smb.conf is as follows. Note this is on Debian 10 (Buster) so things like password change command will be different on other distributions. Also note that the order of modules in the setting vfs objects = catia fruit streams_xattr is significant and important.
# NOTE: Whenever you modify this file you should run the command
# "testparm" to check that you have not made any basic syntactic
# errors.
[global]
workgroup = WORKGROUP
min protocol = SMB2
log file = /var/log/samba/log.%m
max log size = 5000
logging = file
security = USER
panic action = /usr/share/samba/panic-action %d
server role = standalone server
obey pam restrictions = yes
unix password sync = yes
passwd program = /usr/bin/passwd %u
passwd chat = *Enter\snew\s*\spassword:* %n\n *Retype\snew\s*\spassword:* %n\n *password\supdated\ssuccessfully* .
pam password change = yes
username map = /etc/samba/users.map
map to guest = bad user
guest account = XXXYOURGUESTACCOUNT
# Time Machine settings
vfs objects = catia fruit streams_xattr
fruit:model = MacSamba
fruit:advertise_fullsync = true
fruit:metadata = stream
fruit:veto_appledouble = no
#default is yes, not necessary to specify fruit:posix_rename = no
#default is yes, not necessary to specify fruit:zero_file_id = yes
fruit:wipe_intentionally_left_blank_rfork = yes
fruit:delete_empty_adfiles = yes
ea support = yes
#default is yes, not necessary to specify fruit:aapl = yes
# Make share visible to Windows
#Disabled for a Mac-Only network
# lanman auth = no
# ntlm auth = yes
# wins support = yes
# local master = yes
# preferred master = yes
# Allow symlinks
# follow symlinks = yes
# wide links = yes
# unix extensions = no
#======================= Share Definitions =======================
[SHARED DRIVE]
path = /PATH/TO/YOUR/SHARED/FOLDER
valid users = #YOURGUESTUSERGROUP
writable = yes
durable handles = yes
kernel oplocks = no
kernel share modes = no
posix locking = no
vfs objects = catia fruit streams_xattr
#default is yes, not necessary to specify ea support = yes
#default is yes, not necessary to specify browseable = yes
read only = no
inherit acls = yes
fruit:time machine = yes
fruit:metadata = stream
fruit:locking = netatalk
guest ok = yes
[Time Machine]
path = /PATH/TO/YOUR/SHARED/FOLDER
valid users = #YOURGUESTUSERGROUP
writable = yes
durable handles = yes
kernel oplocks = no
kernel share modes = no
posix locking = no
vfs objects = catia fruit streams_xattr
#default is yes, not necessary to specify ea support = yes
browseable = no
read only = no
inherit acls = yes
fruit:time machine = yes
fruit:metadata = stream
fruit:locking = netatalk
fruit:time machine max size = 1.9T
Also note that The Samba build with Debian 10 currently does not have built-in support for advertising Time Machine shares. Normally specifying fruit:time machine = yes will trigger Samba to advertise a Time Machine destination via Avahi, but this specific support is not built with the Samba included in Debian 10. You'll need to configure a .service definition in /etc/avahi/services/. I created time_machine_advert.service in that folder. You'll need to restart avahi to be sure it gets applied with sudo systemctl restart avahi. I have two Time Machine destinations. Multiples can be advertised within a single file by simply duplicating the txt record and incrementing dkX, i.e. dk1, dk2 etc.
<?xml version="1.0" standalone='no'?><!--*-nxml-*-->
<!DOCTYPE service-group SYSTEM "avahi-service.dtd">
<service-group>
<name replace-wildcards="yes">%h</name>
<service>
<type>_adisk._tcp</type>
<txt-record>sys=waMa=0,adVF=0x100</txt-record>
<txt-record>dk0=adVN=SAMBA SHARE NAME EXACTLY,adVF=0x82</txt-record>
<txt-record>dk1=adVN=SAMBA SHARE NAME #2 EXACTLY,adVF=0x82</txt-record>
</service>
<service>
<type>_smb._tcp</type>
<port>445</port>
</service>
</service-group>
There are some additional useful sites for Samba configuration with regards to Time Machine here:
Samba team's official guide to Time Machine configuration, although this alone didn't get a working config for me: https://wiki.samba.org/index.php/Configure_Samba_to_Work_Better_with_Mac_OS_X
smb.conf reference: https://www.samba.org/samba/docs/current/man-html/smb.conf.5.html
vfs_fruit (the Apple compatibility module for Samba) reference https://www.samba.org/samba/docs/current/man-html/vfs_fruit.8.html
Ken Murphy's smb.conf on GitHub that finally pushed me over the edge and got my setup working: https://github.com/KenMurphy/SambaConfigs/blob/master/smb.conf
A general Samba on Debian guide: https://www.antoneliasson.se/journal/time-machine-compatible-samba-on-debian-buster/
Looking for why my information was not persisting in Cosmos I have found this error in the log of cygnus repeated hundreds of times:
03 Sep 2015 02:20:18,491 ERROR [SinkRunner-PollingRunner-DefaultSinkProcessor] (com.telefonica.iot.cygnus.sinks.OrionSink.process:103) - Channel error (The Flume transaction could not be started. Details=begin() called when transaction is OPEN!)
03 Sep 2015 02:20:18,491 ERROR [SinkRunner-PollingRunner-DefaultSinkProcessor] (org.apache.flume.SinkRunner$PollingRunner.run:160) - Unable to deliver event. Exception follows.
org.apache.flume.EventDeliveryException: java.lang.IllegalStateException: begin() called when transaction is OPEN!
at com.telefonica.iot.cygnus.sinks.OrionSink.process(OrionSink.java:104)
at org.apache.flume.sink.DefaultSinkProcessor.process(DefaultSinkProcessor.java:68)
at org.apache.flume.SinkRunner$PollingRunner.run(SinkRunner.java:147)
at java.lang.Thread.run(Thread.java:701)
Caused by: java.lang.IllegalStateException: begin() called when transaction is OPEN!
at com.google.common.base.Preconditions.checkState(Preconditions.java:145)
at org.apache.flume.channel.BasicTransactionSemantics.begin(BasicTransactionSemantics.java:131)
at com.telefonica.iot.cygnus.sinks.OrionSink.process(OrionSink.java:101)
... 3 more
Is it a configuration error or a runtime error?
Thanks in advance.
Edit: This is a configuration file content of an agent:
# Copyright 2014 Telefónica Investigación y Desarrollo, S.A.U
#
# This file is part of fiware-cygnus (FI-WARE project).
#
# fiware-cygnus is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General
# Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any
# later version.
# fiware-cygnus is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along with fiware-cygnus. If not, see
# http://www.gnu.org/licenses/.
#
# For those usages not covered by the GNU Affero General Public License please contact with iot_support at tid dot es
#=============================================
# To be put in APACHE_FLUME_HOME/conf/agent.conf
#
# General configuration template explaining how to setup a sink of each of the available types (HDFS, CKAN, MySQL).
#=============================================
# The next tree fields set the sources, sinks and channels used by Cygnus. You could use different names than the
# ones suggested below, but in that case make sure you keep coherence in properties names along the configuration file.
# Regarding sinks, you can use multiple types at the same time; the only requirement is to provide a channel for each
# one of them (this example shows how to configure 3 sink types at the same time). Even, you can define more than one
# sink of the same type and sharing the channel in order to improve the performance (this is like having
# multi-threading).
cygnusagent.sources = http-source
cygnusagent.sinks = hdfs-sink
cygnusagent.channels = hdfs-channel
#=============================================
# source configuration
# channel name where to write the notification events
cygnusagent.sources.http-source.channels = hdfs-channel
# source class, must not be changed
cygnusagent.sources.http-source.type = org.apache.flume.source.http.HTTPSource
# listening port the Flume source will use for receiving incoming notifications
cygnusagent.sources.http-source.port = 5050
# Flume handler that will parse the notifications, must not be changed
cygnusagent.sources.http-source.handler = com.telefonica.iot.cygnus.handlers.OrionRestHandler
# URL target
cygnusagent.sources.http-source.handler.notification_target = /notify
# Default service (service semantic depends on the persistence sink)
cygnusagent.sources.http-source.handler.default_service = my_service
# Default service path (service path semantic depends on the persistence sink)
cygnusagent.sources.http-source.handler.default_service_path = my_path
# Number of channel re-injection retries before a Flume event is definitely discarded (-1 means infinite retries)
cygnusagent.sources.http-source.handler.events_ttl = 0
# Source interceptors, do not change
cygnusagent.sources.http-source.interceptors = ts
# TimestampInterceptor, do not change
cygnusagent.sources.http-source.interceptors.ts.type = timestamp
# GroupinInterceptor, do not change
#cygnusagent.sources.http-source.interceptors.gi.type = com.telefonica.iot.cygnus.interceptors.GroupingInterceptor$Builder
# Grouping rules for the GroupingInterceptor, put the right absolute path to the file if necessary
# See the doc/design/interceptors document for more details
#cygnusagent.sources.http-source.interceptors.gi.grouping_rules_conf_file = /usr/cygnus/conf/grouping_rules.conf
# ============================================
# OrionHDFSSink configuration
# channel name from where to read notification events
cygnusagent.sinks.hdfs-sink.channel = hdfs-channel
# sink class, must not be changed
cygnusagent.sinks.hdfs-sink.type = com.telefonica.iot.cygnus.sinks.OrionHDFSSink
# Comma-separated list of FQDN/IP address regarding the HDFS Namenode endpoints
# If you are using Kerberos authentication, then the usage of FQDNs instead of IP addresses is mandatory
cygnusagent.sinks.hdfs-sink.hdfs_host = cosmos.lab.fiware.org
# port of the HDFS service listening for persistence operations; 14000 for httpfs, 50070 for webhdfs
cygnusagent.sinks.hdfs-sink.hdfs_port = 14000
# username allowed to write in HDFS
cygnusagent.sinks.hdfs-sink.hdfs_username = my_username
# OAuth2 token
cygnusagent.sinks.hdfs-sink.oauth2_token = my_token
# how the attributes are stored, either per row either per column (row, column)
cygnusagent.sinks.hdfs-sink.attr_persistence = column
# Hive FQDN/IP address of the Hive server
cygnusagent.sinks.hdfs-sink.hive_host = cosmos.lab.fiware.org
# Hive port for Hive external table provisioning
cygnusagent.sinks.hdfs-sink.hive_port = 10000
# Kerberos-based authentication enabling
cygnusagent.sinks.hdfs-sink.krb5_auth = false
# Kerberos username
cygnusagent.sinks.hdfs-sink.krb5_auth.krb5_user = krb5_username
# Kerberos password
cygnusagent.sinks.hdfs-sink.krb5_auth.krb5_password = xxxxxxxxxxxxx
# Kerberos login file
cygnusagent.sinks.hdfs-sink.krb5_auth.krb5_login_conf_file = /usr/cygnus/conf/krb5_login.conf
# Kerberos configuration file
cygnusagent.sinks.hdfs-sink.krb5_auth.krb5_conf_file = /usr/cygnus/conf/krb5.conf
#=============================================
# hdfs-channel configuration
# channel type (must not be changed)
cygnusagent.channels.hdfs-channel.type = memory
# capacity of the channel
cygnusagent.channels.hdfs-channel.capacity = 10000
# amount of bytes that can be sent per transaction
cygnusagent.channels.hdfs-channel.transactionCapacity = 1000
After configuring Cygnus in my filab vm, I'm getting this error:
[ERROR - es.tid.fiware.fiwareconnectors.cygnus.sinks.OrionSink.process(OrionSink.java:140)] Persistence error (The default_org/XYZ directory could not be created in HDFS. HttpFS response: 400 Bad Request)
A concrete example will this one:
(SinkRunner-PollingRunner-DefaultSinkProcessor) [ERROR - es.tid.fiware.fiwareconnectors.cygnus.sinks.OrionSink.process(OrionSink.java:140)] Persistence error (The default_org/<XX:YY:ZZ:AA:BB>-<entity-type> directory could not be created in HDFS. HttpFS response: 400 Bad Request)
I am following the next manual to run cygnus:
https://github.com/telefonicaid/fiware-connectors/tree/develop/flume but the problem persist.
My cygnus.conf is the next one:
#=============================================
# To be put in APACHE_FLUME_HOME/conf/cygnus.conf
#
# General configuration template explaining how to setup a sink of each of the available types (HDFS, CKAN, MySQL).
#=============================================
# The next tree fields set the sources, sinks and channels used by Cygnus. You could use different names than the
# ones suggested below, but in that case make sure you keep coherence in properties names along the configuration file.
# Regarding sinks, you can use multiple types at the same time; the only requirement is to provide a channel for each
# one of them (this example shows how to configure 3 sink types at the same time). Even, you can define more than one
# sink of the same type and sharing the channel in order to improve the performance (this is like having
# multi-threading).
cygnusagent.sources = http-source
cygnusagent.sinks = hdfs-sink
cygnusagent.channels = hdfs-channel
#=============================================
# source configuration
# channel name where to write the notification events
cygnusagent.sources.http-source.channels = hdfs-channel
# source class, must not be changed
cygnusagent.sources.http-source.type = org.apache.flume.source.http.HTTPSource
# listening port the Flume source will use for receiving incoming notifications
cygnusagent.sources.http-source.port = 5050
# Flume handler that will parse the notifications, must not be changed
cygnusagent.sources.http-source.handler = es.tid.fiware.fiwareconnectors.cygnus.handlers.OrionRestHandler
# URL target
cygnusagent.sources.http-source.handler.notification_target = /notify
# Default service (service semantic depends on the persistence sink)
cygnusagent.sources.http-source.handler.default_service = def_serv
# Default service path (service path semantic depends on the persistence sink)
cygnusagent.sources.http-source.handler.default_service_path = def_servpath
# Number of channel re-injection retries before a Flume event is definitely discarded
cygnusagent.sources.http-source.handler.events_ttl = 10
# Management interface port (temporal location for this parameter)
cygnusagent.sources.http-source.handler.management_port = 8081
# Source interceptors, do not change
cygnusagent.sources.http-source.interceptors = ts de
# Interceptor type, do not change
cygnusagent.sources.http-source.interceptors.ts.type = timestamp
# Destination extractor interceptor, do not change
cygnusagent.sources.http-source.interceptors.de.type = es.tid.fiware.fiwareconnectors.cygnus.interceptors.DestinationExtractor$Builder
# Matching table for the destination extractor interceptor, put the right absolute path to the file if necessary
# See the doc/design/interceptors document for more details
cygnusagent.sources.http-source.interceptors.de.matching_table = /usr/cygnus/conf/matching_table.conf
# ============================================
# OrionHDFSSink configuration
# channel name from where to read notification events
cygnusagent.sinks.hdfs-sink.channel = hdfs-channel
# sink class, must not be changed
cygnusagent.sinks.hdfs-sink.type = es.tid.fiware.fiwareconnectors.cygnus.sinks.OrionHDFSSink
# Comma-separated list of FQDN/IP address regarding the Cosmos Namenode endpoints
cygnusagent.sinks.hdfs-sink.cosmos_host = cosmos.lab.fi-ware.org
# port of the Cosmos service listening for persistence operations; 14000 for httpfs, 50070 for webhdfs and free choice for inifinty
cygnusagent.sinks.hdfs-sink.cosmos_port = 14000
# default username allowed to write in HDFS
cygnusagent.sinks.hdfs-sink.cosmos_default_username = <fiware_user>
# default password for the default username
cygnusagent.sinks.hdfs-sink.cosmos_default_password = <fiware_password>
# HDFS backend type (webhdfs, httpfs or infinity)
cygnusagent.sinks.hdfs-sink.hdfs_api = httpfs
# how the attributes are stored, either per row either per column (row, column)
cygnusagent.sinks.hdfs-sink.attr_persistence = column
# Hive FQDN/IP address of the Hive server
cygnusagent.sinks.hdfs-sink.hive_host = <fiware_user>#cosmos.lab.fi-ware.org
# Hive port for Hive external table provisioning
cygnusagent.sinks.hdfs-sink.hive_port = 10000
#=============================================
# hdfs-channel configuration
# channel type (must not be changed)
cygnusagent.channels.hdfs-channel.type = memory
# capacity of the channel
cygnusagent.channels.hdfs-channel.capacity = 1000
# amount of bytes that can be sent per transaction
cygnusagent.channels.hdfs-channel.transactionCapacity = 100
I need to persist data in Cosmos in SQL tables instead of HDFS files.
I've deployed a VM in Cloud section of FI-Lab where I've installed the 0.14.0 Orion version and 0.3 of Cygnus. I've configured Cygnus to store data in HDFS and SQL... Problem is, persistence in HDFS files works fine but it is not possible in SQL tables despite of in the past I got it. That's why I'm confused
I guess if HDFS persistence works, it should be problem of cygnus.config file. So I show it below:
# APACHE_FLUME_HOME/conf/cygnus.conf
# The next tree fields set the sources, sinks and channels used by Cygnus. You could use different names than the
# ones suggested below, but in that case make sure you keep coherence in properties names along the configuration file.
# Regarding sinks, you can use multiple ones at the same time; the only requirement is to provide a channel for each
# one of them (this example shows how to configure 3 sinks at the same time).
cygnusagent.sources = http-source
cygnusagent.sinks = hdfs-sink mysql-sink
cygnusagent.channels = hdfs-channel mysql-channel
#=============================================
# source configuration
# channel name where to write the notification events
cygnusagent.sources.http-source.channels = hdfs-channel mysql-channel
# source class, must not be changed
cygnusagent.sources.http-source.type = org.apache.flume.source.http.HTTPSource
# listening port the Flume source will use for receiving incoming notifications
cygnusagent.sources.http-source.port = 5050
# Flume handler that will parse the notifications, must not be changed
cygnusagent.sources.http-source.handler = es.tid.fiware.fiwareconnectors.cygnus.handlers.OrionRestHandler
# URL target
cygnusagent.sources.http-source.handler.notification_target = /notify
# Default organization (organization semantic depend on the persistence sink)
cygnusagent.sources.http-source.handler.default_organization = org42
# ============================================
# OrionHDFSSink configuration
# channel name from where to read notification events
cygnusagent.sinks.hdfs-sink.channel = hdfs-channel
# sink class, must not be changed
cygnusagent.sinks.hdfs-sink.type = es.tid.fiware.fiwareconnectors.cygnus.sinks.OrionHDFSSink
# The FQDN/IP address of the Cosmos deployment where the notification events will be persisted
cygnusagent.sinks.hdfs-sink.cosmos_host = 130.206.80.46
# port of the Cosmos service listening for persistence operations; 14000 for httpfs, 50070 for webhdfs and free choice for inifinty
cygnusagent.sinks.hdfs-sink.cosmos_port = 14000
# default username allowed to write in HDFS
cygnusagent.sinks.hdfs-sink.cosmos_default_username = quiquehz
# default password for the default username
cygnusagent.sinks.hdfs-sink.cosmos_default_password = 'password'
# HDFS backend type (webhdfs, httpfs or infinity)
cygnusagent.sinks.hdfs-sink.hdfs_api = httpfs
# how the attributes are stored, either per row either per column (row, column)
cygnusagent.sinks.hdfs-sink.attr_persistence = column
# prefix for the database and table names, empty if no prefix is desired
cygnusagent.sinks.hdfs-sink.naming_prefix =
# Hive port for Hive external table provisioning
cygnusagent.sinks.hdfs-sink.hive_port = 10000
# ============================================
# OrionMySQLSink configuration
# channel name from where to read notification events
cygnusagent.sinks.mysql-sink.channel = mysql-channel
# sink class, must not be changed
cygnusagent.sinks.mysql-sink.type = es.tid.fiware.fiwareconnectors.cygnus.sinks.OrionMySQLSink
# the FQDN/IP address where the MySQL server runs
cygnusagent.sinks.mysql-sink.mysql_host = 130.206.80.46
# the port where the MySQL server listes for incomming connections
cygnusagent.sinks.mysql-sink.mysql_port = 3306
# a valid user in the MySQL server
cygnusagent.sinks.mysql-sink.mysql_username = quiquehz
# password for the user above
cygnusagent.sinks.mysql-sink.mysql_password = 'password'
# how the attributes are stored, either per row either per column (row, column)
cygnusagent.sinks.mysql-sink.attr_persistence = column
# prefix for the database and table names, empty if no prefix is desired
cygnusagent.sinks.mysql-sink.naming_prefix =
#=============================================
# hdfs-channel configuration
# channel type (must not be changed)
cygnusagent.channels.hdfs-channel.type = memory
# capacity of the channel
cygnusagent.channels.hdfs-channel.capacity = 1000
# amount of bytes that can be sent per transaction
cygnusagent.channels.hdfs-channel.transactionCapacity = 100
#=============================================
# mysql-channel configuration
# channel type (must not be changed)
cygnusagent.channels.mysql-channel.type = memory
# capacity of the channel
cygnusagent.channels.mysql-channel.capacity = 1000
# amount of bytes that can be sent per transaction
cygnusagent.channels.mysql-channel.transactionCapacity = 100
Having a look to your configuration, you are pointing to a MySQL server running in the Cosmos Namenode. I do not know if this is a mistake (and then this could be the reason it is not working properly), or it is your real aim (in that case I must warn you an external connection to the database will be automatically be droped). In any case, the MySQL server should live out of Cosmos, in a VM of yours, since Cosmos is only intended for HDFS storage and MapReduce analysis.