All my servers are running Ubuntu 18, 20 or 22 LTS versions. The below is specifically about Ubuntu 22.04 LTS, with the nightly builds of netdata running.
I have a working parent setup that accepts streaming for a uuidgen generated API Key. I say “working”, because the parent is streaming to netdata.cloud…
I have a production server that was connected directly to app.netdata.cloud, and since it is production, I wanted to introduce first one and then later two or even three parents - so that I have a better handle on data retention and so that I can lesser the burden of netdata on the child system.
When I install netdata from the Room I want the server to show up in, netdata works (of course!). When I then reconfigure it to stream via the parent, netdata starts and stops within one second:
root@divine-meadow:/var/log/netdata# systemctl restart netdata
root@divine-meadow:/var/log/netdata# systemctl status netdata
● netdata.service - Real time performance monitoring
Loaded: loaded (/lib/systemd/system/netdata.service; enabled; vendor preset: enabled)
Active: activating (auto-restart) (Result: exit-code) since Fri 2023-11-24 06:05:06 UTC; 2s ago
Process: 6290 ExecStart=/usr/sbin/netdata -D $EXTRA_OPTS (code=exited, status=1/FAILURE)
Main PID: 6290 (code=exited, status=1/FAILURE)
CPU: 31ms
Nov 24 06:05:06 divine-meadow netdata[6290]: All threads finished.
Nov 24 06:05:06 divine-meadow netdata[6290]: DIGEST-MD5 common mech free
Nov 24 06:05:06 divine-meadow netdata[6290]: NETDATA SHUTDOWN: in 100 ms, cancel main threads - next: close SQL con
Nov 24 06:05:06 divine-meadow netdata[6290]: NETDATA SHUTDOWN: in 0 ms, close SQL context db - next: closed SQL m
Nov 24 06:05:06 divine-meadow netdata[6290]: NETDATA SHUTDOWN: in 0 ms, closed SQL main db - next: remove pid file
Nov 24 06:05:06 divine-meadow netdata[6290]: EXIT: cannot unlink pidfile '/var/run/netdata/netdata.pid'.
Nov 24 06:05:06 divine-meadow netdata[6290]: NETDATA SHUTDOWN: in 0 ms, remove pid file - next: free openssl stru
Nov 24 06:05:06 divine-meadow netdata[6290]: NETDATA SHUTDOWN: in 0 ms, free openssl structures - next: remove in
Nov 24 06:05:06 divine-meadow netdata[6290]: NETDATA SHUTDOWN: in 0 ms, remove incomplete shutdown file - next: e>Nov 24 06:05:06 divine-meadow netdata[6290]: NETDATA SHUTDOWN: completed in 100 ms - netdata is now exiting - bye bye...
I’ll attach my parent and child configs.
I would like to receive some help with this issue, as I have the need to monitor my production environment.
[global]
access log = none
[db]
mode = dbengine
storage tiers = 3
# To allow memory pressure to offload index from ram
dbengine page descriptors in file mapped memory = yes
# storage tier 0
update every = 1
dbengine multihost disk space MB = 12000
dbengine page cache size MB = 1400
# storage tier 1
dbengine tier 1 page cache size MB = 512
dbengine tier 1 multihost disk space MB = 4096
dbengine tier 1 update every iterations = 60
dbengine tier 1 backfill = new
# storage tier 2
dbengine tier 2 page cache size MB = 128
dbengine tier 2 multihost disk space MB = 2048
dbengine tier 2 update every iterations = 60
dbengine tier 2 backfill = new
[ml]
# Enabled by default
# enabled = yes
[health]
# Enabled by default
# enabled = yes
[web]
# Enabled by default
ssl key = /etc/nginx/ssl/balmy-sunset.itpassion.watch/1985710/server.key
ssl certificate = /etc/nginx/ssl/balmy-sunset.itpassion.watch/1985710/server.crt
tls version = 1.3
tls ciphers = TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256:TLS_AES_128_GCM_SHA256
bind to = *=dashboard|registry|badges|management|streaming|netdata.conf^SSL=force
stream.conf:
[stream]
# Stream metrics to another Netdata
enabled = yes
# The Private IP and PORT of all parents
destination = PARENT_2_IP_ADDRESS:19999
# This is the API key for the outgoing connection to Parent 2 as generated in Step 5
api key = f2b6d8cf-4c88-431c-a3d5-f0225d5adeed
[f2b6d8cf-4c88-431c-a3d5-f0225d5adeed]
# Accept metrics streams from Parent 2 and Child Agents
enabled = yes
[db]
# https://learn.netdata.cloud/docs/agent/database
# none = no retention, ram = some retention in ram
mode = ram
# The retention in seconds.
# This provides some tolerance to the time the child has to find a parent in
# order to transfer the data. For IoT this can be lowered to 120.
retention = 1200
# The granularity of metrics, in seconds.
# You may increase this to lower CPU resources.
update every = 1
[ml]
# Disable Machine Learning
enabled = no
[health]
# Disable Health Checks (Alerting)
enabled = no
[web]
# Disable remote access to the local dashboard
bind to = lo
[plugins]
# Uncomment the following line to disable all external plugins on extreme
# IoT cases by default.
# enable running new plugins = no
stream.conf:
[stream]
# Stream metrics to another Netdata
enabled = yes
# The IP and PORT of the parent
destination = d-fra1-mon0001p.itpassion.watch:19999
# The shared API key, generated by uuidgen
api key = f2b6d8cf-4c88-431c-a3d5-f0225d5adeed
Is there anybody in the community who could help me out with this, please? I’m surely not the first to try and make parent-child connections work even via TLS?!