Netdata ON AWS EKS PVC Error

I have deployed netdata via helm. All child nodes are successfully deployed but the parent pod has an error of

0/12 nodes are available: 12 node(s) didn't match PersistentVolume's node affinity. preemption: 0/12 nodes are available: 12 Preemption is not helpful for scheduling.

My storage class is like:

kind: StorageClass
metadata:
  name: resource-storage
  uid: 5982b3aa-7a9e-48d0-b53b-f80e6d738697
  resourceVersion: '458821127'
  creationTimestamp: '2023-03-25T22:36:54Z'
  annotations:
    kubectl.kubernetes.io/last-applied-configuration: >
      {"apiVersion":"storage.k8s.io/v1","kind":"StorageClass","metadata":{"annotations":{"storageclass.kubernetes.io/is-default-class":"true"},"name":"resource-storage"},"parameters":{"fsType":"ext4","type":"gp2"},"provisioner":"kubernetes.io/aws-ebs"}
    storageclass.kubernetes.io/is-default-class: 'true'
  selfLink: /apis/storage.k8s.io/v1/storageclasses/resource-storage
spec: {}
provisioner: kubernetes.io/aws-ebs
parameters:
  fsType: ext4
  type: gp2
reclaimPolicy: Delete
allowVolumeExpansion: true
volumeBindingMode: Immediate

Used Helm Values:

replicaCount: 1

service:
  type: ClusterIP
  port: 19999
  annotations: { }

ingress:
  enabled: false

serviceAccount:
  create: true
  name: netdata

restarter:
  enabled: true
  schedule: "00 06 * * *"
  image:
    repository: bitnami/kubectl
    tag: 1.25
    pullPolicy: Always
  restartPolicy: Never
  resources: { }
  concurrencyPolicy: Forbid
  startingDeadlineSeconds: 60
  successfulJobsHistoryLimit: 3
  failedJobsHistoryLimit: 3

notifications:
  slack:
    webhook_url: ""
    recipient: ""

parent:
  hostname: "netdata-parent"
  enabled: true
  port: 19999
  resources: { }
    # limits:
    #  cpu: 4
    #  memory: 4096Mi
  # requests:
  #  cpu: 4
  #  memory: 4096Mi
  livenessProbe:
    initialDelaySeconds: 0
    failureThreshold: 3
    periodSeconds: 30
    successThreshold: 1
    timeoutSeconds: 1
  readinessProbe:
    initialDelaySeconds: 0
    failureThreshold: 3
    periodSeconds: 30
    successThreshold: 1
    timeoutSeconds: 1
  securityContext:
    runAsUser: 201
    runAsGroup: 201
    fsGroup: 201
  terminationGracePeriodSeconds: 300
  nodeSelector: { }
  tolerations: [ ]
  affinity: { }
  priorityClassName: ""
  env: { }
  ## To disable anonymous statistics:
  # DO_NOT_TRACK: 1
  envFrom: [ ]
    ## E.g. to read Netdata Cloud claim token from an existing secret "netdata" set this to:
    # - secretRef:
  #     name: netdata
  ## And create it with: kubectl create secret generic netdata --from-literal="NETDATA_CLAIM_TOKEN=<token>"
  ## Also ensure that claim.token is empty

  podLabels: { }
  podAnnotations: { }
  dnsPolicy: Default
  database:
    persistence: true
    storageclass: "resource-storage"
    volumesize: 50Gi
  alarms:
    persistence: true
    storageclass: "resource-storage"
    volumesize: 5Gi
  configs:
    netdata:
      enabled: true
      path: /etc/netdata/netdata.conf
      data: |
        [global]
          hostname = {{ .Values.parent.hostname }}
        [db]
          mode = dbengine

        [plugins]
          cgroups = no
          tc = no
          enable running new plugins = no
          check for new plugins every = 72000
          python.d = no
          charts.d = no
          go.d = no
          node.d = no
          apps = no
          proc = no
          idlejitter = no
          diskspace = no
    stream:
      enabled: true
      path: /etc/netdata/stream.conf
      data: |
        [11111111-2222-3333-4444-555555555555]
          enabled = yes
          history = 3600
          default memory mode = dbengine
          health enabled by default = auto
          allow from = *
    health:
      enabled: true
      path: /etc/netdata/health_alarm_notify.conf
      data: |
        SEND_EMAIL="NO"
        SEND_SLACK="YES"
        SLACK_WEBHOOK_URL="{{ .Values.notifications.slack.webhook_url }}"
        DEFAULT_RECIPIENT_SLACK="{{ .Values.notifications.slack.recipient }}"
    exporting:
      enabled: false
      path: /etc/netdata/exporting.conf
      data: ""
    example:
      enabled: false
      path: /etc/netdata/health.d/example.conf
      data: |
        alarm: example_alarm1
          on: example.random
        every: 2s
        warn: $random1 > (($status >= $WARNING)  ? (70) : (80))
        crit: $random1 > (($status == $CRITICAL) ? (80) : (90))
        info: random
          to: sysadmin
  extraVolumeMounts: [ ]
  extraVolumes: [ ]
  extraInitContainers: [ ]

child:
  enabled: true
  port: "{{ .Values.parent.port }}"
  updateStrategy: { }
  resources: { }
  livenessProbe:
    initialDelaySeconds: 0
    failureThreshold: 3
    periodSeconds: 30
    successThreshold: 1
    timeoutSeconds: 1
  readinessProbe:
    initialDelaySeconds: 0
    failureThreshold: 3
    periodSeconds: 30
    successThreshold: 1
    timeoutSeconds: 1
  terminationGracePeriodSeconds: 30
  nodeSelector: { }
  tolerations:
    - operator: Exists
      effect: NoSchedule
  affinity: { }
  priorityClassName: ""
  podLabels: { }
  podAnnotationAppArmor:
    enabled: true
  podAnnotations: { }
  hostNetwork: true
  dnsPolicy: ClusterFirstWithHostNet
  persistence:
    enabled: true
    storageclass: "resource-storage"
    hostPath: /var/lib/netdata-k8s-child
  podsMetadata:
    useKubelet: false
    kubeletUrl: "https://localhost:10250"
  configs:
    netdata:
      enabled: true
      path: /etc/netdata/netdata.conf
      data: |
        [db]
          mode = ram
        [web]
          bind to = localhost:19999
        [health]
          enabled = no
        [ml]
          enabled = no
    stream:
      enabled: true
      path: /etc/netdata/stream.conf
      data: |
        [stream]
          enabled = {{ ternary "yes" "no" .Values.parent.enabled }}
          destination = netdata:{{ .Values.service.port }}
          api key = 11111111-2222-3333-4444-555555555555
          timeout seconds = 60
          buffer size bytes = 1048576
          reconnect delay seconds = 5
          initial clock resync iterations = 60
    exporting:
      enabled: false
      path: /etc/netdata/exporting.conf
      data: ""
    go.d:
      enabled: true
      path: /etc/netdata/go.d.conf
      data: |
        modules:
          pulsar: no
          prometheus: yes
    kubelet:
      enabled: true
      path: /etc/netdata/go.d/k8s_kubelet.conf
      data: |
        update_every: 1
        autodetection_retry: 0
        jobs:
          - url: http://127.0.0.1:10255/metrics
          - url: https://localhost:10250/metrics
            tls_skip_verify: yes
    kubeproxy:
      enabled: true
      path: /etc/netdata/go.d/k8s_kubeproxy.conf
      data: |
        update_every: 1
        autodetection_retry: 0
        jobs:
          - url: http://127.0.0.1:10249/metrics
  env: { }
  ## To disable anonymous statistics:
  # DO_NOT_TRACK: 1

  envFrom: [ ]
    ## E.g. to read Netdata Cloud claim token from an existing secret "netdata" set this to:
    # - secretRef:
  #     name: netdata
  ## And create it with: kubectl create secret generic netdata --from-literal="NETDATA_CLAIM_TOKEN=<token>"
  ## Also ensure that claim.token is empty

  claiming:
    enabled: false
    token: ""
    rooms: ""
    url: "https://api.netdata.cloud"

  extraVolumeMounts: [ ]
    ## Additional volume mounts for netdata child
    ## E.g to mount all disks under / to be monitored via the diskspace plugin
    # - name: hostroot
  #   mountPath: /host/root
  #   readOnly: true
  #   mountPropagation: HostToContainer
  extraVolumes: [ ]
    ## Additional volumes for netdata child
    ## E.g to mount all disks under / to be monitored via the diskspace plugin
  # - name: hostroot
  #   hostPath:
  #     path: /

k8sState:
  hostname: "netdata-k8s-state"
  enabled: true
  port: "{{ .Values.parent.port }}"

  resources: { }
    # limits:
    #  cpu: 4
    #  memory: 4096Mi
  # requests:
  #  cpu: 4
  #  memory: 4096Mi

  livenessProbe:
    initialDelaySeconds: 0
    failureThreshold: 3
    periodSeconds: 30
    successThreshold: 1
    timeoutSeconds: 1
  readinessProbe:
    initialDelaySeconds: 0
    failureThreshold: 3
    periodSeconds: 30
    successThreshold: 1
    timeoutSeconds: 1

  terminationGracePeriodSeconds: 30

  nodeSelector: { }

  tolerations: [ ]

  affinity: { }

  priorityClassName: ""

  podLabels: { }

  podAnnotationAppArmor:
    enabled: true

  podAnnotations: { }

  dnsPolicy: ClusterFirstWithHostNet

  persistence:
    enabled: true
    storageclass: "resource-storage"
    volumesize: 5Gi

  configs:
    netdata:
      enabled: true
      path: /etc/netdata/netdata.conf
      data: |
        [global]
          hostname = {{ .Values.k8sState.hostname }}
        [db]
          mode = ram
        [web]
          bind to = localhost:19999
        [health]
          enabled = no
        [ml]
          enabled = no
        [plugins]
          timex = no
          checks = no
          idlejitter = no
          tc = no
          diskspace = no
          proc = no
          cgroups = no
          enable running new plugins = no
          slabinfo = no
          perf = no
          go.d = yes
          ioping = no
          ebpf = no
          charts.d = no
          apps = no
          python.d = no
          fping = no
    stream:
      enabled: true
      path: /etc/netdata/stream.conf
      data: |
        [stream]
          enabled = {{ ternary "yes" "no" .Values.parent.enabled }}
          destination = netdata:{{ .Values.service.port }}
          api key = 11111111-2222-3333-4444-555555555555
          timeout seconds = 60
          buffer size bytes = 1048576
          reconnect delay seconds = 5
          initial clock resync iterations = 60
    exporting:
      enabled: false
      path: /etc/netdata/exporting.conf
      data: ""
    go.d:
      enabled: true
      path: /etc/netdata/go.d.conf
      data: |
        default_run: no
        modules:
          k8s_state: yes
    go.d-k8s_state:
      enabled: true
      path: /etc/netdata/go.d/k8s_state.conf
      data: |
        jobs:
          - name: k8s_state
            update_every: 1

  env: { }
  ## To disable anonymous statistics:
  # DO_NOT_TRACK: 1

  envFrom: [ ]
    ## E.g. to read Netdata Cloud claim token from an existing secret "netdata" set this to:
    # - secretRef:
  #     name: netdata
  ## And create it with: kubectl create secret generic netdata --from-literal="NETDATA_CLAIM_TOKEN=<token>"
  ## Also ensure that claim.token is empty

  claiming:
    enabled: false
    token: ""
    rooms: ""
    url: "https://api.netdata.cloud"

  extraVolumeMounts: [ ]

  extraVolumes: [ ]

Any suggesstions ?

  • I tried with a default helm values. But that did not work aswell.

Solution:
You need to set availability zone in storage class

- matchLabelExpressions:
  - key: topology.ebs.csi.aws.com/zone
    values:
     - us-east-2c