diff --git a/argocd/applications/victoria-metrics.yaml b/argocd/applications/victoria-metrics.yaml new file mode 100644 index 0000000..6b38202 --- /dev/null +++ b/argocd/applications/victoria-metrics.yaml @@ -0,0 +1,13 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: victoria-metrics + namespace: argocd +spec: + destination: + server: https://kubernetes.default.svc + project: default + source: + path: victoria-metrics + repoURL: https://git.pyrocufflink.blue/infra/kubernetes.git + targetRevision: master diff --git a/victoria-metrics/README.md b/victoria-metrics/README.md new file mode 100644 index 0000000..4f80e67 --- /dev/null +++ b/victoria-metrics/README.md @@ -0,0 +1,68 @@ +# Victoria Metrics + +[Victoria Metrics] is a powerful, scalable time-series database compatible +with Prometheus and its ecosystem of metrics exporters. + + +## Clustered Deployment + +*Victoria Metrics* can run in a high-availability cluster, with the various +functions of the TSDB split into independently-scalable processes: + +* `vmstorage`: Stores time series data. +* `vminsert`: Ingests metrics in various formats (e.g. Prometheus) and sends + them to one or more `vmstorage` nodes. +* `vmselect`: Performs metrics queries, retrieving results from one or more + `vmstorage` nodes. + +The `vmstorage` processes are managed by a StatefulSet with a volume claim +template for persistent storage. The number of replicas in the StatefulSet +must be $2n-1$ where $n$ is the value of the `replicationFactor` setting for +`vminsert`. + +`vminsert` and `vmselect` processes are stateless and thus managed by a +Deployment. There should be at least 2 replicas of each of these, so that +restarts, etc. can be performed without any downtime. + + +## vmagent + +In a typical Victoria Metrics ecosystem, collecting metrics is handled +separately from the TSDB. The [vmagent] process handles scraping and receiving +metrics and passing them to `vminsert`. `vmagent` can cache received metrics +locally, in case no `vminsert` process is available, so it requires persistent +storage and is therefore managed by a StatefulSet. Because there are multiple +`vmagent` processes scraping the same targets, the `vminsert` and `vmstorage` +processes MUST have the `dedup.minScrapeInterval` setting set to match the +`vmagent` scrape interval. Jobs with scrape intervals longer than the +default will unfortunately have duplicate data points. + + +## Blackbox Exporter + +Many applications and web sites are monitored via the [Blackbox Exporter], +which makes arbitrary HTTP, TCP, ICMP, etc. requests and reports Prometheus +metrics about them. This is a stateless process, managed by a Deployment. + + +## vmalert + +Victoria Metrics has a separate process for alerting, [vmalert]. This process +periodically executes the queries defined in its alerting rules and creates +alerts for matching results. Alerts are stored in the Victoria Metrics TSDB. +Rules are defined in a YAML document, managed by a ConfigMap. Notifications +are sent to Alertmanager. + + +## Alertmanager + +[Alertmanager] receives notifications from `vmalert` and sends e.g. email +messages. Multiple instances can be run in a cluster; each node needs to know +the host and port of every node in the cluster. + + +[Victoria Metrics]: https://new.docs.victoriametrics.com/ +[vmagent]: https://new.docs.victoriametrics.com/vmagent/ +[Blackbox Exporter]: https://github.com/prometheus/blackbox_exporter +[vmalert]: https://new.docs.victoriametrics.com/vmalert/ +[Alertmanager]: https://prometheus.io/docs/alerting/latest/alertmanager/ diff --git a/victoria-metrics/alertmanager.config.yml b/victoria-metrics/alertmanager.config.yml new file mode 100644 index 0000000..5a1e64e --- /dev/null +++ b/victoria-metrics/alertmanager.config.yml @@ -0,0 +1,23 @@ +global: + smtp_from: prometheus@pyrocufflink.blue + smtp_require_tls: false + smtp_smarthost: mail.pyrocufflink.blue:25 + +receivers: +- email_configs: + - send_resolved: true + to: gyrfalcon@ebonfire.com + name: default-email + +route: + group_by: + - '...' + receiver: default-email + routes: + - group_by: + - alertname + group_wait: 1m + match: + job: homeassistant + receiver: default-email + repeat_interval: 120h diff --git a/victoria-metrics/alertmanager.yaml b/victoria-metrics/alertmanager.yaml new file mode 100644 index 0000000..172ca02 --- /dev/null +++ b/victoria-metrics/alertmanager.yaml @@ -0,0 +1,86 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: alertmanager + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/component: alertmanager +spec: + ports: + - port: 9093 + name: alertmanager + selector: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/component: alertmanager + +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: alertmanager + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/component: alertmanager +spec: + serviceName: alertmanager + selector: + matchLabels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/component: alertmanager + template: + metadata: + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/component: alertmanager + spec: + containers: + - name: alertmanager + image: docker.io/prom/alertmanager:v0.26.0 + ports: + - containerPort: 9093 + name: http + readinessProbe: &probe + httpGet: + port: http + path: /-/ready + periodSeconds: 60 + startupProbe: + <<: *probe + periodSeconds: 1 + successThreshold: 1 + failureThreshold: 30 + timeoutSeconds: 1 + securityContext: + runAsNonRoot: true + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /etc/alertmanager + name: config + readOnly: true + - mountPath: /alertmanager + name: alertmanager + subPath: data + securityContext: + fsGroup: 2093 + runAsGroup: 2093 + runAsNonRoot: true + runAsUser: 2093 + volumes: + - name: config + configMap: + name: alertmanager + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: alertmanager + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/component: alertmanager + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 4G diff --git a/victoria-metrics/alerts.yml b/victoria-metrics/alerts.yml new file mode 100644 index 0000000..ee02e57 --- /dev/null +++ b/victoria-metrics/alerts.yml @@ -0,0 +1,129 @@ +groups: +- name: default alert + rules: + - alert: DiskUsage + expr: >- + sum(collectd_df_df_complex{type!="free"}) by (instance, df) / sum(collectd_df_df_complex{df!="var-log", df!="var-lib-frigate"}) by (instance, df) > .75 + or sum(collectd_df_df_complex{type!="free"}) by (instance, df) / sum(collectd_df_df_complex{df="var-log"}) by (instance, df) > .95 + or sum(collectd_df_df_complex{type!="free"}) by (instance, df) / sum(collectd_df_df_complex{df="var-lib-frigate"}) by (instance, df) > .95 + for: 2h + - alert: TheWebsiteIsDown + expr: >- + probe_success{job="websites"} == 0 + for: 10m + - alert: Missing Metrics + expr: >- + up{instance!~"vmhost.*"} == 0 + for: 10m + - alert: NUT is offline + expr: >- + absent(collectd_nut_percent) + +- name: Bitwarden + rules: + - alert: vaultwarden is not running + expr: >- + collectd_processes_ps_count_processes{processes="vaultwarden"} < 1 + for: 5m + +- name: Active Directory + rules: + - alert: samba is not running + expr: >- + collectd_processes_ps_count_processes{processes=~"samba|smbd|winbindd|krb5kdc"} < 1 + for: 5m + +- name: Graylog + rules: + - alert: unprocessed messages + expr: >- + org_graylog2_journal_entries_uncommitted > 100 + for: 1h + +- name: mdraid + rules: + - alert: mdraid missing disk + expr: collectd_md_md_disks{type="missing", instance!~"burp.*"} != 0 + - alert: mdraid failed disk + expr: collectd_md_md_disks{type="failed"} != 0 + +- name: BURP + rules: + - alert: no recent backups + expr: absent(burp_client_last_backup_timestamp) + for: 8h + annotations: + summary: No clients have been backed up recently + description: >- + This alert indicates that NO clients have been backed up within the + last day. There is likely a problem with the BURP server. + - alert: missed client backup + expr: + time() - (burp_client_last_backup_timestamp > now() - 86400 * 90) > 86400 * 2 + for: 3h + annotations: + summary: A client has not backed up today + description: >- + A client has not been backed up for more than a day. This may be + because the client is offline, or because the backup process has + failed. Clients that have not been backed up for more than 90 days + will not trigger this alert. + - alert: disks need swapped + expr: + time() - tlast_change_over_time( + ( + collectd_md_md_disks{instance="burp1.pyrocufflink.blue", type="active"} + or last_over_time(collectd_md_md_disks{instance="burp1.pyrocufflink.blue", type="active"})[1d] + )[90d] + ) > 86400 * 30 + annotations: + summary: The disks in the BURP array need swapped + description: >- + The disks in the BURP RAID-1 (mirror) array should be swapped + periodically. One disk should be online and mounted while the other + is stored in the fireproof safe. Switching them ensures that even if + something happens to the active disk, such as hardware failure, power + surge, fire, or accidental `rm -rf`, the offline disk is only out of + date by a few weeks. + - alert: disk needs archived + expr: + sum( + collectd_md_md_disks{instance="burp1.pyrocufflink.blue", type=~"missing|spare"} + ) < 1 + annotations: + summary: One of the disks in the BURP array should be archived + description: >- + The disks in the BURP RAID-1 (mirror) array should be swapped + periodically. One disk should be online and mounted while the other + is stored in the fireproof safe. All of the disks are currently + online; one needs to be disconnected and moved to the safe as soon as + possible. + +- name: certificates + rules: + - alert: certificate will expire soon + expr: + probe_ssl_last_chain_expiry_timestamp_seconds - time() < 29 * 86400 + annotations: + summary: A certificate will expire in less than 29 days + description: >- + Generally, certificates are renewed automatically, approximately 30 + days before their expiration (NotAfter) date. There may be a problem + with the certificate renewal process that prevented this certificate + from being renewed. + - alert: certificate will expire very soon + expr: + probe_ssl_last_chain_expiry_timestamp_seconds - time() < 14 * 86400 + annotations: + summary: A certificate will expire in less than 14 days + description: >- + Generally, certificates are renewed automatically, approximately 30 + days before their expiration (NotAfter) date. There is most likely a + problem with the certificate renewal process that prevented this + certificate from being renewed. + +- name: Frigate + rules: + - alert: Frigate is Unavailable + expr: + homeassistant_entity_available{entity="sensor.frigate_status"} != 1 diff --git a/victoria-metrics/blackbox-exporter.yaml b/victoria-metrics/blackbox-exporter.yaml new file mode 100644 index 0000000..3568cea --- /dev/null +++ b/victoria-metrics/blackbox-exporter.yaml @@ -0,0 +1,74 @@ +apiVersion: v1 +kind: Service +metadata: + name: blackbox-exporter + labels: + app.kubernetes.io/name: blackbox-exporter + app.kubernetes.io/component: blackbox-exporter +spec: + ports: + - port: 9115 + name: blackbox-exporter + selector: + app.kubernetes.io/name: blackbox-exporter + app.kubernetes.io/component: blackbox-exporter + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: blackbox-exporter + labels: + app.kubernetes.io/name: blackbox-exporter + app.kubernetes.io/component: blackbox-exporter +spec: + selector: + matchLabels: + app.kubernetes.io/name: blackbox-exporter + app.kubernetes.io/component: blackbox-exporter + template: + metadata: + labels: + app.kubernetes.io/name: blackbox-exporter + app.kubernetes.io/component: blackbox-exporter + spec: + containers: + - name: blackbox-exporter + image: docker.io/bitnami/blackbox-exporter:0.24.0 + args: + - --config.file=/etc/blackbox-exporter/blackbox.yml + ports: + - containerPort: 9115 + name: http + readinessProbe: &probe + httpGet: + port: http + path: / + periodSeconds: 60 + startupProbe: + <<: *probe + periodSeconds: 1 + successThreshold: 1 + failureThreshold: 30 + timeoutSeconds: 1 + securityContext: + runAsNonRoot: true + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /etc/blackbox-exporter + name: config + readOnly: true + - mountPath: /tmp + name: tmp + subPath: tmp + securityContext: + runAsNonRoot: true + sysctls: + - name: net.ipv4.ping_group_range + value: 0 65536 + volumes: + - name: config + configMap: + name: blackbox + - name: tmp + emptyDir: {} diff --git a/victoria-metrics/blackbox.yml b/victoria-metrics/blackbox.yml new file mode 100644 index 0000000..edb21d5 --- /dev/null +++ b/victoria-metrics/blackbox.yml @@ -0,0 +1,40 @@ +modules: + dns_pyrocufflink: + dns: + query_name: pyrocufflink.blue + query_type: SOA + validate_answer_rrs: + fail_if_not_matches_regexp: + - pyrocufflink\.blue\.\t\d+\tIN\tSOA\tdc.+\.pyrocufflink\.blue.* + prober: dns + timeout: 2s + dns_recursive: + dns: + query_name: news.ycombinator.com + query_type: A + prober: dns + timeout: 5s + http: + http: + headers: + Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 + Accept-Charset: utf-8 + Accept-Language: en-US + method: GET + prober: http + timeout: 5s + icmp: + prober: icmp + timeout: 5s + smtp: + prober: tcp + tcp: + query_response: + - expect: ^220 ([^ ]+) ESMTP (.+)$ + - send: EHLO prober\r + - expect: ^250[ -]SMTPUTF8 + - send: QUIT\r + timeout: 5s + tcp: + prober: tcp + timeout: 5s diff --git a/victoria-metrics/ingress.yaml b/victoria-metrics/ingress.yaml new file mode 100644 index 0000000..3b0bf1e --- /dev/null +++ b/victoria-metrics/ingress.yaml @@ -0,0 +1,70 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: victoria-metrics + labels: + app.kubernetes.io/name: victoria-metrics + app.kubernetes.io/component: victoria-metrics + annotations: + nginx.ingress.kubernetes.io/proxy-body-size: 40m + nginx.ingress.kubernetes.io/auth-method: GET + nginx.ingress.kubernetes.io/auth-url: http://authelia.authelia.svc.cluster.local:9091/api/verify + nginx.ingress.kubernetes.io/auth-signin: https://auth.pyrocufflink.blue/?rm=$request_method + nginx.ingress.kubernetes.io/auth-snippet: | + proxy_set_header X-Forwarded-Method $request_method; +spec: + rules: + - host: metrics.pyrocufflink.blue + http: + paths: + - path: /insert + pathType: Prefix + backend: + service: + name: vminsert + port: + name: vminsert + - path: /select + pathType: Prefix + backend: + service: + name: vmselect + port: + name: vmselect + - path: /vmalert + pathType: Prefix + backend: + service: + name: vmalert + port: + name: vmalert + +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: alertmanager + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/component: alertmanager + annotations: + nginx.ingress.kubernetes.io/use-regex: 'true' + nginx.ingress.kubernetes.io/rewrite-target: /$2 + nginx.ingress.kubernetes.io/proxy-body-size: 40m + nginx.ingress.kubernetes.io/auth-method: GET + nginx.ingress.kubernetes.io/auth-url: http://authelia.authelia.svc.cluster.local:9091/api/verify + nginx.ingress.kubernetes.io/auth-signin: https://auth.pyrocufflink.blue/?rm=$request_method + nginx.ingress.kubernetes.io/auth-snippet: | + proxy_set_header X-Forwarded-Method $request_method; +spec: + rules: + - host: metrics.pyrocufflink.blue + http: + paths: + - path: /alertmanager(/|$)(.*) + pathType: ImplementationSpecific + backend: + service: + name: alertmanager + port: + name: alertmanager diff --git a/victoria-metrics/kustomization.yaml b/victoria-metrics/kustomization.yaml new file mode 100644 index 0000000..4a7c8cd --- /dev/null +++ b/victoria-metrics/kustomization.yaml @@ -0,0 +1,191 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: victoria-metrics + +labels: +- pairs: + app.kubernetes.io/instance: victoria-metrics + includeSelectors: true +- pairs: + app.kubernetes.io/part-of: victoria-metrics + includeSelectors: false + +resources: +- namespace.yaml +- secrets.yaml +- vmstorage.yaml +- vmselect.yaml +- vminsert.yaml +- vmagent.yaml +- vmalert.yaml +- alertmanager.yaml +- blackbox-exporter.yaml +- ingress.yaml + +configMapGenerator: +- name: vmagent + files: + - scrape.yml + options: + disableNameSuffixHash: true + +- name: vmalert-rules + files: + - alerts.yml + options: + disableNameSuffixHash: true + +- name: alertmanager + files: + - alertmanager.yml=alertmanager.config.yml + options: + disableNameSuffixHash: true + +- name: blackbox + files: + - blackbox.yml + options: + disableNameSuffixHash: true + +replicas: +# When changing the number of vmstorage replicas, be sure to update +# the storageNode value for vmselect and vminsert. Also, the +# replicationFactor setting may need adjusted. +- name: vmstorage + count: 3 +- name: vmselect + count: 2 +- name: vminsert + count: 2 +- name: vmagent + count: 2 +- name: vmalert + count: 2 +# When changing the number of alertmanager replicas, be sure to update +# the notifier URL value for vmalert and the peer addresses provided to +# Alertmanager itself. +- name: alertmanager + count: 2 + +patches: +- patch: | + apiVersion: apps/v1 + kind: StatefulSet + metadata: + name: vmstorage + spec: + template: + spec: + containers: + - name: vmstorage + env: + - name: vmstorage_dedup_minScrapeInterval + value: 1m + - name: vmstorage_retentionPeriod + value: 5y + +- patch: | + apiVersion: apps/v1 + kind: Deployment + metadata: + name: vmselect + spec: + template: + spec: + containers: + - name: vmselect + env: + - name: vmselect_storageNode + value: vmstorage-0.vmstorage,vmstorage-1.vmstorage,vmstorage-2.vmstorage + - name: vmselect_replicationFactor + value: '2' + +- patch: | + apiVersion: apps/v1 + kind: Deployment + metadata: + name: vminsert + spec: + template: + spec: + containers: + - name: vminsert + env: + - name: vminsert_storageNode + value: vmstorage-0.vmstorage,vmstorage-1.vmstorage,vmstorage-2.vmstorage + - name: vminsert_dedup_minScrapeInterval + value: 1m + - name: vminsert_replicationFactor + value: '2' + +- patch: | + apiVersion: apps/v1 + kind: StatefulSet + metadata: + name: vmagent + spec: + template: + spec: + containers: + - name: vmagent + env: + - name: SCRAPE_GRAYLOG_TOKEN + valueFrom: + secretKeyRef: + name: vmagent + key: graylog.token + optional: true + volumeMounts: + - mountPath: /run/secrets/vmagent + name: secrets + readOnly: true + - mountPath: /scrape/collectd + name: scrape-collectd + readOnly: true + volumes: + - name: scrape-collectd + configMap: + name: scrape-collectd + optional: true + - name: secrets + secret: + secretName: vmagent + +- patch: | + apiVersion: apps/v1 + kind: Deployment + metadata: + name: vmalert + spec: + template: + spec: + containers: + - name: vmalert + env: + - name: vmalert_http_pathPrefix + value: /vmalert + - name: vmalert_notifier_url + value: http://alertmanager-0.alertmanager:9093,http://alertmanager-1.alertmanager:9093 + startupProbe: + httpGet: + path: /vmalert/health + readinessProbe: + httpGet: + path: /vmalert/health + +- patch: | + apiVersion: apps/v1 + kind: StatefulSet + metadata: + name: alertmanager + spec: + template: + spec: + containers: + - name: alertmanager + args: + - --config.file=/etc/alertmanager/alertmanager.yml + - --storage.path=/alertmanager + - --cluster.peer=alertmanager-0.alertmanager:9094 + - --cluster.peer=alertmanager-1.alertmanager:9094 diff --git a/victoria-metrics/namespace.yaml b/victoria-metrics/namespace.yaml new file mode 100644 index 0000000..d8d0b8b --- /dev/null +++ b/victoria-metrics/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: victoria-metrics diff --git a/victoria-metrics/scrape.yml b/victoria-metrics/scrape.yml new file mode 100644 index 0000000..ea130d0 --- /dev/null +++ b/victoria-metrics/scrape.yml @@ -0,0 +1,258 @@ +global: + scrape_interval: 1m + +scrape_configs: +- job_name: vmagent + kubernetes_sd_configs: + - role: pod + namespaces: + own_namespace: true + selectors: + - role: pod + label: app.kubernetes.io/name=vmagent + relabel_configs: + - target_label: instance + source_labels: + - __meta_kubernetes_pod_name + +- job_name: blackbox + metrics_path: /probe + params: + module: + - icmp + static_configs: + - targets: + - 1.1.1.1 + - 8.8.8.8 + - 9.9.9.9 + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: blackbox-exporter:9115 + +- job_name: websites + scrape_interval: 5m + metrics_path: /probe + params: + module: + - http + static_configs: + - targets: + - http://dustin.hatch.name/ + - https://darkchestofwonders.us/ + - http://nratonpass.com/ + - http://pyrocufflink.net/ + - http://ebonfire.com/ + - http://chmod777.sh/ + - https://hatch.chat/_matrix/client/versions + - https://nextcloud.pyrocufflink.net/ + - https://bitwarden.pyrocufflink.blue/ + - https://git.pyrocufflink.blue/ + - https://jenkins.pyrocufflink.blue/login + - https://tabitha.biz/ + - https://dustinandtabitha.com/ + - https://hatchlearningcenter.org/ + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: blackbox-exporter:9115 + +- job_name: collectd + honor_labels: true + static_configs: + - targets: + - gw1.pyrocufflink.blue + - k8s-aarch64-n0.pyrocufflink.blue + - k8s-aarch64-n1.pyrocufflink.blue + - nvr1.pyrocufflink.blue + - vmhost0.pyrocufflink.blue + - vmhost1.pyrocufflink.blue + file_sd_configs: + - files: + - /scrape/collectd/scrape-collectd.yml + relabel_configs: + - source_labels: [__address__] + target_label: __address__ + replacement: '$1:9103' + +- job_name: sambadc + scrape_interval: 1m + metrics_path: /probe + params: + module: + - tcp + dns_sd_configs: + - names: + - _ldap._tcp.pyrocufflink.blue + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: blackbox-exporter:9115 + +- job_name: dns_recursive + scrape_interval: 1m + metrics_path: /probe + params: + module: + - dns_recursive + static_configs: + - targets: + - 172.30.0.1 + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: blackbox-exporter:9115 + +- job_name: dns_pyrocufflink + scrape_interval: 1m + metrics_path: /probe + params: + module: + - dns_pyrocufflink + static_configs: + - targets: + - 172.30.0.10 + - 172.30.0.9 + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: blackbox-exporter:9115 + +- job_name: smtp + scrape_interval: 1m + metrics_path: /probe + params: + module: + - smtp + dns_sd_configs: + - names: + - mail.pyrocufflink.blue + type: A + port: 25 + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: blackbox-exporter:9115 + +- job_name: homeassistant + scheme: https + metrics_path: /api/prometheus + authorization: + type: Bearer + credentials_file: /run/secrets/vmagent/homeassistant.token + static_configs: + - targets: + - homeassistant.pyrocufflink.blue + +- job_name: graylog + scheme: https + metrics_path: /api/plugins/org.graylog.plugins.metrics.prometheus/metrics + basic_auth: + username: %{SCRAPE_GRAYLOG_TOKEN} + password: token + static_configs: + - targets: + - graylog.pyrocufflink.blue:443 + +- job_name: gitea + scheme: https + static_configs: + - targets: + - git.pyrocufflink.blue + +- job_name: synapse + metrics_path: /_synapse/metrics + static_configs: + - targets: + - matrix0.pyrocufflink.blue + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__address__] + target_label: __address__ + replacement: '$1:9000' + +- job_name: unifi + static_configs: + - targets: + - unifi.pyrocufflink.blue:9130 + +- job_name: jenkins + metrics_path: /prometheus/ + scheme: https + static_configs: + - targets: + - jenkins.pyrocufflink.blue + +- job_name: burp + scrape_interval: 270s + scrape_timeout: 30s + static_configs: + - targets: + - burp.pyrocufflink.blue:9645 + +- job_name: minio-backups + metrics_path: /minio/v2/metrics/cluster + scheme: https + static_configs: + - targets: + - burp.pyrocufflink.blue:9000 + +- job_name: kubernetes + scheme: https + tls_config: + ca_file: /run/secrets/kubernetes.io/serviceaccount/ca.crt + static_configs: + - targets: + - kubernetes.pyrocufflink.blue:6443 + +- job_name: kubelet + scheme: https + tls_config: + ca_file: /run/secrets/kubernetes.io/serviceaccount/ca.crt + authorization: + type: Bearer + credentials_file: /run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: %{KUBERNETES_SERVICE_HOST}:%{KUBERNETES_SERVICE_PORT} + - target_label: __metrics_path__ + source_labels: + - __meta_kubernetes_node_name + replacement: /api/v1/nodes/$1/proxy/metrics + +- job_name: zincati + metrics_path: /bridge?selector=zincati + static_configs: + - targets: + - k8s-aarch64-n0.pyrocufflink.blue + - k8s-aarch64-n1.pyrocufflink.blue + - nvr1.pyrocufflink.blue + relabel_configs: + - source_labels: [__address__] + target_label: instance + - source_labels: [__address__] + target_label: __address__ + replacement: '$1:9598' diff --git a/victoria-metrics/secrets.yaml b/victoria-metrics/secrets.yaml new file mode 100644 index 0000000..4f8bf92 --- /dev/null +++ b/victoria-metrics/secrets.yaml @@ -0,0 +1,18 @@ +apiVersion: bitnami.com/v1alpha1 +kind: SealedSecret +metadata: + name: vmagent + labels: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent +spec: + encryptedData: + graylog.token: AgAhkcueTYekWV1i71xu97dP8WkDczpuSaQzP/HBDLvAIOs+n15aS8vk/6iLKcovSdf7tBTpj2ft1zf1oLqYL6q2jpakF6HYCIRSMDGOTkp6hBJyTup+bafqaNgzY2D9i7D/KMdCahVPnrriyNVAgCl2zlrMny5C882IvGNwS4fhaHFdLm+waTRHdZZJJzObvXI4nWDO7fOqIEEzoOF6pBwuTXU6t38bK72RxUWHQjOx9XP+MJbfB64kPHul8w+kS94LMLq/6LxofMs54YtSOLavPUo+OZhcW53XQROHKKJqAm23FE9HOVdnggGMHnXIDnhSBu4rOGt0OMn/7X9MaKO25Ey+jx/+K3tj17tu6OlvUJ49x3u03cmvC2BXokl2Dnj9+at6gC5Zuj4bGvquxsF8/uPAfZSasFFWr5p5HVfPUOqriSbMZ8tmn7ZAnWhaJrxc91Vv0raHeXMjJTu36r3QJtNpt2UNoY23pxH7QS6KxSB/3QXOZb2l1I3S7EoHddiu8MuZxAhWkmsqZDHWZPWsYPO0bA7NZlM+7XwhU2vqloH79tLTLdIlzubFXGW70VrsDm2bJOrftlcxkHG8j5NqNbOHuYGMZ+7m9cIpzB5ilmuv6k5Et9P0Vo++Awt5534VDpw6+vm2a5O/YwZjjP3VOtrp59Y8HFI3V/MQYpO3CpaYTOQGELt3tWpfIKKHnfqmYI8hFVdOj9kR76OFxOBXoyFagM9Th12NGHUkNZTbAhu/BFxVl5wC2ObGgjlcwQSvRo5m + homeassistant.token: AgB23TUFBNpyu0RmdYkUZEYWy3+cMcffZFKVjAlvbkGOWs3f5NwWbJEZ7wBP5+s9NHNYzGx5VtGRQKSg/MjUs47gtaRN4uHLzdH7ziWrRurG4GIl36mCYeivqdf7Z+PCi/E/O+0ShCmaF9u8rYcb/ECWJDWIMt1m5+QBWLXdPJgKpw4bbS8AlIzsSvjwZs4axeZvMp4gpzjPhtl4XziKiptQ25miXtX5GtNOqmI9QwMcOmXVLXg6ZC93AqToUWiiiGKvFSob6VUvQlEJaJZNhaDwN52fjglwus6B4pnrOnck21T56IviD5Lh6LI5EKltTAJ7TwPjIySbhASp/dGOna1wanSspQZ9ZhAUMiSxrBLlv5fB+Sg70k++5jftbqqNi+X+m36UEcwQBISZNaGgQlutzJ/ghDEwXfPZOsImVT5qPz62wV8bfxJ5G2CRw1S5BfFXjJ0AyzulKLE5GGucUk03R0MUYLz0oS2mX4xsHG3qlc5WvzHNc/YsOm8bKVd/Wa/wGt3U/4bNX8BF5IUznuc22+4oxdvEeOidMB4LjLaYpYzBwpRRMLupb9sSiqKCaGUfb6kQGDHkcwZDpCVf492IUIbeihc50J2w3J9eQfbyJMmc0v0GblpzNG0OVGmMy5Z+ThZnTm/Rfmrsh18zYQGr4sBZFjXJZA2gETQY1gSTsS1z2KkhvzQwrZEiMp4ZgIzDxUwGPLH0BqnXHz7KfxJW7V1BUsk2fkD2UA51CVf52VUlGGY0CYQRf8Sgq/qqAHtz6MpQ7Gc4Z1RzXlALLTK8hii7Uv6aGNFj8iIpbRLG7d9R6y+Rlp8JUd1PC6a+uFLASx2M/q036/bBNbUDgxs3qHJmq1SaHEObFur7dpIbd9XP2FWWmk8x67gQs1Nqb73KVHbWJ1kdGMmfJr42aO6+ZPVZvlJ3l1RL4a1lrAk7Y0FlWjMvOg0r + template: + metadata: + name: vmagent + namespace: victoria-metrics + labels: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent diff --git a/victoria-metrics/vmagent.yaml b/victoria-metrics/vmagent.yaml new file mode 100644 index 0000000..4367eac --- /dev/null +++ b/victoria-metrics/vmagent.yaml @@ -0,0 +1,185 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vmagent + labels: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: vmagent + labels: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent +rules: +- apiGroups: + - '' + resources: + - nodes + verbs: + - get + - list + - watch +- apiGroups: + - '' + resources: + - nodes/proxy + verbs: + - get +- nonResourceURLs: + - /metrics + verbs: + - get + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: vmagent + labels: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: vmagent +subjects: +- kind: ServiceAccount + name: vmagent + namespace: victoria-metrics + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: vmagent + labels: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent +rules: +- apiGroups: + - '' + resources: + - pods + verbs: + - get + - list + - watch + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: vmagent + labels: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: vmagent +subjects: +- kind: ServiceAccount + name: vmagent + +--- +apiVersion: v1 +kind: Service +metadata: + name: vmagent + labels: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent +spec: + ports: + - port: 8429 + name: vmagent + selector: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent + clusterIP: None + +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: vmagent + labels: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent +spec: + serviceName: vmagent + selector: + matchLabels: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent + template: + metadata: + labels: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent + spec: + containers: + - name: vmagent + image: docker.io/victoriametrics/vmagent:v1.96.0 + args: + - -envflag.enable=true + - -envflag.prefix=vmagent_ + - -remoteWrite.tmpDataPath=/data + - -httpListenAddr=0.0.0.0:8429 + - -promscrape.config=/config/scrape.yml + - -promscrape.configCheckInterval=30s + env: + - name: vmagent_remoteWrite_url + value: http://vminsert:8480/insert/1/prometheus/api/v1/write + ports: + - containerPort: 8429 + name: http + readinessProbe: &probe + httpGet: + port: http + path: /health + periodSeconds: 60 + startupProbe: + <<: *probe + periodSeconds: 1 + successThreshold: 1 + failureThreshold: 30 + timeoutSeconds: 1 + securityContext: + runAsNonRoot: true + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /config + name: config + readOnly: true + - mountPath: /data + name: tmpdata + subPath: data + serviceAccountName: vmagent + securityContext: + fsGroup: 2093 + runAsGroup: 2093 + runAsNonRoot: true + runAsUser: 2093 + volumes: + - name: config + configMap: + name: vmagent + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: tmpdata + labels: + app.kubernetes.io/name: vmagent + app.kubernetes.io/component: vmagent + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 4G diff --git a/victoria-metrics/vmalert.yaml b/victoria-metrics/vmalert.yaml new file mode 100644 index 0000000..e0ef386 --- /dev/null +++ b/victoria-metrics/vmalert.yaml @@ -0,0 +1,88 @@ +apiVersion: v1 +kind: Service +metadata: + name: vmalert + labels: + app.kubernetes.io/name: vmalert + app.kubernetes.io/component: vmalert +spec: + ports: + - port: 8880 + name: vmalert + selector: + app.kubernetes.io/name: vmalert + app.kubernetes.io/component: vmalert + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vmalert + labels: + app.kubernetes.io/name: vmalert + app.kubernetes.io/component: vmalert +spec: + selector: + matchLabels: + app.kubernetes.io/name: vmalert + app.kubernetes.io/component: vmalert + template: + metadata: + labels: + app.kubernetes.io/name: vmalert + app.kubernetes.io/component: vmalert + spec: + containers: + - name: vmalert + image: docker.io/victoriametrics/vmalert:v1.96.0 + args: + - -envflag.enable=true + - -envflag.prefix=vmalert_ + - -httpListenAddr=0.0.0.0:8880 + - -configCheckInterval=30s + env: + - name: vmalert_rule + value: /rules/*.yml + - name: vmalert_datasource_url + value: http://vmselect:8481/select/1/prometheus + - name: vmalert_remoteread_url + value: http://vmselect:8481/select/1/prometheus + - name: vmalert_remorewrite_url + value: http://vminsert:8480/select/1/prometheus + - name: vmalert_notifier_url + value: http://alertmanager:9093 + ports: + - containerPort: 8880 + name: http + readinessProbe: &probe + httpGet: + port: http + path: /health + periodSeconds: 60 + startupProbe: + <<: *probe + periodSeconds: 1 + successThreshold: 1 + failureThreshold: 30 + timeoutSeconds: 1 + securityContext: + runAsNonRoot: true + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /rules + name: rules + readOnly: true + - mountPath: /tmp + name: tmp + subPath: tmp + securityContext: + runAsGroup: 2093 + runAsNonRoot: true + runAsUser: 2093 + volumes: + - name: rules + configMap: + name: vmalert-rules + optional: true + - name: tmp + emptyDir: {} diff --git a/victoria-metrics/vminsert.yaml b/victoria-metrics/vminsert.yaml new file mode 100644 index 0000000..60ae57f --- /dev/null +++ b/victoria-metrics/vminsert.yaml @@ -0,0 +1,69 @@ +apiVersion: v1 +kind: Service +metadata: + name: vminsert + labels: + app.kubernetes.io/name: vminsert + app.kubernetes.io/component: vminsert +spec: + ports: + - port: 8480 + name: vminsert + selector: + app.kubernetes.io/name: vminsert + app.kubernetes.io/component: vminsert + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vminsert + labels: + app.kubernetes.io/name: vminsert + app.kubernetes.io/component: vminsert +spec: + selector: + matchLabels: + app.kubernetes.io/name: vminsert + app.kubernetes.io/component: vminsert + template: + metadata: + labels: + app.kubernetes.io/name: vminsert + app.kubernetes.io/component: vminsert + spec: + containers: + - name: vminsert + image: docker.io/victoriametrics/vminsert:v1.96.0-cluster + args: + - -envflag.enable=true + - -envflag.prefix=vminsert_ + - -httpListenAddr=0.0.0.0:8480 + ports: + - containerPort: 8480 + name: http + readinessProbe: &probe + httpGet: + port: http + path: /health + periodSeconds: 60 + startupProbe: + <<: *probe + periodSeconds: 1 + successThreshold: 1 + failureThreshold: 30 + timeoutSeconds: 1 + securityContext: + runAsNonRoot: true + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /tmp + name: tmp + subPath: tmp + securityContext: + runAsGroup: 2093 + runAsNonRoot: true + runAsUser: 2093 + volumes: + - name: tmp + emptyDir: {} diff --git a/victoria-metrics/vmselect.yaml b/victoria-metrics/vmselect.yaml new file mode 100644 index 0000000..91d60ab --- /dev/null +++ b/victoria-metrics/vmselect.yaml @@ -0,0 +1,69 @@ +apiVersion: v1 +kind: Service +metadata: + name: vmselect + labels: + app.kubernetes.io/name: vmselect + app.kubernetes.io/component: vmselect +spec: + ports: + - port: 8481 + name: vmselect + selector: + app.kubernetes.io/name: vmselect + app.kubernetes.io/component: vmselect + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vmselect + labels: + app.kubernetes.io/name: vmselect + app.kubernetes.io/component: vmselect +spec: + selector: + matchLabels: + app.kubernetes.io/name: vmselect + app.kubernetes.io/component: vmselect + template: + metadata: + labels: + app.kubernetes.io/name: vmselect + app.kubernetes.io/component: vmselect + spec: + containers: + - name: vmselect + image: docker.io/victoriametrics/vmselect:v1.96.0-cluster + args: + - -envflag.enable=true + - -envflag.prefix=vmselect_ + - -httpListenAddr=0.0.0.0:8481 + ports: + - containerPort: 8481 + name: http + readinessProbe: &probe + httpGet: + port: http + path: /health + periodSeconds: 60 + startupProbe: + <<: *probe + periodSeconds: 1 + successThreshold: 1 + failureThreshold: 30 + timeoutSeconds: 1 + securityContext: + runAsNonRoot: true + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /tmp + name: tmp + subPath: tmp + securityContext: + runAsGroup: 2093 + runAsNonRoot: true + runAsUser: 2093 + volumes: + - name: tmp + emptyDir: {} diff --git a/victoria-metrics/vmstorage.yaml b/victoria-metrics/vmstorage.yaml new file mode 100644 index 0000000..50c2d4a --- /dev/null +++ b/victoria-metrics/vmstorage.yaml @@ -0,0 +1,89 @@ +apiVersion: v1 +kind: Service +metadata: + name: vmstorage + labels: + app.kubernetes.io/name: vmstorage + app.kubernetes.io/component: vmstorage +spec: + ports: + - port: 8400 + name: vminsert + - port: 8401 + name: vmselect + selector: + app.kubernetes.io/name: vmstorage + app.kubernetes.io/component: vmstorage + +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: vmstorage + labels: + app.kubernetes.io/name: vmstorage + app.kubernetes.io/component: vmstorage +spec: + serviceName: vmstorage + selector: + matchLabels: + app.kubernetes.io/name: vmstorage + app.kubernetes.io/component: vmstorage + template: + metadata: + labels: + app.kubernetes.io/name: vmstorage + app.kubernetes.io/component: vmstorage + spec: + containers: + - name: vmstorage + image: docker.io/victoriametrics/vmstorage:v1.96.0-cluster + args: + - -envflag.enable=true + - -envflag.prefix=vmstorage_ + - -storageDataPath=/data + - -httpListenAddr=0.0.0.0:8482 + ports: + - containerPort: 8400 + name: vminsert + - containerPort: 8401 + name: vmselect + - containerPort: 8482 + name: http + readinessProbe: &probe + httpGet: + port: http + path: /health + periodSeconds: 60 + startupProbe: + <<: *probe + periodSeconds: 1 + successThreshold: 1 + failureThreshold: 30 + timeoutSeconds: 1 + securityContext: + runAsNonRoot: true + readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /data + name: data + subPath: data + securityContext: + fsGroup: 2093 + runAsGroup: 2093 + runAsNonRoot: true + runAsUser: 2093 + volumeClaimTemplates: + - apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: data + labels: + app.kubernetes.io/name: vmstorage + app.kubernetes.io/component: vmstorage + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 40G