mirror of
https://github.com/fluxcd/flux2-monitoring-example.git
synced 2024-12-14 10:47:31 +00:00
Merge pull request #1 from fluxcd/monitoring-config
Add kube-prometheus-stack, loki-stack & monitoring-config
This commit is contained in:
commit
acd76e11c6
12 changed files with 3529 additions and 0 deletions
7
kube-prometheus-stack/kustomization.yaml
Normal file
7
kube-prometheus-stack/kustomization.yaml
Normal file
|
@ -0,0 +1,7 @@
|
|||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: monitoring
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- repository.yaml
|
||||
- release.yaml
|
6
kube-prometheus-stack/namespace.yaml
Normal file
6
kube-prometheus-stack/namespace.yaml
Normal file
|
@ -0,0 +1,6 @@
|
|||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: monitoring
|
||||
labels:
|
||||
app.kubernetes.io/component: monitoring
|
162
kube-prometheus-stack/release.yaml
Normal file
162
kube-prometheus-stack/release.yaml
Normal file
|
@ -0,0 +1,162 @@
|
|||
apiVersion: helm.toolkit.fluxcd.io/v2beta1
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: kube-prometheus-stack
|
||||
spec:
|
||||
interval: 5m
|
||||
chart:
|
||||
spec:
|
||||
version: "48.x"
|
||||
chart: kube-prometheus-stack
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: prometheus-community
|
||||
interval: 60m
|
||||
install:
|
||||
crds: Create
|
||||
upgrade:
|
||||
crds: CreateReplace
|
||||
# https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-prometheus-stack/values.yaml
|
||||
values:
|
||||
alertmanager:
|
||||
enabled: false
|
||||
prometheus:
|
||||
prometheusSpec:
|
||||
retention: 24h
|
||||
resources:
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 200Mi
|
||||
podMonitorNamespaceSelector: {}
|
||||
podMonitorSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/component: monitoring
|
||||
grafana:
|
||||
defaultDashboardsEnabled: false
|
||||
kube-state-metrics:
|
||||
collectors: []
|
||||
extraArgs:
|
||||
- --custom-resource-state-only=true
|
||||
rbac:
|
||||
extraRules:
|
||||
- apiGroups:
|
||||
- source.toolkit.fluxcd.io
|
||||
- kustomize.toolkit.fluxcd.io
|
||||
- helm.toolkit.fluxcd.io
|
||||
- image.toolkit.fluxcd.io
|
||||
- notification.toolkit.fluxcd.io
|
||||
resources:
|
||||
- gitrepositories
|
||||
- buckets
|
||||
- helmrepositories
|
||||
- helmcharts
|
||||
- ocirepositories
|
||||
- kustomizations
|
||||
- helmreleases
|
||||
- imagerepositories
|
||||
- imagepolicies
|
||||
- imageupdateautomations
|
||||
- alerts
|
||||
- providers
|
||||
- receivers
|
||||
verbs: ["list", "watch"]
|
||||
customResourceState:
|
||||
enabled: true
|
||||
config:
|
||||
spec:
|
||||
resources:
|
||||
- &metric
|
||||
groupVersionKind:
|
||||
group: source.toolkit.fluxcd.io
|
||||
version: "v1"
|
||||
kind: GitRepository
|
||||
metricNamePrefix: gotk
|
||||
metrics:
|
||||
- name: "resource_info"
|
||||
help: "The current state of a GitOps Toolkit resource."
|
||||
each:
|
||||
type: Info
|
||||
info:
|
||||
labelsFromPath:
|
||||
name: [metadata, name]
|
||||
labelsFromPath:
|
||||
exported_namespace: [metadata, namespace]
|
||||
ready: [status, conditions, "[type=Ready]", status]
|
||||
- <<: *metric
|
||||
groupVersionKind:
|
||||
group: source.toolkit.fluxcd.io
|
||||
version: "v1beta2"
|
||||
kind: Bucket
|
||||
- <<: *metric
|
||||
groupVersionKind:
|
||||
group: source.toolkit.fluxcd.io
|
||||
version: "v1beta2"
|
||||
kind: HelmRepository
|
||||
- <<: *metric
|
||||
groupVersionKind:
|
||||
group: source.toolkit.fluxcd.io
|
||||
version: "v1beta2"
|
||||
kind: HelmChart
|
||||
- <<: *metric
|
||||
groupVersionKind:
|
||||
group: source.toolkit.fluxcd.io
|
||||
version: "v1beta2"
|
||||
kind: OCIRepository
|
||||
- <<: *metric
|
||||
groupVersionKind:
|
||||
group: kustomize.toolkit.fluxcd.io
|
||||
version: "v1"
|
||||
kind: Kustomization
|
||||
- <<: *metric
|
||||
groupVersionKind:
|
||||
group: helm.toolkit.fluxcd.io
|
||||
version: "v2beta1"
|
||||
kind: HelmRelease
|
||||
- <<: *metric
|
||||
groupVersionKind:
|
||||
group: image.toolkit.fluxcd.io
|
||||
version: "v1beta2"
|
||||
kind: ImageRepository
|
||||
- <<: *metric
|
||||
groupVersionKind:
|
||||
group: image.toolkit.fluxcd.io
|
||||
version: "v1beta2"
|
||||
kind: ImagePolicy
|
||||
- <<: *metric
|
||||
groupVersionKind:
|
||||
group: image.toolkit.fluxcd.io
|
||||
version: "v1beta1"
|
||||
kind: ImageUpdateAutomation
|
||||
- <<: *metric
|
||||
groupVersionKind:
|
||||
group: notification.toolkit.fluxcd.io
|
||||
version: "v1beta2"
|
||||
kind: Alert
|
||||
- <<: *metric
|
||||
groupVersionKind:
|
||||
group: notification.toolkit.fluxcd.io
|
||||
version: "v1beta2"
|
||||
kind: Provider
|
||||
- <<: *metric
|
||||
groupVersionKind:
|
||||
group: notification.toolkit.fluxcd.io
|
||||
version: "v1"
|
||||
kind: Receiver
|
||||
postRenderers:
|
||||
- kustomize:
|
||||
patches:
|
||||
- target:
|
||||
# Ignore these objects from Flux diff as they are mutated from chart hooks
|
||||
kind: (ValidatingWebhookConfiguration|MutatingWebhookConfiguration)
|
||||
name: kube-prometheus-stack-admission
|
||||
patch: |
|
||||
- op: add
|
||||
path: /metadata/annotations/helm.toolkit.fluxcd.io~1driftDetection
|
||||
value: disabled
|
||||
- target:
|
||||
# Ignore these objects from Flux diff as they are mutated at apply time but not at dry-run time
|
||||
kind: PrometheusRule
|
||||
patch: |
|
||||
- op: add
|
||||
path: /metadata/annotations/helm.toolkit.fluxcd.io~1driftDetection
|
||||
value: disabled
|
8
kube-prometheus-stack/repository.yaml
Normal file
8
kube-prometheus-stack/repository.yaml
Normal file
|
@ -0,0 +1,8 @@
|
|||
apiVersion: source.toolkit.fluxcd.io/v1beta2
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: prometheus-community
|
||||
spec:
|
||||
interval: 120m
|
||||
type: default
|
||||
url: https://prometheus-community.github.io/helm-charts
|
6
loki-stack/kustomization.yaml
Normal file
6
loki-stack/kustomization.yaml
Normal file
|
@ -0,0 +1,6 @@
|
|||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: monitoring
|
||||
resources:
|
||||
- repository.yaml
|
||||
- release.yaml
|
34
loki-stack/release.yaml
Normal file
34
loki-stack/release.yaml
Normal file
|
@ -0,0 +1,34 @@
|
|||
apiVersion: helm.toolkit.fluxcd.io/v2beta1
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: loki-stack
|
||||
spec:
|
||||
interval: 5m
|
||||
dependsOn:
|
||||
- name: kube-prometheus-stack
|
||||
chart:
|
||||
spec:
|
||||
version: "2.x"
|
||||
chart: loki-stack
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: grafana-charts
|
||||
interval: 60m
|
||||
# https://github.com/grafana/helm-charts/blob/main/charts/loki-stack/values.yaml
|
||||
# https://github.com/grafana/loki/blob/main/production/helm/loki/values.yaml
|
||||
values:
|
||||
promtail:
|
||||
enabled: true
|
||||
loki:
|
||||
enabled: true
|
||||
isDefault: false
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
additionalLabels:
|
||||
app.kubernetes.io/part-of: kube-prometheus-stack
|
||||
config:
|
||||
chunk_store_config:
|
||||
max_look_back_period: 0s
|
||||
table_manager:
|
||||
retention_deletes_enabled: true
|
||||
retention_period: 12h
|
7
loki-stack/repository.yaml
Normal file
7
loki-stack/repository.yaml
Normal file
|
@ -0,0 +1,7 @@
|
|||
apiVersion: source.toolkit.fluxcd.io/v1beta2
|
||||
kind: HelmRepository
|
||||
metadata:
|
||||
name: grafana-charts
|
||||
spec:
|
||||
interval: 120m0s
|
||||
url: https://grafana.github.io/helm-charts
|
1198
monitoring-config/dashboards/cluster.json
Normal file
1198
monitoring-config/dashboards/cluster.json
Normal file
File diff suppressed because it is too large
Load diff
1724
monitoring-config/dashboards/control-plane.json
Normal file
1724
monitoring-config/dashboards/control-plane.json
Normal file
File diff suppressed because it is too large
Load diff
332
monitoring-config/dashboards/logs.json
Normal file
332
monitoring-config/dashboards/logs.json
Normal file
|
@ -0,0 +1,332 @@
|
|||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_LOKI",
|
||||
"label": "Loki",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "loki",
|
||||
"pluginName": "Loki"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": "-- Grafana --",
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [],
|
||||
"type": "dashboard"
|
||||
},
|
||||
"type": "dashboard"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"iconColor": "red",
|
||||
"name": "flux events",
|
||||
"target": {
|
||||
"limit": 100,
|
||||
"matchAny": false,
|
||||
"tags": [
|
||||
"flux"
|
||||
],
|
||||
"type": "tags"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"description": "Flux logs collected from Kubernetes, stored in Loki",
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": 29,
|
||||
"iteration": 1653748775696,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": "${DS_LOKI}",
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "bars",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "hidden",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${DS_LOKI}",
|
||||
"expr": "sum(count_over_time({namespace=~\"$namespace\", stream=~\"$stream\", app =~\"$controller\"} | json | __error__!=\"JSONParserErr\" | level=~\"$level\" |= \"$query\" [$__interval]))",
|
||||
"instant": false,
|
||||
"legendFormat": "Log count",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": "${DS_LOKI}",
|
||||
"description": "Logs from services running in Kubernetes",
|
||||
"gridPos": {
|
||||
"h": 25,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 4
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"dedupStrategy": "numbers",
|
||||
"enableLogDetails": false,
|
||||
"prettifyLogMessage": true,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": false,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": false
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${DS_LOKI}",
|
||||
"expr": "{namespace=~\"$namespace\", stream=~\"$stream\", app =~\"$controller\"} | json | __error__!=\"JSONParserErr\" | level=~\"$level\" |= \"$query\"",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"refresh": "10s",
|
||||
"schemaVersion": 36,
|
||||
"style": "light",
|
||||
"tags": [
|
||||
"flux"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "",
|
||||
"value": ""
|
||||
},
|
||||
"description": "String to search for",
|
||||
"hide": 0,
|
||||
"label": "Search Query",
|
||||
"name": "query",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "",
|
||||
"value": ""
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"skipUrlSync": false,
|
||||
"type": "textbox"
|
||||
},
|
||||
{
|
||||
"allValue": "info|error",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": false,
|
||||
"name": "level",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "info",
|
||||
"value": "info"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "error",
|
||||
"value": "error"
|
||||
}
|
||||
],
|
||||
"query": "info,error",
|
||||
"queryValue": "",
|
||||
"skipUrlSync": false,
|
||||
"type": "custom"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"All"
|
||||
],
|
||||
"value": [
|
||||
"$__all"
|
||||
]
|
||||
},
|
||||
"datasource": "${DS_LOKI}",
|
||||
"definition": "label_values(app)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "controller",
|
||||
"options": [],
|
||||
"query": "label_values(app)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": true,
|
||||
"text": [
|
||||
"flux-system"
|
||||
],
|
||||
"value": [
|
||||
"flux-system"
|
||||
]
|
||||
},
|
||||
"datasource": "${DS_LOKI}",
|
||||
"definition": "label_values(namespace)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "namespace",
|
||||
"options": [],
|
||||
"query": "label_values(namespace)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"allValue": ".+",
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": "${DS_LOKI}",
|
||||
"definition": "label_values(stream)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "stream",
|
||||
"options": [],
|
||||
"query": "label_values(stream)",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "Loki",
|
||||
"value": "Loki"
|
||||
},
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Datasource",
|
||||
"multi": false,
|
||||
"name": "DS_LOKI",
|
||||
"options": [],
|
||||
"query": "loki",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"type": "datasource"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "Flux Logs",
|
||||
"uid": "flux-logs",
|
||||
"version": 2
|
||||
}
|
16
monitoring-config/kustomization.yaml
Normal file
16
monitoring-config/kustomization.yaml
Normal file
|
@ -0,0 +1,16 @@
|
|||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: monitoring
|
||||
resources:
|
||||
- podmonitor.yaml
|
||||
configMapGenerator:
|
||||
- name: flux-grafana-dashboards
|
||||
files:
|
||||
- dashboards/control-plane.json
|
||||
- dashboards/cluster.json
|
||||
- dashboards/logs.json
|
||||
options:
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
app.kubernetes.io/part-of: flux
|
||||
app.kubernetes.io/component: monitoring
|
29
monitoring-config/podmonitor.yaml
Normal file
29
monitoring-config/podmonitor.yaml
Normal file
|
@ -0,0 +1,29 @@
|
|||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PodMonitor
|
||||
metadata:
|
||||
name: flux-system
|
||||
labels:
|
||||
app.kubernetes.io/part-of: flux
|
||||
app.kubernetes.io/component: monitoring
|
||||
spec:
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- flux-system
|
||||
selector:
|
||||
matchExpressions:
|
||||
- key: app
|
||||
operator: In
|
||||
values:
|
||||
- helm-controller
|
||||
- source-controller
|
||||
- kustomize-controller
|
||||
- notification-controller
|
||||
- image-automation-controller
|
||||
- image-reflector-controller
|
||||
podMetricsEndpoints:
|
||||
- port: http-prom
|
||||
relabelings:
|
||||
# https://github.com/prometheus-operator/prometheus-operator/issues/4816
|
||||
- sourceLabels: [__meta_kubernetes_pod_phase]
|
||||
action: keep
|
||||
regex: Running
|
Loading…
Reference in a new issue