mirror of
https://github.com/monitoring-mixins/website.git
synced 2024-12-14 11:37:31 +00:00
94 lines
4.7 KiB
YAML
94 lines
4.7 KiB
YAML
|
groups:
|
||
|
- name: apache-solr
|
||
|
rules:
|
||
|
- alert: ApacheSolrZookeeperChangeInEnsembleSize
|
||
|
annotations:
|
||
|
description: Zookeeper host {{$labels.zk_host}} has had an ensemble change of
|
||
|
{{ printf "%.0f" $value }} over the last 5 minutes
|
||
|
summary: Changes in the ZooKeeper ensemble size can affect the stability and
|
||
|
performance of the cluster.
|
||
|
expr: |
|
||
|
changes(solr_zookeeper_ensemble_size[5m]) > 0
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: ApacheSolrHighCPUUsageCritical
|
||
|
annotations:
|
||
|
description: '{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had
|
||
|
a system CPU load of {{ printf "%.0f" $value }}%, which is above the threshold
|
||
|
of 85.'
|
||
|
summary: High CPU load can indicate that Solr nodes are under heavy load, potentially
|
||
|
impacting performance.
|
||
|
expr: |
|
||
|
100 * sum without (base_url, item) (avg_over_time(solr_metrics_jvm_os_cpu_load{item="systemCpuLoad"}[5m])) > 85
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
- alert: ApacheSolrHighCPUUsageWarning
|
||
|
annotations:
|
||
|
description: '{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had
|
||
|
a system CPU load of {{ printf "%.0f" $value }}%, which is above the threshold
|
||
|
of 75.'
|
||
|
summary: High CPU load can indicate that Solr nodes are under heavy load, potentially
|
||
|
impacting performance.
|
||
|
expr: |
|
||
|
100 * sum without (base_url, item) (avg_over_time(solr_metrics_jvm_os_cpu_load{item="systemCpuLoad"}[5m])) > 75
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: ApacheSolrHighHeapMemoryUsageCritical
|
||
|
annotations:
|
||
|
description: |
|
||
|
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had high memory usage of {{ printf "%.0f" $value }}%, which is above the thresold of 75.
|
||
|
summary: High heap memory usage can lead to garbage collection issues, out-of-memory
|
||
|
errors, and overall system instability.
|
||
|
expr: |
|
||
|
100 * sum without(item, base_url)(solr_metrics_jvm_memory_heap_bytes{item="used"}) / clamp_min(sum without(item, base_url)(solr_metrics_jvm_memory_heap_bytes{item="max"}), 1) > 75
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: critical
|
||
|
- alert: ApacheSolrHighHeapMemoryUsageWarning
|
||
|
annotations:
|
||
|
description: |
|
||
|
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had high memory usage of {{ printf "%.0f" $value }}%, which is above the thresold of 85.
|
||
|
summary: High heap memory usage can lead to garbage collection issues, out-of-memory
|
||
|
errors, and overall system instability.
|
||
|
expr: |
|
||
|
100 * sum without(item, base_url)(solr_metrics_jvm_memory_heap_bytes{item="used"}) / clamp_min(sum without(item, base_url)(solr_metrics_jvm_memory_heap_bytes{item="max"}), 1) > 85
|
||
|
for: 5m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: ApacheSolrLowCacheHitRatio
|
||
|
annotations:
|
||
|
description: |
|
||
|
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had a low cache hit ratio of {{ printf "%.0f" $value }}% on core {{$labels.core}} of type {{$labels.type}}, which is under the threshold of 75.
|
||
|
summary: Low cache hit ratios can lead to increased disk I/O and slower query
|
||
|
response times.
|
||
|
expr: |
|
||
|
100 * sum without(base_url, category, collection, item, replica, shard) (solr_metrics_core_searcher_cache_ratio{item="hitratio", type=~"documentCache|filterCache|queryResultCache"}) < 75
|
||
|
for: 10m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: ApacheSolrHighCoreErrors
|
||
|
annotations:
|
||
|
description: |
|
||
|
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had a high amount of core errors {{ printf "%.0f" $value }}% on core {{$labels.core}}, which is above the threshold of 15.
|
||
|
summary: A spike in core errors can indicate serious issues at the core level,
|
||
|
affecting data integrity and availability.
|
||
|
expr: |
|
||
|
100 * sum without(base_url, category, collection, handler, replica, shard) (increase(solr_metrics_core_errors_total[10m]) / clamp_min(avg_over_time(solr_metrics_core_errors_total[10m]), 1)) > 15
|
||
|
for: 10m
|
||
|
labels:
|
||
|
severity: warning
|
||
|
- alert: ApacheSolrHighDocumentIndexing
|
||
|
annotations:
|
||
|
description: |
|
||
|
{{$labels.instance}} on cluster {{$labels.solr_cluster}} has had a high document indexing value of {{ printf "%.0f" $value }}% on core {{$labels.core}}, which is above the threshold of 30.
|
||
|
summary: A sudden spike in document indexing could indicate unintended or malicious
|
||
|
bulk updates.
|
||
|
expr: |
|
||
|
100 * sum without(base_url, category, collection, handler, replica, shard) (increase(solr_metrics_core_update_handler_adds_total[15m]) / clamp_min(avg_over_time(solr_metrics_core_update_handler_adds_total[15m]), 1)) > 30
|
||
|
for: 15m
|
||
|
labels:
|
||
|
severity: warning
|