mirror of
https://github.com/arangodb/kube-arangodb.git
synced 2024-12-14 11:57:37 +00:00
[Feature] Add Reason in OOM Metric (#1308)
This commit is contained in:
parent
8fb93b85f9
commit
9107c053a7
7 changed files with 98 additions and 60 deletions
|
@ -2,6 +2,7 @@
|
|||
|
||||
## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A)
|
||||
- (Feature) ArangoBackup create retries and MaxIterations limit
|
||||
- (Feature) Add Reason in OOM Metric
|
||||
|
||||
## [1.2.27](https://github.com/arangodb/kube-arangodb/tree/1.2.27) (2023-04-27)
|
||||
- (Feature) Add InSync Cache
|
||||
|
|
|
@ -14,3 +14,4 @@ Counter of unexpected restarts in pod (Containers/InitContainers/EphemeralContai
|
|||
| container | Container Name |
|
||||
| container_type | Container/InitContainer/EphemeralContainer |
|
||||
| code | ExitCode |
|
||||
| reason | Reason |
|
||||
|
|
|
@ -219,6 +219,8 @@ namespaces:
|
|||
description: "Container/InitContainer/EphemeralContainer"
|
||||
- key: code
|
||||
description: "ExitCode"
|
||||
- key: reason
|
||||
description: "Reason"
|
||||
engine:
|
||||
panics_recovered:
|
||||
shortDescription: "Number of Panics recovered inside Operator reconciliation loop"
|
||||
|
|
|
@ -39,7 +39,7 @@ type Metrics struct {
|
|||
Members map[string]MetricMember
|
||||
}
|
||||
|
||||
func (m *Metrics) IncMemberContainerRestarts(id, container string, code int32) {
|
||||
func (m *Metrics) IncMemberContainerRestarts(id, container, reason string, code int32) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
@ -63,14 +63,22 @@ func (m *Metrics) IncMemberContainerRestarts(id, container string, code int32) {
|
|||
cr = MetricMemberRestarts{}
|
||||
}
|
||||
|
||||
cr[code]++
|
||||
cd := cr[code]
|
||||
|
||||
if cd == nil {
|
||||
cd = MetricMemberRestartReason{}
|
||||
}
|
||||
|
||||
cd[reason]++
|
||||
|
||||
cr[code] = cd
|
||||
|
||||
v.ContainerRestarts[container] = cr
|
||||
|
||||
m.Members[id] = v
|
||||
}
|
||||
|
||||
func (m *Metrics) IncMemberInitContainerRestarts(id, container string, code int32) {
|
||||
func (m *Metrics) IncMemberInitContainerRestarts(id, container, reason string, code int32) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
@ -94,14 +102,22 @@ func (m *Metrics) IncMemberInitContainerRestarts(id, container string, code int3
|
|||
cr = MetricMemberRestarts{}
|
||||
}
|
||||
|
||||
cr[code]++
|
||||
cd := cr[code]
|
||||
|
||||
if cd == nil {
|
||||
cd = MetricMemberRestartReason{}
|
||||
}
|
||||
|
||||
cd[reason]++
|
||||
|
||||
cr[code] = cd
|
||||
|
||||
v.InitContainerRestarts[container] = cr
|
||||
|
||||
m.Members[id] = v
|
||||
}
|
||||
|
||||
func (m *Metrics) IncMemberEphemeralContainerRestarts(id, container string, code int32) {
|
||||
func (m *Metrics) IncMemberEphemeralContainerRestarts(id, container, reason string, code int32) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
@ -125,7 +141,15 @@ func (m *Metrics) IncMemberEphemeralContainerRestarts(id, container string, code
|
|||
cr = MetricMemberRestarts{}
|
||||
}
|
||||
|
||||
cr[code]++
|
||||
cd := cr[code]
|
||||
|
||||
if cd == nil {
|
||||
cd = MetricMemberRestartReason{}
|
||||
}
|
||||
|
||||
cd[reason]++
|
||||
|
||||
cr[code] = cd
|
||||
|
||||
v.EphemeralContainerRestarts[container] = cr
|
||||
|
||||
|
@ -133,31 +157,41 @@ func (m *Metrics) IncMemberEphemeralContainerRestarts(id, container string, code
|
|||
}
|
||||
|
||||
type MetricMember struct {
|
||||
ContainerRestarts map[string]MetricMemberRestarts
|
||||
InitContainerRestarts map[string]MetricMemberRestarts
|
||||
EphemeralContainerRestarts map[string]MetricMemberRestarts
|
||||
ContainerRestarts MetricContainerRestarts
|
||||
InitContainerRestarts MetricContainerRestarts
|
||||
EphemeralContainerRestarts MetricContainerRestarts
|
||||
}
|
||||
|
||||
type MetricMemberRestarts map[int32]uint64
|
||||
type MetricContainerRestarts map[string]MetricMemberRestarts
|
||||
|
||||
type MetricMemberRestarts map[int32]MetricMemberRestartReason
|
||||
|
||||
type MetricMemberRestartReason map[string]uint64
|
||||
|
||||
func (d *Resources) CollectMetrics(m metrics.PushMetric) {
|
||||
for member, info := range d.metrics.Members {
|
||||
// Containers
|
||||
for container, restarts := range info.ContainerRestarts {
|
||||
for code, count := range restarts {
|
||||
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "container", fmt.Sprintf("%d", code)))
|
||||
for code, reasons := range restarts {
|
||||
for reason, count := range reasons {
|
||||
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "container", fmt.Sprintf("%d", code), reason))
|
||||
}
|
||||
}
|
||||
}
|
||||
// InitContainers
|
||||
for container, restarts := range info.InitContainerRestarts {
|
||||
for code, count := range restarts {
|
||||
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "initContainer", fmt.Sprintf("%d", code)))
|
||||
for code, reasons := range restarts {
|
||||
for reason, count := range reasons {
|
||||
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "initContainer", fmt.Sprintf("%d", code), reason))
|
||||
}
|
||||
}
|
||||
}
|
||||
// EphemeralContainers
|
||||
for container, restarts := range info.EphemeralContainerRestarts {
|
||||
for code, count := range restarts {
|
||||
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "ephemeralContainer", fmt.Sprintf("%d", code)))
|
||||
for code, reasons := range restarts {
|
||||
for reason, count := range reasons {
|
||||
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "ephemeralContainer", fmt.Sprintf("%d", code), reason))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,65 +29,65 @@ import (
|
|||
func Test_MetricsInc_Container(t *testing.T) {
|
||||
var m Metrics
|
||||
|
||||
m.IncMemberContainerRestarts("ID", "server", 137)
|
||||
m.IncMemberContainerRestarts("ID", "server", "OOMKill", 137)
|
||||
|
||||
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server"][137]["OOMKill"])
|
||||
|
||||
m.IncMemberContainerRestarts("ID", "server", 137)
|
||||
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137])
|
||||
m.IncMemberContainerRestarts("ID", "server", "OOMKill", 137)
|
||||
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137]["OOMKill"])
|
||||
|
||||
m.IncMemberContainerRestarts("ID", "server2", 137)
|
||||
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server2"][137])
|
||||
m.IncMemberContainerRestarts("ID", "server2", "OOMKill", 137)
|
||||
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137]["OOMKill"])
|
||||
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server2"][137]["OOMKill"])
|
||||
|
||||
m.IncMemberContainerRestarts("ID2", "server", 137)
|
||||
m.IncMemberContainerRestarts("ID", "server", 138)
|
||||
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server2"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID2"].ContainerRestarts["server"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server"][138])
|
||||
m.IncMemberContainerRestarts("ID2", "server", "OOMKill", 137)
|
||||
m.IncMemberContainerRestarts("ID", "server", "OOMKill", 138)
|
||||
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137]["OOMKill"])
|
||||
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server2"][137]["OOMKill"])
|
||||
require.EqualValues(t, 1, m.Members["ID2"].ContainerRestarts["server"][137]["OOMKill"])
|
||||
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server"][138]["OOMKill"])
|
||||
}
|
||||
|
||||
func Test_MetricsInc_InitContainer(t *testing.T) {
|
||||
var m Metrics
|
||||
|
||||
m.IncMemberInitContainerRestarts("ID", "server", 137)
|
||||
m.IncMemberInitContainerRestarts("ID", "server", "OOMKill", 137)
|
||||
|
||||
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server"][137]["OOMKill"])
|
||||
|
||||
m.IncMemberInitContainerRestarts("ID", "server", 137)
|
||||
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137])
|
||||
m.IncMemberInitContainerRestarts("ID", "server", "OOMKill", 137)
|
||||
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137]["OOMKill"])
|
||||
|
||||
m.IncMemberInitContainerRestarts("ID", "server2", 137)
|
||||
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server2"][137])
|
||||
m.IncMemberInitContainerRestarts("ID", "server2", "OOMKill", 137)
|
||||
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137]["OOMKill"])
|
||||
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server2"][137]["OOMKill"])
|
||||
|
||||
m.IncMemberInitContainerRestarts("ID2", "server", 137)
|
||||
m.IncMemberInitContainerRestarts("ID", "server", 138)
|
||||
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server2"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID2"].InitContainerRestarts["server"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server"][138])
|
||||
m.IncMemberInitContainerRestarts("ID2", "server", "OOMKill", 137)
|
||||
m.IncMemberInitContainerRestarts("ID", "server", "OOMKill", 138)
|
||||
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137]["OOMKill"])
|
||||
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server2"][137]["OOMKill"])
|
||||
require.EqualValues(t, 1, m.Members["ID2"].InitContainerRestarts["server"][137]["OOMKill"])
|
||||
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server"][138]["OOMKill"])
|
||||
}
|
||||
|
||||
func Test_MetricsInc_EphemeralContainer(t *testing.T) {
|
||||
var m Metrics
|
||||
|
||||
m.IncMemberEphemeralContainerRestarts("ID", "server", 137)
|
||||
m.IncMemberEphemeralContainerRestarts("ID", "server", "OOMKill", 137)
|
||||
|
||||
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server"][137]["OOMKill"])
|
||||
|
||||
m.IncMemberEphemeralContainerRestarts("ID", "server", 137)
|
||||
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137])
|
||||
m.IncMemberEphemeralContainerRestarts("ID", "server", "OOMKill", 137)
|
||||
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137]["OOMKill"])
|
||||
|
||||
m.IncMemberEphemeralContainerRestarts("ID", "server2", 137)
|
||||
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server2"][137])
|
||||
m.IncMemberEphemeralContainerRestarts("ID", "server2", "OOMKill", 137)
|
||||
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137]["OOMKill"])
|
||||
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server2"][137]["OOMKill"])
|
||||
|
||||
m.IncMemberEphemeralContainerRestarts("ID2", "server", 137)
|
||||
m.IncMemberEphemeralContainerRestarts("ID", "server", 138)
|
||||
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server2"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID2"].EphemeralContainerRestarts["server"][137])
|
||||
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server"][138])
|
||||
m.IncMemberEphemeralContainerRestarts("ID2", "server", "OOMKill", 137)
|
||||
m.IncMemberEphemeralContainerRestarts("ID", "server", "OOMKill", 138)
|
||||
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137]["OOMKill"])
|
||||
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server2"][137]["OOMKill"])
|
||||
require.EqualValues(t, 1, m.Members["ID2"].EphemeralContainerRestarts["server"][137]["OOMKill"])
|
||||
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server"][138]["OOMKill"])
|
||||
}
|
||||
|
|
|
@ -205,7 +205,7 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter
|
|||
Time("finished", t.FinishedAt.Time).
|
||||
Warn("Pod failed in unexpected way: Init Container failed")
|
||||
|
||||
r.metrics.IncMemberInitContainerRestarts(memberStatus.ID, container, t.ExitCode)
|
||||
r.metrics.IncMemberInitContainerRestarts(memberStatus.ID, container, t.Reason, t.ExitCode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -227,7 +227,7 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter
|
|||
Time("finished", t.FinishedAt.Time).
|
||||
Warn("Pod failed in unexpected way: Core Container failed")
|
||||
|
||||
r.metrics.IncMemberContainerRestarts(memberStatus.ID, container, t.ExitCode)
|
||||
r.metrics.IncMemberContainerRestarts(memberStatus.ID, container, t.Reason, t.ExitCode)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ package metric_descriptions
|
|||
import "github.com/arangodb/kube-arangodb/pkg/util/metrics"
|
||||
|
||||
var (
|
||||
arangodbOperatorMembersUnexpectedContainerExitCodes = metrics.NewDescription("arangodb_operator_members_unexpected_container_exit_codes", "Counter of unexpected restarts in pod (Containers/InitContainers/EphemeralContainers)", []string{`namespace`, `name`, `member`, `container`, `container_type`, `code`}, nil)
|
||||
arangodbOperatorMembersUnexpectedContainerExitCodes = metrics.NewDescription("arangodb_operator_members_unexpected_container_exit_codes", "Counter of unexpected restarts in pod (Containers/InitContainers/EphemeralContainers)", []string{`namespace`, `name`, `member`, `container`, `container_type`, `code`, `reason`}, nil)
|
||||
)
|
||||
|
||||
func init() {
|
||||
|
@ -34,6 +34,6 @@ func ArangodbOperatorMembersUnexpectedContainerExitCodes() metrics.Description {
|
|||
return arangodbOperatorMembersUnexpectedContainerExitCodes
|
||||
}
|
||||
|
||||
func ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(value float64, namespace string, name string, member string, container string, containerType string, code string) metrics.Metric {
|
||||
return ArangodbOperatorMembersUnexpectedContainerExitCodes().Gauge(value, namespace, name, member, container, containerType, code)
|
||||
func ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(value float64, namespace string, name string, member string, container string, containerType string, code string, reason string) metrics.Metric {
|
||||
return ArangodbOperatorMembersUnexpectedContainerExitCodes().Gauge(value, namespace, name, member, container, containerType, code, reason)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue