1
0
Fork 0
mirror of https://github.com/arangodb/kube-arangodb.git synced 2024-12-14 11:57:37 +00:00

[Feature] Add Reason in OOM Metric (#1308)

This commit is contained in:
Adam Janikowski 2023-05-08 17:18:45 +07:00 committed by GitHub
parent 8fb93b85f9
commit 9107c053a7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 98 additions and 60 deletions

View file

@ -2,6 +2,7 @@
## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A)
- (Feature) ArangoBackup create retries and MaxIterations limit
- (Feature) Add Reason in OOM Metric
## [1.2.27](https://github.com/arangodb/kube-arangodb/tree/1.2.27) (2023-04-27)
- (Feature) Add InSync Cache

View file

@ -14,3 +14,4 @@ Counter of unexpected restarts in pod (Containers/InitContainers/EphemeralContai
| container | Container Name |
| container_type | Container/InitContainer/EphemeralContainer |
| code | ExitCode |
| reason | Reason |

View file

@ -219,6 +219,8 @@ namespaces:
description: "Container/InitContainer/EphemeralContainer"
- key: code
description: "ExitCode"
- key: reason
description: "Reason"
engine:
panics_recovered:
shortDescription: "Number of Panics recovered inside Operator reconciliation loop"

View file

@ -39,7 +39,7 @@ type Metrics struct {
Members map[string]MetricMember
}
func (m *Metrics) IncMemberContainerRestarts(id, container string, code int32) {
func (m *Metrics) IncMemberContainerRestarts(id, container, reason string, code int32) {
if m == nil {
return
}
@ -63,14 +63,22 @@ func (m *Metrics) IncMemberContainerRestarts(id, container string, code int32) {
cr = MetricMemberRestarts{}
}
cr[code]++
cd := cr[code]
if cd == nil {
cd = MetricMemberRestartReason{}
}
cd[reason]++
cr[code] = cd
v.ContainerRestarts[container] = cr
m.Members[id] = v
}
func (m *Metrics) IncMemberInitContainerRestarts(id, container string, code int32) {
func (m *Metrics) IncMemberInitContainerRestarts(id, container, reason string, code int32) {
if m == nil {
return
}
@ -94,14 +102,22 @@ func (m *Metrics) IncMemberInitContainerRestarts(id, container string, code int3
cr = MetricMemberRestarts{}
}
cr[code]++
cd := cr[code]
if cd == nil {
cd = MetricMemberRestartReason{}
}
cd[reason]++
cr[code] = cd
v.InitContainerRestarts[container] = cr
m.Members[id] = v
}
func (m *Metrics) IncMemberEphemeralContainerRestarts(id, container string, code int32) {
func (m *Metrics) IncMemberEphemeralContainerRestarts(id, container, reason string, code int32) {
if m == nil {
return
}
@ -125,7 +141,15 @@ func (m *Metrics) IncMemberEphemeralContainerRestarts(id, container string, code
cr = MetricMemberRestarts{}
}
cr[code]++
cd := cr[code]
if cd == nil {
cd = MetricMemberRestartReason{}
}
cd[reason]++
cr[code] = cd
v.EphemeralContainerRestarts[container] = cr
@ -133,31 +157,41 @@ func (m *Metrics) IncMemberEphemeralContainerRestarts(id, container string, code
}
type MetricMember struct {
ContainerRestarts map[string]MetricMemberRestarts
InitContainerRestarts map[string]MetricMemberRestarts
EphemeralContainerRestarts map[string]MetricMemberRestarts
ContainerRestarts MetricContainerRestarts
InitContainerRestarts MetricContainerRestarts
EphemeralContainerRestarts MetricContainerRestarts
}
type MetricMemberRestarts map[int32]uint64
type MetricContainerRestarts map[string]MetricMemberRestarts
type MetricMemberRestarts map[int32]MetricMemberRestartReason
type MetricMemberRestartReason map[string]uint64
func (d *Resources) CollectMetrics(m metrics.PushMetric) {
for member, info := range d.metrics.Members {
// Containers
for container, restarts := range info.ContainerRestarts {
for code, count := range restarts {
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "container", fmt.Sprintf("%d", code)))
for code, reasons := range restarts {
for reason, count := range reasons {
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "container", fmt.Sprintf("%d", code), reason))
}
}
}
// InitContainers
for container, restarts := range info.InitContainerRestarts {
for code, count := range restarts {
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "initContainer", fmt.Sprintf("%d", code)))
for code, reasons := range restarts {
for reason, count := range reasons {
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "initContainer", fmt.Sprintf("%d", code), reason))
}
}
}
// EphemeralContainers
for container, restarts := range info.EphemeralContainerRestarts {
for code, count := range restarts {
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "ephemeralContainer", fmt.Sprintf("%d", code)))
for code, reasons := range restarts {
for reason, count := range reasons {
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "ephemeralContainer", fmt.Sprintf("%d", code), reason))
}
}
}
}

View file

@ -29,65 +29,65 @@ import (
func Test_MetricsInc_Container(t *testing.T) {
var m Metrics
m.IncMemberContainerRestarts("ID", "server", 137)
m.IncMemberContainerRestarts("ID", "server", "OOMKill", 137)
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server"][137])
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server"][137]["OOMKill"])
m.IncMemberContainerRestarts("ID", "server", 137)
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137])
m.IncMemberContainerRestarts("ID", "server", "OOMKill", 137)
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137]["OOMKill"])
m.IncMemberContainerRestarts("ID", "server2", 137)
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137])
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server2"][137])
m.IncMemberContainerRestarts("ID", "server2", "OOMKill", 137)
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137]["OOMKill"])
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server2"][137]["OOMKill"])
m.IncMemberContainerRestarts("ID2", "server", 137)
m.IncMemberContainerRestarts("ID", "server", 138)
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137])
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server2"][137])
require.EqualValues(t, 1, m.Members["ID2"].ContainerRestarts["server"][137])
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server"][138])
m.IncMemberContainerRestarts("ID2", "server", "OOMKill", 137)
m.IncMemberContainerRestarts("ID", "server", "OOMKill", 138)
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137]["OOMKill"])
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server2"][137]["OOMKill"])
require.EqualValues(t, 1, m.Members["ID2"].ContainerRestarts["server"][137]["OOMKill"])
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server"][138]["OOMKill"])
}
func Test_MetricsInc_InitContainer(t *testing.T) {
var m Metrics
m.IncMemberInitContainerRestarts("ID", "server", 137)
m.IncMemberInitContainerRestarts("ID", "server", "OOMKill", 137)
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server"][137])
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server"][137]["OOMKill"])
m.IncMemberInitContainerRestarts("ID", "server", 137)
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137])
m.IncMemberInitContainerRestarts("ID", "server", "OOMKill", 137)
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137]["OOMKill"])
m.IncMemberInitContainerRestarts("ID", "server2", 137)
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137])
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server2"][137])
m.IncMemberInitContainerRestarts("ID", "server2", "OOMKill", 137)
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137]["OOMKill"])
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server2"][137]["OOMKill"])
m.IncMemberInitContainerRestarts("ID2", "server", 137)
m.IncMemberInitContainerRestarts("ID", "server", 138)
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137])
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server2"][137])
require.EqualValues(t, 1, m.Members["ID2"].InitContainerRestarts["server"][137])
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server"][138])
m.IncMemberInitContainerRestarts("ID2", "server", "OOMKill", 137)
m.IncMemberInitContainerRestarts("ID", "server", "OOMKill", 138)
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137]["OOMKill"])
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server2"][137]["OOMKill"])
require.EqualValues(t, 1, m.Members["ID2"].InitContainerRestarts["server"][137]["OOMKill"])
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server"][138]["OOMKill"])
}
func Test_MetricsInc_EphemeralContainer(t *testing.T) {
var m Metrics
m.IncMemberEphemeralContainerRestarts("ID", "server", 137)
m.IncMemberEphemeralContainerRestarts("ID", "server", "OOMKill", 137)
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server"][137])
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server"][137]["OOMKill"])
m.IncMemberEphemeralContainerRestarts("ID", "server", 137)
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137])
m.IncMemberEphemeralContainerRestarts("ID", "server", "OOMKill", 137)
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137]["OOMKill"])
m.IncMemberEphemeralContainerRestarts("ID", "server2", 137)
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137])
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server2"][137])
m.IncMemberEphemeralContainerRestarts("ID", "server2", "OOMKill", 137)
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137]["OOMKill"])
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server2"][137]["OOMKill"])
m.IncMemberEphemeralContainerRestarts("ID2", "server", 137)
m.IncMemberEphemeralContainerRestarts("ID", "server", 138)
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137])
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server2"][137])
require.EqualValues(t, 1, m.Members["ID2"].EphemeralContainerRestarts["server"][137])
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server"][138])
m.IncMemberEphemeralContainerRestarts("ID2", "server", "OOMKill", 137)
m.IncMemberEphemeralContainerRestarts("ID", "server", "OOMKill", 138)
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137]["OOMKill"])
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server2"][137]["OOMKill"])
require.EqualValues(t, 1, m.Members["ID2"].EphemeralContainerRestarts["server"][137]["OOMKill"])
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server"][138]["OOMKill"])
}

View file

@ -205,7 +205,7 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter
Time("finished", t.FinishedAt.Time).
Warn("Pod failed in unexpected way: Init Container failed")
r.metrics.IncMemberInitContainerRestarts(memberStatus.ID, container, t.ExitCode)
r.metrics.IncMemberInitContainerRestarts(memberStatus.ID, container, t.Reason, t.ExitCode)
}
}
}
@ -227,7 +227,7 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter
Time("finished", t.FinishedAt.Time).
Warn("Pod failed in unexpected way: Core Container failed")
r.metrics.IncMemberContainerRestarts(memberStatus.ID, container, t.ExitCode)
r.metrics.IncMemberContainerRestarts(memberStatus.ID, container, t.Reason, t.ExitCode)
}
}
}

View file

@ -23,7 +23,7 @@ package metric_descriptions
import "github.com/arangodb/kube-arangodb/pkg/util/metrics"
var (
arangodbOperatorMembersUnexpectedContainerExitCodes = metrics.NewDescription("arangodb_operator_members_unexpected_container_exit_codes", "Counter of unexpected restarts in pod (Containers/InitContainers/EphemeralContainers)", []string{`namespace`, `name`, `member`, `container`, `container_type`, `code`}, nil)
arangodbOperatorMembersUnexpectedContainerExitCodes = metrics.NewDescription("arangodb_operator_members_unexpected_container_exit_codes", "Counter of unexpected restarts in pod (Containers/InitContainers/EphemeralContainers)", []string{`namespace`, `name`, `member`, `container`, `container_type`, `code`, `reason`}, nil)
)
func init() {
@ -34,6 +34,6 @@ func ArangodbOperatorMembersUnexpectedContainerExitCodes() metrics.Description {
return arangodbOperatorMembersUnexpectedContainerExitCodes
}
func ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(value float64, namespace string, name string, member string, container string, containerType string, code string) metrics.Metric {
return ArangodbOperatorMembersUnexpectedContainerExitCodes().Gauge(value, namespace, name, member, container, containerType, code)
func ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(value float64, namespace string, name string, member string, container string, containerType string, code string, reason string) metrics.Metric {
return ArangodbOperatorMembersUnexpectedContainerExitCodes().Gauge(value, namespace, name, member, container, containerType, code, reason)
}