mirror of
https://github.com/arangodb/kube-arangodb.git
synced 2024-12-14 11:57:37 +00:00
[Feature] Add Reason in OOM Metric (#1308)
This commit is contained in:
parent
8fb93b85f9
commit
9107c053a7
7 changed files with 98 additions and 60 deletions
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A)
|
## [master](https://github.com/arangodb/kube-arangodb/tree/master) (N/A)
|
||||||
- (Feature) ArangoBackup create retries and MaxIterations limit
|
- (Feature) ArangoBackup create retries and MaxIterations limit
|
||||||
|
- (Feature) Add Reason in OOM Metric
|
||||||
|
|
||||||
## [1.2.27](https://github.com/arangodb/kube-arangodb/tree/1.2.27) (2023-04-27)
|
## [1.2.27](https://github.com/arangodb/kube-arangodb/tree/1.2.27) (2023-04-27)
|
||||||
- (Feature) Add InSync Cache
|
- (Feature) Add InSync Cache
|
||||||
|
|
|
@ -14,3 +14,4 @@ Counter of unexpected restarts in pod (Containers/InitContainers/EphemeralContai
|
||||||
| container | Container Name |
|
| container | Container Name |
|
||||||
| container_type | Container/InitContainer/EphemeralContainer |
|
| container_type | Container/InitContainer/EphemeralContainer |
|
||||||
| code | ExitCode |
|
| code | ExitCode |
|
||||||
|
| reason | Reason |
|
||||||
|
|
|
@ -219,6 +219,8 @@ namespaces:
|
||||||
description: "Container/InitContainer/EphemeralContainer"
|
description: "Container/InitContainer/EphemeralContainer"
|
||||||
- key: code
|
- key: code
|
||||||
description: "ExitCode"
|
description: "ExitCode"
|
||||||
|
- key: reason
|
||||||
|
description: "Reason"
|
||||||
engine:
|
engine:
|
||||||
panics_recovered:
|
panics_recovered:
|
||||||
shortDescription: "Number of Panics recovered inside Operator reconciliation loop"
|
shortDescription: "Number of Panics recovered inside Operator reconciliation loop"
|
||||||
|
|
|
@ -39,7 +39,7 @@ type Metrics struct {
|
||||||
Members map[string]MetricMember
|
Members map[string]MetricMember
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Metrics) IncMemberContainerRestarts(id, container string, code int32) {
|
func (m *Metrics) IncMemberContainerRestarts(id, container, reason string, code int32) {
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -63,14 +63,22 @@ func (m *Metrics) IncMemberContainerRestarts(id, container string, code int32) {
|
||||||
cr = MetricMemberRestarts{}
|
cr = MetricMemberRestarts{}
|
||||||
}
|
}
|
||||||
|
|
||||||
cr[code]++
|
cd := cr[code]
|
||||||
|
|
||||||
|
if cd == nil {
|
||||||
|
cd = MetricMemberRestartReason{}
|
||||||
|
}
|
||||||
|
|
||||||
|
cd[reason]++
|
||||||
|
|
||||||
|
cr[code] = cd
|
||||||
|
|
||||||
v.ContainerRestarts[container] = cr
|
v.ContainerRestarts[container] = cr
|
||||||
|
|
||||||
m.Members[id] = v
|
m.Members[id] = v
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Metrics) IncMemberInitContainerRestarts(id, container string, code int32) {
|
func (m *Metrics) IncMemberInitContainerRestarts(id, container, reason string, code int32) {
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -94,14 +102,22 @@ func (m *Metrics) IncMemberInitContainerRestarts(id, container string, code int3
|
||||||
cr = MetricMemberRestarts{}
|
cr = MetricMemberRestarts{}
|
||||||
}
|
}
|
||||||
|
|
||||||
cr[code]++
|
cd := cr[code]
|
||||||
|
|
||||||
|
if cd == nil {
|
||||||
|
cd = MetricMemberRestartReason{}
|
||||||
|
}
|
||||||
|
|
||||||
|
cd[reason]++
|
||||||
|
|
||||||
|
cr[code] = cd
|
||||||
|
|
||||||
v.InitContainerRestarts[container] = cr
|
v.InitContainerRestarts[container] = cr
|
||||||
|
|
||||||
m.Members[id] = v
|
m.Members[id] = v
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Metrics) IncMemberEphemeralContainerRestarts(id, container string, code int32) {
|
func (m *Metrics) IncMemberEphemeralContainerRestarts(id, container, reason string, code int32) {
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -125,7 +141,15 @@ func (m *Metrics) IncMemberEphemeralContainerRestarts(id, container string, code
|
||||||
cr = MetricMemberRestarts{}
|
cr = MetricMemberRestarts{}
|
||||||
}
|
}
|
||||||
|
|
||||||
cr[code]++
|
cd := cr[code]
|
||||||
|
|
||||||
|
if cd == nil {
|
||||||
|
cd = MetricMemberRestartReason{}
|
||||||
|
}
|
||||||
|
|
||||||
|
cd[reason]++
|
||||||
|
|
||||||
|
cr[code] = cd
|
||||||
|
|
||||||
v.EphemeralContainerRestarts[container] = cr
|
v.EphemeralContainerRestarts[container] = cr
|
||||||
|
|
||||||
|
@ -133,31 +157,41 @@ func (m *Metrics) IncMemberEphemeralContainerRestarts(id, container string, code
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricMember struct {
|
type MetricMember struct {
|
||||||
ContainerRestarts map[string]MetricMemberRestarts
|
ContainerRestarts MetricContainerRestarts
|
||||||
InitContainerRestarts map[string]MetricMemberRestarts
|
InitContainerRestarts MetricContainerRestarts
|
||||||
EphemeralContainerRestarts map[string]MetricMemberRestarts
|
EphemeralContainerRestarts MetricContainerRestarts
|
||||||
}
|
}
|
||||||
|
|
||||||
type MetricMemberRestarts map[int32]uint64
|
type MetricContainerRestarts map[string]MetricMemberRestarts
|
||||||
|
|
||||||
|
type MetricMemberRestarts map[int32]MetricMemberRestartReason
|
||||||
|
|
||||||
|
type MetricMemberRestartReason map[string]uint64
|
||||||
|
|
||||||
func (d *Resources) CollectMetrics(m metrics.PushMetric) {
|
func (d *Resources) CollectMetrics(m metrics.PushMetric) {
|
||||||
for member, info := range d.metrics.Members {
|
for member, info := range d.metrics.Members {
|
||||||
// Containers
|
// Containers
|
||||||
for container, restarts := range info.ContainerRestarts {
|
for container, restarts := range info.ContainerRestarts {
|
||||||
for code, count := range restarts {
|
for code, reasons := range restarts {
|
||||||
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "container", fmt.Sprintf("%d", code)))
|
for reason, count := range reasons {
|
||||||
|
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "container", fmt.Sprintf("%d", code), reason))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// InitContainers
|
// InitContainers
|
||||||
for container, restarts := range info.InitContainerRestarts {
|
for container, restarts := range info.InitContainerRestarts {
|
||||||
for code, count := range restarts {
|
for code, reasons := range restarts {
|
||||||
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "initContainer", fmt.Sprintf("%d", code)))
|
for reason, count := range reasons {
|
||||||
|
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "initContainer", fmt.Sprintf("%d", code), reason))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// EphemeralContainers
|
// EphemeralContainers
|
||||||
for container, restarts := range info.EphemeralContainerRestarts {
|
for container, restarts := range info.EphemeralContainerRestarts {
|
||||||
for code, count := range restarts {
|
for code, reasons := range restarts {
|
||||||
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "ephemeralContainer", fmt.Sprintf("%d", code)))
|
for reason, count := range reasons {
|
||||||
|
m.Push(metric_descriptions.ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(float64(count), d.namespace, d.name, member, container, "ephemeralContainer", fmt.Sprintf("%d", code), reason))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,65 +29,65 @@ import (
|
||||||
func Test_MetricsInc_Container(t *testing.T) {
|
func Test_MetricsInc_Container(t *testing.T) {
|
||||||
var m Metrics
|
var m Metrics
|
||||||
|
|
||||||
m.IncMemberContainerRestarts("ID", "server", 137)
|
m.IncMemberContainerRestarts("ID", "server", "OOMKill", 137)
|
||||||
|
|
||||||
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server"][137])
|
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server"][137]["OOMKill"])
|
||||||
|
|
||||||
m.IncMemberContainerRestarts("ID", "server", 137)
|
m.IncMemberContainerRestarts("ID", "server", "OOMKill", 137)
|
||||||
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137])
|
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137]["OOMKill"])
|
||||||
|
|
||||||
m.IncMemberContainerRestarts("ID", "server2", 137)
|
m.IncMemberContainerRestarts("ID", "server2", "OOMKill", 137)
|
||||||
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137])
|
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137]["OOMKill"])
|
||||||
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server2"][137])
|
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server2"][137]["OOMKill"])
|
||||||
|
|
||||||
m.IncMemberContainerRestarts("ID2", "server", 137)
|
m.IncMemberContainerRestarts("ID2", "server", "OOMKill", 137)
|
||||||
m.IncMemberContainerRestarts("ID", "server", 138)
|
m.IncMemberContainerRestarts("ID", "server", "OOMKill", 138)
|
||||||
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137])
|
require.EqualValues(t, 2, m.Members["ID"].ContainerRestarts["server"][137]["OOMKill"])
|
||||||
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server2"][137])
|
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server2"][137]["OOMKill"])
|
||||||
require.EqualValues(t, 1, m.Members["ID2"].ContainerRestarts["server"][137])
|
require.EqualValues(t, 1, m.Members["ID2"].ContainerRestarts["server"][137]["OOMKill"])
|
||||||
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server"][138])
|
require.EqualValues(t, 1, m.Members["ID"].ContainerRestarts["server"][138]["OOMKill"])
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_MetricsInc_InitContainer(t *testing.T) {
|
func Test_MetricsInc_InitContainer(t *testing.T) {
|
||||||
var m Metrics
|
var m Metrics
|
||||||
|
|
||||||
m.IncMemberInitContainerRestarts("ID", "server", 137)
|
m.IncMemberInitContainerRestarts("ID", "server", "OOMKill", 137)
|
||||||
|
|
||||||
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server"][137])
|
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server"][137]["OOMKill"])
|
||||||
|
|
||||||
m.IncMemberInitContainerRestarts("ID", "server", 137)
|
m.IncMemberInitContainerRestarts("ID", "server", "OOMKill", 137)
|
||||||
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137])
|
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137]["OOMKill"])
|
||||||
|
|
||||||
m.IncMemberInitContainerRestarts("ID", "server2", 137)
|
m.IncMemberInitContainerRestarts("ID", "server2", "OOMKill", 137)
|
||||||
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137])
|
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137]["OOMKill"])
|
||||||
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server2"][137])
|
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server2"][137]["OOMKill"])
|
||||||
|
|
||||||
m.IncMemberInitContainerRestarts("ID2", "server", 137)
|
m.IncMemberInitContainerRestarts("ID2", "server", "OOMKill", 137)
|
||||||
m.IncMemberInitContainerRestarts("ID", "server", 138)
|
m.IncMemberInitContainerRestarts("ID", "server", "OOMKill", 138)
|
||||||
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137])
|
require.EqualValues(t, 2, m.Members["ID"].InitContainerRestarts["server"][137]["OOMKill"])
|
||||||
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server2"][137])
|
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server2"][137]["OOMKill"])
|
||||||
require.EqualValues(t, 1, m.Members["ID2"].InitContainerRestarts["server"][137])
|
require.EqualValues(t, 1, m.Members["ID2"].InitContainerRestarts["server"][137]["OOMKill"])
|
||||||
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server"][138])
|
require.EqualValues(t, 1, m.Members["ID"].InitContainerRestarts["server"][138]["OOMKill"])
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_MetricsInc_EphemeralContainer(t *testing.T) {
|
func Test_MetricsInc_EphemeralContainer(t *testing.T) {
|
||||||
var m Metrics
|
var m Metrics
|
||||||
|
|
||||||
m.IncMemberEphemeralContainerRestarts("ID", "server", 137)
|
m.IncMemberEphemeralContainerRestarts("ID", "server", "OOMKill", 137)
|
||||||
|
|
||||||
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server"][137])
|
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server"][137]["OOMKill"])
|
||||||
|
|
||||||
m.IncMemberEphemeralContainerRestarts("ID", "server", 137)
|
m.IncMemberEphemeralContainerRestarts("ID", "server", "OOMKill", 137)
|
||||||
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137])
|
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137]["OOMKill"])
|
||||||
|
|
||||||
m.IncMemberEphemeralContainerRestarts("ID", "server2", 137)
|
m.IncMemberEphemeralContainerRestarts("ID", "server2", "OOMKill", 137)
|
||||||
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137])
|
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137]["OOMKill"])
|
||||||
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server2"][137])
|
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server2"][137]["OOMKill"])
|
||||||
|
|
||||||
m.IncMemberEphemeralContainerRestarts("ID2", "server", 137)
|
m.IncMemberEphemeralContainerRestarts("ID2", "server", "OOMKill", 137)
|
||||||
m.IncMemberEphemeralContainerRestarts("ID", "server", 138)
|
m.IncMemberEphemeralContainerRestarts("ID", "server", "OOMKill", 138)
|
||||||
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137])
|
require.EqualValues(t, 2, m.Members["ID"].EphemeralContainerRestarts["server"][137]["OOMKill"])
|
||||||
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server2"][137])
|
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server2"][137]["OOMKill"])
|
||||||
require.EqualValues(t, 1, m.Members["ID2"].EphemeralContainerRestarts["server"][137])
|
require.EqualValues(t, 1, m.Members["ID2"].EphemeralContainerRestarts["server"][137]["OOMKill"])
|
||||||
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server"][138])
|
require.EqualValues(t, 1, m.Members["ID"].EphemeralContainerRestarts["server"][138]["OOMKill"])
|
||||||
}
|
}
|
||||||
|
|
|
@ -205,7 +205,7 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter
|
||||||
Time("finished", t.FinishedAt.Time).
|
Time("finished", t.FinishedAt.Time).
|
||||||
Warn("Pod failed in unexpected way: Init Container failed")
|
Warn("Pod failed in unexpected way: Init Container failed")
|
||||||
|
|
||||||
r.metrics.IncMemberInitContainerRestarts(memberStatus.ID, container, t.ExitCode)
|
r.metrics.IncMemberInitContainerRestarts(memberStatus.ID, container, t.Reason, t.ExitCode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -227,7 +227,7 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter
|
||||||
Time("finished", t.FinishedAt.Time).
|
Time("finished", t.FinishedAt.Time).
|
||||||
Warn("Pod failed in unexpected way: Core Container failed")
|
Warn("Pod failed in unexpected way: Core Container failed")
|
||||||
|
|
||||||
r.metrics.IncMemberContainerRestarts(memberStatus.ID, container, t.ExitCode)
|
r.metrics.IncMemberContainerRestarts(memberStatus.ID, container, t.Reason, t.ExitCode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@ package metric_descriptions
|
||||||
import "github.com/arangodb/kube-arangodb/pkg/util/metrics"
|
import "github.com/arangodb/kube-arangodb/pkg/util/metrics"
|
||||||
|
|
||||||
var (
|
var (
|
||||||
arangodbOperatorMembersUnexpectedContainerExitCodes = metrics.NewDescription("arangodb_operator_members_unexpected_container_exit_codes", "Counter of unexpected restarts in pod (Containers/InitContainers/EphemeralContainers)", []string{`namespace`, `name`, `member`, `container`, `container_type`, `code`}, nil)
|
arangodbOperatorMembersUnexpectedContainerExitCodes = metrics.NewDescription("arangodb_operator_members_unexpected_container_exit_codes", "Counter of unexpected restarts in pod (Containers/InitContainers/EphemeralContainers)", []string{`namespace`, `name`, `member`, `container`, `container_type`, `code`, `reason`}, nil)
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
@ -34,6 +34,6 @@ func ArangodbOperatorMembersUnexpectedContainerExitCodes() metrics.Description {
|
||||||
return arangodbOperatorMembersUnexpectedContainerExitCodes
|
return arangodbOperatorMembersUnexpectedContainerExitCodes
|
||||||
}
|
}
|
||||||
|
|
||||||
func ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(value float64, namespace string, name string, member string, container string, containerType string, code string) metrics.Metric {
|
func ArangodbOperatorMembersUnexpectedContainerExitCodesCounter(value float64, namespace string, name string, member string, container string, containerType string, code string, reason string) metrics.Metric {
|
||||||
return ArangodbOperatorMembersUnexpectedContainerExitCodes().Gauge(value, namespace, name, member, container, containerType, code)
|
return ArangodbOperatorMembersUnexpectedContainerExitCodes().Gauge(value, namespace, name, member, container, containerType, code, reason)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue