mirror of
https://github.com/prometheus-operator/prometheus-operator.git
synced 2025-04-08 18:14:14 +00:00
chore: allow to save e2e diagnostics to disk
When the E2E_DIAGNOSTIC_DIRECTORY environment variable is set, the test framework will save all collected information (logs, events, resources, ...) to this directory. Related to #6973 Signed-off-by: Simon Pasquier <spasquie@redhat.com>
This commit is contained in:
parent
293c16c854
commit
e8db5dd39a
9 changed files with 179 additions and 190 deletions
28
.github/workflows/e2e-feature-gated.yaml
vendored
28
.github/workflows/e2e-feature-gated.yaml
vendored
|
@ -1,5 +1,6 @@
|
|||
name: e2e-feature-gated
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
|
@ -23,7 +24,9 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Import environment variables from file
|
||||
run: cat ".github/env" >> $GITHUB_ENV
|
||||
run: |
|
||||
cat ".github/env" >> $GITHUB_ENV
|
||||
echo E2E_DIAGNOSTIC_DIRECTORY=$(mktemp -d) >> $GITHUB_ENV
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
|
@ -53,16 +56,17 @@ jobs:
|
|||
kubectl cluster-info
|
||||
- name: Load images
|
||||
run: |
|
||||
kind load docker-image -n e2e quay.io/prometheus-operator/prometheus-operator:$(git rev-parse --short HEAD)
|
||||
kind load docker-image -n e2e quay.io/prometheus-operator/prometheus-config-reloader:$(git rev-parse --short HEAD)
|
||||
kind load docker-image -n e2e quay.io/prometheus-operator/admission-webhook:$(git rev-parse --short HEAD)
|
||||
make test-e2e-images
|
||||
kubectl apply -f scripts/kind-rbac.yaml
|
||||
- name: Run tests
|
||||
run: >
|
||||
EXCLUDE_ALERTMANAGER_TESTS=exclude
|
||||
EXCLUDE_PROMETHEUS_TESTS=exclude
|
||||
EXCLUDE_PROMETHEUS_ALL_NS_TESTS=exclude
|
||||
EXCLUDE_THANOSRULER_TESTS=exclude
|
||||
EXCLUDE_OPERATOR_UPGRADE_TESTS=exclude
|
||||
EXCLUDE_PROMETHEUS_UPGRADE_TESTS=exclude
|
||||
make test-e2e
|
||||
env:
|
||||
E2E_DIAGNOSTIC_DIRECTORY: ${{ env.E2E_DIAGNOSTIC_DIRECTORY }}
|
||||
run: |
|
||||
make test-e2e-feature-gates
|
||||
- name: Upload diagnostics artifact
|
||||
if: ${{ failure() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cluster-state
|
||||
path: ${{ env.E2E_DIAGNOSTIC_DIRECTORY }}
|
||||
retention-days: 15
|
||||
|
|
54
.github/workflows/e2e-prometheus3.yaml
vendored
54
.github/workflows/e2e-prometheus3.yaml
vendored
|
@ -1,6 +1,7 @@
|
|||
name: e2e-prometheus3
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '30 14 * * *' # Every day 14:30
|
||||
|
||||
|
@ -15,39 +16,21 @@ jobs:
|
|||
suite: [alertmanager, prometheus, prometheusAllNS, thanosruler, operatorUpgrade]
|
||||
include:
|
||||
- suite: alertmanager
|
||||
prometheus: "exclude"
|
||||
prometheusAllNS: "exclude"
|
||||
alertmanager: ""
|
||||
thanosruler: "exclude"
|
||||
operatorUpgrade: "exclude"
|
||||
target: test-e2e-alertmanager
|
||||
- suite: prometheus
|
||||
prometheus: ""
|
||||
prometheusAllNS: "exclude"
|
||||
alertmanager: "exclude"
|
||||
thanosruler: "exclude"
|
||||
operatorUpgrade: "exclude"
|
||||
target: test-e2e-prometheus
|
||||
- suite: prometheusAllNS
|
||||
prometheus: "exclude"
|
||||
prometheusAllNS: ""
|
||||
alertmanager: "exclude"
|
||||
thanosruler: "exclude"
|
||||
operatorUpgrade: "exclude"
|
||||
target: test-e2e-prometheus-all-namespaces
|
||||
- suite: thanosruler
|
||||
prometheus: "exclude"
|
||||
prometheusAllNS: "exclude"
|
||||
alertmanager: "exclude"
|
||||
thanosruler: ""
|
||||
operatorUpgrade: "exclude"
|
||||
target: test-e2e-thanos-ruler
|
||||
- suite: operatorUpgrade
|
||||
prometheus: "exclude"
|
||||
prometheusAllNS: "exclude"
|
||||
alertmanager: "exclude"
|
||||
thanosruler: "exclude"
|
||||
operatorUpgrade: ""
|
||||
target: test-e2e-prometheus-upgrade
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Import environment variables from file
|
||||
run: cat ".github/env" >> $GITHUB_ENV
|
||||
run: |
|
||||
cat ".github/env" >> $GITHUB_ENV
|
||||
echo E2E_DIAGNOSTIC_DIRECTORY=$(mktemp -d) >> $GITHUB_ENV
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
|
@ -77,21 +60,14 @@ jobs:
|
|||
kubectl cluster-info
|
||||
- name: Load images
|
||||
run: |
|
||||
kind load docker-image -n e2e quay.io/prometheus-operator/prometheus-operator:$(git rev-parse --short HEAD)
|
||||
kind load docker-image -n e2e quay.io/prometheus-operator/prometheus-config-reloader:$(git rev-parse --short HEAD)
|
||||
kind load docker-image -n e2e quay.io/prometheus-operator/admission-webhook:$(git rev-parse --short HEAD)
|
||||
make test-e2e-images
|
||||
kubectl apply -f scripts/kind-rbac.yaml
|
||||
- name: Run tests
|
||||
run: >
|
||||
TEST_EXPERIMENTAL_PROMETHEUS=true
|
||||
EXCLUDE_ALERTMANAGER_TESTS=${{ matrix.alertmanager }}
|
||||
EXCLUDE_PROMETHEUS_TESTS=${{ matrix.prometheus }}
|
||||
EXCLUDE_PROMETHEUS_ALL_NS_TESTS=${{ matrix.prometheusAllNS }}
|
||||
EXCLUDE_THANOSRULER_TESTS=${{ matrix.thanosruler }}
|
||||
EXCLUDE_OPERATOR_UPGRADE_TESTS=${{ matrix.operatorUpgrade }}
|
||||
EXCLUDE_FEATURE_GATED_TESTS=exclude
|
||||
EXCLUDE_PROMETHEUS_UPGRADE_TESTS=exclude
|
||||
make test-e2e
|
||||
env:
|
||||
E2E_DIAGNOSTIC_DIRECTORY: ${{ env.E2E_DIAGNOSTIC_DIRECTORY }}
|
||||
TEST_EXPERIMENTAL_PROMETHEUS: "true"
|
||||
run: |
|
||||
make ${{ matrix.target }}
|
||||
|
||||
# Added to summarize the matrix and allow easy branch protection rules setup
|
||||
e2e-tests-result:
|
||||
|
|
59
.github/workflows/e2e.yaml
vendored
59
.github/workflows/e2e.yaml
vendored
|
@ -1,5 +1,6 @@
|
|||
name: e2e
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
|
@ -25,39 +26,21 @@ jobs:
|
|||
suite: [alertmanager, prometheus, prometheusAllNS, thanosruler, operatorUpgrade]
|
||||
include:
|
||||
- suite: alertmanager
|
||||
prometheus: "exclude"
|
||||
prometheusAllNS: "exclude"
|
||||
alertmanager: ""
|
||||
thanosruler: "exclude"
|
||||
operatorUpgrade: "exclude"
|
||||
target: test-e2e-alertmanager
|
||||
- suite: prometheus
|
||||
prometheus: ""
|
||||
prometheusAllNS: "exclude"
|
||||
alertmanager: "exclude"
|
||||
thanosruler: "exclude"
|
||||
operatorUpgrade: "exclude"
|
||||
target: test-e2e-prometheus
|
||||
- suite: prometheusAllNS
|
||||
prometheus: "exclude"
|
||||
prometheusAllNS: ""
|
||||
alertmanager: "exclude"
|
||||
thanosruler: "exclude"
|
||||
operatorUpgrade: "exclude"
|
||||
target: test-e2e-prometheus-all-namespaces
|
||||
- suite: thanosruler
|
||||
prometheus: "exclude"
|
||||
prometheusAllNS: "exclude"
|
||||
alertmanager: "exclude"
|
||||
thanosruler: ""
|
||||
operatorUpgrade: "exclude"
|
||||
target: test-e2e-thanos-ruler
|
||||
- suite: operatorUpgrade
|
||||
prometheus: "exclude"
|
||||
prometheusAllNS: "exclude"
|
||||
alertmanager: "exclude"
|
||||
thanosruler: "exclude"
|
||||
operatorUpgrade: ""
|
||||
target: test-e2e-operator-upgrade
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Import environment variables from file
|
||||
run: cat ".github/env" >> $GITHUB_ENV
|
||||
run: |
|
||||
cat ".github/env" >> $GITHUB_ENV
|
||||
echo E2E_DIAGNOSTIC_DIRECTORY=$(mktemp -d) >> $GITHUB_ENV
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
|
@ -87,20 +70,20 @@ jobs:
|
|||
kubectl cluster-info
|
||||
- name: Load images
|
||||
run: |
|
||||
kind load docker-image -n e2e quay.io/prometheus-operator/prometheus-operator:$(git rev-parse --short HEAD)
|
||||
kind load docker-image -n e2e quay.io/prometheus-operator/prometheus-config-reloader:$(git rev-parse --short HEAD)
|
||||
kind load docker-image -n e2e quay.io/prometheus-operator/admission-webhook:$(git rev-parse --short HEAD)
|
||||
make test-e2e-images
|
||||
kubectl apply -f scripts/kind-rbac.yaml
|
||||
- name: Run tests
|
||||
run: >
|
||||
EXCLUDE_ALERTMANAGER_TESTS=${{ matrix.alertmanager }}
|
||||
EXCLUDE_PROMETHEUS_TESTS=${{ matrix.prometheus }}
|
||||
EXCLUDE_PROMETHEUS_ALL_NS_TESTS=${{ matrix.prometheusAllNS }}
|
||||
EXCLUDE_THANOSRULER_TESTS=${{ matrix.thanosruler }}
|
||||
EXCLUDE_OPERATOR_UPGRADE_TESTS=${{ matrix.operatorUpgrade }}
|
||||
EXCLUDE_FEATURE_GATED_TESTS=exclude
|
||||
EXCLUDE_PROMETHEUS_UPGRADE_TESTS=exclude
|
||||
make test-e2e
|
||||
env:
|
||||
E2E_DIAGNOSTIC_DIRECTORY: ${{ env.E2E_DIAGNOSTIC_DIRECTORY }}
|
||||
run: |
|
||||
make ${{ matrix.target }}
|
||||
- name: Upload diagnostics artifact
|
||||
if: ${{ failure() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cluster-state
|
||||
path: ${{ env.E2E_DIAGNOSTIC_DIRECTORY }}
|
||||
retention-days: 15
|
||||
|
||||
# Added to summarize the matrix and allow easy branch protection rules setup
|
||||
e2e-tests-result:
|
||||
|
|
30
.github/workflows/test-prom-version-upgrade.yaml
vendored
30
.github/workflows/test-prom-version-upgrade.yaml
vendored
|
@ -1,5 +1,6 @@
|
|||
name: Test Prometheus upgrades
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '37 15 * * *' # Every day 15:37
|
||||
|
||||
|
@ -10,7 +11,9 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Import environment variables from file
|
||||
run: cat ".github/env" >> $GITHUB_ENV
|
||||
run: |
|
||||
cat ".github/env" >> $GITHUB_ENV
|
||||
echo E2E_DIAGNOSTIC_DIRECTORY=$(mktemp -d) >> $GITHUB_ENV
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
|
@ -35,18 +38,17 @@ jobs:
|
|||
kubectl get pods -A
|
||||
- name: Load images
|
||||
run: |
|
||||
kind load docker-image -n e2e quay.io/prometheus-operator/prometheus-operator:$(git rev-parse --short HEAD)
|
||||
kind load docker-image -n e2e quay.io/prometheus-operator/prometheus-config-reloader:$(git rev-parse --short HEAD)
|
||||
kind load docker-image -n e2e quay.io/prometheus-operator/admission-webhook:$(git rev-parse --short HEAD)
|
||||
make test-e2e-images
|
||||
kubectl apply -f scripts/kind-rbac.yaml
|
||||
- name: Run tests
|
||||
run: >
|
||||
TEST_EXPERIMENTAL_PROMETHEUS=false
|
||||
EXCLUDE_ALL_NS_TESTS=exclude
|
||||
EXCLUDE_ALERTMANAGER_TESTS=exclude
|
||||
EXCLUDE_PROMETHEUS_TESTS=exclude
|
||||
EXCLUDE_PROMETHEUS_ALL_NS_TESTS=exclude
|
||||
EXCLUDE_THANOSRULER_TESTS=exclude
|
||||
EXCLUDE_OPERATOR_UPGRADE_TESTS=exclude
|
||||
EXCLUDE_FEATURE_GATED_TESTS=exclude
|
||||
make test-e2e
|
||||
env:
|
||||
E2E_DIAGNOSTIC_DIRECTORY: ${{ env.E2E_DIAGNOSTIC_DIRECTORY }}
|
||||
run: |
|
||||
make test-e2e-prometheus-upgrade
|
||||
- name: Upload diagnostics artifact
|
||||
if: ${{ failure() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cluster-state
|
||||
path: ${{ env.E2E_DIAGNOSTIC_DIRECTORY }}
|
||||
retention-days: 15
|
||||
|
|
|
@ -323,23 +323,6 @@ func testAMClusterInitialization(t *testing.T) {
|
|||
alertmanager := framework.MakeBasicAlertmanager(ns, "test", int32(amClusterSize))
|
||||
alertmanagerService := framework.MakeAlertmanagerService(alertmanager.Name, "alertmanager-service", v1.ServiceTypeClusterIP)
|
||||
|
||||
// Print Alertmanager logs on failure.
|
||||
defer func() {
|
||||
if !t.Failed() {
|
||||
return
|
||||
}
|
||||
|
||||
for i := 0; i < amClusterSize; i++ {
|
||||
b := &bytes.Buffer{}
|
||||
err := framework.WritePodLogs(context.Background(), b, ns, fmt.Sprintf("alertmanager-test-%v", strconv.Itoa(i)), testFramework.LogOptions{})
|
||||
if err != nil {
|
||||
t.Logf("failed to get logs: %v", err)
|
||||
}
|
||||
|
||||
t.Log(b.String())
|
||||
}
|
||||
}()
|
||||
|
||||
_, err := framework.CreateAlertmanagerAndWaitUntilReady(context.Background(), alertmanager)
|
||||
require.NoError(t, err)
|
||||
|
||||
|
|
|
@ -178,36 +178,36 @@ func deployInstrumentedApplicationWithTLS(name, ns string) error {
|
|||
|
||||
// createRemoteWriteStack creates a pair of Prometheus objects with the first
|
||||
// instance scraping targets and remote-writing samples to the second one.
|
||||
// The 1st and 2nd returned values are the scraping Prometheus object and its service.
|
||||
// The 3rd and 4th returned values are the receiver Prometheus object and its service.
|
||||
func createRemoteWriteStack(name, ns string, prwtc testFramework.PromRemoteWriteTestConfig) (*monitoringv1.Prometheus, *v1.Service, *monitoringv1.Prometheus, *v1.Service, error) {
|
||||
// The 1st returned value is the scraping Prometheus service.
|
||||
// The 2nd returned value is the receiver Prometheus service.
|
||||
func createRemoteWriteStack(name, ns string, prwtc testFramework.PromRemoteWriteTestConfig) (*v1.Service, *v1.Service, error) {
|
||||
// Prometheus instance with remote-write receiver enabled.
|
||||
receiverName := fmt.Sprintf("%s-%s", name, "receiver")
|
||||
rwReceiver := framework.MakeBasicPrometheus(ns, receiverName, receiverName, 1)
|
||||
framework.EnableRemoteWriteReceiverWithTLS(rwReceiver)
|
||||
|
||||
if _, err := framework.CreatePrometheusAndWaitUntilReady(context.Background(), ns, rwReceiver); err != nil {
|
||||
return nil, nil, nil, nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
rwReceiverService := framework.MakePrometheusService(receiverName, receiverName, v1.ServiceTypeClusterIP)
|
||||
if _, err := framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, rwReceiverService); err != nil {
|
||||
return nil, nil, nil, nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Prometheus instance scraping targets.
|
||||
prometheus := framework.MakeBasicPrometheus(ns, name, name, 1)
|
||||
prwtc.AddRemoteWriteWithTLSToPrometheus(prometheus, "https://"+rwReceiverService.Name+":9090/api/v1/write")
|
||||
if _, err := framework.CreatePrometheusAndWaitUntilReady(context.Background(), ns, prometheus); err != nil {
|
||||
return nil, nil, nil, nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
prometheusService := framework.MakePrometheusService(name, name, v1.ServiceTypeClusterIP)
|
||||
if _, err := framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, prometheusService); err != nil {
|
||||
return nil, nil, nil, nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
return prometheus, prometheusService, rwReceiver, rwReceiverService, nil
|
||||
return prometheusService, rwReceiverService, nil
|
||||
}
|
||||
|
||||
func createServiceAccountSecret(t *testing.T, saName, ns string) {
|
||||
|
@ -737,14 +737,13 @@ func testPromRemoteWriteWithTLS(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
prometheus, svc, receiver, receiverSvc, err := createRemoteWriteStack(name, ns, tc.rwConfig)
|
||||
svc, receiverSvc, err := createRemoteWriteStack(name, ns, tc.rwConfig)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Wait for the instrumented application to be scraped.
|
||||
if err := framework.WaitForHealthyTargets(context.Background(), ns, svc.Name, 1); err != nil {
|
||||
framework.PrintPrometheusLogs(context.Background(), t, prometheus)
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
|
@ -769,8 +768,6 @@ func testPromRemoteWriteWithTLS(t *testing.T) {
|
|||
}
|
||||
|
||||
if len(response) != 1 {
|
||||
framework.PrintPrometheusLogs(context.Background(), t, prometheus)
|
||||
framework.PrintPrometheusLogs(context.Background(), t, receiver)
|
||||
t.Fatalf("(%s, %s, %s): query %q failed: %v", tc.rwConfig.ClientKey.Filename, tc.rwConfig.ClientCert.Filename, tc.rwConfig.CA.Filename, q, response)
|
||||
}
|
||||
})
|
||||
|
@ -1574,18 +1571,6 @@ func testPromRulesExceedingConfigMapLimit(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if !t.Failed() {
|
||||
return
|
||||
}
|
||||
|
||||
b := &bytes.Buffer{}
|
||||
if err := framework.WritePodLogs(context.Background(), b, ns, "prometheus-"+p.Name+"-0", testFramework.LogOptions{}); err != nil {
|
||||
t.Logf("failed to get logs: %v", err)
|
||||
}
|
||||
t.Log(b.String())
|
||||
}()
|
||||
|
||||
pSVC := framework.MakePrometheusService(p.Name, "not-relevant", v1.ServiceTypeClusterIP)
|
||||
if finalizerFn, err := framework.CreateOrUpdateServiceAndWaitUntilReady(context.Background(), ns, pSVC); err != nil {
|
||||
t.Fatal(fmt.Errorf("creating Prometheus service failed: %w", err))
|
||||
|
|
|
@ -19,6 +19,8 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
@ -38,6 +40,64 @@ type TestCtx struct {
|
|||
|
||||
type FinalizerFn func() error
|
||||
|
||||
type diagnosticWriter interface {
|
||||
io.Writer
|
||||
io.Closer
|
||||
StartCollection(string)
|
||||
}
|
||||
|
||||
// stdoutDiagnosticWriter writes collected information to stdout.
|
||||
type stdoutDiagnosticWriter struct {
|
||||
b bytes.Buffer
|
||||
}
|
||||
|
||||
func (sdw *stdoutDiagnosticWriter) Write(b []byte) (int, error) { return sdw.b.Write(b) }
|
||||
func (sdw *stdoutDiagnosticWriter) Close() error { return nil }
|
||||
func (sdw *stdoutDiagnosticWriter) StartCollection(name string) {
|
||||
fmt.Fprintf(&sdw.b, "=== %s\n", name)
|
||||
}
|
||||
|
||||
// fileDiagnosticWriter writes collected information to disk.
|
||||
type fileDiagnosticWriter struct {
|
||||
dir string
|
||||
f *os.File
|
||||
}
|
||||
|
||||
func (fdw *fileDiagnosticWriter) Write(b []byte) (int, error) {
|
||||
if fdw.f == nil {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return fdw.f.Write(b)
|
||||
}
|
||||
|
||||
func (fdw *fileDiagnosticWriter) Close() error {
|
||||
if fdw.f == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fdw.f.Close()
|
||||
}
|
||||
|
||||
func (fdw *fileDiagnosticWriter) StartCollection(name string) {
|
||||
if fdw.f != nil {
|
||||
fdw.f.Close()
|
||||
fdw.f = nil
|
||||
}
|
||||
|
||||
fullpath := filepath.Join(fdw.dir, name)
|
||||
if err := os.MkdirAll(filepath.Dir(fullpath), 0755); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
f, err := os.Create(fullpath)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
fdw.f = f
|
||||
}
|
||||
|
||||
func (f *Framework) NewTestCtx(t *testing.T) *TestCtx {
|
||||
// TestCtx is used among others for namespace names where '/' is forbidden
|
||||
prefix := strings.TrimPrefix(
|
||||
|
@ -62,18 +122,39 @@ func (f *Framework) NewTestCtx(t *testing.T) *TestCtx {
|
|||
}
|
||||
|
||||
// We can collect more information as we see fit over time.
|
||||
b := &bytes.Buffer{}
|
||||
tc.collectAlertmanagers(b, f)
|
||||
tc.collectPrometheuses(b, f)
|
||||
tc.collectThanosRulers(b, f)
|
||||
tc.collectPrometheusAgents(b, f)
|
||||
tc.collectLogs(b, f)
|
||||
tc.collectEvents(b, f)
|
||||
var (
|
||||
dw diagnosticWriter
|
||||
dir = os.Getenv("E2E_DIAGNOSTIC_DIRECTORY")
|
||||
)
|
||||
|
||||
t.Logf("=== %s (start)", t.Name())
|
||||
t.Log("")
|
||||
t.Log(b.String())
|
||||
t.Logf("=== %s (end)", t.Name())
|
||||
if dir != "" {
|
||||
dw = &fileDiagnosticWriter{
|
||||
dir: filepath.Join(dir, t.Name()),
|
||||
}
|
||||
} else {
|
||||
dw = &stdoutDiagnosticWriter{}
|
||||
}
|
||||
defer dw.Close()
|
||||
|
||||
dw.StartCollection("alertmanagers")
|
||||
tc.collectAlertmanagers(dw, f)
|
||||
dw.StartCollection("prometheuses")
|
||||
tc.collectPrometheuses(dw, f)
|
||||
dw.StartCollection("thanosrulers")
|
||||
tc.collectThanosRulers(dw, f)
|
||||
dw.StartCollection("prometheusagents")
|
||||
tc.collectPrometheusAgents(dw, f)
|
||||
|
||||
tc.collectLogs(dw, f)
|
||||
|
||||
tc.collectEvents(dw, f)
|
||||
|
||||
if sdw, ok := dw.(*stdoutDiagnosticWriter); ok {
|
||||
t.Logf("== %s (start)", t.Name())
|
||||
t.Log("")
|
||||
t.Log(sdw.b.String())
|
||||
t.Logf("== %s (end)", t.Name())
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
|
@ -82,31 +163,31 @@ func (f *Framework) NewTestCtx(t *testing.T) *TestCtx {
|
|||
return tc
|
||||
}
|
||||
|
||||
func (ctx *TestCtx) collectLogs(w io.Writer, f *Framework) {
|
||||
func (ctx *TestCtx) collectLogs(dw diagnosticWriter, f *Framework) {
|
||||
for _, ns := range ctx.namespaces {
|
||||
pods, err := f.KubeClient.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
fmt.Fprintf(w, "%s: failed to get pods: %v\n", ns, err)
|
||||
fmt.Fprintf(os.Stderr, "%s: failed to get pods: %v\n", ns, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, pod := range pods.Items {
|
||||
err := f.WritePodLogs(context.Background(), w, ns, pod.Name, LogOptions{})
|
||||
dw.StartCollection(filepath.Join("logs", pod.Namespace, pod.Name))
|
||||
err := f.WritePodLogs(context.Background(), dw, ns, pod.Name, LogOptions{})
|
||||
if err != nil {
|
||||
fmt.Fprintf(w, "%s: failed to get pod logs: %v\n", ns, err)
|
||||
fmt.Fprintf(os.Stderr, "%s: failed to get pod logs: %v\n", ns, err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (ctx *TestCtx) collectEvents(w io.Writer, f *Framework) {
|
||||
fmt.Fprintln(w, "=== Events")
|
||||
func (ctx *TestCtx) collectEvents(dw diagnosticWriter, f *Framework) {
|
||||
for _, ns := range ctx.namespaces {
|
||||
b := &bytes.Buffer{}
|
||||
err := f.WriteEvents(context.Background(), b, ns)
|
||||
dw.StartCollection(filepath.Join("events", ns))
|
||||
err := f.WriteEvents(context.Background(), dw, ns)
|
||||
if err != nil {
|
||||
fmt.Fprintf(w, "%s: failed to get events: %v\n", ns, err)
|
||||
fmt.Fprintf(os.Stderr, "%s: failed to get events: %v\n", ns, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -126,7 +207,6 @@ func collectConditions(w io.Writer, prefix string, conditions []monitoringv1.Con
|
|||
}
|
||||
|
||||
func (ctx *TestCtx) collectAlertmanagers(w io.Writer, f *Framework) {
|
||||
fmt.Fprintln(w, "=== Alertmanagers")
|
||||
for _, ns := range ctx.namespaces {
|
||||
ams, err := f.MonClientV1.Alertmanagers(ns).List(context.Background(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
|
@ -141,7 +221,6 @@ func (ctx *TestCtx) collectAlertmanagers(w io.Writer, f *Framework) {
|
|||
}
|
||||
|
||||
func (ctx *TestCtx) collectPrometheuses(w io.Writer, f *Framework) {
|
||||
fmt.Fprintln(w, "=== Prometheuses")
|
||||
for _, ns := range ctx.namespaces {
|
||||
ps, err := f.MonClientV1.Prometheuses(ns).List(context.Background(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
|
@ -156,7 +235,6 @@ func (ctx *TestCtx) collectPrometheuses(w io.Writer, f *Framework) {
|
|||
}
|
||||
|
||||
func (ctx *TestCtx) collectPrometheusAgents(w io.Writer, f *Framework) {
|
||||
fmt.Fprintln(w, "=== PrometheusAgents")
|
||||
for _, ns := range ctx.namespaces {
|
||||
ps, err := f.MonClientV1alpha1.PrometheusAgents(ns).List(context.Background(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
|
@ -171,7 +249,6 @@ func (ctx *TestCtx) collectPrometheusAgents(w io.Writer, f *Framework) {
|
|||
}
|
||||
|
||||
func (ctx *TestCtx) collectThanosRulers(w io.Writer, f *Framework) {
|
||||
fmt.Fprintln(w, "=== ThanosRulers")
|
||||
for _, ns := range ctx.namespaces {
|
||||
trs, err := f.MonClientV1.ThanosRulers(ns).List(context.Background(), metav1.ListOptions{})
|
||||
if err != nil {
|
||||
|
|
|
@ -38,7 +38,7 @@ type LogOptions struct {
|
|||
SinceSeconds int64
|
||||
}
|
||||
|
||||
// PrintPodLogs prints the logs of a specified Pod.
|
||||
// WritePodLogs writes the logs of a specified Pod.
|
||||
func (f *Framework) WritePodLogs(ctx context.Context, w io.Writer, ns, pod string, opts LogOptions) error {
|
||||
p, err := f.KubeClient.CoreV1().Pods(ns).Get(ctx, pod, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
|
@ -69,7 +69,6 @@ func (f *Framework) WritePodLogs(ctx context.Context, w io.Writer, ns, pod strin
|
|||
return fmt.Errorf("failed to retrieve logs of container %q (pod %s/%s): %w", c, ns, pod, err)
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, "=== Logs (pod=%s/%s container=%s)\n", ns, pod, c)
|
||||
_, err = w.Write(resp)
|
||||
fmt.Fprint(w, "\n")
|
||||
if err != nil {
|
||||
|
|
|
@ -23,7 +23,6 @@ import (
|
|||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
|
@ -798,25 +797,6 @@ func (f *Framework) PrometheusQuery(ns, svcName, scheme, query string) ([]Promet
|
|||
return q.Data.Result, nil
|
||||
}
|
||||
|
||||
// PrintPrometheusLogs prints the logs for each Prometheus replica.
|
||||
func (f *Framework) PrintPrometheusLogs(ctx context.Context, t *testing.T, p *monitoringv1.Prometheus) {
|
||||
if p == nil {
|
||||
return
|
||||
}
|
||||
|
||||
replicas := int(*p.Spec.Replicas)
|
||||
for i := 0; i < replicas; i++ {
|
||||
b := &bytes.Buffer{}
|
||||
err := f.WritePodLogs(ctx, b, p.Namespace, fmt.Sprintf("prometheus-%s-%d", p.Name, i), LogOptions{Container: "prometheus"})
|
||||
if err != nil {
|
||||
t.Logf("failed to retrieve logs for replica[%d]: %v", i, err)
|
||||
continue
|
||||
}
|
||||
t.Logf("Prometheus %q/%q (replica #%d) logs:", p.Namespace, p.Name, i)
|
||||
t.Log(b.String())
|
||||
}
|
||||
}
|
||||
|
||||
func (f *Framework) WaitForPrometheusFiringAlert(ctx context.Context, ns, svcName, alertName string) error {
|
||||
var loopError error
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue