2021-02-08 07:45:21 +00:00
|
|
|
// The error kernel shall handle errors for a given process.
|
2021-08-16 11:01:12 +00:00
|
|
|
// This will be cases where the process itself were unable
|
2021-02-08 07:45:21 +00:00
|
|
|
// to handle the error on it's own, and we might need to
|
|
|
|
// restart the process, or send a message back to the operator
|
2021-08-16 11:01:12 +00:00
|
|
|
// that the action which the message where supposed to trigger
|
|
|
|
// failed, or that an event where unable to be processed.
|
2021-02-08 07:45:21 +00:00
|
|
|
|
2021-02-08 05:02:54 +00:00
|
|
|
package steward
|
|
|
|
|
|
|
|
import (
|
2021-08-03 11:43:05 +00:00
|
|
|
"context"
|
2021-02-08 05:02:54 +00:00
|
|
|
"fmt"
|
|
|
|
"log"
|
|
|
|
)
|
|
|
|
|
|
|
|
// errorKernel is the structure that will hold all the error
|
|
|
|
// handling values and logic.
|
|
|
|
type errorKernel struct {
|
2021-03-12 08:38:19 +00:00
|
|
|
// NOTE: The errorKernel should probably have a concept
|
2021-02-08 07:45:21 +00:00
|
|
|
// of error-state which is a map of all the processes,
|
|
|
|
// how many times a process have failed over the same
|
|
|
|
// message etc...
|
2021-02-24 09:58:02 +00:00
|
|
|
|
|
|
|
// errorCh is used to report errors from a process
|
|
|
|
errorCh chan errProcess
|
2021-08-04 06:35:35 +00:00
|
|
|
|
|
|
|
ctx context.Context
|
|
|
|
cancel context.CancelFunc
|
2021-02-08 05:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// newErrorKernel will initialize and return a new error kernel
|
2021-08-04 06:35:35 +00:00
|
|
|
func newErrorKernel(ctx context.Context) *errorKernel {
|
|
|
|
ctxC, cancel := context.WithCancel(ctx)
|
|
|
|
|
2021-02-08 05:02:54 +00:00
|
|
|
return &errorKernel{
|
2021-02-24 09:58:02 +00:00
|
|
|
errorCh: make(chan errProcess, 2),
|
2021-08-04 06:35:35 +00:00
|
|
|
ctx: ctxC,
|
|
|
|
cancel: cancel,
|
2021-02-08 05:02:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// startErrorKernel will start the error kernel and check if there
|
|
|
|
// have been reveived any errors from any of the processes, and
|
|
|
|
// handle them appropriately.
|
2021-03-12 08:38:19 +00:00
|
|
|
//
|
|
|
|
// NOTE: Since a process will be locked while waiting to send the error
|
2021-02-08 05:02:54 +00:00
|
|
|
// on the errorCh maybe it makes sense to have a channel inside the
|
|
|
|
// processes error handling with a select so we can send back to the
|
|
|
|
// process if it should continue or not based not based on how severe
|
|
|
|
// the error where. This should be right after sending the error
|
|
|
|
// sending in the process.
|
2021-08-04 06:35:35 +00:00
|
|
|
func (e *errorKernel) start(newMessagesCh chan<- []subjectAndMessage) error {
|
2021-03-12 08:38:19 +00:00
|
|
|
// NOTE: For now it will just print the error messages to the
|
2021-02-08 05:02:54 +00:00
|
|
|
// console.
|
|
|
|
|
2021-08-03 11:43:05 +00:00
|
|
|
for {
|
|
|
|
var er errProcess
|
|
|
|
select {
|
|
|
|
case er = <-e.errorCh:
|
2021-08-04 06:35:35 +00:00
|
|
|
case <-e.ctx.Done():
|
2021-08-03 11:43:05 +00:00
|
|
|
return fmt.Errorf("info: stopping errorKernel")
|
|
|
|
}
|
2021-02-08 05:02:54 +00:00
|
|
|
|
2021-08-03 11:43:05 +00:00
|
|
|
// We should be able to handle each error individually and
|
|
|
|
// also concurrently, so the handler is started in it's
|
|
|
|
// own go routine
|
|
|
|
go func() {
|
|
|
|
// NOTE: Here we should check the severity of the error,
|
|
|
|
// and also possibly the the error-state of the process
|
|
|
|
// that fails, so we can decide if we should stop and
|
|
|
|
// start a new process to replace to old one, or if we
|
|
|
|
// should just kill the process and send message back to
|
|
|
|
// the operator....or other ?
|
|
|
|
//
|
|
|
|
// Just print the error, and tell the process to continue
|
2021-02-15 10:28:27 +00:00
|
|
|
|
2021-08-03 11:43:05 +00:00
|
|
|
// log.Printf("*** error_kernel: %#v, type=%T\n", er, er)
|
|
|
|
log.Printf("TESTING, we received and error from the process, but we're telling the process back to continue\n")
|
2021-02-24 14:43:31 +00:00
|
|
|
|
2021-08-03 11:43:05 +00:00
|
|
|
// // TESTING: Creating an error message to send to errorCentral
|
|
|
|
// fmt.Printf(" --- Sending error message to central !!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
|
|
|
|
// sam := subjectAndMessage{
|
|
|
|
// Subject: Subject{
|
|
|
|
// ToNode: "errorCentral",
|
|
|
|
// CommandOrEvent: EventNACK,
|
|
|
|
// Method: ErrorLog,
|
|
|
|
// },
|
|
|
|
// Message: Message{
|
|
|
|
// ToNode: "errorCentral",
|
|
|
|
// Data: []string{"some tull here .............."},
|
|
|
|
// CommandOrEvent: EventNACK,
|
|
|
|
// Method: ErrorLog,
|
|
|
|
// },
|
|
|
|
// }
|
|
|
|
// newMessagesCh <- []subjectAndMessage{sam}
|
2021-02-24 14:43:31 +00:00
|
|
|
|
2021-08-03 11:43:05 +00:00
|
|
|
select {
|
|
|
|
case er.errorActionCh <- errActionContinue:
|
2021-08-04 06:35:35 +00:00
|
|
|
case <-e.ctx.Done():
|
2021-08-03 11:43:05 +00:00
|
|
|
log.Printf("info: errorKernel: got ctx.Done, will stop waiting for errAction\n")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
2021-02-08 05:02:54 +00:00
|
|
|
}
|
|
|
|
|
2021-08-04 06:35:35 +00:00
|
|
|
func (e *errorKernel) stop() {
|
|
|
|
e.cancel()
|
|
|
|
}
|
|
|
|
|
2021-02-08 05:02:54 +00:00
|
|
|
type errorAction int
|
|
|
|
|
|
|
|
const (
|
|
|
|
// errActionJustPrint should just print the error,
|
|
|
|
// and the worker process should continue.
|
|
|
|
errActionContinue errorAction = iota
|
|
|
|
// errActionKillAndSpawnNew should log the error,
|
|
|
|
// stop the current worker process, and spawn a new.
|
|
|
|
errActionKill errorAction = iota
|
|
|
|
// errActionKillAndDie should log the error, stop the
|
|
|
|
// current worker process, and send a message back to
|
|
|
|
// the master supervisor that it was unable to complete
|
|
|
|
// the action of the current message. The error message
|
|
|
|
// should contain a copy of the original message.
|
|
|
|
)
|
|
|
|
|
|
|
|
type errProcess struct {
|
2021-02-08 07:45:21 +00:00
|
|
|
// Channel for communicating the action to take back to
|
|
|
|
// to the process who triggered the error
|
2021-02-08 05:02:54 +00:00
|
|
|
errorActionCh chan errorAction
|
2021-02-08 07:45:21 +00:00
|
|
|
// Some informational text
|
|
|
|
infoText string
|
|
|
|
// The process structure that belongs to a given process
|
|
|
|
process process
|
|
|
|
// The message that where in progress when error occured
|
|
|
|
message Message
|
2021-02-08 05:02:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (e errProcess) Error() string {
|
|
|
|
return fmt.Sprintf("worker error: proc = %#v, message = %#v", e.process, e.message)
|
|
|
|
}
|