mirror of
https://github.com/postmannen/ctrl.git
synced 2025-01-07 04:49:17 +00:00
sending errors to central
This commit is contained in:
parent
6400645283
commit
ca295d97d8
6 changed files with 90 additions and 62 deletions
|
@ -15,7 +15,7 @@ import (
|
||||||
// errorKernel is the structure that will hold all the error
|
// errorKernel is the structure that will hold all the error
|
||||||
// handling values and logic.
|
// handling values and logic.
|
||||||
type errorKernel struct {
|
type errorKernel struct {
|
||||||
// TODO: The errorKernel should probably have a concept
|
// NOTE: The errorKernel should probably have a concept
|
||||||
// of error-state which is a map of all the processes,
|
// of error-state which is a map of all the processes,
|
||||||
// how many times a process have failed over the same
|
// how many times a process have failed over the same
|
||||||
// message etc...
|
// message etc...
|
||||||
|
@ -34,14 +34,15 @@ func newErrorKernel() *errorKernel {
|
||||||
// startErrorKernel will start the error kernel and check if there
|
// startErrorKernel will start the error kernel and check if there
|
||||||
// have been reveived any errors from any of the processes, and
|
// have been reveived any errors from any of the processes, and
|
||||||
// handle them appropriately.
|
// handle them appropriately.
|
||||||
// TODO: Since a process will be locked while waiting to send the error
|
//
|
||||||
|
// NOTE: Since a process will be locked while waiting to send the error
|
||||||
// on the errorCh maybe it makes sense to have a channel inside the
|
// on the errorCh maybe it makes sense to have a channel inside the
|
||||||
// processes error handling with a select so we can send back to the
|
// processes error handling with a select so we can send back to the
|
||||||
// process if it should continue or not based not based on how severe
|
// process if it should continue or not based not based on how severe
|
||||||
// the error where. This should be right after sending the error
|
// the error where. This should be right after sending the error
|
||||||
// sending in the process.
|
// sending in the process.
|
||||||
func (e *errorKernel) startErrorKernel(newMessagesCh chan<- []subjectAndMessage) {
|
func (e *errorKernel) startErrorKernel(newMessagesCh chan<- []subjectAndMessage) {
|
||||||
// TODO: For now it will just print the error messages to the
|
// NOTE: For now it will just print the error messages to the
|
||||||
// console.
|
// console.
|
||||||
go func() {
|
go func() {
|
||||||
|
|
||||||
|
@ -52,7 +53,7 @@ func (e *errorKernel) startErrorKernel(newMessagesCh chan<- []subjectAndMessage)
|
||||||
// also concurrently, so the handler is started in it's
|
// also concurrently, so the handler is started in it's
|
||||||
// own go routine
|
// own go routine
|
||||||
go func() {
|
go func() {
|
||||||
// TODO: Here we should check the severity of the error,
|
// NOTE: Here we should check the severity of the error,
|
||||||
// and also possibly the the error-state of the process
|
// and also possibly the the error-state of the process
|
||||||
// that fails, so we can decide if we should stop and
|
// that fails, so we can decide if we should stop and
|
||||||
// start a new process to replace to old one, or if we
|
// start a new process to replace to old one, or if we
|
||||||
|
|
Binary file not shown.
|
@ -36,7 +36,6 @@ type Message struct {
|
||||||
|
|
||||||
// gobEncodePayload will encode the message structure along with its
|
// gobEncodePayload will encode the message structure along with its
|
||||||
// valued in gob binary format.
|
// valued in gob binary format.
|
||||||
// TODO: Check if it adds value to compress with gzip.
|
|
||||||
func gobEncodeMessage(m Message) ([]byte, error) {
|
func gobEncodeMessage(m Message) ([]byte, error) {
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
gobEnc := gob.NewEncoder(&buf)
|
gobEnc := gob.NewEncoder(&buf)
|
||||||
|
|
118
process.go
118
process.go
|
@ -32,8 +32,11 @@ type process struct {
|
||||||
node node
|
node node
|
||||||
// The processID for the current process
|
// The processID for the current process
|
||||||
processID int
|
processID int
|
||||||
// errorCh is used to report errors from a process
|
// errorCh is the same channel the errorKernel uses to
|
||||||
// NB: Implementing this as an int to report for testing
|
// read incomming errors. By having this channel available
|
||||||
|
// within a process we can send errors to the error kernel,
|
||||||
|
// the EK desided what to do, and sends the action about
|
||||||
|
// what to do back to the process where the error came from.
|
||||||
errorCh chan errProcess
|
errorCh chan errProcess
|
||||||
processKind processKind
|
processKind processKind
|
||||||
// Who are we allowed to receive from ?
|
// Who are we allowed to receive from ?
|
||||||
|
@ -130,7 +133,9 @@ func (p process) spawnWorker(s *server) {
|
||||||
go func() {
|
go func() {
|
||||||
err := p.procFunc()
|
err := p.procFunc()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("error: spawnWorker: procFunc failed: %v\n", err)
|
er := fmt.Errorf("error: spawnWorker: procFunc failed: %v", err)
|
||||||
|
log.Printf("%v\n", er)
|
||||||
|
sendErrorLogMessage(p.newMessagesCh, node(p.node), err)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
@ -149,7 +154,9 @@ func (p process) spawnWorker(s *server) {
|
||||||
go func() {
|
go func() {
|
||||||
err := p.procFunc()
|
err := p.procFunc()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("error: spawnWorker: procFunc failed: %v\n", err)
|
er := fmt.Errorf("error: spawnWorker: procFunc failed: %v", err)
|
||||||
|
log.Printf("%v\n", er)
|
||||||
|
sendErrorLogMessage(p.newMessagesCh, node(p.node), err)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
@ -168,7 +175,10 @@ func (p process) messageDeliverNats(natsConn *nats.Conn, message Message) {
|
||||||
for {
|
for {
|
||||||
dataPayload, err := gobEncodeMessage(message)
|
dataPayload, err := gobEncodeMessage(message)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("error: createDataPayload: %v\n", err)
|
er := fmt.Errorf("error: createDataPayload: %v", err)
|
||||||
|
log.Printf("%v\n", er)
|
||||||
|
sendErrorLogMessage(p.newMessagesCh, node(p.node), err)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
msg := &nats.Msg{
|
msg := &nats.Msg{
|
||||||
|
@ -187,14 +197,18 @@ func (p process) messageDeliverNats(natsConn *nats.Conn, message Message) {
|
||||||
// Create a subscriber for the reply message.
|
// Create a subscriber for the reply message.
|
||||||
subReply, err := natsConn.SubscribeSync(msg.Reply)
|
subReply, err := natsConn.SubscribeSync(msg.Reply)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("error: nc.SubscribeSync failed: failed to create reply message: %v\n", err)
|
er := fmt.Errorf("error: nc.SubscribeSync failed: failed to create reply message: %v", err)
|
||||||
|
log.Printf("%v\n", er)
|
||||||
|
sendErrorLogMessage(p.newMessagesCh, node(p.node), err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Publish message
|
// Publish message
|
||||||
err = natsConn.PublishMsg(msg)
|
err = natsConn.PublishMsg(msg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("error: publish failed: %v\n", err)
|
er := fmt.Errorf("error: publish failed: %v", err)
|
||||||
|
log.Printf("%v\n", er)
|
||||||
|
sendErrorLogMessage(p.newMessagesCh, node(p.node), err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,24 +265,22 @@ func (p process) subscriberHandler(natsConn *nats.Conn, thisNode string, msg *na
|
||||||
gobDec := gob.NewDecoder(buf)
|
gobDec := gob.NewDecoder(buf)
|
||||||
err := gobDec.Decode(&message)
|
err := gobDec.Decode(&message)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("error: gob decoding failed: %v\n", err)
|
er := fmt.Errorf("error: gob decoding failed: %v", err)
|
||||||
|
log.Printf("%v\n", er)
|
||||||
|
sendErrorLogMessage(s.newMessagesCh, node(thisNode), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Maybe the handling of the errors within the subscriber
|
|
||||||
// should also involve the error-kernel to report back centrally
|
|
||||||
// that there was a problem like missing method to handle a specific
|
|
||||||
// method etc.
|
|
||||||
switch {
|
switch {
|
||||||
case p.subject.CommandOrEvent == CommandACK || p.subject.CommandOrEvent == EventACK:
|
case p.subject.CommandOrEvent == CommandACK || p.subject.CommandOrEvent == EventACK:
|
||||||
// REMOVED: log.Printf("info: subscriberHandler: ACK Message received received, preparing to call handler: %v\n", p.subject.name())
|
|
||||||
mh, ok := p.methodsAvailable.CheckIfExists(message.Method)
|
mh, ok := p.methodsAvailable.CheckIfExists(message.Method)
|
||||||
if !ok {
|
if !ok {
|
||||||
// TODO: Check how errors should be handled here!!!
|
er := fmt.Errorf("error: subscriberHandler: method type not available: %v", p.subject.CommandOrEvent)
|
||||||
log.Printf("error: subscriberHandler: method type not available: %v\n", p.subject.CommandOrEvent)
|
log.Printf("%v\n", er)
|
||||||
|
sendErrorLogMessage(s.newMessagesCh, node(thisNode), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
out := []byte("not allowed from " + message.FromNode)
|
out := []byte("not allowed from " + message.FromNode)
|
||||||
var err error
|
//var err error
|
||||||
|
|
||||||
// Check if we are allowed to receive from that host
|
// Check if we are allowed to receive from that host
|
||||||
_, arOK1 := p.allowedReceivers[message.FromNode]
|
_, arOK1 := p.allowedReceivers[message.FromNode]
|
||||||
|
@ -282,8 +294,9 @@ func (p process) subscriberHandler(natsConn *nats.Conn, thisNode string, msg *na
|
||||||
out, err = mh.handler(p, message, thisNode)
|
out, err = mh.handler(p, message, thisNode)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// TODO: Send to error kernel ?
|
er := fmt.Errorf("error: subscriberHandler: failed to execute event: %v", err)
|
||||||
log.Printf("error: subscriberHandler: failed to execute event: %v\n", err)
|
log.Printf("%v\n", er)
|
||||||
|
sendErrorLogMessage(s.newMessagesCh, node(thisNode), err)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Printf("info: we don't allow receiving from: %v, %v\n", message.FromNode, p.subject)
|
log.Printf("info: we don't allow receiving from: %v, %v\n", message.FromNode, p.subject)
|
||||||
|
@ -292,18 +305,13 @@ func (p process) subscriberHandler(natsConn *nats.Conn, thisNode string, msg *na
|
||||||
// Send a confirmation message back to the publisher
|
// Send a confirmation message back to the publisher
|
||||||
natsConn.Publish(msg.Reply, out)
|
natsConn.Publish(msg.Reply, out)
|
||||||
|
|
||||||
// TESTING: Simulate that we also want to send some error that occured
|
|
||||||
// to the errorCentral
|
|
||||||
{
|
|
||||||
err := fmt.Errorf("error: some testing error we want to send out from %v", p.node)
|
|
||||||
sendErrorLogMessage(s.newMessagesCh, node(thisNode), err)
|
|
||||||
}
|
|
||||||
case p.subject.CommandOrEvent == CommandNACK || p.subject.CommandOrEvent == EventNACK:
|
case p.subject.CommandOrEvent == CommandNACK || p.subject.CommandOrEvent == EventNACK:
|
||||||
// REMOVED: log.Printf("info: subscriberHandler: ACK Message received received, preparing to call handler: %v\n", p.subject.name())
|
// REMOVED: log.Printf("info: subscriberHandler: ACK Message received received, preparing to call handler: %v\n", p.subject.name())
|
||||||
mf, ok := p.methodsAvailable.CheckIfExists(message.Method)
|
mf, ok := p.methodsAvailable.CheckIfExists(message.Method)
|
||||||
if !ok {
|
if !ok {
|
||||||
// TODO: Check how errors should be handled here!!!
|
er := fmt.Errorf("error: subscriberHandler: method type not available: %v", p.subject.CommandOrEvent)
|
||||||
log.Printf("error: subscriberHandler: method type not available: %v\n", p.subject.CommandOrEvent)
|
log.Printf("%v\n", er)
|
||||||
|
sendErrorLogMessage(s.newMessagesCh, node(thisNode), err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start the method handler for that specific subject type.
|
// Start the method handler for that specific subject type.
|
||||||
|
@ -317,11 +325,15 @@ func (p process) subscriberHandler(natsConn *nats.Conn, thisNode string, msg *na
|
||||||
_, err := mf.handler(p, message, thisNode)
|
_, err := mf.handler(p, message, thisNode)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// TODO: Send to error kernel ?
|
er := fmt.Errorf("error: subscriberHandler: failed to execute event: %v", err)
|
||||||
log.Printf("error: subscriberHandler: failed to execute event: %v\n", err)
|
log.Printf("%v\n", er)
|
||||||
|
sendErrorLogMessage(s.newMessagesCh, node(thisNode), err)
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
log.Printf("info: did not find that specific type of command: %#v\n", p.subject.CommandOrEvent)
|
er := fmt.Errorf("info: did not find that specific type of command: %#v", p.subject.CommandOrEvent)
|
||||||
|
log.Printf("%v\n", er)
|
||||||
|
sendErrorLogMessage(s.newMessagesCh, node(thisNode), err)
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -346,6 +358,8 @@ func (p process) subscribeMessages(s *server) {
|
||||||
// process.
|
// process.
|
||||||
func (p process) publishMessages(natsConn *nats.Conn, processes *processes) {
|
func (p process) publishMessages(natsConn *nats.Conn, processes *processes) {
|
||||||
for {
|
for {
|
||||||
|
var err error
|
||||||
|
|
||||||
// Wait and read the next message on the message channel
|
// Wait and read the next message on the message channel
|
||||||
m := <-p.subject.messageCh
|
m := <-p.subject.messageCh
|
||||||
|
|
||||||
|
@ -365,25 +379,35 @@ func (p process) publishMessages(natsConn *nats.Conn, processes *processes) {
|
||||||
processes.mu.Lock()
|
processes.mu.Lock()
|
||||||
processes.active[pn] = p
|
processes.active[pn] = p
|
||||||
processes.mu.Unlock()
|
processes.mu.Unlock()
|
||||||
// REMOVED: sleep
|
|
||||||
//time.Sleep(time.Second * 1)
|
|
||||||
|
|
||||||
// NB: simulate that we get an error, and that we can send that
|
// Handle the error.
|
||||||
// out of the process and receive it in another thread.
|
|
||||||
// REMOVED: Error simulation
|
|
||||||
// ep := errProcess{
|
|
||||||
// infoText: "process failed",
|
|
||||||
// process: p,
|
|
||||||
// message: m,
|
|
||||||
// errorActionCh: make(chan errorAction),
|
|
||||||
// }
|
|
||||||
// s.errorKernel.errorCh <- ep
|
|
||||||
//
|
//
|
||||||
// // Wait for the response action back from the error kernel, and
|
// NOTE: None of the processes above generate an error, so the the
|
||||||
// // decide what to do. Should we continue, quit, or .... ?
|
// if clause will never be triggered. But keeping it here as an example
|
||||||
// switch <-ep.errorActionCh {
|
// for now for how to handle errors.
|
||||||
// case errActionContinue:
|
if err != nil {
|
||||||
// log.Printf("The errAction was continue...so we're continuing\n")
|
// Create an error type which also creates a channel which the
|
||||||
// }
|
// errorKernel will send back the action about what to do.
|
||||||
|
ep := errProcess{
|
||||||
|
infoText: "process failed",
|
||||||
|
process: p,
|
||||||
|
message: m,
|
||||||
|
errorActionCh: make(chan errorAction),
|
||||||
|
}
|
||||||
|
p.errorCh <- ep
|
||||||
|
|
||||||
|
// Wait for the response action back from the error kernel, and
|
||||||
|
// decide what to do. Should we continue, quit, or .... ?
|
||||||
|
switch <-ep.errorActionCh {
|
||||||
|
case errActionContinue:
|
||||||
|
// Just log and continue
|
||||||
|
log.Printf("The errAction was continue...so we're continuing\n")
|
||||||
|
case errActionKill:
|
||||||
|
log.Printf("The errAction was kill...so we're killing\n")
|
||||||
|
// ....
|
||||||
|
default:
|
||||||
|
log.Printf("Info: publishMessages: The errAction was not defined, so we're doing nothing\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,18 +37,22 @@ type ringBuffer struct {
|
||||||
totalMessagesIndex int
|
totalMessagesIndex int
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
permStore chan string
|
permStore chan string
|
||||||
|
nodeName node
|
||||||
|
newMessagesCh chan []subjectAndMessage
|
||||||
}
|
}
|
||||||
|
|
||||||
// newringBuffer is a push/pop storage for values.
|
// newringBuffer is a push/pop storage for values.
|
||||||
func newringBuffer(size int, dbFileName string) *ringBuffer {
|
func newringBuffer(size int, dbFileName string, nodeName node, newMessagesCh chan []subjectAndMessage) *ringBuffer {
|
||||||
db, err := bolt.Open(dbFileName, 0600, nil)
|
db, err := bolt.Open(dbFileName, 0600, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("error: failed to open db: %v\n", err)
|
log.Printf("error: failed to open db: %v\n", err)
|
||||||
}
|
}
|
||||||
return &ringBuffer{
|
return &ringBuffer{
|
||||||
bufData: make(chan samDBValue, size),
|
bufData: make(chan samDBValue, size),
|
||||||
db: db,
|
db: db,
|
||||||
permStore: make(chan string),
|
permStore: make(chan string),
|
||||||
|
nodeName: nodeName,
|
||||||
|
newMessagesCh: newMessagesCh,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,12 +60,10 @@ func newringBuffer(size int, dbFileName string) *ringBuffer {
|
||||||
// put the messages on a buffered channel
|
// put the messages on a buffered channel
|
||||||
// and deliver messages out when requested on the outCh.
|
// and deliver messages out when requested on the outCh.
|
||||||
func (r *ringBuffer) start(inCh chan subjectAndMessage, outCh chan samDBValue, defaultMessageTimeout int, defaultMessageRetries int) {
|
func (r *ringBuffer) start(inCh chan subjectAndMessage, outCh chan samDBValue, defaultMessageTimeout int, defaultMessageRetries int) {
|
||||||
|
|
||||||
// Starting both writing and reading in separate go routines so we
|
// Starting both writing and reading in separate go routines so we
|
||||||
// can write and read concurrently.
|
// can write and read concurrently.
|
||||||
|
|
||||||
// TODO: At startup, check if there are unprocessed messages in
|
|
||||||
// the K/V store, and process them.
|
|
||||||
|
|
||||||
const samValueBucket string = "samValueBucket"
|
const samValueBucket string = "samValueBucket"
|
||||||
const indexValueBucket string = "indexValueBucket"
|
const indexValueBucket string = "indexValueBucket"
|
||||||
|
|
||||||
|
@ -149,8 +151,10 @@ func (r *ringBuffer) fillBuffer(inCh chan subjectAndMessage, samValueBucket stri
|
||||||
// Store the incomming message in key/value store
|
// Store the incomming message in key/value store
|
||||||
err = r.dbUpdate(r.db, samValueBucket, strconv.Itoa(dbID), js)
|
err = r.dbUpdate(r.db, samValueBucket, strconv.Itoa(dbID), js)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// TODO: Handle error
|
er := fmt.Errorf("error: dbUpdate samValue failed: %v", err)
|
||||||
log.Printf("error: dbUpdate samValue failed: %v\n", err)
|
log.Printf("%v\n", er)
|
||||||
|
sendErrorLogMessage(r.newMessagesCh, node(r.nodeName), err)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Put the message on the inmemory buffer.
|
// Put the message on the inmemory buffer.
|
||||||
|
|
|
@ -204,7 +204,7 @@ func createErrorMsgContent(FromNode node, theError error) subjectAndMessage {
|
||||||
func (s *server) routeMessagesToProcess(dbFileName string, newSAM chan []subjectAndMessage) {
|
func (s *server) routeMessagesToProcess(dbFileName string, newSAM chan []subjectAndMessage) {
|
||||||
// Prepare and start a new ring buffer
|
// Prepare and start a new ring buffer
|
||||||
const bufferSize int = 1000
|
const bufferSize int = 1000
|
||||||
rb := newringBuffer(bufferSize, dbFileName)
|
rb := newringBuffer(bufferSize, dbFileName, node(s.nodeName), s.newMessagesCh)
|
||||||
inCh := make(chan subjectAndMessage)
|
inCh := make(chan subjectAndMessage)
|
||||||
ringBufferOutCh := make(chan samDBValue)
|
ringBufferOutCh := make(chan samDBValue)
|
||||||
// start the ringbuffer.
|
// start the ringbuffer.
|
||||||
|
|
Loading…
Reference in a new issue