2021-03-03 13:14:32 +00:00
package steward
import (
"bytes"
2022-01-03 11:30:28 +00:00
"compress/gzip"
2021-04-07 16:05:07 +00:00
"context"
2022-02-03 06:04:10 +00:00
"crypto/ed25519"
2021-03-03 13:14:32 +00:00
"encoding/gob"
"fmt"
2022-01-03 11:30:28 +00:00
"io"
2021-03-03 13:14:32 +00:00
"log"
2022-01-03 10:36:05 +00:00
"os"
2022-02-03 06:04:10 +00:00
"strings"
2021-12-27 10:40:29 +00:00
"sync"
2021-03-03 13:14:32 +00:00
"time"
2021-12-29 06:28:09 +00:00
"github.com/fxamacker/cbor/v2"
2021-12-27 10:40:29 +00:00
"github.com/klauspost/compress/zstd"
2021-03-03 13:14:32 +00:00
"github.com/nats-io/nats.go"
2021-04-12 13:35:20 +00:00
"github.com/prometheus/client_golang/prometheus"
2021-03-03 13:14:32 +00:00
)
// processKind are either kindSubscriber or kindPublisher, and are
// used to distinguish the kind of process to spawn and to know
// the process kind put in the process map.
type processKind string
const (
processKindSubscriber processKind = "subscriber"
processKindPublisher processKind = "publisher"
)
2021-08-16 11:01:12 +00:00
// process holds all the logic to handle a message type and it's
// method, subscription/publishin messages for a subject, and more.
2021-03-03 13:14:32 +00:00
type process struct {
messageID int
// the subject used for the specific process. One process
// can contain only one sender on a message bus, hence
// also one subject
subject Subject
// Put a node here to be able know the node a process is at.
// NB: Might not be needed later on.
2021-06-29 06:21:42 +00:00
node Node
2021-03-03 13:14:32 +00:00
// The processID for the current process
processID int
2021-03-12 08:38:19 +00:00
// errorCh is the same channel the errorKernel uses to
// read incomming errors. By having this channel available
// within a process we can send errors to the error kernel,
// the EK desided what to do, and sends the action about
// what to do back to the process where the error came from.
2022-01-18 18:26:36 +00:00
errorCh chan errorEvent
2021-03-03 13:14:32 +00:00
processKind processKind
// Who are we allowed to receive from ?
2021-09-20 10:22:34 +00:00
// allowedReceivers map[Node]struct{}
2021-03-03 14:44:32 +00:00
// methodsAvailable
methodsAvailable MethodsAvailable
2021-03-04 15:27:55 +00:00
// Helper or service function that can do some kind of work
// for the process.
// The idea is that this can hold for example the map of the
// the hello nodes to limit shared resources in the system as
// a whole for sharing a map from the *server level.
2021-03-10 13:14:09 +00:00
procFunc procFunc
2021-03-04 15:27:55 +00:00
// The channel to send a messages to the procFunc go routine.
2022-01-02 10:57:25 +00:00
// This is typically used within the methodHandler for so we
// can pass messages between the procFunc and the handler.
2021-03-04 15:27:55 +00:00
procFuncCh chan Message
2021-03-08 13:09:14 +00:00
// copy of the configuration from server
configuration * Configuration
2021-03-09 03:55:51 +00:00
// The new messages channel copied from *Server
2021-03-29 11:36:30 +00:00
toRingbufferCh chan <- [ ] subjectAndMessage
2021-03-31 06:56:13 +00:00
// The structure who holds all processes information
processes * processes
2021-04-07 14:45:51 +00:00
// nats connection
natsConn * nats . Conn
2021-04-08 05:07:13 +00:00
// natsSubscription returned when calling natsConn.Subscribe
2021-04-08 10:51:54 +00:00
natsSubscription * nats . Subscription
2021-04-07 16:05:07 +00:00
// context
ctx context . Context
// context cancelFunc
ctxCancel context . CancelFunc
2021-04-08 05:07:13 +00:00
// Process name
processName processName
2021-04-09 09:30:40 +00:00
// startup holds the startup functions for starting up publisher
// or subscriber processes
2021-08-18 10:16:21 +00:00
startup * startup
2022-02-04 09:33:31 +00:00
// Signatures
signatures * signatures
2021-03-03 13:14:32 +00:00
}
// prepareNewProcess will set the the provided values and the default
// values for a process.
2022-02-04 09:33:31 +00:00
func newProcess ( ctx context . Context , metrics * metrics , natsConn * nats . Conn , processes * processes , toRingbufferCh chan <- [ ] subjectAndMessage , configuration * Configuration , subject Subject , errCh chan errorEvent , processKind processKind , procFunc func ( ) error , signatures * signatures ) process {
2021-03-03 13:14:32 +00:00
// create the initial configuration for a sessions communicating with 1 host process.
2021-03-04 05:53:03 +00:00
processes . lastProcessID ++
2021-03-03 13:14:32 +00:00
2021-07-02 09:26:52 +00:00
ctx , cancel := context . WithCancel ( ctx )
2021-04-07 16:05:07 +00:00
2021-03-03 14:44:32 +00:00
var method Method
2021-03-03 13:14:32 +00:00
proc := process {
messageID : 0 ,
subject : subject ,
2021-06-29 06:21:42 +00:00
node : Node ( configuration . NodeName ) ,
2021-03-04 05:53:03 +00:00
processID : processes . lastProcessID ,
2021-03-03 13:14:32 +00:00
errorCh : errCh ,
processKind : processKind ,
2021-03-03 14:44:32 +00:00
methodsAvailable : method . GetMethodsAvailable ( ) ,
2021-03-29 11:36:30 +00:00
toRingbufferCh : toRingbufferCh ,
2021-03-09 03:55:51 +00:00
configuration : configuration ,
2021-03-31 06:56:13 +00:00
processes : processes ,
2021-04-07 14:45:51 +00:00
natsConn : natsConn ,
2021-04-07 16:05:07 +00:00
ctx : ctx ,
ctxCancel : cancel ,
2022-02-04 09:33:31 +00:00
startup : newStartup ( metrics , signatures ) ,
2022-02-07 03:23:13 +00:00
signatures : signatures ,
2021-03-03 13:14:32 +00:00
}
return proc
}
2022-01-01 09:13:35 +00:00
// procFunc is a function that will be started when a worker process
// is started. If a procFunc is registered when creating a new process
// the procFunc will be started as a go routine when the process is started,
// and stopped when the process is stopped.
//
// A procFunc can be started both for publishing and subscriber processes.
//
// When used with a subscriber process the usecase is most likely to handle
2022-01-01 09:26:16 +00:00
// some kind of state needed for a request type. The handlers themselves
// can not hold state since they are only called once per message received,
// and exits when the message is handled leaving no state behind. With a procfunc
// we can have a process function running at all times tied to the process, and
// this function can be able to hold the state needed in a certain scenario.
//
2022-01-01 09:13:35 +00:00
// With a subscriber handler you generally take the message in the handler and
// pass it on to the procFunc by putting it on the procFuncCh<-, and the
// message can then be read from the procFuncCh inside the procFunc, and we
// can do some further work on it, for example update registry for metrics that
// is needed for that specific request type.
//
2022-01-01 09:26:16 +00:00
// With a publisher process you can attach a static function that will do some
// work to a request type, and publish the result.
//
2022-01-01 09:13:35 +00:00
// procFunc's can also be used to wrap in other types which we want to
// work with. An example can be handling of metrics which the message
// have no notion of, but a procFunc can have that wrapped in from when it was constructed.
2022-01-02 10:57:25 +00:00
type procFunc func ( ctx context . Context , procFuncCh chan Message ) error
2021-03-10 13:14:09 +00:00
2021-03-03 13:14:32 +00:00
// The purpose of this function is to check if we should start a
// publisher or subscriber process, where a process is a go routine
// that will handle either sending or receiving messages on one
// subject.
//
// It will give the process the next available ID, and also add the
// process to the processes map in the server structure.
2021-04-07 14:45:51 +00:00
func ( p process ) spawnWorker ( procs * processes , natsConn * nats . Conn ) {
2021-03-03 13:14:32 +00:00
// We use the full name of the subject to identify a unique
// process. We can do that since a process can only handle
// one message queue.
var pn processName
if p . processKind == processKindPublisher {
pn = processNameGet ( p . subject . name ( ) , processKindPublisher )
}
if p . processKind == processKindSubscriber {
pn = processNameGet ( p . subject . name ( ) , processKindSubscriber )
}
2021-04-12 13:35:20 +00:00
processName := processNameGet ( p . subject . name ( ) , p . processKind )
2021-06-08 04:02:08 +00:00
// Add prometheus metrics for the process.
2021-08-18 13:41:53 +00:00
p . processes . metrics . promProcessesAllRunning . With ( prometheus . Labels { "processName" : string ( processName ) } )
2021-04-12 13:35:20 +00:00
2021-03-03 13:14:32 +00:00
// Start a publisher worker, which will start a go routine (process)
// That will take care of all the messages for the subject it owns.
if p . processKind == processKindPublisher {
2021-03-25 13:30:39 +00:00
2021-03-09 06:43:55 +00:00
// If there is a procFunc for the process, start it.
if p . procFunc != nil {
2022-01-02 10:57:25 +00:00
// Initialize the channel for communication between the proc and
// the procFunc.
p . procFuncCh = make ( chan Message )
2021-03-09 06:43:55 +00:00
// Start the procFunc in it's own anonymous func so we are able
// to get the return error.
go func ( ) {
2022-01-02 10:57:25 +00:00
err := p . procFunc ( p . ctx , p . procFuncCh )
2021-03-09 06:43:55 +00:00
if err != nil {
2021-09-23 06:19:53 +00:00
er := fmt . Errorf ( "error: spawnWorker: start procFunc failed: %v" , err )
2022-01-19 09:55:03 +00:00
p . processes . errorKernel . errSend ( p , Message { } , er )
2021-03-09 06:43:55 +00:00
}
} ( )
}
2021-04-08 10:51:54 +00:00
go p . publishMessages ( natsConn )
2021-03-03 13:14:32 +00:00
}
// Start a subscriber worker, which will start a go routine (process)
// That will take care of all the messages for the subject it owns.
if p . processKind == processKindSubscriber {
2021-03-04 15:27:55 +00:00
// If there is a procFunc for the process, start it.
if p . procFunc != nil {
2022-01-02 10:57:25 +00:00
// Initialize the channel for communication between the proc and
// the procFunc.
p . procFuncCh = make ( chan Message )
2021-03-25 13:30:39 +00:00
2021-03-04 15:27:55 +00:00
// Start the procFunc in it's own anonymous func so we are able
// to get the return error.
go func ( ) {
2022-01-02 10:57:25 +00:00
err := p . procFunc ( p . ctx , p . procFuncCh )
2021-03-04 15:27:55 +00:00
if err != nil {
2021-09-23 06:19:53 +00:00
er := fmt . Errorf ( "error: spawnWorker: start procFunc failed: %v" , err )
2022-01-19 09:55:03 +00:00
p . processes . errorKernel . errSend ( p , Message { } , er )
2021-03-04 15:27:55 +00:00
}
} ( )
}
2021-03-05 12:10:46 +00:00
2021-04-08 10:51:54 +00:00
p . natsSubscription = p . subscribeMessages ( )
2021-03-03 13:14:32 +00:00
}
2021-04-08 10:51:54 +00:00
p . processName = pn
// Add information about the new process to the started processes map.
2021-11-16 09:21:44 +00:00
procs . active . mu . Lock ( )
2021-11-16 18:07:24 +00:00
procs . active . procNames [ pn ] = p
2021-11-16 09:21:44 +00:00
procs . active . mu . Unlock ( )
2021-03-03 13:14:32 +00:00
}
2021-12-27 10:40:29 +00:00
// messageDeliverNats will create the Nats message with headers and payload.
// It will also take care of the delivering the message that is converted to
2021-12-30 05:28:21 +00:00
// gob or cbor format as a nats.Message. It will also take care of checking
// timeouts and retries specified for the message.
2021-12-29 05:40:42 +00:00
func ( p process ) messageDeliverNats ( natsMsgPayload [ ] byte , natsMsgHeader nats . Header , natsConn * nats . Conn , message Message ) {
2021-03-03 13:14:32 +00:00
retryAttempts := 0
2021-04-16 11:18:10 +00:00
const publishTimer time . Duration = 5
const subscribeSyncTimer time . Duration = 5
2021-09-07 07:43:54 +00:00
// The for loop will run until the message is delivered successfully,
// or that retries are reached.
2021-03-03 13:14:32 +00:00
for {
msg := & nats . Msg {
2021-03-09 06:43:55 +00:00
Subject : string ( p . subject . name ( ) ) ,
2021-04-04 05:33:18 +00:00
// Subject: fmt.Sprintf("%s.%s.%s", proc.node, "command", "CLICommandRequest"),
2021-03-03 13:14:32 +00:00
// Structure of the reply message are:
2021-06-08 02:45:01 +00:00
// <nodename>.<message type>.<method>.reply
2021-12-29 05:40:42 +00:00
Reply : fmt . Sprintf ( "%s.reply" , p . subject . name ( ) ) ,
Data : natsMsgPayload ,
Header : natsMsgHeader ,
2021-12-28 11:05:09 +00:00
}
2022-01-27 06:19:04 +00:00
// If it is a NACK message we just deliver the message and return
// here so we don't create a ACK message and then stop waiting for it.
2022-01-27 09:06:06 +00:00
if p . subject . Event == EventNACK {
2021-12-30 16:03:42 +00:00
err := natsConn . PublishMsg ( msg )
if err != nil {
er := fmt . Errorf ( "error: nats publish of hello failed: %v" , err )
log . Printf ( "%v\n" , er )
return
}
p . processes . metrics . promNatsDeliveredTotal . Inc ( )
return
}
2021-03-03 13:14:32 +00:00
// The SubscribeSync used in the subscriber, will get messages that
2021-04-16 11:18:10 +00:00
// are sent after it started subscribing.
2021-03-03 13:14:32 +00:00
//
2022-01-27 06:19:04 +00:00
// Create a subscriber for the ACK reply message.
2021-03-09 06:43:55 +00:00
subReply , err := natsConn . SubscribeSync ( msg . Reply )
2021-03-03 13:14:32 +00:00
if err != nil {
2021-09-23 06:19:53 +00:00
er := fmt . Errorf ( "error: nats SubscribeSync failed: failed to create reply message for subject: %v, error: %v" , msg . Reply , err )
2021-04-16 11:18:10 +00:00
// sendErrorLogMessage(p.toRingbufferCh, node(p.node), er)
2021-04-16 11:43:58 +00:00
log . Printf ( "%v, waiting %ds before retrying\n" , er , subscribeSyncTimer )
2021-04-16 11:18:10 +00:00
time . Sleep ( time . Second * subscribeSyncTimer )
2021-09-16 06:13:24 +00:00
subReply . Unsubscribe ( )
2021-03-03 13:14:32 +00:00
continue
}
// Publish message
2021-03-09 06:43:55 +00:00
err = natsConn . PublishMsg ( msg )
2021-03-03 13:14:32 +00:00
if err != nil {
2021-09-23 06:19:53 +00:00
er := fmt . Errorf ( "error: nats publish failed: %v" , err )
2021-04-16 11:18:10 +00:00
// sendErrorLogMessage(p.toRingbufferCh, node(p.node), er)
2021-04-16 11:43:58 +00:00
log . Printf ( "%v, waiting %ds before retrying\n" , er , publishTimer )
2021-04-16 11:18:10 +00:00
time . Sleep ( time . Second * publishTimer )
2021-03-03 13:14:32 +00:00
continue
}
// If the message is an ACK type of message we must check that a
// reply, and if it is not we don't wait here at all.
2022-01-27 09:06:06 +00:00
if p . subject . Event == EventACK {
2021-04-15 08:33:44 +00:00
// Wait up until ACKTimeout specified for a reply,
2021-09-07 07:43:54 +00:00
// continue and resend if no reply received,
2021-03-03 13:14:32 +00:00
// or exit if max retries for the message reached.
2021-12-31 05:59:09 +00:00
_ , err := subReply . NextMsg ( time . Second * time . Duration ( message . ACKTimeout ) )
2021-03-03 13:14:32 +00:00
if err != nil {
2021-09-23 06:19:53 +00:00
er := fmt . Errorf ( "error: ack receive failed: subject=%v: %v" , p . subject . name ( ) , err )
2021-04-16 10:38:48 +00:00
// sendErrorLogMessage(p.toRingbufferCh, p.node, er)
log . Printf ( " ** %v\n" , er )
2021-03-03 13:14:32 +00:00
// did not receive a reply, decide what to do..
retryAttempts ++
2021-09-16 06:13:24 +00:00
log . Printf ( "Retry attempt:%v, retries: %v, ACKTimeout: %v, message.ID: %v\n" , retryAttempts , message . Retries , message . ACKTimeout , message . ID )
2021-08-16 11:01:12 +00:00
2021-03-03 13:14:32 +00:00
switch {
2021-09-16 09:20:08 +00:00
//case message.Retries == 0:
// // 0 indicates unlimited retries
// continue
2021-03-03 13:14:32 +00:00
case retryAttempts >= message . Retries :
// max retries reached
2021-10-01 06:37:00 +00:00
er := fmt . Errorf ( "info: toNode: %v, fromNode: %v, subject: %v, methodArgs: %v: max retries reached, check if node is up and running and if it got a subscriber started for the given REQ type" , message . ToNode , message . FromNode , msg . Subject , message . MethodArgs )
2021-09-07 07:43:54 +00:00
// We do not want to send errorLogs for REQErrorLog type since
// it will just cause an endless loop.
if message . Method != REQErrorLog {
2022-01-21 05:15:26 +00:00
p . processes . errorKernel . infoSend ( p , message , er )
2021-09-07 07:43:54 +00:00
}
log . Printf ( "%v\n" , er )
2021-08-26 09:41:46 +00:00
2021-09-16 06:13:24 +00:00
subReply . Unsubscribe ( )
2021-08-26 09:41:46 +00:00
p . processes . metrics . promNatsMessagesFailedACKsTotal . Inc ( )
2021-03-03 13:14:32 +00:00
return
2021-03-26 06:55:42 +00:00
2021-03-03 13:14:32 +00:00
default :
// none of the above matched, so we've not reached max retries yet
2021-09-07 07:43:54 +00:00
log . Printf ( "max retries for message not reached, retrying sending of message with ID %v\n" , message . ID )
2021-08-26 09:41:46 +00:00
p . processes . metrics . promNatsMessagesMissedACKsTotal . Inc ( )
2021-03-03 13:14:32 +00:00
continue
}
}
2021-12-31 05:59:09 +00:00
// REMOVED: log.Printf("<--- publisher: received ACK from:%v, for: %v, data: %s\n", message.ToNode, message.Method, msgReply.Data)
2021-03-03 13:14:32 +00:00
}
2021-08-26 08:50:40 +00:00
2021-09-16 06:13:24 +00:00
subReply . Unsubscribe ( )
2021-08-26 08:50:40 +00:00
p . processes . metrics . promNatsDeliveredTotal . Inc ( )
2021-03-03 13:14:32 +00:00
return
}
}
2021-12-28 11:05:09 +00:00
// messageSubscriberHandler will deserialize the message when a new message is
2021-03-03 13:14:32 +00:00
// received, check the MessageType field in the message to decide what
// kind of message it is and then it will check how to handle that message type,
// and then call the correct method handler for it.
//
// This handler function should be started in it's own go routine,so
// one individual handler is started per message received so we can keep
// the state of the message being processed, and then reply back to the
// correct sending process's reply, meaning so we ACK back to the correct
// publisher.
2021-12-28 11:05:09 +00:00
func ( p process ) messageSubscriberHandler ( natsConn * nats . Conn , thisNode string , msg * nats . Msg ) {
// Variable to hold a copy of the message data, so we don't mess with
// the original data since the original is a pointer value.
msgData := make ( [ ] byte , len ( msg . Data ) )
copy ( msgData , msg . Data )
// fmt.Printf(" * DEBUG: header value on subscriberHandler: %v\n", msg.Header)
// If compression is used, decompress it to get the gob data. If
// compression is not used it is the gob encoded data we already
// got in msgData so we do nothing with it.
if val , ok := msg . Header [ "cmp" ] ; ok {
// fmt.Printf(" * DEBUG: ok = %v, map = %v, len of val = %v\n", ok, msg.Header, len(val))
switch val [ 0 ] {
case "z" :
zr , err := zstd . NewReader ( nil )
if err != nil {
log . Printf ( "error: zstd NewReader failed: %v\n" , err )
return
}
msgData , err = zr . DecodeAll ( msg . Data , nil )
if err != nil {
er := fmt . Errorf ( "error: zstd decoding failed: %v" , err )
log . Printf ( "%v\n" , er )
2022-01-19 09:55:03 +00:00
p . processes . errorKernel . errSend ( p , Message { } , er )
2022-01-03 11:30:28 +00:00
zr . Close ( )
2021-12-28 11:05:09 +00:00
return
}
2022-01-03 11:30:28 +00:00
zr . Close ( )
case "g" :
r := bytes . NewReader ( msgData )
gr , err := gzip . NewReader ( r )
if err != nil {
log . Printf ( "error: gzip NewReader failed: %v\n" , err )
return
}
b , err := io . ReadAll ( gr )
if err != nil {
log . Printf ( "error: gzip ReadAll failed: %v\n" , err )
return
}
gr . Close ( )
msgData = b
2021-12-28 11:05:09 +00:00
}
}
2021-03-03 13:14:32 +00:00
message := Message { }
2021-12-29 07:29:11 +00:00
// Check if serialization is specified.
2021-12-30 05:28:21 +00:00
// Will default to gob serialization if nothing or non existing value is specified is specified.
2021-12-29 07:11:43 +00:00
if val , ok := msg . Header [ "serial" ] ; ok {
// fmt.Printf(" * DEBUG: ok = %v, map = %v, len of val = %v\n", ok, msg.Header, len(val))
switch val [ 0 ] {
case "cbor" :
err := cbor . Unmarshal ( msgData , & message )
if err != nil {
er := fmt . Errorf ( "error: cbor decoding failed: %v" , err )
log . Printf ( "%v\n" , er )
2022-01-19 09:55:03 +00:00
p . processes . errorKernel . errSend ( p , message , er )
2021-12-29 07:11:43 +00:00
return
}
default : // Deaults to gob if no match was found.
2021-12-29 07:29:11 +00:00
r := bytes . NewReader ( msgData )
gobDec := gob . NewDecoder ( r )
2021-12-29 07:11:43 +00:00
err := gobDec . Decode ( & message )
if err != nil {
er := fmt . Errorf ( "error: gob decoding failed: %v" , err )
log . Printf ( "%v\n" , er )
2022-01-19 09:55:03 +00:00
p . processes . errorKernel . errSend ( p , message , er )
2021-12-29 07:11:43 +00:00
return
}
}
} else {
// Default to gob if serialization flag was not specified.
2021-12-29 07:29:11 +00:00
r := bytes . NewReader ( msgData )
gobDec := gob . NewDecoder ( r )
2021-12-29 07:11:43 +00:00
err := gobDec . Decode ( & message )
if err != nil {
er := fmt . Errorf ( "error: gob decoding failed: %v" , err )
log . Printf ( "%v\n" , er )
2022-01-19 09:55:03 +00:00
p . processes . errorKernel . errSend ( p , message , er )
2021-12-29 07:11:43 +00:00
return
}
2021-03-03 13:14:32 +00:00
}
2021-11-22 07:32:45 +00:00
// Send final reply for a relayed message back to the originating node.
//
2021-11-22 03:24:15 +00:00
// Check if the previous message was a relayed message, and if true
2021-11-11 08:35:01 +00:00
// make a copy of the current message where the to field is set to
// the value of the previous message's RelayFromNode field, so we
// also can send the a copy of the reply back to where it originated.
2021-11-22 03:24:15 +00:00
if message . PreviousMessage != nil && message . PreviousMessage . RelayOriginalViaNode != "" {
2021-11-11 08:35:01 +00:00
// make a copy of the message
msgCopy := message
msgCopy . ToNode = msgCopy . PreviousMessage . RelayFromNode
2021-11-22 03:24:15 +00:00
// We set the replyMethod of the initial message.
// If no RelayReplyMethod was found, we default to the reply
2021-11-22 07:32:45 +00:00
// method of the previous message.
2021-11-11 12:43:32 +00:00
switch {
case msgCopy . PreviousMessage . RelayReplyMethod == "" :
2021-11-22 07:32:45 +00:00
er := fmt . Errorf ( "error: subscriberHandler: no PreviousMessage.RelayReplyMethod found, defaulting to the reply method of previous message: %v " , msgCopy )
2022-01-19 09:55:03 +00:00
p . processes . errorKernel . errSend ( p , message , er )
2021-11-22 03:24:15 +00:00
log . Printf ( "%v\n" , er )
2021-11-11 12:43:32 +00:00
msgCopy . Method = msgCopy . PreviousMessage . ReplyMethod
case msgCopy . PreviousMessage . RelayReplyMethod != "" :
msgCopy . Method = msgCopy . PreviousMessage . RelayReplyMethod
}
2021-11-22 07:32:45 +00:00
// Reset the previousMessage relay fields so the message don't loop.
2021-11-11 08:35:01 +00:00
message . PreviousMessage . RelayViaNode = ""
2021-11-22 03:24:15 +00:00
message . PreviousMessage . RelayOriginalViaNode = ""
2021-11-11 08:35:01 +00:00
2021-11-22 03:24:15 +00:00
// Create a SAM for the msg copy that will be sent back the where the
// relayed message originated from.
2021-11-11 08:35:01 +00:00
sam , err := newSubjectAndMessage ( msgCopy )
if err != nil {
2021-11-22 03:24:15 +00:00
er := fmt . Errorf ( "error: subscriberHandler: newSubjectAndMessage : %v, message copy: %v" , err , msgCopy )
2022-01-19 09:55:03 +00:00
p . processes . errorKernel . errSend ( p , message , er )
2021-11-11 08:35:01 +00:00
log . Printf ( "%v\n" , er )
}
p . toRingbufferCh <- [ ] subjectAndMessage { sam }
}
2021-08-16 11:01:12 +00:00
// Check if it is an ACK or NACK message, and do the appropriate action accordingly.
2021-12-30 05:28:21 +00:00
//
// With ACK messages Steward will keep the state of the message delivery, and try to
// resend the message if an ACK is not received within the timeout/retries specified
// in the message.
// When a process sends an ACK message, it will stop and wait for the nats-reply message
// for the time specified in the replyTimeout value. If no reply message is received
// within the given timeout the publishing process will try to resend the message for
// number of times specified in the retries field of the Steward message.
// When receiving a Steward-message with ACK enabled we send a message back the the
// node where the message originated using the msg.Reply subject field of the nats-message.
//
// With NACK messages we do not send a nats reply message, so the message will only be
// sent from the publisher once, and if it is not delivered it will not be retried.
2021-03-03 13:14:32 +00:00
switch {
2021-12-30 05:28:21 +00:00
2022-01-27 09:06:06 +00:00
// Check for ACK type Event.
case p . subject . Event == EventACK :
2021-12-30 05:28:21 +00:00
// Look up the method handler for the specified method.
2021-03-09 06:43:55 +00:00
mh , ok := p . methodsAvailable . CheckIfExists ( message . Method )
2021-03-03 13:14:32 +00:00
if ! ok {
2022-01-27 09:06:06 +00:00
er := fmt . Errorf ( "error: subscriberHandler: no such method type: %v" , p . subject . Event )
2022-01-19 09:55:03 +00:00
p . processes . errorKernel . errSend ( p , message , er )
2021-03-03 13:14:32 +00:00
}
2022-02-03 10:02:35 +00:00
out := [ ] byte { }
2021-12-29 07:11:43 +00:00
var err error
2021-03-03 13:14:32 +00:00
2022-02-07 03:23:13 +00:00
if p . signatures . verifySignature ( message ) {
2022-02-03 10:02:35 +00:00
// Call the method handler for the specified method.
out , err = mh . handler ( p , message , thisNode )
2021-03-03 13:14:32 +00:00
2022-02-03 10:02:35 +00:00
if err != nil {
er := fmt . Errorf ( "error: subscriberHandler: handler method failed: %v" , err )
p . processes . errorKernel . errSend ( p , message , er )
}
2021-03-03 13:14:32 +00:00
}
// Send a confirmation message back to the publisher
natsConn . Publish ( msg . Reply , out )
2022-01-27 09:06:06 +00:00
// Check for NACK type Event.
case p . subject . Event == EventNACK :
2021-03-03 14:44:32 +00:00
mf , ok := p . methodsAvailable . CheckIfExists ( message . Method )
2021-03-03 13:14:32 +00:00
if ! ok {
2022-01-27 09:06:06 +00:00
er := fmt . Errorf ( "error: subscriberHandler: method type not available: %v" , p . subject . Event )
2022-01-19 09:55:03 +00:00
p . processes . errorKernel . errSend ( p , message , er )
2021-03-03 13:14:32 +00:00
}
2022-02-07 03:23:13 +00:00
if p . signatures . verifySignature ( message ) {
2021-03-25 12:39:59 +00:00
2022-02-03 10:02:35 +00:00
_ , err := mf . handler ( p , message , thisNode )
if err != nil {
er := fmt . Errorf ( "error: subscriberHandler: handler method failed: %v" , err )
p . processes . errorKernel . errSend ( p , message , er )
}
2021-03-03 13:14:32 +00:00
}
2021-08-16 11:01:12 +00:00
2021-03-03 13:14:32 +00:00
default :
2022-01-27 13:25:24 +00:00
er := fmt . Errorf ( "info: did not find that specific type of event: %#v" , p . subject . Event )
2022-01-21 05:15:26 +00:00
p . processes . errorKernel . infoSend ( p , message , er )
2021-03-12 08:38:19 +00:00
2021-03-03 13:14:32 +00:00
}
}
2022-02-04 05:24:34 +00:00
// argsToString takes args in the format of []string and returns a string.
func argsToString ( args [ ] string ) string {
return strings . Join ( args , " " )
2022-02-03 10:02:35 +00:00
}
2021-08-16 11:01:12 +00:00
// SubscribeMessage will register the Nats callback function for the specified
// nats subject. This allows us to receive Nats messages for a given subject
// on a node.
2021-04-08 10:51:54 +00:00
func ( p process ) subscribeMessages ( ) * nats . Subscription {
2021-03-03 13:14:32 +00:00
subject := string ( p . subject . name ( ) )
2021-04-08 10:51:54 +00:00
natsSubscription , err := p . natsConn . Subscribe ( subject , func ( msg * nats . Msg ) {
//_, err := p.natsConn.Subscribe(subject, func(msg *nats.Msg) {
2021-03-25 13:30:39 +00:00
2021-08-16 11:01:12 +00:00
// Start up the subscriber handler.
2021-12-28 11:05:09 +00:00
go p . messageSubscriberHandler ( p . natsConn , p . configuration . NodeName , msg )
2021-03-03 13:14:32 +00:00
} )
if err != nil {
log . Printf ( "error: Subscribe failed: %v\n" , err )
2021-04-08 10:51:54 +00:00
return nil
2021-03-03 13:14:32 +00:00
}
2021-04-08 10:51:54 +00:00
return natsSubscription
2021-03-03 13:14:32 +00:00
}
2021-03-09 06:43:55 +00:00
// publishMessages will do the publishing of messages for one single
2021-12-25 18:21:10 +00:00
// process. The function should be run as a goroutine, and will run
// as long as the process it belongs to is running.
2021-04-08 10:51:54 +00:00
func ( p process ) publishMessages ( natsConn * nats . Conn ) {
2021-12-27 10:40:29 +00:00
var once sync . Once
2022-01-03 10:36:05 +00:00
var zEnc * zstd . Encoder
// Prepare a zstd encoder if enabled. By enabling it here before
// looping over the messages to send below, we can reuse the zstd
// encoder for all messages.
switch p . configuration . Compression {
case "z" : // zstd
2022-01-03 12:00:23 +00:00
// enc, err := zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBestCompression))
enc , err := zstd . NewWriter ( nil )
2022-01-03 10:36:05 +00:00
if err != nil {
log . Printf ( "error: zstd new encoder failed: %v\n" , err )
os . Exit ( 1 )
}
zEnc = enc
defer zEnc . Close ( )
}
2022-01-03 09:40:27 +00:00
// Loop and handle 1 message at a time. If some part of the code
// fails in the loop we should throw an error and use `continue`
// to jump back here to the beginning of the loop and continue
// with the next message.
2021-03-03 13:14:32 +00:00
for {
2021-04-07 16:05:07 +00:00
// Wait and read the next message on the message channel, or
// exit this function if Cancel are received via ctx.
select {
2022-02-01 06:22:06 +00:00
case m := <- p . subject . messageCh :
2022-02-03 06:04:10 +00:00
// Sign the methodArgs, and add the signature to the message.
m . ArgSignature = p . addMethodArgSignature ( m )
2022-02-04 05:24:34 +00:00
fmt . Printf ( " * DEBUG: add signature, fromNode: %v, method: %v, signature: %s\n" , m . FromNode , m . Method , m . ArgSignature )
2022-02-03 06:04:10 +00:00
2022-02-01 06:22:06 +00:00
p . publishAMessage ( m , zEnc , once , natsConn )
2021-04-07 16:05:07 +00:00
case <- p . ctx . Done ( ) :
er := fmt . Errorf ( "info: canceling publisher: %v" , p . subject . name ( ) )
2021-07-02 16:32:01 +00:00
//sendErrorLogMessage(p.toRingbufferCh, Node(p.node), er)
log . Printf ( "%v\n" , er )
2021-04-07 16:05:07 +00:00
return
}
2022-02-01 06:22:06 +00:00
}
}
2021-09-13 11:15:21 +00:00
2022-02-03 06:04:10 +00:00
func ( p process ) addMethodArgSignature ( m Message ) [ ] byte {
2022-02-04 05:24:34 +00:00
argsString := argsToString ( m . MethodArgs )
2022-02-07 03:23:13 +00:00
sign := ed25519 . Sign ( p . signatures . SignPrivateKey , [ ] byte ( argsString ) )
2022-02-03 06:04:10 +00:00
return sign
}
2022-02-01 06:22:06 +00:00
func ( p process ) publishAMessage ( m Message , zEnc * zstd . Encoder , once sync . Once , natsConn * nats . Conn ) {
// Create the initial header, and set values below depending on the
// various configuration options chosen.
natsMsgHeader := nats . Header { }
2021-12-29 05:40:42 +00:00
2022-02-01 06:22:06 +00:00
// The serialized value of the nats message payload
var natsMsgPayloadSerialized [ ] byte
2021-12-29 06:18:11 +00:00
2022-02-01 06:22:06 +00:00
// encode the message structure into gob binary format before putting
// it into a nats message.
// Prepare a gob encoder with a buffer before we start the loop
switch p . configuration . Serialization {
case "cbor" :
b , err := cbor . Marshal ( m )
if err != nil {
er := fmt . Errorf ( "error: messageDeliverNats: cbor encode message failed: %v" , err )
p . processes . errorKernel . errSend ( p , m , er )
return
}
2021-12-29 06:28:09 +00:00
2022-02-01 06:22:06 +00:00
natsMsgPayloadSerialized = b
natsMsgHeader [ "serial" ] = [ ] string { p . configuration . Serialization }
2021-12-29 06:28:09 +00:00
2022-02-01 06:22:06 +00:00
default :
var bufGob bytes . Buffer
gobEnc := gob . NewEncoder ( & bufGob )
err := gobEnc . Encode ( m )
if err != nil {
er := fmt . Errorf ( "error: messageDeliverNats: gob encode message failed: %v" , err )
p . processes . errorKernel . errSend ( p , m , er )
return
2021-12-25 18:21:10 +00:00
}
2022-02-01 06:22:06 +00:00
natsMsgPayloadSerialized = bufGob . Bytes ( )
}
2022-01-03 11:30:28 +00:00
2022-02-01 06:22:06 +00:00
// Get the process name so we can look up the process in the
// processes map, and increment the message counter.
pn := processNameGet ( p . subject . name ( ) , processKindPublisher )
m . ID = p . messageID
2021-12-27 10:40:29 +00:00
2022-02-01 06:22:06 +00:00
// The compressed value of the nats message payload. The content
// can either be compressed or in it's original form depening on
// the outcome of the switch below, and if compression were chosen
// or not.
var natsMsgPayloadCompressed [ ] byte
2021-12-27 10:40:29 +00:00
2022-02-01 06:22:06 +00:00
// Compress the data payload if selected with configuration flag.
// The compression chosen is later set in the nats msg header when
// calling p.messageDeliverNats below.
switch p . configuration . Compression {
case "z" : // zstd
natsMsgPayloadCompressed = zEnc . EncodeAll ( natsMsgPayloadSerialized , nil )
natsMsgHeader [ "cmp" ] = [ ] string { p . configuration . Compression }
2021-12-27 10:40:29 +00:00
2022-02-01 06:22:06 +00:00
zEnc . Reset ( nil )
case "g" : // gzip
var buf bytes . Buffer
gzipW := gzip . NewWriter ( & buf )
_ , err := gzipW . Write ( natsMsgPayloadSerialized )
if err != nil {
log . Printf ( "error: failed to write gzip: %v\n" , err )
gzipW . Close ( )
return
2021-12-27 10:40:29 +00:00
}
2022-02-01 06:22:06 +00:00
gzipW . Close ( )
2021-12-27 10:40:29 +00:00
2022-02-01 06:22:06 +00:00
natsMsgPayloadCompressed = buf . Bytes ( )
natsMsgHeader [ "cmp" ] = [ ] string { p . configuration . Compression }
case "" : // no compression
natsMsgPayloadCompressed = natsMsgPayloadSerialized
2021-03-09 06:43:55 +00:00
2022-02-01 06:22:06 +00:00
default : // no compression
// Allways log the error to console.
er := fmt . Errorf ( "error: compression type not defined, setting default to zero compression" )
log . Printf ( "%v\n" , er )
2021-03-03 13:14:32 +00:00
2022-02-01 06:22:06 +00:00
// We only wan't to send the error message to errorCentral once.
once . Do ( func ( ) {
p . processes . errorKernel . errSend ( p , m , er )
} )
2021-10-08 10:07:10 +00:00
2022-02-01 06:22:06 +00:00
natsMsgPayloadCompressed = natsMsgPayloadSerialized
}
2021-03-12 08:38:19 +00:00
2022-02-01 06:22:06 +00:00
// Create the Nats message with headers and payload, and do the
// sending of the message.
p . messageDeliverNats ( natsMsgPayloadCompressed , natsMsgHeader , natsConn , m )
// Signaling back to the ringbuffer that we are done with the
// current message, and it can remove it from the ringbuffer.
m . done <- struct { } { }
// Increment the counter for the next message to be sent.
p . messageID ++
{
p . processes . active . mu . Lock ( )
p . processes . active . procNames [ pn ] = p
p . processes . active . mu . Unlock ( )
2021-03-03 13:14:32 +00:00
}
2022-02-01 06:22:06 +00:00
// // Handle the error.
// //
// // NOTE: None of the processes above generate an error, so the the
// // if clause will never be triggered. But keeping it here as an example
// // for now for how to handle errors.
// if err != nil {
// // Create an error type which also creates a channel which the
// // errorKernel will send back the action about what to do.
// ep := errorEvent{
// //errorType: logOnly,
// process: p,
// message: m,
// errorActionCh: make(chan errorAction),
// }
// p.errorCh <- ep
//
// // Wait for the response action back from the error kernel, and
// // decide what to do. Should we continue, quit, or .... ?
// switch <-ep.errorActionCh {
// case errActionContinue:
// // Just log and continue
// log.Printf("The errAction was continue...so we're continuing\n")
// case errActionKill:
// log.Printf("The errAction was kill...so we're killing\n")
// // ....
// default:
// log.Printf("Info: publishMessages: The errAction was not defined, so we're doing nothing\n")
// }
// }
2021-03-03 13:14:32 +00:00
}