2023-10-04 20:58:42 +00:00
package ctrl
2021-03-03 13:14:32 +00:00
import (
2021-04-07 16:05:07 +00:00
"context"
2022-02-03 06:04:10 +00:00
"crypto/ed25519"
2022-06-17 07:54:20 +00:00
"errors"
2021-03-03 13:14:32 +00:00
"fmt"
2024-11-21 03:54:55 +00:00
"log"
2021-03-03 13:14:32 +00:00
"time"
2021-12-29 06:28:09 +00:00
"github.com/fxamacker/cbor/v2"
2021-12-27 10:40:29 +00:00
"github.com/klauspost/compress/zstd"
2021-03-03 13:14:32 +00:00
"github.com/nats-io/nats.go"
2024-11-21 03:54:55 +00:00
"github.com/nats-io/nats.go/jetstream"
2021-04-12 13:35:20 +00:00
"github.com/prometheus/client_golang/prometheus"
2022-06-17 07:54:20 +00:00
// "google.golang.org/protobuf/internal/errors"
2021-03-03 13:14:32 +00:00
)
// processKind are either kindSubscriber or kindPublisher, and are
// used to distinguish the kind of process to spawn and to know
// the process kind put in the process map.
type processKind string
const (
2024-11-21 04:28:56 +00:00
processKindSubscriberNats processKind = "subscriberNats"
processKindPublisherNats processKind = "publisherNats"
processKindConsumerJetstream processKind = "consumerJetstream"
processKindPublisherJetstream processKind = "publisherJetstream"
2021-03-03 13:14:32 +00:00
)
2021-08-16 11:01:12 +00:00
// process holds all the logic to handle a message type and it's
// method, subscription/publishin messages for a subject, and more.
2021-03-03 13:14:32 +00:00
type process struct {
2022-06-17 22:03:25 +00:00
// isSubProcess is used to indentify subprocesses spawned by other processes.
isSubProcess bool
2023-01-13 14:09:23 +00:00
// isLongRunningPublisher is set to true for a publisher service that should not
// be auto terminated like a normal autospawned publisher would be when the the
// inactivity timeout have expired
isLongRunningPublisher bool
2022-04-01 05:09:55 +00:00
// server
server * server
// messageID
2021-03-03 13:14:32 +00:00
messageID int
// the subject used for the specific process. One process
// can contain only one sender on a message bus, hence
2024-11-21 09:09:49 +00:00
// also one subject.
2021-03-03 13:14:32 +00:00
subject Subject
2024-11-21 09:09:49 +00:00
// The jetstram stream.
streamInfo streamInfo
2021-03-03 13:14:32 +00:00
// Put a node here to be able know the node a process is at.
2021-06-29 06:21:42 +00:00
node Node
2021-03-03 13:14:32 +00:00
// The processID for the current process
2022-02-07 05:42:17 +00:00
processID int
2021-03-03 13:14:32 +00:00
processKind processKind
2021-03-03 14:44:32 +00:00
// methodsAvailable
methodsAvailable MethodsAvailable
2022-03-31 12:57:30 +00:00
// procFunc is a function that will be started when a worker process
// is started. If a procFunc is registered when creating a new process
// the procFunc will be started as a go routine when the process is started,
// and stopped when the process is stopped.
//
// A procFunc can be started both for publishing and subscriber processes.
//
// When used with a subscriber process the usecase is most likely to handle
// some kind of state needed for a request type. The handlers themselves
// can not hold state since they are only called once per message received,
// and exits when the message is handled leaving no state behind. With a procfunc
// we can have a process function running at all times tied to the process, and
// this function can be able to hold the state needed in a certain scenario.
//
// With a subscriber handler you generally take the message in the handler and
// pass it on to the procFunc by putting it on the procFuncCh<-, and the
// message can then be read from the procFuncCh inside the procFunc, and we
// can do some further work on it, for example update registry for metrics that
// is needed for that specific request type.
//
// With a publisher process you can attach a static function that will do some
// work to a request type, and publish the result.
//
// procFunc's can also be used to wrap in other types which we want to
// work with. An example can be handling of metrics which the message
// have no notion of, but a procFunc can have that wrapped in from when it was constructed.
procFunc func ( ctx context . Context , procFuncCh chan Message ) error
2021-03-04 15:27:55 +00:00
// The channel to send a messages to the procFunc go routine.
2022-01-02 10:57:25 +00:00
// This is typically used within the methodHandler for so we
// can pass messages between the procFunc and the handler.
2021-03-04 15:27:55 +00:00
procFuncCh chan Message
2021-03-08 13:09:14 +00:00
// copy of the configuration from server
configuration * Configuration
2021-03-09 03:55:51 +00:00
// The new messages channel copied from *Server
2024-11-19 19:28:26 +00:00
newMessagesCh chan <- [ ] subjectAndMessage
2024-11-22 10:31:07 +00:00
// JetstreamOut channel
jetstreamOut chan Message
2021-03-31 06:56:13 +00:00
// The structure who holds all processes information
processes * processes
2021-04-07 14:45:51 +00:00
// nats connection
natsConn * nats . Conn
2021-04-08 05:07:13 +00:00
// natsSubscription returned when calling natsConn.Subscribe
2021-04-08 10:51:54 +00:00
natsSubscription * nats . Subscription
2021-04-07 16:05:07 +00:00
// context
ctx context . Context
// context cancelFunc
ctxCancel context . CancelFunc
2021-04-08 05:07:13 +00:00
// Process name
processName processName
2021-04-09 09:30:40 +00:00
2022-06-11 04:30:58 +00:00
// handler is used to directly attach a handler to a process upon
// creation of the process, like when a process is spawning a sub
// process like REQCopySrc do. If we're not spawning a sub process
// and it is a regular process the handler to use is found with the
// getHandler method
handler func ( proc process , message Message , node string ) ( [ ] byte , error )
2021-04-09 09:30:40 +00:00
// startup holds the startup functions for starting up publisher
// or subscriber processes
2021-08-18 10:16:21 +00:00
startup * startup
2022-02-04 09:33:31 +00:00
// Signatures
2022-04-21 11:21:36 +00:00
nodeAuth * nodeAuth
2022-04-07 07:34:06 +00:00
// centralAuth
centralAuth * centralAuth
2022-04-01 06:43:14 +00:00
// errorKernel
errorKernel * errorKernel
2022-04-01 06:51:14 +00:00
// metrics
metrics * metrics
2024-11-21 03:54:55 +00:00
// jetstream
js jetstream . JetStream
2024-11-22 19:36:55 +00:00
// zstd encoder
zstdEncoder * zstd . Encoder
2021-03-03 13:14:32 +00:00
}
// prepareNewProcess will set the the provided values and the default
// values for a process.
2024-11-21 09:09:49 +00:00
func newProcess ( ctx context . Context , server * server , subject Subject , stream streamInfo , processKind processKind ) process {
2021-03-03 13:14:32 +00:00
// create the initial configuration for a sessions communicating with 1 host process.
2022-06-22 03:03:11 +00:00
server . processes . mu . Lock ( )
2022-04-01 05:09:55 +00:00
server . processes . lastProcessID ++
2022-06-22 03:03:11 +00:00
pid := server . processes . lastProcessID
server . processes . mu . Unlock ( )
2021-03-03 13:14:32 +00:00
2021-07-02 09:26:52 +00:00
ctx , cancel := context . WithCancel ( ctx )
2021-04-07 16:05:07 +00:00
2021-03-03 14:44:32 +00:00
var method Method
2024-11-21 03:54:55 +00:00
js , err := jetstream . New ( server . natsConn )
if err != nil {
log . Fatalf ( "error: failed to create jetstream.New: %v\n" , err )
}
2021-03-03 13:14:32 +00:00
proc := process {
2022-04-01 05:09:55 +00:00
server : server ,
2021-03-03 13:14:32 +00:00
messageID : 0 ,
subject : subject ,
2022-04-01 05:09:55 +00:00
node : Node ( server . configuration . NodeName ) ,
2022-06-22 03:03:11 +00:00
processID : pid ,
2021-03-03 13:14:32 +00:00
processKind : processKind ,
2021-03-03 14:44:32 +00:00
methodsAvailable : method . GetMethodsAvailable ( ) ,
2024-11-19 19:28:26 +00:00
newMessagesCh : server . newMessagesCh ,
2024-11-22 10:31:07 +00:00
jetstreamOut : server . jetstreamOutCh ,
2022-04-01 05:09:55 +00:00
configuration : server . configuration ,
processes : server . processes ,
natsConn : server . natsConn ,
2021-04-07 16:05:07 +00:00
ctx : ctx ,
ctxCancel : cancel ,
2022-04-01 05:09:55 +00:00
startup : newStartup ( server ) ,
2022-04-21 11:21:36 +00:00
nodeAuth : server . nodeAuth ,
2022-04-07 07:34:06 +00:00
centralAuth : server . centralAuth ,
2022-04-01 06:43:14 +00:00
errorKernel : server . errorKernel ,
2022-04-01 06:51:14 +00:00
metrics : server . metrics ,
2024-11-21 03:54:55 +00:00
js : js ,
2024-11-22 19:36:55 +00:00
zstdEncoder : server . zstdEncoder ,
2021-03-03 13:14:32 +00:00
}
2024-11-21 09:09:49 +00:00
// We use the name of the subject to identify a unique process.
2022-06-09 08:18:09 +00:00
2024-11-21 09:09:49 +00:00
switch proc . processKind {
case processKindPublisherNats :
2024-11-21 04:28:56 +00:00
proc . processName = processNameGet ( proc . subject . name ( ) , processKindPublisherNats )
2024-11-21 09:09:49 +00:00
case processKindSubscriberNats :
2024-11-21 04:28:56 +00:00
proc . processName = processNameGet ( proc . subject . name ( ) , processKindSubscriberNats )
2024-11-21 09:09:49 +00:00
case processKindConsumerJetstream :
proc . processName = processNameGet ( subjectName ( proc . streamInfo . name ) , processKindConsumerJetstream )
case processKindPublisherJetstream :
proc . processName = processNameGet ( subjectName ( proc . streamInfo . name ) , processKindPublisherJetstream )
2022-06-09 08:18:09 +00:00
}
2021-03-03 13:14:32 +00:00
return proc
}
2024-11-21 09:09:49 +00:00
// Start a publisher or subscriber process, where a process is a go routine
// that will handle either sending or receiving messages on one subject.
2021-03-03 13:14:32 +00:00
//
// It will give the process the next available ID, and also add the
// process to the processes map in the server structure.
2024-11-21 03:54:55 +00:00
func ( p process ) Start ( ) {
2021-03-03 13:14:32 +00:00
2021-06-08 04:02:08 +00:00
// Add prometheus metrics for the process.
2022-11-30 05:18:19 +00:00
if ! p . isSubProcess {
p . metrics . promProcessesAllRunning . With ( prometheus . Labels { "processName" : string ( p . processName ) } )
}
2021-04-12 13:35:20 +00:00
2021-03-03 13:14:32 +00:00
// Start a publisher worker, which will start a go routine (process)
// That will take care of all the messages for the subject it owns.
2024-11-21 04:28:56 +00:00
if p . processKind == processKindPublisherNats {
p . startPublisherNats ( )
2021-03-03 13:14:32 +00:00
}
// Start a subscriber worker, which will start a go routine (process)
// That will take care of all the messages for the subject it owns.
2024-11-21 04:28:56 +00:00
if p . processKind == processKindSubscriberNats {
2024-11-21 03:54:55 +00:00
p . startSubscriberNats ( )
2023-01-11 06:03:01 +00:00
}
2021-03-05 12:10:46 +00:00
2023-01-11 06:03:01 +00:00
// Add information about the new process to the started processes map.
p . processes . active . mu . Lock ( )
p . processes . active . procNames [ p . processName ] = p
p . processes . active . mu . Unlock ( )
2022-06-23 09:40:17 +00:00
2023-01-11 06:03:01 +00:00
er := fmt . Errorf ( "successfully started process: %v" , p . processName )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2023-01-11 06:03:01 +00:00
}
2024-11-21 04:28:56 +00:00
func ( p process ) startPublisherNats ( ) {
2023-01-11 06:03:01 +00:00
// If there is a procFunc for the process, start it.
if p . procFunc != nil {
// Initialize the channel for communication between the proc and
// the procFunc.
p . procFuncCh = make ( chan Message )
// Start the procFunc in it's own anonymous func so we are able
// to get the return error.
2022-06-23 09:40:17 +00:00
go func ( ) {
2023-01-11 06:03:01 +00:00
err := p . procFunc ( p . ctx , p . procFuncCh )
2022-06-23 09:40:17 +00:00
if err != nil {
2023-01-11 06:03:01 +00:00
er := fmt . Errorf ( "error: spawnWorker: start procFunc failed: %v" , err )
2023-01-11 07:38:15 +00:00
p . errorKernel . errSend ( p , Message { } , er , logError )
2022-06-23 09:40:17 +00:00
}
2023-01-11 06:03:01 +00:00
} ( )
}
2022-06-23 09:40:17 +00:00
2024-11-21 04:28:56 +00:00
go p . publishMessagesNats ( p . natsConn )
2023-01-11 06:03:01 +00:00
}
2022-06-23 09:40:17 +00:00
2024-11-21 03:54:55 +00:00
func ( p process ) startSubscriberNats ( ) {
2023-01-11 06:03:01 +00:00
// If there is a procFunc for the process, start it.
if p . procFunc != nil {
// Initialize the channel for communication between the proc and
// the procFunc.
p . procFuncCh = make ( chan Message )
2022-06-23 09:40:17 +00:00
2023-01-11 06:03:01 +00:00
// Start the procFunc in it's own anonymous func so we are able
// to get the return error.
go func ( ) {
err := p . procFunc ( p . ctx , p . procFuncCh )
if err != nil {
er := fmt . Errorf ( "error: spawnWorker: start procFunc failed: %v" , err )
2023-01-11 07:38:15 +00:00
p . errorKernel . errSend ( p , Message { } , er , logError )
2023-01-11 06:03:01 +00:00
}
2022-06-23 09:40:17 +00:00
} ( )
2021-03-03 13:14:32 +00:00
}
2021-04-08 10:51:54 +00:00
2024-11-21 03:54:55 +00:00
p . natsSubscription = p . subscribeMessagesNats ( )
2023-01-11 06:03:01 +00:00
// We also need to be able to remove all the information about this process
// when the process context is canceled.
go func ( ) {
<- p . ctx . Done ( )
err := p . natsSubscription . Unsubscribe ( )
if err != nil {
er := fmt . Errorf ( "error: spawnWorker: got <-ctx.Done, but unable to unsubscribe natsSubscription failed: %v" , err )
2023-01-11 07:38:15 +00:00
p . errorKernel . errSend ( p , Message { } , er , logError )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2023-01-11 06:03:01 +00:00
}
2022-06-09 08:18:09 +00:00
2023-01-11 06:03:01 +00:00
p . processes . active . mu . Lock ( )
delete ( p . processes . active . procNames , p . processName )
p . processes . active . mu . Unlock ( )
2023-01-12 06:44:28 +00:00
er := fmt . Errorf ( "successfully stopped process: %v" , p . processName )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2023-01-11 06:03:01 +00:00
} ( )
2021-03-03 13:14:32 +00:00
}
2022-06-17 07:54:20 +00:00
var (
2023-10-04 20:58:42 +00:00
ErrACKSubscribeRetry = errors . New ( "ctrl: retrying to subscribe for ack message" )
2022-06-17 07:54:20 +00:00
)
2021-12-27 10:40:29 +00:00
// messageDeliverNats will create the Nats message with headers and payload.
// It will also take care of the delivering the message that is converted to
2021-12-30 05:28:21 +00:00
// gob or cbor format as a nats.Message. It will also take care of checking
// timeouts and retries specified for the message.
2021-12-29 05:40:42 +00:00
func ( p process ) messageDeliverNats ( natsMsgPayload [ ] byte , natsMsgHeader nats . Header , natsConn * nats . Conn , message Message ) {
2021-03-03 13:14:32 +00:00
retryAttempts := 0
2022-12-30 10:54:10 +00:00
if message . RetryWait <= 0 {
message . RetryWait = 0
}
2021-09-07 07:43:54 +00:00
// The for loop will run until the message is delivered successfully,
// or that retries are reached.
2021-03-03 13:14:32 +00:00
for {
msg := & nats . Msg {
2021-03-09 06:43:55 +00:00
Subject : string ( p . subject . name ( ) ) ,
2021-04-04 05:33:18 +00:00
// Subject: fmt.Sprintf("%s.%s.%s", proc.node, "command", "CLICommandRequest"),
2021-03-03 13:14:32 +00:00
// Structure of the reply message are:
2021-06-08 02:45:01 +00:00
// <nodename>.<message type>.<method>.reply
2021-12-29 05:40:42 +00:00
Reply : fmt . Sprintf ( "%s.reply" , p . subject . name ( ) ) ,
Data : natsMsgPayload ,
Header : natsMsgHeader ,
2021-12-28 11:05:09 +00:00
}
2022-12-31 06:31:18 +00:00
er := fmt . Errorf ( "info: preparing to send nats message with subject %v, id: %v" , msg . Subject , message . ID )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-12-21 07:12:41 +00:00
2022-12-29 21:49:47 +00:00
var err error
switch {
2022-01-27 06:19:04 +00:00
// If it is a NACK message we just deliver the message and return
// here so we don't create a ACK message and then stop waiting for it.
2023-01-05 00:55:52 +00:00
case message . ACKTimeout < 1 :
2022-12-29 21:49:47 +00:00
err = func ( ) error {
err := natsConn . PublishMsg ( msg )
if err != nil {
er := fmt . Errorf ( "error: nats publish for message with subject failed: %v" , err )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-12-29 21:49:47 +00:00
return ErrACKSubscribeRetry
}
p . metrics . promNatsDeliveredTotal . Inc ( )
2022-12-21 07:12:41 +00:00
2022-12-29 21:49:47 +00:00
// The remaining logic is for handling ACK messages, so we return here
// since it was a NACK message, and all or now done.
2021-12-30 16:03:42 +00:00
2022-12-29 06:53:25 +00:00
return nil
2022-12-29 21:49:47 +00:00
} ( )
2022-12-29 06:53:25 +00:00
2023-01-05 00:55:52 +00:00
case message . ACKTimeout >= 1 :
2022-12-29 21:49:47 +00:00
// The function below will return nil if the message should not be retried.
2022-06-17 07:54:20 +00:00
//
2022-12-29 21:49:47 +00:00
// All other errors happening will return ErrACKSubscribeRetry which will lead
// to a 'continue' for the for loop when checking the error directly after this
// function is called
err = func ( ) error {
defer func ( ) { retryAttempts ++ } ( )
if retryAttempts > message . Retries {
// max retries reached
er := fmt . Errorf ( "info: toNode: %v, fromNode: %v, subject: %v, methodArgs: %v: max retries reached, check if node is up and running and if it got a subscriber started for the given REQ type" , message . ToNode , message . FromNode , msg . Subject , message . MethodArgs )
// We do not want to send errorLogs for REQErrorLog type since
// it will just cause an endless loop.
2024-11-19 02:48:42 +00:00
if message . Method != ErrorLog {
2022-12-29 21:49:47 +00:00
p . errorKernel . infoSend ( p , message , er )
}
p . metrics . promNatsMessagesFailedACKsTotal . Inc ( )
return nil
2022-06-18 06:12:14 +00:00
}
2021-03-03 13:14:32 +00:00
2022-12-29 21:49:47 +00:00
er := fmt . Errorf ( "send attempt:%v, max retries: %v, ack timeout: %v, message.ID: %v, method: %v, toNode: %v" , retryAttempts , message . Retries , message . ACKTimeout , message . ID , message . Method , message . ToNode )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-06-17 07:54:20 +00:00
2022-12-29 21:49:47 +00:00
// The SubscribeSync used in the subscriber, will get messages that
// are sent after it started subscribing.
//
// Create a subscriber for the ACK reply message.
subReply , err := natsConn . SubscribeSync ( msg . Reply )
defer func ( ) {
err := subReply . Unsubscribe ( )
if err != nil {
2023-01-12 06:44:28 +00:00
er := fmt . Errorf ( "error: nats SubscribeSync: failed when unsubscribing for ACK: %v" , err )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-12-29 21:49:47 +00:00
}
} ( )
if err != nil {
er := fmt . Errorf ( "error: nats SubscribeSync failed: failed to create reply message for subject: %v, error: %v" , msg . Reply , err )
// sendErrorLogMessage(p.toRingbufferCh, node(p.node), er)
2023-01-12 06:44:28 +00:00
er = fmt . Errorf ( "%v, waiting equal to RetryWait %ds before retrying" , er , message . RetryWait )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-06-17 07:54:20 +00:00
2022-12-30 10:54:10 +00:00
time . Sleep ( time . Second * time . Duration ( message . RetryWait ) )
2022-06-17 07:54:20 +00:00
2022-12-29 21:49:47 +00:00
return ErrACKSubscribeRetry
}
2021-03-03 13:14:32 +00:00
2022-12-29 21:49:47 +00:00
// Publish message
err = natsConn . PublishMsg ( msg )
if err != nil {
2023-01-12 06:44:28 +00:00
er := fmt . Errorf ( "error: nats publish failed: %v, waiting equal to RetryWait of %ds before retrying" , err , message . RetryWait )
2022-12-29 21:49:47 +00:00
// sendErrorLogMessage(p.toRingbufferCh, node(p.node), er)
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-12-29 21:49:47 +00:00
time . Sleep ( time . Second * time . Duration ( message . RetryWait ) )
return ErrACKSubscribeRetry
2022-06-17 06:22:19 +00:00
}
2021-03-03 13:14:32 +00:00
2022-12-29 21:49:47 +00:00
// Wait up until ACKTimeout specified for a reply,
// continue and resend if no reply received,
// or exit if max retries for the message reached.
//
// The nats.Msg returned is discarded with '_' since
// we don't use it.
_ , err = subReply . NextMsg ( time . Second * time . Duration ( message . ACKTimeout ) )
if err != nil {
2022-06-16 22:39:15 +00:00
2022-12-29 21:49:47 +00:00
switch {
case err == nats . ErrNoResponders || err == nats . ErrTimeout :
er := fmt . Errorf ( "error: ack receive failed: waiting for %v seconds before retrying: subject=%v: %v" , message . RetryWait , p . subject . name ( ) , err )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-06-17 07:54:20 +00:00
2022-12-29 21:49:47 +00:00
time . Sleep ( time . Second * time . Duration ( message . RetryWait ) )
2022-12-30 10:54:10 +00:00
p . metrics . promNatsMessagesMissedACKsTotal . Inc ( )
2022-06-17 07:54:20 +00:00
2022-12-30 10:54:10 +00:00
return ErrACKSubscribeRetry
2022-06-17 06:22:19 +00:00
2022-12-29 21:49:47 +00:00
case err == nats . ErrBadSubscription || err == nats . ErrConnectionClosed :
er := fmt . Errorf ( "error: ack receive failed: conneciton closed or bad subscription, will not retry message: subject=%v: %v" , p . subject . name ( ) , err )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-06-17 06:22:19 +00:00
2022-12-29 21:49:47 +00:00
return er
2022-06-17 06:22:19 +00:00
2022-12-29 21:49:47 +00:00
default :
2023-10-04 20:58:42 +00:00
er := fmt . Errorf ( "error: ack receive failed: the error was not defined, check if nats client have been updated with new error values, and update ctrl to handle the new error type: subject=%v: %v" , p . subject . name ( ) , err )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-03-09 06:11:48 +00:00
2022-12-29 21:49:47 +00:00
return er
}
2021-08-16 11:01:12 +00:00
2022-12-29 21:49:47 +00:00
}
return nil
} ( )
}
2022-06-17 07:54:20 +00:00
if err == ErrACKSubscribeRetry {
continue
}
if err != nil {
// All error printing are handled within the function that returns
// the error, so we do nothing and return.
// No more trying to deliver the message
return
}
2021-09-16 06:13:24 +00:00
2022-06-17 07:54:20 +00:00
// Message were delivered successfully.
2022-04-01 07:21:50 +00:00
p . metrics . promNatsDeliveredTotal . Inc ( )
2021-08-26 08:50:40 +00:00
2022-12-31 06:31:18 +00:00
er = fmt . Errorf ( "info: sent nats message with subject %v, id: %v" , msg . Subject , message . ID )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-12-21 07:12:41 +00:00
2021-03-03 13:14:32 +00:00
return
}
}
2021-12-28 11:05:09 +00:00
// messageSubscriberHandler will deserialize the message when a new message is
2021-03-03 13:14:32 +00:00
// received, check the MessageType field in the message to decide what
// kind of message it is and then it will check how to handle that message type,
// and then call the correct method handler for it.
//
// This handler function should be started in it's own go routine,so
// one individual handler is started per message received so we can keep
// the state of the message being processed, and then reply back to the
// correct sending process's reply, meaning so we ACK back to the correct
// publisher.
2024-11-20 16:43:26 +00:00
func ( p process ) messageSubscriberHandlerNats ( natsConn * nats . Conn , thisNode string , msg * nats . Msg , subject string ) {
2021-12-28 11:05:09 +00:00
// Variable to hold a copy of the message data, so we don't mess with
// the original data since the original is a pointer value.
msgData := make ( [ ] byte , len ( msg . Data ) )
copy ( msgData , msg . Data )
2022-02-18 09:46:26 +00:00
// If debugging is enabled, print the source node name of the nats messages received.
if val , ok := msg . Header [ "fromNode" ] ; ok {
er := fmt . Errorf ( "info: nats message received from %v, with subject %v " , val , subject )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-02-18 09:46:26 +00:00
}
2024-11-20 16:43:26 +00:00
zr , err := zstd . NewReader ( nil )
if err != nil {
er := fmt . Errorf ( "error: zstd NewReader failed: %v" , err )
p . errorKernel . errSend ( p , Message { } , er , logWarning )
return
}
msgData , err = zr . DecodeAll ( msg . Data , nil )
if err != nil {
er := fmt . Errorf ( "error: zstd decoding failed: %v" , err )
p . errorKernel . errSend ( p , Message { } , er , logWarning )
zr . Close ( )
return
2021-12-28 11:05:09 +00:00
}
2021-03-03 13:14:32 +00:00
2024-11-20 16:43:26 +00:00
zr . Close ( )
2021-12-29 07:11:43 +00:00
2024-11-20 16:43:26 +00:00
message := Message { }
2021-12-29 07:29:11 +00:00
2024-11-20 16:43:26 +00:00
err = cbor . Unmarshal ( msgData , & message )
if err != nil {
er := fmt . Errorf ( "error: cbor decoding failed, subject: %v, header: %v, error: %v" , subject , msg . Header , err )
p . errorKernel . errSend ( p , message , er , logError )
return
2021-03-03 13:14:32 +00:00
}
2021-08-16 11:01:12 +00:00
// Check if it is an ACK or NACK message, and do the appropriate action accordingly.
2021-12-30 05:28:21 +00:00
//
2023-10-04 20:58:42 +00:00
// With ACK messages ctrl will keep the state of the message delivery, and try to
2021-12-30 05:28:21 +00:00
// resend the message if an ACK is not received within the timeout/retries specified
// in the message.
// When a process sends an ACK message, it will stop and wait for the nats-reply message
// for the time specified in the replyTimeout value. If no reply message is received
// within the given timeout the publishing process will try to resend the message for
2023-10-04 20:58:42 +00:00
// number of times specified in the retries field of the ctrl message.
// When receiving a ctrl-message with ACK enabled we send a message back the the
2021-12-30 05:28:21 +00:00
// node where the message originated using the msg.Reply subject field of the nats-message.
//
// With NACK messages we do not send a nats reply message, so the message will only be
// sent from the publisher once, and if it is not delivered it will not be retried.
2021-03-03 13:14:32 +00:00
switch {
2021-12-30 05:28:21 +00:00
2022-01-27 09:06:06 +00:00
// Check for ACK type Event.
2023-01-05 00:55:52 +00:00
case message . ACKTimeout >= 1 :
2023-01-06 07:32:42 +00:00
er := fmt . Errorf ( "subscriberHandler: received ACK message: %v, from: %v, id:%v" , message . Method , message . FromNode , message . ID )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-06-11 04:30:58 +00:00
// When spawning sub processes we can directly assign handlers to the process upon
// creation. We here check if a handler is already assigned, and if it is nil, we
// lookup and find the correct handler to use if available.
if p . handler == nil {
// Look up the method handler for the specified method.
mh , ok := p . methodsAvailable . CheckIfExists ( message . Method )
2023-10-04 20:58:42 +00:00
p . handler = mh
2022-06-11 04:30:58 +00:00
if ! ok {
2023-10-04 20:58:42 +00:00
er := fmt . Errorf ( "error: subscriberHandler: no such method type: %v" , p . subject . Method )
2023-01-11 07:38:15 +00:00
p . errorKernel . errSend ( p , message , er , logWarning )
2022-06-11 04:30:58 +00:00
}
2021-03-03 13:14:32 +00:00
}
2022-05-30 04:25:15 +00:00
//var err error
2021-03-03 13:14:32 +00:00
2022-06-20 09:17:23 +00:00
_ = p . callHandler ( message , thisNode )
2022-05-28 05:10:19 +00:00
2022-05-30 04:25:15 +00:00
// Send a confirmation message back to the publisher to ACK that the
// message was received by the subscriber. The reply should be sent
2023-01-05 00:55:52 +00:00
// no matter if the handler was executed successfully or not
2022-06-20 09:17:23 +00:00
natsConn . Publish ( msg . Reply , [ ] byte { } )
2022-05-28 05:10:19 +00:00
2023-01-05 00:55:52 +00:00
case message . ACKTimeout < 1 :
2023-01-06 07:32:42 +00:00
er := fmt . Errorf ( "subscriberHandler: received NACK message: %v, from: %v, id:%v" , message . Method , message . FromNode , message . ID )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-06-11 04:30:58 +00:00
// When spawning sub processes we can directly assign handlers to the process upon
// creation. We here check if a handler is already assigned, and if it is nil, we
// lookup and find the correct handler to use if available.
if p . handler == nil {
// Look up the method handler for the specified method.
mh , ok := p . methodsAvailable . CheckIfExists ( message . Method )
2023-10-04 20:58:42 +00:00
p . handler = mh
2022-06-11 04:30:58 +00:00
if ! ok {
2023-10-04 20:58:42 +00:00
er := fmt . Errorf ( "error: subscriberHandler: no such method type: %v" , p . subject . Method )
2023-01-11 07:38:15 +00:00
p . errorKernel . errSend ( p , message , er , logWarning )
2022-06-11 04:30:58 +00:00
}
2022-05-30 04:25:15 +00:00
}
2022-05-28 05:10:19 +00:00
2022-05-30 04:25:15 +00:00
// We do not send reply messages for EventNACL, so we can discard the output.
2022-06-11 04:30:58 +00:00
_ = p . callHandler ( message , thisNode )
2022-05-28 05:10:19 +00:00
2022-05-30 04:25:15 +00:00
default :
2023-10-04 20:58:42 +00:00
er := fmt . Errorf ( "info: did not find that specific type of event: %#v" , p . subject . Method )
2022-05-30 04:25:15 +00:00
p . errorKernel . infoSend ( p , message , er )
2022-05-28 05:10:19 +00:00
2022-05-30 04:25:15 +00:00
}
}
2022-05-28 05:10:19 +00:00
2022-05-30 04:25:15 +00:00
// callHandler will call the handler for the Request type defined in the message.
// If checking signatures and/or acl's are enabled the signatures they will be
// verified, and if OK the handler is called.
2022-06-11 04:30:58 +00:00
func ( p process ) callHandler ( message Message , thisNode string ) [ ] byte {
2022-06-20 09:17:23 +00:00
//out := []byte{}
2022-06-20 11:34:20 +00:00
// Call the handler if ACL/signature checking returns true.
// If the handler is to be called in a scheduled manner, we we take care of that too.
go func ( ) {
switch p . verifySigOrAclFlag ( message ) {
case true :
executeHandler ( p , message , thisNode )
case false :
// ACL/Signature checking failed.
er := fmt . Errorf ( "error: subscriberHandler: ACL were verified not-OK, doing nothing" )
2023-01-11 07:38:15 +00:00
p . errorKernel . errSend ( p , message , er , logWarning )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-06-20 11:34:20 +00:00
}
} ( )
return [ ] byte { }
}
// executeHandler will call the handler for the Request type defined in the message.
func executeHandler ( p process , message Message , thisNode string ) {
2022-05-30 04:25:15 +00:00
var err error
2024-11-25 17:03:12 +00:00
if message . ToNode != "errorCentral" {
fmt . Printf ( "??????? DEBUG: executeHandler: got message: %v\n" , message )
fmt . Printf ( "??????? DEBUG: executeHandler: got thisNode: %v\n" , thisNode )
fmt . Printf ( "??????? DEBUG: executeHandler: got process: %+v\n" , p )
}
2022-05-30 04:25:15 +00:00
2022-06-20 09:17:23 +00:00
// Check if it is a message to run scheduled.
var interval int
var totalTime int
var runAsScheduled bool
switch {
case len ( message . Schedule ) < 2 :
// Not at scheduled message,
case len ( message . Schedule ) == 2 :
interval = message . Schedule [ 0 ]
totalTime = message . Schedule [ 1 ]
fallthrough
case interval > 0 && totalTime > 0 :
runAsScheduled = true
}
2022-12-29 21:49:47 +00:00
if p . configuration . EnableAclCheck {
// Either ACL were verified OK, or ACL/Signature check was not enabled, so we call the handler.
er := fmt . Errorf ( "info: subscriberHandler: Either ACL were verified OK, or ACL/Signature check was not enabled, so we call the handler: %v" , true )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-12-29 21:49:47 +00:00
}
2022-06-20 09:17:23 +00:00
2022-06-20 11:34:20 +00:00
switch {
case ! runAsScheduled :
2022-06-20 09:17:23 +00:00
2022-06-20 11:34:20 +00:00
go func ( ) {
_ , err = p . handler ( p , message , thisNode )
if err != nil {
er := fmt . Errorf ( "error: subscriberHandler: handler method failed: %v" , err )
2023-01-11 07:38:15 +00:00
p . errorKernel . errSend ( p , message , er , logError )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-06-20 11:34:20 +00:00
}
} ( )
2022-06-20 09:17:23 +00:00
2022-06-20 11:34:20 +00:00
case runAsScheduled :
// Create two tickers to use for the scheduling.
intervalTicker := time . NewTicker ( time . Second * time . Duration ( interval ) )
totalTimeTicker := time . NewTicker ( time . Second * time . Duration ( totalTime ) )
2022-12-26 09:52:43 +00:00
defer intervalTicker . Stop ( )
defer totalTimeTicker . Stop ( )
2022-06-20 11:34:20 +00:00
// Run the handler once, so we don't have to wait for the first ticker.
go func ( ) {
_ , err := p . handler ( p , message , thisNode )
if err != nil {
er := fmt . Errorf ( "error: subscriberHandler: handler method failed: %v" , err )
2023-01-11 07:38:15 +00:00
p . errorKernel . errSend ( p , message , er , logError )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-06-20 11:34:20 +00:00
}
} ( )
for {
select {
case <- p . ctx . Done ( ) :
er := fmt . Errorf ( "info: subscriberHandler: proc ctx done: toNode=%v, fromNode=%v, method=%v, methodArgs=%v" , message . ToNode , message . FromNode , message . Method , message . MethodArgs )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-06-20 09:17:23 +00:00
2022-06-20 11:34:20 +00:00
//cancel()
return
case <- totalTimeTicker . C :
// Total time reached. End the process.
//cancel()
er := fmt . Errorf ( "info: subscriberHandler: schedule totalTime done: toNode=%v, fromNode=%v, method=%v, methodArgs=%v" , message . ToNode , message . FromNode , message . Method , message . MethodArgs )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-06-20 09:17:23 +00:00
2022-06-20 11:34:20 +00:00
return
2022-06-20 10:28:28 +00:00
2022-06-20 11:34:20 +00:00
case <- intervalTicker . C :
2022-06-20 10:28:28 +00:00
go func ( ) {
_ , err := p . handler ( p , message , thisNode )
if err != nil {
er := fmt . Errorf ( "error: subscriberHandler: handler method failed: %v" , err )
2023-01-11 07:38:15 +00:00
p . errorKernel . errSend ( p , message , er , logError )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-06-20 10:28:28 +00:00
}
} ( )
2022-06-20 09:17:23 +00:00
}
2022-05-30 04:25:15 +00:00
}
2022-06-20 11:34:20 +00:00
}
2022-05-30 04:25:15 +00:00
}
2022-05-28 05:10:19 +00:00
2022-05-30 04:25:15 +00:00
// verifySigOrAclFlag will do signature and/or acl checking based on which of
// those features are enabled, and then call the handler.
// The handler will also be called if neither signature or acl checking is enabled
// since it is up to the subscriber to decide if it want to use the auth features
// or not.
func ( p process ) verifySigOrAclFlag ( message Message ) bool {
doHandler := false
2022-05-28 05:10:19 +00:00
2022-05-30 04:25:15 +00:00
switch {
2022-05-28 05:10:19 +00:00
2022-05-30 04:25:15 +00:00
// If no checking enabled we should just allow the message.
case ! p . nodeAuth . configuration . EnableSignatureCheck && ! p . nodeAuth . configuration . EnableAclCheck :
2022-06-20 09:17:23 +00:00
//log.Printf(" * DEBUG: verify acl/sig: no acl or signature checking at all is enabled, ALLOW the message, method=%v\n", message.Method)
2022-05-30 04:25:15 +00:00
doHandler = true
2021-03-03 13:14:32 +00:00
2022-05-30 04:25:15 +00:00
// If only sig check enabled, and sig OK, we should allow the message.
case p . nodeAuth . configuration . EnableSignatureCheck && ! p . nodeAuth . configuration . EnableAclCheck :
sigOK := p . nodeAuth . verifySignature ( message )
2022-06-02 04:29:37 +00:00
2023-01-12 06:44:28 +00:00
er := fmt . Errorf ( "verifySigOrAclFlag: verify acl/sig: Only signature checking enabled, ALLOW the message if sigOK, sigOK=%v, method %v" , sigOK , message . Method )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-05-30 04:25:15 +00:00
if sigOK {
doHandler = true
2021-03-03 13:14:32 +00:00
}
2022-05-30 04:25:15 +00:00
// If both sig and acl check enabled, and sig and acl OK, we should allow the message.
case p . nodeAuth . configuration . EnableSignatureCheck && p . nodeAuth . configuration . EnableAclCheck :
2022-05-27 10:17:15 +00:00
sigOK := p . nodeAuth . verifySignature ( message )
aclOK := p . nodeAuth . verifyAcl ( message )
2022-06-02 04:29:37 +00:00
2023-01-12 06:44:28 +00:00
er := fmt . Errorf ( "verifySigOrAclFlag: verify acl/sig:both signature and acl checking enabled, allow the message if sigOK and aclOK, or method is not REQCliCommand, sigOK=%v, aclOK=%v, method=%v" , sigOK , aclOK , message . Method )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2022-05-27 10:17:15 +00:00
2022-05-30 04:25:15 +00:00
if sigOK && aclOK {
doHandler = true
2021-03-03 13:14:32 +00:00
}
2021-08-16 11:01:12 +00:00
2022-05-30 04:25:15 +00:00
// none of the verification options matched, we should keep the default value
// of doHandler=false, so the handler is not done.
2021-03-03 13:14:32 +00:00
default :
2023-01-12 06:44:28 +00:00
er := fmt . Errorf ( "verifySigOrAclFlag: verify acl/sig: None of the verify flags matched, not doing handler for message, method=%v" , message . Method )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2021-03-03 13:14:32 +00:00
}
2022-05-30 04:25:15 +00:00
return doHandler
2021-03-03 13:14:32 +00:00
}
2021-08-16 11:01:12 +00:00
// SubscribeMessage will register the Nats callback function for the specified
// nats subject. This allows us to receive Nats messages for a given subject
// on a node.
2024-11-21 03:54:55 +00:00
func ( p process ) subscribeMessagesNats ( ) * nats . Subscription {
2021-03-03 13:14:32 +00:00
subject := string ( p . subject . name ( ) )
2021-03-25 13:30:39 +00:00
2024-11-21 03:54:55 +00:00
// Register the callback function that NATS will use when new messages arrive.
natsSubscription , err := p . natsConn . QueueSubscribe ( subject , subject , func ( msg * nats . Msg ) {
2021-08-16 11:01:12 +00:00
// Start up the subscriber handler.
2024-11-20 16:43:26 +00:00
go p . messageSubscriberHandlerNats ( p . natsConn , p . configuration . NodeName , msg , subject )
2021-03-03 13:14:32 +00:00
} )
2024-11-21 03:54:55 +00:00
2021-03-03 13:14:32 +00:00
if err != nil {
2024-11-21 03:54:55 +00:00
er := fmt . Errorf ( "error: nats queue subscribe failed: %v" , err )
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2021-04-08 10:51:54 +00:00
return nil
2021-03-03 13:14:32 +00:00
}
2021-04-08 10:51:54 +00:00
return natsSubscription
2021-03-03 13:14:32 +00:00
}
2021-03-09 06:43:55 +00:00
// publishMessages will do the publishing of messages for one single
2021-12-25 18:21:10 +00:00
// process. The function should be run as a goroutine, and will run
// as long as the process it belongs to is running.
2024-11-21 04:28:56 +00:00
func ( p process ) publishMessagesNats ( natsConn * nats . Conn ) {
2021-12-27 10:40:29 +00:00
2022-06-17 22:03:25 +00:00
// Adding a timer that will be used for when to remove the sub process
// publisher. The timer is reset each time a message is published with
// the process, so the sub process publisher will not be removed until
// it have not received any messages for the given amount of time.
ticker := time . NewTicker ( time . Second * time . Duration ( p . configuration . KeepPublishersAliveFor ) )
2022-12-26 09:52:43 +00:00
defer ticker . Stop ( )
2022-06-17 22:03:25 +00:00
2021-03-03 13:14:32 +00:00
for {
2021-04-07 16:05:07 +00:00
// Wait and read the next message on the message channel, or
// exit this function if Cancel are received via ctx.
select {
2022-06-17 22:03:25 +00:00
case <- ticker . C :
2023-01-13 14:09:23 +00:00
if p . isLongRunningPublisher {
2024-11-24 18:12:57 +00:00
// er := fmt.Errorf("info: isLongRunningPublisher, will not cancel publisher: %v", p.processName)
2023-01-13 14:09:23 +00:00
//sendErrorLogMessage(p.toRingbufferCh, Node(p.node), er)
2024-11-24 18:12:57 +00:00
// p.errorKernel.logDebug(er)
2023-01-13 14:09:23 +00:00
continue
}
2022-06-17 22:03:25 +00:00
// We only want to remove subprocesses
2023-01-12 06:12:35 +00:00
// REMOVED 120123: Removed if so all publishers should be canceled if inactive.
//if p.isSubProcess {
p . processes . active . mu . Lock ( )
p . ctxCancel ( )
delete ( p . processes . active . procNames , p . processName )
p . processes . active . mu . Unlock ( )
er := fmt . Errorf ( "info: canceled publisher: %v" , p . processName )
//sendErrorLogMessage(p.toRingbufferCh, Node(p.node), er)
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2023-01-03 10:41:18 +00:00
2023-01-12 06:12:35 +00:00
return
//}
2022-06-17 22:03:25 +00:00
2022-02-01 06:22:06 +00:00
case m := <- p . subject . messageCh :
2022-06-17 22:03:25 +00:00
ticker . Reset ( time . Second * time . Duration ( p . configuration . KeepPublishersAliveFor ) )
2022-02-03 06:04:10 +00:00
// Sign the methodArgs, and add the signature to the message.
m . ArgSignature = p . addMethodArgSignature ( m )
2022-02-11 08:04:14 +00:00
// fmt.Printf(" * DEBUG: add signature, fromNode: %v, method: %v, len of signature: %v\n", m.FromNode, m.Method, len(m.ArgSignature))
2022-02-03 06:04:10 +00:00
2024-11-22 19:36:55 +00:00
go p . publishAMessageNats ( m , natsConn )
2021-04-07 16:05:07 +00:00
case <- p . ctx . Done ( ) :
2022-06-14 05:05:38 +00:00
er := fmt . Errorf ( "info: canceling publisher: %v" , p . processName )
2021-07-02 16:32:01 +00:00
//sendErrorLogMessage(p.toRingbufferCh, Node(p.node), er)
2024-03-27 11:48:17 +00:00
p . errorKernel . logDebug ( er )
2021-04-07 16:05:07 +00:00
return
}
2022-02-01 06:22:06 +00:00
}
}
2021-09-13 11:15:21 +00:00
2022-02-03 06:04:10 +00:00
func ( p process ) addMethodArgSignature ( m Message ) [ ] byte {
2022-02-04 05:24:34 +00:00
argsString := argsToString ( m . MethodArgs )
2022-04-21 11:21:36 +00:00
sign := ed25519 . Sign ( p . nodeAuth . SignPrivateKey , [ ] byte ( argsString ) )
2022-02-03 06:04:10 +00:00
return sign
}
2024-11-22 19:36:55 +00:00
func ( p process ) publishAMessageNats ( m Message , natsConn * nats . Conn ) {
2022-02-01 06:22:06 +00:00
// Create the initial header, and set values below depending on the
// various configuration options chosen.
2022-02-17 07:14:56 +00:00
natsMsgHeader := make ( nats . Header )
2022-02-18 04:22:53 +00:00
natsMsgHeader [ "fromNode" ] = [ ] string { string ( p . node ) }
2021-12-29 05:40:42 +00:00
2022-02-01 06:22:06 +00:00
// Get the process name so we can look up the process in the
// processes map, and increment the message counter.
2024-11-21 04:28:56 +00:00
pn := processNameGet ( p . subject . name ( ) , processKindPublisherNats )
2022-12-31 06:31:18 +00:00
2024-11-22 19:36:55 +00:00
serCmp , err := p . messageSerializeAndCompress ( m )
if err != nil {
log . Fatalf ( "messageSerializeAndCompress: error: %v\n" , err )
}
2021-03-12 08:38:19 +00:00
2022-02-01 06:22:06 +00:00
// Create the Nats message with headers and payload, and do the
// sending of the message.
2024-11-22 19:36:55 +00:00
p . messageDeliverNats ( serCmp , natsMsgHeader , natsConn , m )
2022-02-01 06:22:06 +00:00
// Increment the counter for the next message to be sent.
p . messageID ++
{
p . processes . active . mu . Lock ( )
p . processes . active . procNames [ pn ] = p
p . processes . active . mu . Unlock ( )
2021-03-03 13:14:32 +00:00
}
}