1
0
Fork 0
mirror of https://github.com/postmannen/ctrl.git synced 2025-01-18 21:59:30 +00:00
ctrl/server.go

680 lines
20 KiB
Go
Raw Normal View History

// Notes:
package ctrl
2021-01-25 15:23:00 +01:00
import (
"context"
"encoding/json"
2021-01-25 15:23:00 +01:00
"fmt"
"io"
2021-01-25 15:23:00 +01:00
"log"
2021-03-30 10:37:16 +02:00
"net"
2021-08-23 12:47:33 +02:00
"net/http"
"os"
2021-05-12 09:50:03 +02:00
"path/filepath"
"regexp"
"strings"
"sync"
2021-01-25 15:23:00 +01:00
"time"
"github.com/fxamacker/cbor/v2"
"github.com/jinzhu/copier"
"github.com/klauspost/compress/zstd"
2021-01-25 15:23:00 +01:00
"github.com/nats-io/nats.go"
"github.com/prometheus/client_golang/prometheus"
2021-01-25 15:23:00 +01:00
)
2021-02-26 07:55:28 +01:00
type processName string
// Will return a process name made up of subjectName+processKind
func processNameGet(sn subjectName) processName {
return processName(sn)
2021-02-26 07:55:28 +01:00
}
// server is the structure that will hold the state about spawned
// processes on a local instance.
2021-01-27 14:02:57 +01:00
type server struct {
// The main background context
ctx context.Context
// The CancelFunc for the main context
2021-08-11 12:23:37 +02:00
cancel context.CancelFunc
// Configuration options used for running the server
configuration *Configuration
// The nats connection to the broker
2021-01-27 14:02:57 +01:00
natsConn *nats.Conn
// net listener for communicating via the ctrl socket
ctrlSocket net.Listener
// processes holds all the information about running processes
processes *processes
// The name of the node
nodeName string
2022-04-01 07:09:55 +02:00
// toRingBufferCh are the channel where new messages in a bulk
// format (slice) are put into the system.
2021-11-09 13:18:58 +01:00
//
// In general the ringbuffer will read this
// channel, unfold each slice, and put single messages on the buffer.
newMessagesCh chan Message
// messageDeliverLocalCh
messageDeliverLocalCh chan []Message
// Channel for messages to publish with Jetstream.
jetstreamPublishCh chan Message
2021-02-24 10:58:02 +01:00
// errorKernel is doing all the error handling like what to do if
// an error occurs.
errorKernel *errorKernel
2021-02-18 12:29:14 +01:00
// metric exporter
metrics *metrics
// Version of package
version string
// processInitial is the initial process that all other processes are tied to.
processInitial process
2022-04-21 13:21:36 +02:00
// nodeAuth holds all the signatures, the public keys and other components
// related to authentication on an individual node.
nodeAuth *nodeAuth
// helloRegister is a register of all the nodes that have sent hello messages
// to the central server
helloRegister *helloRegister
// holds the logic for the central auth services
centralAuth *centralAuth
// message ID
messageID messageID
// audit logging
auditLogCh chan []Message
zstdEncoder *zstd.Encoder
}
type messageID struct {
id int
mu sync.Mutex
2021-01-27 14:02:57 +01:00
}
// newServer will prepare and return a server type
2022-02-07 04:23:13 +01:00
func NewServer(configuration *Configuration, version string) (*server, error) {
2021-08-11 12:23:37 +02:00
// Set up the main background context.
ctx, cancel := context.WithCancel(context.Background())
2022-02-07 04:23:13 +01:00
metrics := newMetrics(configuration.PromHostAndPort)
// Start the error kernel that will do all the error handling
// that is not done within a process.
2023-01-11 06:09:42 +01:00
errorKernel := newErrorKernel(ctx, metrics, configuration)
2021-04-19 21:06:37 +02:00
var opt nats.Option
2021-08-16 13:01:12 +02:00
2022-02-07 04:23:13 +01:00
if configuration.RootCAPath != "" {
opt = nats.RootCAs(configuration.RootCAPath)
2021-04-19 21:06:37 +02:00
}
switch {
case configuration.NkeySeed != "":
cwd, err := os.Getwd()
if err != nil {
2024-03-08 22:55:21 +01:00
cancel()
return nil, fmt.Errorf("error: failed to get current working directory when creating tmp seed file: %v", err)
}
pth := filepath.Join(cwd, "seed.txt")
// f, err := os.CreateTemp(pth, "")
// if err != nil {
// return nil, fmt.Errorf("error: failed to create tmp seed file: %v", err)
// }
err = os.WriteFile(pth, []byte(configuration.NkeySeed), 0600)
if err != nil {
2024-03-08 22:55:21 +01:00
cancel()
return nil, fmt.Errorf("error: failed to write temp seed file: %v", err)
}
opt, err = nats.NkeyOptionFromSeed(pth)
if err != nil {
cancel()
return nil, fmt.Errorf("error: failed to read temp nkey seed file: %v", err)
}
// // TODO: REMOVED for testing
//defer func() {
// err = os.Remove(pth)
// if err != nil {
// cancel()
// log.Fatalf("error: failed to remove temp seed file: %v\n", err)
// }
//}()
case configuration.NkeySeedFile != "" && configuration.NkeyFromED25519SSHKeyFile == "":
2021-05-20 12:27:25 +02:00
var err error
2021-08-11 12:23:37 +02:00
2022-02-07 04:23:13 +01:00
opt, err = nats.NkeyOptionFromSeed(configuration.NkeySeedFile)
2021-05-20 12:27:25 +02:00
if err != nil {
cancel()
2021-05-20 12:27:25 +02:00
return nil, fmt.Errorf("error: failed to read nkey seed file: %v", err)
}
case configuration.NkeyFromED25519SSHKeyFile != "":
var err error
opt, err = configuration.nkeyOptFromSSHKey()
if err != nil {
cancel()
return nil, fmt.Errorf("error: failed to read nkey seed file: %v", err)
}
}
var conn *nats.Conn
2021-08-16 13:01:12 +02:00
// Connect to the nats server, and retry until succesful.
for {
var err error
// Setting MaxReconnects to -1 which equals unlimited.
2022-02-07 04:23:13 +01:00
conn, err = nats.Connect(configuration.BrokerAddress,
2021-12-17 17:28:57 +01:00
opt,
2022-06-21 10:40:32 +02:00
//nats.FlusherTimeout(time.Second*10),
2021-12-17 17:28:57 +01:00
nats.MaxReconnects(-1),
2022-02-07 04:23:13 +01:00
nats.ReconnectJitter(time.Duration(configuration.NatsReconnectJitter)*time.Millisecond, time.Duration(configuration.NatsReconnectJitterTLS)*time.Second),
nats.Timeout(time.Second*time.Duration(configuration.NatsConnOptTimeout)),
2021-12-17 17:28:57 +01:00
)
// If no servers where available, we loop and retry until succesful.
if err != nil {
2022-02-07 04:23:13 +01:00
log.Printf("error: could not connect, waiting %v seconds, and retrying: %v\n", configuration.NatsConnectRetryInterval, err)
time.Sleep(time.Duration(time.Second * time.Duration(configuration.NatsConnectRetryInterval)))
continue
}
2021-02-01 11:13:38 +01:00
break
}
2021-08-11 12:23:37 +02:00
2021-12-16 11:01:01 +01:00
log.Printf(" * conn.Opts.ReconnectJitterTLS: %v\n", conn.Opts.ReconnectJitterTLS)
log.Printf(" * conn.Opts.ReconnectJitter: %v\n", conn.Opts.ReconnectJitter)
var ctrlSocket net.Listener
2022-01-07 07:36:19 +01:00
var err error
2022-12-21 07:17:47 +01:00
// Check if tmp folder for socket exists, if not create it
if _, err := os.Stat(configuration.SocketFolder); os.IsNotExist(err) {
err := os.MkdirAll(configuration.SocketFolder, 0770)
2022-12-21 07:17:47 +01:00
if err != nil {
cancel()
return nil, fmt.Errorf("error: failed to create socket folder directory %v: %v", configuration.SocketFolder, err)
}
}
// Open the ctrl socket file, and start the listener if enabled.
2022-02-07 04:23:13 +01:00
if configuration.EnableSocket {
ctrlSocket, err = createSocket(configuration.SocketFolder, "ctrl.sock")
2022-01-07 07:36:19 +01:00
if err != nil {
cancel()
return nil, err
}
2021-03-30 10:37:16 +02:00
}
//var nodeAuth *nodeAuth
//if configuration.EnableSignatureCheck {
2022-04-21 13:21:36 +02:00
nodeAuth := newNodeAuth(configuration, errorKernel)
2022-02-11 09:04:14 +01:00
// fmt.Printf(" * DEBUG: newServer: signatures contains: %+v\n", signatures)
//}
//var centralAuth *centralAuth
//if configuration.IsCentralAuth {
centralAuth := newCentralAuth(configuration, errorKernel)
//}
zstdEncoder, err := zstd.NewWriter(nil, zstd.WithEncoderConcurrency(1))
if err != nil {
log.Fatalf("error: zstd new encoder failed: %v", err)
}
defer func() {
go func() {
<-ctx.Done()
zstdEncoder.Close()
}()
}()
2022-04-01 07:09:55 +02:00
s := server{
ctx: ctx,
cancel: cancel,
configuration: configuration,
nodeName: configuration.NodeName,
natsConn: conn,
ctrlSocket: ctrlSocket,
newMessagesCh: make(chan Message),
messageDeliverLocalCh: make(chan []Message),
jetstreamPublishCh: make(chan Message),
metrics: metrics,
version: version,
errorKernel: errorKernel,
nodeAuth: nodeAuth,
helloRegister: newHelloRegister(),
centralAuth: centralAuth,
auditLogCh: make(chan []Message),
zstdEncoder: zstdEncoder,
}
2022-04-01 07:09:55 +02:00
s.processes = newProcesses(ctx, &s)
// Create the default data folder for where subscribers should
2021-03-25 12:50:58 +01:00
// write it's data, check if data folder exist, and create it if needed.
2022-02-07 04:23:13 +01:00
if _, err := os.Stat(configuration.SubscribersDataFolder); os.IsNotExist(err) {
if configuration.SubscribersDataFolder == "" {
return nil, fmt.Errorf("error: subscribersDataFolder value is empty, you need to provide the config or the flag value at startup %v: %v", configuration.SubscribersDataFolder, err)
}
err := os.Mkdir(configuration.SubscribersDataFolder, 0770)
if err != nil {
2022-02-07 04:23:13 +01:00
return nil, fmt.Errorf("error: failed to create data folder directory %v: %v", configuration.SubscribersDataFolder, err)
}
er := fmt.Errorf("info: creating subscribers data folder at %v", configuration.SubscribersDataFolder)
s.errorKernel.logDebug(er)
}
2022-04-01 07:09:55 +02:00
return &s, nil
2021-02-05 07:25:12 +01:00
}
// helloRegister is a register of all the nodes that have sent hello messages.
type helloRegister struct {
}
func newHelloRegister() *helloRegister {
h := helloRegister{}
return &h
}
// create socket will create a socket file, and return the net.Listener to
// communicate with that socket.
func createSocket(socketFolder string, socketFileName string) (net.Listener, error) {
// Just as an extra check we eventually delete any existing ctrl socket files if found.
socketFilepath := filepath.Join(socketFolder, socketFileName)
if _, err := os.Stat(socketFilepath); !os.IsNotExist(err) {
err = os.Remove(socketFilepath)
if err != nil {
er := fmt.Errorf("error: could not delete sock file: %v", err)
return nil, er
}
}
// Open the socket.
nl, err := net.Listen("unix", socketFilepath)
if err != nil {
er := fmt.Errorf("error: failed to open socket: %v", err)
return nil, er
}
return nl, nil
}
2021-02-24 10:58:02 +01:00
// Start will spawn up all the predefined subscriber processes.
2021-02-10 07:25:44 +01:00
// Spawning of publisher processes is done on the fly by checking
2021-02-24 10:58:02 +01:00
// if there is publisher process for a given message subject, and
2021-08-16 13:01:12 +02:00
// if it does not exist it will spawn one.
func (s *server) Start() {
log.Printf("Starting ctrl, version=%+v\n", s.version)
s.metrics.promVersion.With(prometheus.Labels{"version": string(s.version)})
2021-08-04 10:37:24 +02:00
go func() {
err := s.errorKernel.start(s.newMessagesCh)
2021-08-04 10:37:24 +02:00
if err != nil {
log.Printf("%v\n", err)
}
}()
2021-02-18 12:29:14 +01:00
// Start collecting the metrics
2021-08-03 13:57:29 +02:00
go func() {
2021-08-04 10:37:24 +02:00
err := s.metrics.start()
2021-08-03 13:57:29 +02:00
if err != nil {
log.Printf("%v\n", err)
os.Exit(1)
}
}()
2021-02-18 12:29:14 +01:00
// Start the checking the input socket for new messages from operator.
2022-01-07 07:36:19 +01:00
if s.configuration.EnableSocket {
go s.readSocket()
}
2023-01-08 08:32:58 +01:00
// Start the checking the readfolder for new messages from operator.
if s.configuration.EnableReadFolder {
go s.readFolder()
}
2021-09-10 05:26:16 +02:00
// Check if we should start the tcp listener for new messages from operator.
if s.configuration.TCPListener != "" {
go s.readTCPListener()
}
2021-09-10 05:26:16 +02:00
// Check if we should start the http listener for new messages from operator.
if s.configuration.HTTPListener != "" {
go s.readHttpListener()
}
// Start audit logger.
go s.startAuditLog(s.ctx)
// Start up the predefined subscribers.
//
// Since all the logic to handle processes are tied to the process
// struct, we need to create an initial process to start the rest.
2021-08-11 12:23:37 +02:00
//
// The context of the initial process are set in processes.Start.
sub := newSubject(Initial, s.nodeName)
s.processInitial = newProcess(context.TODO(), s, sub)
// Start all wanted subscriber processes.
s.processes.Start(s.processInitial)
2021-08-09 09:18:30 +02:00
time.Sleep(time.Second * 1)
s.processes.printProcessesMap()
// Start Jetstream publisher and consumer.
go s.jetstreamPublish()
go s.jetstreamConsume()
2021-08-23 12:47:33 +02:00
// Start exposing the the data folder via HTTP if flag is set.
if s.configuration.ExposeDataFolder != "" {
log.Printf("info: Starting expose of data folder via HTTP\n")
2024-03-08 22:55:21 +01:00
go s.exposeDataFolder()
2021-08-23 12:47:33 +02:00
}
2021-08-09 09:18:30 +02:00
// Start the processing of new messages from an input channel.
s.routeMessagesToPublisherProcess()
2021-02-10 07:25:44 +01:00
// Start reading the channel for injecting direct messages that should
// not be sent via the message broker.
s.directSAMSChRead()
2022-01-26 09:23:02 +01:00
// Check and enable read the messages specified in the startup folder.
s.readStartupFolder()
2021-08-09 09:18:30 +02:00
}
// startAuditLog will start up the logging of all messages to audit file
func (s *server) startAuditLog(ctx context.Context) {
storeFile := filepath.Join(s.configuration.DatabaseFolder, "store.log")
f, err := os.OpenFile(storeFile, os.O_APPEND|os.O_RDWR|os.O_CREATE, 0660)
if err != nil {
log.Printf("error: startPermanentStore: failed to open file: %v\n", err)
}
defer f.Close()
for {
select {
case messages := <-s.auditLogCh:
for _, message := range messages {
msgForPermStore := Message{}
copier.Copy(&msgForPermStore, message)
// Remove the content of the data field.
msgForPermStore.Data = nil
js, err := json.Marshal(msgForPermStore)
if err != nil {
er := fmt.Errorf("error:fillBuffer: json marshaling: %v", err)
s.errorKernel.errSend(s.processInitial, Message{}, er, logError)
}
d := time.Now().Format("Mon Jan _2 15:04:05 2006") + ", " + string(js) + "\n"
_, err = f.WriteString(d)
if err != nil {
log.Printf("error:failed to write entry: %v\n", err)
}
}
case <-ctx.Done():
return
}
}
}
// directSAMSChRead for injecting messages directly in to the local system
// without sending them via the message broker.
func (s *server) directSAMSChRead() {
go func() {
for {
select {
case <-s.ctx.Done():
log.Printf("info: stopped the directSAMSCh reader\n\n")
return
case messages := <-s.messageDeliverLocalCh:
2022-11-01 09:54:35 +01:00
// fmt.Printf(" * DEBUG: directSAMSChRead: <- sams = %v\n", sams)
// Range over all the sams, find the process, check if the method exists, and
// handle the message by starting the correct method handler.
for i := range messages {
// TODO: !!!!!! Shoud the node here be the fromNode ???????
subject := newSubject(messages[i].Method, string(messages[i].ToNode))
processName := processNameGet(subject.name())
s.processes.active.mu.Lock()
p := s.processes.active.procNames[processName]
s.processes.active.mu.Unlock()
mh, ok := p.methodsAvailable.CheckIfExists(messages[i].Method)
if !ok {
er := fmt.Errorf("error: subscriberHandler: method type not available: %v", p.subject.Method)
p.errorKernel.errSend(p, messages[i], er, logError)
continue
}
p.handler = mh
go executeHandler(p, messages[i], s.nodeName)
}
}
}
}()
}
2021-08-09 09:18:30 +02:00
// Will stop all processes started during startup.
func (s *server) Stop() {
// Stop the started pub/sub message processes.
2021-08-11 12:23:37 +02:00
s.processes.Stop()
2021-08-09 09:18:30 +02:00
log.Printf("info: stopped all subscribers\n")
2021-07-02 08:38:44 +02:00
2021-08-09 09:18:30 +02:00
// Stop the errorKernel.
s.errorKernel.stop()
log.Printf("info: stopped the errorKernel\n")
2021-07-02 13:26:30 +02:00
2021-08-09 09:18:30 +02:00
// Stop the main context.
2021-08-11 12:23:37 +02:00
s.cancel()
2021-08-09 09:18:30 +02:00
log.Printf("info: stopped the main context\n")
2021-08-09 14:41:31 +02:00
// Delete the ctrl socket file when the program exits.
socketFilepath := filepath.Join(s.configuration.SocketFolder, "ctrl.sock")
2021-08-09 14:41:31 +02:00
if _, err := os.Stat(socketFilepath); !os.IsNotExist(err) {
err = os.Remove(socketFilepath)
if err != nil {
er := fmt.Errorf("error: could not delete sock file: %v", err)
log.Printf("%v\n", er)
}
}
}
// routeMessagesToPublisherProcess takes a database name it's input argument.
2021-08-25 08:50:24 +02:00
// The database will be used as the persistent k/v store for the work
// queue which is implemented as a ring buffer.
// The ringBufferInCh are where we get new messages to publish.
2021-03-09 07:43:55 +01:00
// Incomming messages will be routed to the correct subject process, where
// the handling of each nats subject is handled within it's own separate
// worker process.
// It will also handle the process of spawning more worker processes
// for publisher subjects if it does not exist.
func (s *server) routeMessagesToPublisherProcess() {
2021-03-09 07:43:55 +01:00
// Start reading new fresh messages received on the incomming message
// pipe/file.
2021-03-09 07:43:55 +01:00
// Process the messages that are in the ring buffer. Check and
// send if there are a specific subject for it, and if no subject
// exist throw an error.
var method Method
methodsAvailable := method.GetMethodsAvailable()
go func() {
for message := range s.newMessagesCh {
2021-03-09 07:43:55 +01:00
go func(message Message) {
2021-11-09 14:01:42 +01:00
s.messageID.mu.Lock()
s.messageID.id++
message.ID = s.messageID.id
s.messageID.mu.Unlock()
s.metrics.promMessagesProcessedIDLast.Set(float64(message.ID))
2021-09-21 23:29:42 +02:00
// Check if the format of the message is correct.
if _, ok := methodsAvailable.CheckIfExists(message.Method); !ok {
er := fmt.Errorf("error: routeMessagesToProcess: the method do not exist, message dropped: %v", message.Method)
s.errorKernel.errSend(s.processInitial, message, er, logError)
return
}
switch {
case message.Retries < 0:
message.Retries = s.configuration.DefaultMessageRetries
}
if message.MethodTimeout < 1 && message.MethodTimeout != -1 {
message.MethodTimeout = s.configuration.DefaultMethodTimeout
}
// ---
// Check for {{CTRL_FILE}} and if we should read and load a local file into
// the message before sending.
var filePathToOpen string
foundFile := false
var argPos int
for i, v := range message.MethodArgs {
if strings.Contains(v, "{{CTRL_FILE:") {
foundFile = true
argPos = i
// Example to split:
// echo {{CTRL_FILE:/somedir/msg_file.yaml}}>ctrlfile.txt
//
// Split at colon. We want the part after.
ss := strings.Split(v, ":")
// Split at "}}",so pos [0] in the result contains just the file path.
sss := strings.Split(ss[1], "}}")
filePathToOpen = sss[0]
}
}
if foundFile {
fh, err := os.Open(filePathToOpen)
if err != nil {
er := fmt.Errorf("error: routeMessagesToPublisherProcess: failed to open file given as CTRL_FILE argument: %v", err)
s.errorKernel.logError(er)
return
}
defer fh.Close()
b, err := io.ReadAll(fh)
if err != nil {
er := fmt.Errorf("error: routeMessagesToPublisherProcess: failed to read file %v given as CTRL_FILE argument: %v", filePathToOpen, err)
s.errorKernel.logError(er)
return
}
// Replace the {{CTRL_FILE}} with the actual content read from file.
re := regexp.MustCompile(`(.*)({{CTRL_FILE.*}})(.*)`)
message.MethodArgs[argPos] = re.ReplaceAllString(message.MethodArgs[argPos], `${1}`+string(b)+`${3}`)
// ---
}
message.ArgSignature = s.processInitial.addMethodArgSignature(message)
go s.processInitial.publishAMessage(message, s.natsConn)
}(message)
2021-03-09 07:43:55 +01:00
}
}()
}
2021-08-23 12:47:33 +02:00
2024-03-08 22:55:21 +01:00
func (s *server) exposeDataFolder() {
2021-08-23 17:05:56 +02:00
fileHandler := func(w http.ResponseWriter, r *http.Request) {
// w.Header().Set("Content-Type", "text/html")
http.FileServer(http.Dir(s.configuration.SubscribersDataFolder)).ServeHTTP(w, r)
}
2021-08-23 12:47:33 +02:00
//create a file server, and serve the files found in ./
2021-08-23 17:05:56 +02:00
//fd := http.FileServer(http.Dir(s.configuration.SubscribersDataFolder))
http.HandleFunc("/", fileHandler)
2021-08-23 12:47:33 +02:00
// we create a net.Listen type to use later with the http.Serve function.
nl, err := net.Listen("tcp", s.configuration.ExposeDataFolder)
if err != nil {
log.Println("error: starting net.Listen: ", err)
}
// start the web server with http.Serve instead of the usual http.ListenAndServe
err = http.Serve(nl, nil)
if err != nil {
log.Printf("Error: failed to start web server: %v\n", err)
}
os.Exit(1)
}
// messageSerializeAndCompress will serialize and compress the Message, and
// return the result as a []byte.
func (s *server) messageSerializeAndCompress(msg Message) ([]byte, error) {
// encode the message structure into cbor
bSerialized, err := cbor.Marshal(msg)
if err != nil {
er := fmt.Errorf("error: messageDeliverNats: cbor encode message failed: %v", err)
s.errorKernel.logDebug(er)
return nil, er
}
// Compress the data payload if selected with configuration flag.
// The compression chosen is later set in the nats msg header when
// calling p.messageDeliverNats below.
bCompressed := s.zstdEncoder.EncodeAll(bSerialized, nil)
return bCompressed, nil
}
// messageDeserializeAndUncompress will deserialize the ctrl message
func (s *server) messageDeserializeAndUncompress(msgData []byte) (Message, error) {
// // If debugging is enabled, print the source node name of the nats messages received.
// headerFromNode := msg.Headers().Get("fromNode")
// if headerFromNode != "" {
// er := fmt.Errorf("info: subscriberHandlerJetstream: nats message received from %v, with subject %v ", headerFromNode, msg.Subject())
// s.errorKernel.logDebug(er)
// }
zr, err := zstd.NewReader(nil)
if err != nil {
er := fmt.Errorf("error: subscriberHandlerJetstream: zstd NewReader failed: %v", err)
return Message{}, er
}
msgData, err = zr.DecodeAll(msgData, nil)
if err != nil {
er := fmt.Errorf("error: subscriberHandlerJetstream: zstd decoding failed: %v", err)
zr.Close()
return Message{}, er
}
zr.Close()
message := Message{}
err = cbor.Unmarshal(msgData, &message)
if err != nil {
er := fmt.Errorf("error: subscriberHandlerJetstream: cbor decoding failed, error: %v", err)
return Message{}, er
}
return message, nil
}