1
0
Fork 0
mirror of https://github.com/postmannen/ctrl.git synced 2024-12-14 12:37:31 +00:00
ctrl/server.go

386 lines
13 KiB
Go
Raw Normal View History

// Notes:
2021-02-01 10:13:38 +00:00
package steward
2021-01-25 14:23:00 +00:00
import (
"bytes"
"encoding/gob"
"fmt"
"log"
2021-01-28 13:58:16 +00:00
"sync"
2021-01-25 14:23:00 +00:00
"time"
"github.com/nats-io/nats.go"
"github.com/prometheus/client_golang/prometheus"
2021-01-25 14:23:00 +00:00
)
// server is the structure that will hold the state about spawned
// processes on a local instance.
2021-01-27 13:02:57 +00:00
type server struct {
natsConn *nats.Conn
// TODO: sessions should probably hold a slice/map of processes ?
processes map[subjectName]process
// The last processID created
lastProcessID int
// The name of the node
nodeName string
2021-02-24 09:58:02 +00:00
// Mutex for locking when writing to the process map
mu sync.Mutex
// The channel where we put new messages read from file,
// or some other process who wants to send something via the
// system
2021-02-24 09:58:02 +00:00
// We can than range this channel for new messages to process.
newMessagesCh chan []subjectAndMessage
2021-02-24 09:58:02 +00:00
// errorKernel is doing all the error handling like what to do if
// an error occurs.
// TODO: Will also send error messages to cental error subscriber.
errorKernel *errorKernel
// used to check if the methods specified in message is valid
methodsAvailable MethodsAvailable
2021-02-17 17:59:49 +00:00
// Map who holds the command and event types available.
// Used to check if the commandOrEvent specified in message is valid
commandOrEventAvailable CommandOrEventAvailable
2021-02-18 11:29:14 +00:00
// metric exporter
metrics *metrics
2021-02-24 09:58:02 +00:00
// subscriberServices are where we find the services and the API to
// use services needed by subscriber.
// For example, this can be a service that knows
// how to forward the data for a received message of type log to a
// central logger.
subscriberServices *subscriberServices
// Is this the central error logger ?
centralErrorLogger bool
// default message timeout in seconds. This can be overridden on the message level
defaultMessageTimeout int
// default amount of retries that will be done before a message is thrown away, and out of the system
defaultMessageRetries int
2021-01-27 13:02:57 +00:00
}
// newServer will prepare and return a server type
func NewServer(brokerAddress string, nodeName string, promHostAndPort string, centralErrorLogger bool, defaultMessageTimeout int, defaultMessageRetries int) (*server, error) {
2021-02-01 10:13:38 +00:00
conn, err := nats.Connect(brokerAddress, nil)
if err != nil {
log.Printf("error: nats.Connect failed: %v\n", err)
}
var m Method
2021-02-24 09:58:02 +00:00
var coe CommandOrEvent
s := &server{
nodeName: nodeName,
natsConn: conn,
processes: make(map[subjectName]process),
newMessagesCh: make(chan []subjectAndMessage),
methodsAvailable: m.GetMethodsAvailable(),
2021-02-24 09:58:02 +00:00
commandOrEventAvailable: coe.GetCommandOrEventAvailable(),
metrics: newMetrics(promHostAndPort),
2021-02-24 09:58:02 +00:00
subscriberServices: newSubscriberServices(),
centralErrorLogger: centralErrorLogger,
defaultMessageTimeout: defaultMessageTimeout,
defaultMessageRetries: defaultMessageRetries,
}
2021-02-05 06:25:12 +00:00
return s, nil
}
2021-02-24 09:58:02 +00:00
// Start will spawn up all the predefined subscriber processes.
2021-02-10 06:25:44 +00:00
// Spawning of publisher processes is done on the fly by checking
2021-02-24 09:58:02 +00:00
// if there is publisher process for a given message subject, and
// not exist it will spawn one.
func (s *server) Start() {
// Start the error kernel that will do all the error handling
// not done within a process.
s.errorKernel = newErrorKernel()
s.errorKernel.startErrorKernel(s.newMessagesCh)
2021-02-18 11:29:14 +00:00
// Start collecting the metrics
go s.startMetrics()
// Start the checking the input file for new messages from operator.
go s.getMessagesFromFile("./", "inmsg.txt", s.newMessagesCh)
// Start the textLogging service that will run on the subscribers
2021-02-24 09:58:02 +00:00
// TODO: This should only be started if the flag value provided when
// starting asks to subscribe to TextLogging events.
go s.subscriberServices.startTextLogging()
2021-02-24 09:58:02 +00:00
// Start up the predefined subscribers.
// TODO: What to subscribe on should be handled via flags, or config
// files.
s.subscribersStart()
time.Sleep(time.Second * 2)
2021-02-10 06:25:44 +00:00
s.printProcessesMap()
2021-02-24 09:58:02 +00:00
// Start the processing of new messaging from an input channel.
s.processNewMessages("./incommmingBuffer.db", s.newMessagesCh)
select {}
}
2021-02-10 06:25:44 +00:00
func (s *server) printProcessesMap() {
fmt.Println("--------------------------------------------------------------------------------------------")
fmt.Printf("*** Output of processes map :\n")
for _, v := range s.processes {
fmt.Printf("*** - : %v\n", v)
}
s.metrics.metricsCh <- metricType{
metric: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "total_running_processes",
Help: "The current number of total running processes",
}),
value: float64(len(s.processes)),
}
2021-02-10 06:25:44 +00:00
fmt.Println("--------------------------------------------------------------------------------------------")
}
2021-02-09 10:52:08 +00:00
// processKind are either kindSubscriber or kindPublisher, and are
// used to distinguish the kind of process to spawn and to know
// the process kind put in the process map.
type processKind string
const (
2021-02-09 10:52:08 +00:00
processKindSubscriber processKind = "subscriber"
processKindPublisher processKind = "publisher"
)
2021-01-28 13:58:16 +00:00
// process are represent the communication to one individual host
type process struct {
messageID int
2021-02-03 07:28:21 +00:00
// the subject used for the specific process. One process
// can contain only one sender on a message bus, hence
// also one subject
subject Subject
2021-01-28 13:58:16 +00:00
// Put a node here to be able know the node a process is at.
// NB: Might not be needed later on.
node node
// The processID for the current process
processID int
// errorCh is used to report errors from a process
// NB: Implementing this as an int to report for testing
2021-02-09 10:52:08 +00:00
errorCh chan errProcess
processKind processKind
}
// prepareNewProcess will set the the provided values and the default
// values for a process.
2021-02-09 10:52:08 +00:00
func (s *server) processPrepareNew(subject Subject, errCh chan errProcess, processKind processKind) process {
2021-02-03 08:06:37 +00:00
// create the initial configuration for a sessions communicating with 1 host process.
s.lastProcessID++
proc := process{
2021-02-09 10:52:08 +00:00
messageID: 0,
subject: subject,
2021-02-24 06:39:14 +00:00
node: node(subject.ToNode),
2021-02-09 10:52:08 +00:00
processID: s.lastProcessID,
errorCh: errCh,
processKind: processKind,
//messageCh: make(chan Message),
2021-01-27 13:02:57 +00:00
}
return proc
}
2021-02-24 09:58:02 +00:00
// spawnWorkerProcess will spawn take care of spawning both publisher
// and subscriber proesses.
//It will give the process the next available ID, and also add the
// process to the processes map.
func (s *server) spawnWorkerProcess(proc process) {
s.mu.Lock()
// We use the full name of the subject to identify a unique
// process. We can do that since a process can only handle
// one message queue.
s.processes[proc.subject.name()] = proc
s.mu.Unlock()
2021-01-27 13:02:57 +00:00
// TODO: I think it makes most sense that the messages would come to
// here from some other message-pickup-process, and that process will
// give the message to the correct publisher process. A channel that
// is listened on in the for loop below could be used to receive the
// messages from the message-pickup-process.
2021-02-10 06:25:44 +00:00
//
// Handle publisher workers
2021-02-09 10:52:08 +00:00
if proc.processKind == processKindPublisher {
2021-02-24 09:58:02 +00:00
s.publishMessages(proc)
2021-01-27 08:45:52 +00:00
}
2021-02-10 06:25:44 +00:00
// handle subscriber workers
if proc.processKind == processKindSubscriber {
2021-02-24 09:58:02 +00:00
s.subscribeMessages(proc)
}
2021-01-27 08:45:52 +00:00
}
func (s *server) messageDeliverNats(proc process, message Message) {
retryAttempts := 0
2021-01-27 08:45:52 +00:00
for {
dataPayload, err := gobEncodeMessage(message)
2021-01-27 08:45:52 +00:00
if err != nil {
log.Printf("error: createDataPayload: %v\n", err)
}
msg := &nats.Msg{
Subject: string(proc.subject.name()),
// Subject: fmt.Sprintf("%s.%s.%s", proc.node, "command", "shellCommand"),
// Structure of the reply message are:
// reply.<nodename>.<message type>.<method>
Reply: fmt.Sprintf("reply.%s", proc.subject.name()),
Data: dataPayload,
2021-01-25 14:23:00 +00:00
}
2021-01-27 08:45:52 +00:00
// The SubscribeSync used in the subscriber, will get messages that
// are sent after it started subscribing, so we start a publisher
// that sends out a message every second.
//
// Create a subscriber for the reply message.
subReply, err := s.natsConn.SubscribeSync(msg.Reply)
2021-01-27 08:45:52 +00:00
if err != nil {
2021-02-25 10:08:05 +00:00
log.Printf("error: nc.SubscribeSync failed: failed to create reply message: %v\n", err)
2021-01-27 08:45:52 +00:00
continue
}
// Publish message
err = s.natsConn.PublishMsg(msg)
2021-01-27 08:45:52 +00:00
if err != nil {
log.Printf("error: publish failed: %v\n", err)
continue
}
// If the message is an ACK type of message we must check that a
// reply, and if it is not we don't wait here at all.
fmt.Printf("---- MESSAGE : %v\n", message)
if message.CommandOrEvent == CommandACK || message.CommandOrEvent == EventACK {
// Wait up until 10 seconds for a reply,
// continue and resend if to reply received.
msgReply, err := subReply.NextMsg(time.Second * time.Duration(message.Timeout))
if err != nil {
log.Printf("error: subReply.NextMsg failed for node=%v, subject=%v: %v\n", proc.node, proc.subject.name(), err)
// did not receive a reply, decide what to do..
retryAttempts++
fmt.Printf("Retry attempts:%v, retries: %v, timeout: %v\n", retryAttempts, message.Retries, message.Timeout)
switch {
case message.Retries == 0:
// 0 indicates unlimited retries
continue
case retryAttempts >= message.Retries:
// max retries reached
log.Printf("info: max retries for message reached, breaking out: %v", retryAttempts)
return
default:
// none of the above matched, so we've not reached max retries yet
continue
}
}
2021-02-24 09:58:02 +00:00
log.Printf("info: publisher: received ACK for message: %s\n", msgReply.Data)
2021-01-27 08:45:52 +00:00
}
return
}
}
2021-02-10 06:25:44 +00:00
// handler will deserialize the message when a new message is received,
// check the MessageType field in the message to decide what kind of
// message it is and then it will check how to handle that message type,
// and handle it.
// This handler function should be started in it's own go routine,so
// one individual handler is started per message received so we can keep
// the state of the message being processed, and then reply back to the
// correct sending process's reply, meaning so we ACK back to the correct
// publisher.
func (s *server) subscriberHandler(natsConn *nats.Conn, thisNode string, msg *nats.Msg) {
2021-02-10 06:25:44 +00:00
message := Message{}
// Create a buffer to decode the gob encoded binary data back
// to it's original structure.
buf := bytes.NewBuffer(msg.Data)
gobDec := gob.NewDecoder(buf)
err := gobDec.Decode(&message)
if err != nil {
log.Printf("error: gob decoding failed: %v\n", err)
}
//fmt.Printf("%v\n", msg)
// TODO: Maybe the handling of the errors within the subscriber
// should also involve the error-kernel to report back centrally
// that there was a problem like missing method to handle a specific
// method etc.
switch {
2021-02-17 17:59:49 +00:00
case message.CommandOrEvent == CommandACK || message.CommandOrEvent == EventACK:
log.Printf("info: subscriberHandler: message.CommandOrEvent received was = %v, preparing to call handler\n", message.CommandOrEvent)
mf, ok := s.methodsAvailable.CheckIfExists(message.Method)
if !ok {
// TODO: Check how errors should be handled here!!!
log.Printf("error: subscriberHandler: method type not available: %v\n", message.CommandOrEvent)
2021-02-10 06:25:44 +00:00
}
fmt.Printf("*** DEBUG: BEFORE CALLING HANDLER: ACK\n")
out, err := mf.handler(s, message, thisNode)
2021-02-11 14:07:03 +00:00
if err != nil {
// TODO: Send to error kernel ?
log.Printf("error: subscriberHandler: failed to execute event: %v\n", err)
2021-02-10 06:25:44 +00:00
}
// Send a confirmation message back to the publisher
2021-02-11 14:07:03 +00:00
natsConn.Publish(msg.Reply, out)
2021-02-25 10:08:05 +00:00
// TESTING: Simulate that we also want to send some error that occured
// to the errorCentral
{
err := fmt.Errorf("error: some testing error we want to send out")
sendErrorLogMessage(s.newMessagesCh, node(thisNode), err)
}
2021-02-17 17:59:49 +00:00
case message.CommandOrEvent == CommandNACK || message.CommandOrEvent == EventNACK:
log.Printf("info: subscriberHandler: message.CommandOrEvent received was = %v, preparing to call handler\n", message.CommandOrEvent)
mf, ok := s.methodsAvailable.CheckIfExists(message.Method)
if !ok {
// TODO: Check how errors should be handled here!!!
log.Printf("error: subscriberHandler: method type not available: %v\n", message.CommandOrEvent)
}
// since we don't send a reply for a NACK message, we don't care about the
// out return when calling mf.handler
fmt.Printf("*** DEBUG: BEFORE CALLING HANDLER: NACK\n")
_, err := mf.handler(s, message, thisNode)
if err != nil {
// TODO: Send to error kernel ?
log.Printf("error: subscriberHandler: failed to execute event: %v\n", err)
}
2021-02-10 06:25:44 +00:00
default:
log.Printf("info: did not find that specific type of command: %#v\n", message.CommandOrEvent)
}
}
2021-02-25 10:08:05 +00:00
// sendErrorMessage will put the error message directly on the channel that is
// read by the nats publishing functions.
func sendErrorLogMessage(newMessagesCh chan<- []subjectAndMessage, FromNode node, theError error) {
// --- Testing
2021-02-25 10:08:05 +00:00
sam := createErrorMsgContent(FromNode, theError)
newMessagesCh <- []subjectAndMessage{sam}
}
2021-02-25 10:08:05 +00:00
// createErrorMsgContent will prepare a subject and message with the content
// of the error
func createErrorMsgContent(FromNode node, theError error) subjectAndMessage {
// TESTING: Creating an error message to send to errorCentral
fmt.Printf(" --- Sending error message to central !!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
sam := subjectAndMessage{
Subject: Subject{
ToNode: "errorCentral",
CommandOrEvent: EventNACK,
Method: ErrorLog,
},
Message: Message{
ToNode: "errorCentral",
FromNode: FromNode,
2021-02-25 10:08:05 +00:00
Data: []string{theError.Error()},
CommandOrEvent: EventNACK,
Method: ErrorLog,
},
}
2021-02-25 10:08:05 +00:00
return sam
}