Merge pull request #12 from tim-beatham/11-health-system

11 health system
This commit is contained in:
Tim Beatham 2023-11-06 13:40:04 +00:00 committed by GitHub
commit 4c19ebd81f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 265 additions and 100 deletions

View File

@ -2,6 +2,7 @@ certificatePath: "/wgmesh/cert/cert.pem"
privateKeyPath: "/wgmesh/cert/priv.pem"
caCertificatePath: "/wgmesh/cert/cacert.pem"
skipCertVerification: true
timeout: 5
gRPCPort: "21906"
advertiseRoutes: true
clusterSize: 32
@ -9,4 +10,5 @@ syncRate: 1
interClusterChance: 0.15
branchRate: 3
infectionCount: 3
keepAliveRate: 60
keepAliveTime: 10
pruneTime: 20

View File

@ -9,6 +9,7 @@ import (
ctrlserver "github.com/tim-beatham/wgmesh/pkg/ctrlserver"
"github.com/tim-beatham/wgmesh/pkg/ipc"
logging "github.com/tim-beatham/wgmesh/pkg/log"
"github.com/tim-beatham/wgmesh/pkg/mesh"
"github.com/tim-beatham/wgmesh/pkg/robin"
"github.com/tim-beatham/wgmesh/pkg/sync"
"github.com/tim-beatham/wgmesh/pkg/timestamp"
@ -44,12 +45,13 @@ func main() {
SyncProvider: &syncProvider,
Client: client,
}
ctrlServer, err := ctrlserver.NewCtrlServer(&ctrlServerParams)
syncProvider.Server = ctrlServer
syncRequester := sync.NewSyncRequester(ctrlServer)
syncScheduler := sync.NewSyncScheduler(ctrlServer, syncRequester)
timestampScheduler := timestamp.NewTimestampScheduler(ctrlServer)
pruneScheduler := mesh.NewPruner(ctrlServer.MeshManager, *conf)
robinIpcParams := robin.RobinIpcParams{
CtrlServer: ctrlServer,
@ -68,6 +70,7 @@ func main() {
go ipc.RunIpcHandler(&robinIpc)
go syncScheduler.Run()
go timestampScheduler.Run()
go pruneScheduler.Run()
closeResources := func() {
logging.Log.WriteInfof("Closing resources")

View File

@ -34,10 +34,10 @@ func (c *CrdtMeshManager) AddNode(node mesh.MeshNode) {
panic("node must be of type *MeshNodeCrdt")
}
crdt.Routes = make(map[string]interface{})
crdt.Timestamp = time.Now().Unix()
c.doc.Path("nodes").Map().Set(crdt.HostEndpoint, crdt)
nodeVal, _ := c.doc.Path("nodes").Map().Get(crdt.HostEndpoint)
nodeVal.Map().Set("routes", automerge.NewMap())
}
// GetMesh(): Converts the document into a struct
@ -204,7 +204,6 @@ func (m *CrdtMeshManager) AddRoutes(nodeId string, routes ...string) error {
return err
}
}
return nil
}
@ -212,6 +211,58 @@ func (m *CrdtMeshManager) GetSyncer() mesh.MeshSyncer {
return NewAutomergeSync(m)
}
func (m *CrdtMeshManager) Prune(pruneTime int) error {
nodes, err := m.doc.Path("nodes").Get()
if err != nil {
return err
}
if nodes.Kind() != automerge.KindMap {
return errors.New("node must be a map")
}
values, err := nodes.Map().Values()
if err != nil {
return err
}
deletionNodes := make([]string, 0)
for nodeId, node := range values {
if node.Kind() != automerge.KindMap {
return errors.New("node must be a map")
}
nodeMap := node.Map()
timeStamp, err := nodeMap.Get("timestamp")
if err != nil {
return err
}
if timeStamp.Kind() != automerge.KindInt64 {
return errors.New("timestamp is not int64")
}
timeValue := timeStamp.Int64()
nowValue := time.Now().Unix()
if nowValue-timeValue >= int64(pruneTime) {
deletionNodes = append(deletionNodes, nodeId)
}
}
for _, node := range deletionNodes {
logging.Log.WriteInfof("Pruning %s", node)
nodes.Map().Delete(node)
}
return nil
}
func (m1 *MeshNodeCrdt) Compare(m2 *MeshNodeCrdt) int {
return strings.Compare(m1.PublicKey, m2.PublicKey)
}

View File

@ -32,13 +32,23 @@ type WgMeshConfiguration struct {
AdvertiseRoutes bool `yaml:"advertiseRoutes"`
// Endpoint is the IP in which this computer is publicly reachable.
// usecase is when the node has multiple IP addresses
Endpoint string `yaml:"publicEndpoint"`
ClusterSize int `yaml:"clusterSize"`
SyncRate float64 `yaml:"syncRate"`
Endpoint string `yaml:"publicEndpoint"`
// ClusterSize size of the cluster to split on
ClusterSize int `yaml:"clusterSize"`
// SyncRate number of times per second to perform a sync
SyncRate float64 `yaml:"syncRate"`
// InterClusterChance proability of inter-cluster communication in a sync round
InterClusterChance float64 `yaml:"interClusterChance"`
BranchRate int `yaml:"branchRate"`
InfectionCount int `yaml:"infectionCount"`
KeepAliveRate int `yaml:"keepAliveRate"`
// BranchRate number of nodes to randomly communicate with
BranchRate int `yaml:"branchRate"`
// InfectionCount number of times we sync before we can no longer catch the udpate
InfectionCount int `yaml:"infectionCount"`
// KeepAliveTime number of seconds before we update node indicating that we are still alive
KeepAliveTime int `yaml:"keepAliveTime"`
// Timeout number of seconds before we consider the node as dead
Timeout int `yaml:"timeout"`
// PruneTime number of seconds before we consider the 'node' as dead
PruneTime int `yaml:"pruneTime"`
}
func ValidateConfiguration(c *WgMeshConfiguration) error {
@ -90,7 +100,7 @@ func ValidateConfiguration(c *WgMeshConfiguration) error {
}
}
if c.KeepAliveRate <= 0 {
if c.KeepAliveTime <= 0 {
return &WgMeshConfigurationError{
msg: "KeepAliveRate cannot be less than negative",
}
@ -102,6 +112,24 @@ func ValidateConfiguration(c *WgMeshConfiguration) error {
}
}
if c.Timeout < 1 {
return &WgMeshConfigurationError{
msg: "Timeout should be greater than or equal to 1",
}
}
if c.PruneTime <= 1 {
return &WgMeshConfigurationError{
msg: "Prune time cannot be <= 1",
}
}
if c.KeepAliveTime <= 1 {
return &WgMeshConfigurationError{
msg: "Prune time cannot be less than keep alive time",
}
}
return nil
}

View File

@ -15,8 +15,10 @@ func getExampleConfiguration() *WgMeshConfiguration {
SyncRate: 1,
InterClusterChance: 0.1,
BranchRate: 2,
KeepAliveRate: 1,
KeepAliveTime: 4,
InfectionCount: 1,
Timeout: 2,
PruneTime: 20,
}
}
@ -110,7 +112,7 @@ func InfectionCountZero(t *testing.T) {
func KeepAliveRateZero(t *testing.T) {
conf := getExampleConfiguration()
conf.KeepAliveRate = 0
conf.KeepAliveTime = 0
err := ValidateConfiguration(conf)
@ -128,3 +130,36 @@ func TestValidCOnfiguration(t *testing.T) {
t.Error(err)
}
}
func TestTimeout(t *testing.T) {
conf := getExampleConfiguration()
conf.Timeout = 0
err := ValidateConfiguration(conf)
if err == nil {
t.Fatal(`error should be thrown`)
}
}
func TestPruneTimeZero(t *testing.T) {
conf := getExampleConfiguration()
conf.PruneTime = 0
err := ValidateConfiguration(conf)
if err == nil {
t.Fatalf(`Error should be thrown`)
}
}
func TestPruneTimeLessThanKeepAliveTime(t *testing.T) {
conf := getExampleConfiguration()
conf.PruneTime = 1
err := ValidateConfiguration(conf)
if err == nil {
t.Fatalf(`Error should be thrown`)
}
}

View File

@ -35,6 +35,9 @@ func NewCtrlServer(params *NewCtrlServerParams) (*MeshCtrlServer, error) {
ipAllocator := &ip.ULABuilder{}
interfaceManipulator := wg.NewWgInterfaceManipulator(params.Client)
var meshManager mesh.MeshManagerImpl
configApplyer := mesh.NewWgMeshConfigApplyer()
meshManagerParams := &mesh.NewMeshManagerParams{
Conf: *params.Conf,
Client: params.Client,
@ -43,8 +46,10 @@ func NewCtrlServer(params *NewCtrlServerParams) (*MeshCtrlServer, error) {
IdGenerator: idGenerator,
IPAllocator: ipAllocator,
InterfaceManipulator: interfaceManipulator,
ConfigApplyer: mesh.NewWgMeshConfigApplyer(ctrlServer.MeshManager),
ConfigApplyer: configApplyer,
}
configApplyer.SetMeshManager(&meshManager)
ctrlServer.MeshManager = mesh.NewMeshManager(meshManagerParams)
ctrlServer.Conf = params.Conf

42
pkg/lib/timer.go Normal file
View File

@ -0,0 +1,42 @@
package lib
import "time"
type TimerFunc = func() error
type Timer struct {
f TimerFunc
quit chan struct{}
updateRate int
}
func (t *Timer) Run() error {
ticker := time.NewTicker(time.Duration(t.updateRate) * time.Second)
t.quit = make(chan struct{})
for {
select {
case <-ticker.C:
err := t.f()
if err != nil {
return err
}
case <-t.quit:
break
}
}
}
func (t *Timer) Stop() error {
close(t.quit)
return nil
}
func NewTimer(f TimerFunc, updateRate int) *Timer {
return &Timer{
f: f,
updateRate: updateRate,
}
}

View File

@ -1,7 +1,6 @@
package mesh
import (
"errors"
"fmt"
"net"
@ -12,6 +11,7 @@ import (
type MeshConfigApplyer interface {
ApplyConfig() error
RemovePeers(meshId string) error
SetMeshManager(manager MeshManager)
}
// WgMeshConfigApplyer applies WireGuard configuration
@ -101,7 +101,7 @@ func (m *WgMeshConfigApplyer) RemovePeers(meshId string) error {
mesh := m.meshManager.GetMesh(meshId)
if mesh == nil {
return errors.New(fmt.Sprintf("mesh %s does not exist", meshId))
return fmt.Errorf("mesh %s does not exist", meshId)
}
dev, err := mesh.GetDevice()
@ -118,6 +118,10 @@ func (m *WgMeshConfigApplyer) RemovePeers(meshId string) error {
return nil
}
func NewWgMeshConfigApplyer(manager MeshManager) MeshConfigApplyer {
return &WgMeshConfigApplyer{meshManager: manager}
func (m *WgMeshConfigApplyer) SetMeshManager(manager MeshManager) {
m.meshManager = manager
}
func NewWgMeshConfigApplyer() MeshConfigApplyer {
return &WgMeshConfigApplyer{}
}

View File

@ -28,6 +28,7 @@ type MeshManager interface {
UpdateTimeStamp() error
GetClient() *wgctrl.Client
GetMeshes() map[string]MeshProvider
Prune() error
}
type MeshManagerImpl struct {
@ -46,6 +47,19 @@ type MeshManagerImpl struct {
interfaceManipulator wg.WgInterfaceManipulator
}
// Prune implements MeshManager.
func (m *MeshManagerImpl) Prune() error {
for _, mesh := range m.Meshes {
err := mesh.Prune(m.conf.PruneTime)
if err != nil {
return err
}
}
return nil
}
// CreateMesh: Creates a new mesh, stores it and returns the mesh id
func (m *MeshManagerImpl) CreateMesh(devName string, port int) (string, error) {
meshId, err := m.idGenerator.GetId()
@ -76,7 +90,6 @@ func (m *MeshManagerImpl) CreateMesh(devName string, port int) (string, error) {
}
m.Meshes[meshId] = nodeManager
err = m.configApplyer.RemovePeers(meshId)
if err != nil {
logging.Log.WriteErrorf(err.Error())
@ -217,7 +230,7 @@ func (s *MeshManagerImpl) LeaveMesh(meshId string) error {
_, exists := s.Meshes[meshId]
if !exists {
return errors.New(fmt.Sprintf("mesh %s does not exist", meshId))
return fmt.Errorf("mesh %s does not exist", meshId)
}
// For now just delete the mesh with the ID.
@ -229,7 +242,7 @@ func (s *MeshManagerImpl) GetSelf(meshId string) (MeshNode, error) {
meshInstance, ok := s.Meshes[meshId]
if !ok {
return nil, errors.New(fmt.Sprintf("mesh %s does not exist", meshId))
return nil, fmt.Errorf("mesh %s does not exist", meshId)
}
snapshot, err := meshInstance.GetMesh()
@ -327,6 +340,7 @@ func NewMeshManager(params *NewMeshManagerParams) *MeshManagerImpl {
Client: params.Client,
conf: &params.Conf,
}
m.configApplyer = params.ConfigApplyer
m.RouteManager = NewRouteManager(m)
m.idGenerator = params.IdGenerator

View File

@ -18,7 +18,7 @@ func getMeshConfiguration() *conf.WgMeshConfiguration {
BranchRate: 3,
InterClusterChance: 0.15,
InfectionCount: 2,
KeepAliveRate: 60,
KeepAliveTime: 60,
}
}

16
pkg/mesh/pruner.go Normal file
View File

@ -0,0 +1,16 @@
package mesh
import (
"github.com/tim-beatham/wgmesh/pkg/conf"
"github.com/tim-beatham/wgmesh/pkg/lib"
)
func pruneFunction(m MeshManager) lib.TimerFunc {
return func() error {
return m.Prune()
}
}
func NewPruner(m MeshManager, conf conf.WgMeshConfiguration) *lib.Timer {
return lib.NewTimer(pruneFunction(m), conf.PruneTime/2)
}

View File

@ -66,6 +66,11 @@ type MeshProviderStub struct {
snapshot *MeshSnapshotStub
}
// Prune implements MeshProvider.
func (*MeshProviderStub) Prune(pruneAmount int) error {
return nil
}
// UpdateTimeStamp implements MeshProvider.
func (*MeshProviderStub) UpdateTimeStamp(nodeId string) error {
return nil
@ -154,10 +159,18 @@ func (a *MeshConfigApplyerStub) RemovePeers(meshId string) error {
return nil
}
func (a *MeshConfigApplyerStub) SetMeshManager(manager MeshManager) {
}
type MeshManagerStub struct {
meshes map[string]MeshProvider
}
// Prune implements MeshManager.
func (*MeshManagerStub) Prune() error {
return nil
}
func NewMeshManagerStub() MeshManager {
return &MeshManagerStub{meshes: make(map[string]MeshProvider)}
}

View File

@ -58,20 +58,24 @@ type MeshProvider interface {
GetDevice() (*wgtypes.Device, error)
// HasChanges returns true if we have changes since last time we synced
HasChanges() bool
// Record that we have changges and save the corresponding changes
// Record that we have changes and save the corresponding changes
SaveChanges()
// UpdateTimeStamp: update the timestamp of the given node
UpdateTimeStamp(nodeId string) error
// AddRoutes: adds routes to the given node
AddRoutes(nodeId string, route ...string) error
// GetSyncer: returns the automerge syncer for sync
GetSyncer() MeshSyncer
// SetDescription: sets the description of this automerge data type
SetDescription(nodeId string, description string) error
// Prune: prunes all nodes that have not updated their timestamp in
// pruneAmount seconds
Prune(pruneAmount int) error
}
// HostParameters contains the IDs of a node
type HostParameters struct {
HostEndpoint string
// TODO: Contain the WireGungracefullyuard identifier in this
}
// MeshProviderFactoryParams parameters required to build a mesh provider

View File

@ -107,7 +107,6 @@ func (s *SyncerImpl) Sync(meshId string) error {
logging.Log.WriteInfof("SYNC COUNT: %d", s.syncCount)
s.infectionCount = ((s.conf.InfectionCount + s.infectionCount - 1) % s.conf.InfectionCount)
return nil
}

View File

@ -24,6 +24,13 @@ func (s *SyncErrorHandlerImpl) incrementFailedCount(meshId string, endpoint stri
return false
}
// self, err := s.meshManager.GetSelf(meshId)
// if err != nil {
// return false
// }
// mesh.DecrementHealth(endpoint, self.GetHostEndpoint())
return true
}

View File

@ -89,10 +89,12 @@ func (s *SyncRequesterImpl) SyncMesh(meshId, endpoint string) error {
c := rpc.NewSyncServiceClient(client)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
syncTimeOut := s.server.Conf.SyncRate * float64(time.Second)
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(syncTimeOut))
defer cancel()
err = syncMesh(mesh, ctx, c)
err = s.syncMesh(mesh, ctx, c)
if err != nil {
return s.handleErr(meshId, endpoint, err)
@ -102,7 +104,7 @@ func (s *SyncRequesterImpl) SyncMesh(meshId, endpoint string) error {
return nil
}
func syncMesh(mesh mesh.MeshProvider, ctx context.Context, client rpc.SyncServiceClient) error {
func (s *SyncRequesterImpl) syncMesh(mesh mesh.MeshProvider, ctx context.Context, client rpc.SyncServiceClient) error {
stream, err := client.SyncMesh(ctx)
syncer := mesh.GetSyncer()

View File

@ -1,10 +1,8 @@
package sync
import (
"time"
"github.com/tim-beatham/wgmesh/pkg/ctrlserver"
logging "github.com/tim-beatham/wgmesh/pkg/log"
"github.com/tim-beatham/wgmesh/pkg/lib"
)
// SyncScheduler: Loops through all nodes in the mesh and runs a schedule to
@ -22,34 +20,13 @@ type SyncSchedulerImpl struct {
}
// Run implements SyncScheduler.
func (s *SyncSchedulerImpl) Run() error {
ticker := time.NewTicker(time.Duration(s.server.Conf.SyncRate) * time.Second)
quit := make(chan struct{})
s.quit = quit
for {
select {
case <-ticker.C:
err := s.syncer.SyncMeshes()
if err != nil {
logging.Log.WriteErrorf(err.Error())
}
break
case <-quit:
break
}
func syncFunction(syncer Syncer) lib.TimerFunc {
return func() error {
return syncer.SyncMeshes()
}
}
// Stop implements SyncScheduler.
func (s *SyncSchedulerImpl) Stop() error {
close(s.quit)
return nil
}
func NewSyncScheduler(s *ctrlserver.MeshCtrlServer, syncRequester SyncRequester) SyncScheduler {
func NewSyncScheduler(s *ctrlserver.MeshCtrlServer, syncRequester SyncRequester) *lib.Timer {
syncer := NewSyncer(s.MeshManager, s.Conf, syncRequester)
return &SyncSchedulerImpl{server: s, syncer: syncer}
return lib.NewTimer(syncFunction(syncer), int(s.Conf.SyncRate))
}

View File

@ -1,51 +1,14 @@
package timestamp
import (
"time"
"github.com/tim-beatham/wgmesh/pkg/ctrlserver"
logging "github.com/tim-beatham/wgmesh/pkg/log"
"github.com/tim-beatham/wgmesh/pkg/mesh"
"github.com/tim-beatham/wgmesh/pkg/lib"
)
type TimestampScheduler interface {
Run() error
Stop() error
}
type TimeStampSchedulerImpl struct {
meshManager mesh.MeshManager
updateRate int
quit chan struct{}
}
func (s *TimeStampSchedulerImpl) Run() error {
ticker := time.NewTicker(time.Duration(s.updateRate) * time.Second)
s.quit = make(chan struct{})
for {
select {
case <-ticker.C:
err := s.meshManager.UpdateTimeStamp()
if err != nil {
logging.Log.WriteErrorf("Update Timestamp Error: %s", err.Error())
}
case <-s.quit:
break
}
func NewTimestampScheduler(ctrlServer *ctrlserver.MeshCtrlServer) lib.Timer {
timerFunc := func() error {
return ctrlServer.MeshManager.UpdateTimeStamp()
}
}
func NewTimestampScheduler(ctrlServer *ctrlserver.MeshCtrlServer) TimestampScheduler {
return &TimeStampSchedulerImpl{
meshManager: ctrlServer.MeshManager,
updateRate: ctrlServer.Conf.KeepAliveRate,
}
}
func (s *TimeStampSchedulerImpl) Stop() error {
close(s.quit)
return nil
return *lib.NewTimer(timerFunc, ctrlServer.Conf.KeepAliveTime)
}