diff --git a/client/internal/routemanager/systemops_darwin.go b/client/internal/routemanager/systemops_darwin.go index 33b8287b6..f7ce72a4e 100644 --- a/client/internal/routemanager/systemops_darwin.go +++ b/client/internal/routemanager/systemops_darwin.go @@ -8,7 +8,9 @@ import ( "net/netip" "os/exec" "strings" + "time" + "github.com/cenkalti/backoff/v4" log "github.com/sirupsen/logrus" "github.com/netbirdio/netbird/client/internal/peer" @@ -55,11 +57,33 @@ func routeCmd(action string, prefix netip.Prefix, nexthop netip.Addr, intf strin args = append(args, "-interface", intf) } - out, err := exec.Command("route", args...).CombinedOutput() - log.Tracef("route %s: %s", strings.Join(args, " "), out) - - if err != nil { + if err := retryRouteCmd(args); err != nil { return fmt.Errorf("failed to %s route for %s: %w", action, prefix, err) } return nil } + +func retryRouteCmd(args []string) error { + operation := func() error { + out, err := exec.Command("route", args...).CombinedOutput() + log.Tracef("route %s: %s", strings.Join(args, " "), out) + // https://github.com/golang/go/issues/45736 + if err != nil && strings.Contains(string(out), "sysctl: cannot allocate memory") { + return err + } else if err != nil { + return backoff.Permanent(err) + } + return nil + } + + expBackOff := backoff.NewExponentialBackOff() + expBackOff.InitialInterval = 50 * time.Millisecond + expBackOff.MaxInterval = 500 * time.Millisecond + expBackOff.MaxElapsedTime = 1 * time.Second + + err := backoff.Retry(operation, expBackOff) + if err != nil { + return fmt.Errorf("route cmd retry failed: %w", err) + } + return nil +} diff --git a/client/internal/routemanager/systemops_darwin_test.go b/client/internal/routemanager/systemops_darwin_test.go index 5c5aaa24f..cc9bb9db5 100644 --- a/client/internal/routemanager/systemops_darwin_test.go +++ b/client/internal/routemanager/systemops_darwin_test.go @@ -5,8 +5,10 @@ package routemanager import ( "fmt" "net" + "net/netip" "os/exec" "regexp" + "sync" "testing" "github.com/stretchr/testify/assert" @@ -29,6 +31,42 @@ func init() { }...) } +func TestConcurrentRoutes(t *testing.T) { + baseIP := netip.MustParseAddr("192.0.2.0") + intf := "lo0" + + var wg sync.WaitGroup + for i := 0; i < 1024; i++ { + wg.Add(1) + go func(ip netip.Addr) { + defer wg.Done() + prefix := netip.PrefixFrom(ip, 32) + if err := addToRouteTable(prefix, netip.Addr{}, intf); err != nil { + t.Errorf("Failed to add route for %s: %v", prefix, err) + } + }(baseIP) + baseIP = baseIP.Next() + } + + wg.Wait() + + baseIP = netip.MustParseAddr("192.0.2.0") + + for i := 0; i < 1024; i++ { + wg.Add(1) + go func(ip netip.Addr) { + defer wg.Done() + prefix := netip.PrefixFrom(ip, 32) + if err := removeFromRouteTable(prefix, netip.Addr{}, intf); err != nil { + t.Errorf("Failed to remove route for %s: %v", prefix, err) + } + }(baseIP) + baseIP = baseIP.Next() + } + + wg.Wait() +} + func createAndSetupDummyInterface(t *testing.T, intf string, ipAddressCIDR string) string { t.Helper()