mirror of
https://github.com/netbirdio/netbird.git
synced 2024-10-04 17:22:36 +02:00
Add basic signal metrics (#2107)
This commit is contained in:
parent
94e505480b
commit
85b8f36ec1
@ -7,6 +7,9 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"go.opentelemetry.io/otel"
|
||||||
|
|
||||||
"github.com/netbirdio/netbird/management/server/activity"
|
"github.com/netbirdio/netbird/management/server/activity"
|
||||||
|
|
||||||
"github.com/netbirdio/netbird/util"
|
"github.com/netbirdio/netbird/util"
|
||||||
@ -53,7 +56,10 @@ func startSignal(t *testing.T) (*grpc.Server, net.Listener) {
|
|||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
s := grpc.NewServer()
|
s := grpc.NewServer()
|
||||||
sigProto.RegisterSignalExchangeServer(s, sig.NewServer())
|
srv, err := sig.NewServer(otel.Meter(""))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
sigProto.RegisterSignalExchangeServer(s, srv)
|
||||||
go func() {
|
go func() {
|
||||||
if err := s.Serve(lis); err != nil {
|
if err := s.Serve(lis); err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
|
@ -17,6 +17,7 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
"go.opentelemetry.io/otel"
|
||||||
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
"google.golang.org/grpc/keepalive"
|
"google.golang.org/grpc/keepalive"
|
||||||
@ -810,7 +811,7 @@ func TestEngine_MultiplePeers(t *testing.T) {
|
|||||||
ctx, cancel := context.WithCancel(CtxInitState(context.Background()))
|
ctx, cancel := context.WithCancel(CtxInitState(context.Background()))
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
sigServer, signalAddr, err := startSignal()
|
sigServer, signalAddr, err := startSignal(t)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
return
|
return
|
||||||
@ -1013,7 +1014,9 @@ func createEngine(ctx context.Context, cancel context.CancelFunc, setupKey strin
|
|||||||
return e, err
|
return e, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func startSignal() (*grpc.Server, string, error) {
|
func startSignal(t *testing.T) (*grpc.Server, string, error) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
s := grpc.NewServer(grpc.KeepaliveEnforcementPolicy(kaep), grpc.KeepaliveParams(kasp))
|
s := grpc.NewServer(grpc.KeepaliveEnforcementPolicy(kaep), grpc.KeepaliveParams(kasp))
|
||||||
|
|
||||||
lis, err := net.Listen("tcp", "localhost:0")
|
lis, err := net.Listen("tcp", "localhost:0")
|
||||||
@ -1021,7 +1024,9 @@ func startSignal() (*grpc.Server, string, error) {
|
|||||||
log.Fatalf("failed to listen: %v", err)
|
log.Fatalf("failed to listen: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
proto.RegisterSignalExchangeServer(s, signalServer.NewServer())
|
srv, err := signalServer.NewServer(otel.Meter(""))
|
||||||
|
require.NoError(t, err)
|
||||||
|
proto.RegisterSignalExchangeServer(s, srv)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
if err = s.Serve(lis); err != nil {
|
if err = s.Serve(lis); err != nil {
|
||||||
|
@ -7,6 +7,8 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/netbirdio/management-integrations/integrations"
|
"github.com/netbirdio/management-integrations/integrations"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"go.opentelemetry.io/otel"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
@ -39,7 +41,7 @@ var (
|
|||||||
// we will use a management server started via to simulate the server and capture the number of retries
|
// we will use a management server started via to simulate the server and capture the number of retries
|
||||||
func TestConnectWithRetryRuns(t *testing.T) {
|
func TestConnectWithRetryRuns(t *testing.T) {
|
||||||
// start the signal server
|
// start the signal server
|
||||||
_, signalAddr, err := startSignal()
|
_, signalAddr, err := startSignal(t)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to start signal server: %v", err)
|
t.Fatalf("failed to start signal server: %v", err)
|
||||||
}
|
}
|
||||||
@ -141,7 +143,9 @@ func startManagement(t *testing.T, signalAddr string, counter *int) (*grpc.Serve
|
|||||||
return s, lis.Addr().String(), nil
|
return s, lis.Addr().String(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func startSignal() (*grpc.Server, string, error) {
|
func startSignal(t *testing.T) (*grpc.Server, string, error) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
s := grpc.NewServer(grpc.KeepaliveEnforcementPolicy(kaep), grpc.KeepaliveParams(kasp))
|
s := grpc.NewServer(grpc.KeepaliveEnforcementPolicy(kaep), grpc.KeepaliveParams(kasp))
|
||||||
|
|
||||||
lis, err := net.Listen("tcp", "localhost:0")
|
lis, err := net.Listen("tcp", "localhost:0")
|
||||||
@ -149,7 +153,9 @@ func startSignal() (*grpc.Server, string, error) {
|
|||||||
log.Fatalf("failed to listen: %v", err)
|
log.Fatalf("failed to listen: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
proto.RegisterSignalExchangeServer(s, signalServer.NewServer())
|
srv, err := signalServer.NewServer(otel.Meter(""))
|
||||||
|
require.NoError(t, err)
|
||||||
|
proto.RegisterSignalExchangeServer(s, srv)
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
if err = s.Serve(lis); err != nil {
|
if err = s.Serve(lis); err != nil {
|
||||||
|
1
go.mod
1
go.mod
@ -75,6 +75,7 @@ require (
|
|||||||
github.com/things-go/go-socks5 v0.0.4
|
github.com/things-go/go-socks5 v0.0.4
|
||||||
github.com/yusufpapurcu/wmi v1.2.4
|
github.com/yusufpapurcu/wmi v1.2.4
|
||||||
github.com/zcalusic/sysinfo v1.0.2
|
github.com/zcalusic/sysinfo v1.0.2
|
||||||
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0
|
||||||
go.opentelemetry.io/otel v1.26.0
|
go.opentelemetry.io/otel v1.26.0
|
||||||
go.opentelemetry.io/otel/exporters/prometheus v0.48.0
|
go.opentelemetry.io/otel/exporters/prometheus v0.48.0
|
||||||
go.opentelemetry.io/otel/metric v1.26.0
|
go.opentelemetry.io/otel/metric v1.26.0
|
||||||
|
2
go.sum
2
go.sum
@ -502,6 +502,8 @@ github.com/zcalusic/sysinfo v1.0.2 h1:nwTTo2a+WQ0NXwo0BGRojOJvJ/5XKvQih+2RrtWqfx
|
|||||||
github.com/zcalusic/sysinfo v1.0.2/go.mod h1:kluzTYflRWo6/tXVMJPdEjShsbPpsFRyy+p1mBQPC30=
|
github.com/zcalusic/sysinfo v1.0.2/go.mod h1:kluzTYflRWo6/tXVMJPdEjShsbPpsFRyy+p1mBQPC30=
|
||||||
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
|
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
|
||||||
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
|
go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
|
||||||
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 h1:4Pp6oUg3+e/6M4C0A/3kJ2VYa++dsWVTtGgLVj5xtHg=
|
||||||
|
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0/go.mod h1:Mjt1i1INqiaoZOMGR1RIUJN+i3ChKoFRqzrRQhlkbs0=
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 h1:Xs2Ncz0gNihqu9iosIZ5SkBbWo5T8JhhLJFMQL1qmLI=
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 h1:Xs2Ncz0gNihqu9iosIZ5SkBbWo5T8JhhLJFMQL1qmLI=
|
||||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0/go.mod h1:vy+2G/6NvVMpwGX/NyLqcC41fxepnuKHk16E6IZUcJc=
|
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0/go.mod h1:vy+2G/6NvVMpwGX/NyLqcC41fxepnuKHk16E6IZUcJc=
|
||||||
go.opentelemetry.io/otel v1.26.0 h1:LQwgL5s/1W7YiiRwxf03QGnWLb2HW4pLiAhaA5cZXBs=
|
go.opentelemetry.io/otel v1.26.0 h1:LQwgL5s/1W7YiiRwxf03QGnWLb2HW4pLiAhaA5cZXBs=
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
# netbird Signal Server
|
# netbird Signal Server
|
||||||
|
|
||||||
This is a netbird signal-exchange server and client library to exchange connection information between netbird peers
|
This is a netbird signal-exchange server and client library to exchange
|
||||||
|
connection information between netbird peers
|
||||||
|
|
||||||
## Command Options
|
## Command Options
|
||||||
The CLI accepts the command **management** with the following options:
|
|
||||||
|
The CLI accepts the the following options:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
start Netbird Signal Server daemon
|
start Netbird Signal Server daemon
|
||||||
|
|
||||||
@ -20,24 +23,38 @@ Global Flags:
|
|||||||
--log-file string sets Netbird log path. If console is specified the the log will be output to stdout (default "/var/log/netbird/signal.log")
|
--log-file string sets Netbird log path. If console is specified the the log will be output to stdout (default "/var/log/netbird/signal.log")
|
||||||
--log-level string (default "info")
|
--log-level string (default "info")
|
||||||
```
|
```
|
||||||
|
|
||||||
## Running the Signal service (Docker)
|
## Running the Signal service (Docker)
|
||||||
|
|
||||||
We have packed the Signal server into docker image. You can pull the image from Docker Hub and execute it with the following commands:
|
We have packed the Signal server into docker image. You can pull the image from
|
||||||
|
Docker Hub and execute it with the
|
||||||
|
following commands:
|
||||||
|
|
||||||
````shell
|
````shell
|
||||||
docker pull netbirdio/signal:latest
|
docker pull netbirdio/signal:latest
|
||||||
docker run -d --name netbird-signal -p 10000:10000 netbirdio/signal:latest
|
docker run -d --name netbird-signal -p 10000:10000 netbirdio/signal:latest
|
||||||
````
|
````
|
||||||
The default log-level is set to INFO, if you need you can change it using by updating the docker cmd as followed:
|
|
||||||
|
The default log-level is set to INFO, if you need you can change it using by
|
||||||
|
updating the docker cmd as followed:
|
||||||
|
|
||||||
````shell
|
````shell
|
||||||
docker run -d --name netbird-signal -p 10000:10000 netbirdio/signal:latest --log-level DEBUG
|
docker run -d --name netbird-signal -p 10000:10000 netbirdio/signal:latest --log-level DEBUG
|
||||||
````
|
````
|
||||||
|
|
||||||
### Run with TLS (Let's Encrypt).
|
### Run with TLS (Let's Encrypt).
|
||||||
By specifying the **--letsencrypt-domain** the daemon will handle SSL certificate request and configuration.
|
|
||||||
|
|
||||||
In the following example ```10000``` is the signal service **default** port, and ```443``` will be used as port for Let's Encrypt challenge and HTTP API.
|
By specifying the **--letsencrypt-domain** the daemon will handle SSL
|
||||||
> The server where you are running a container has to have a public IP (for Let's Encrypt certificate challenge).
|
certificate request and configuration.
|
||||||
|
|
||||||
Replace <YOUR-DOMAIN> with your server's public domain (e.g. mydomain.com or subdomain sub.mydomain.com).
|
In the following example ```10000``` is the signal service **default** port,
|
||||||
|
and ```443``` will be used as port for
|
||||||
|
Let's Encrypt challenge and HTTP API.
|
||||||
|
> The server where you are running a container has to have a public IP (for
|
||||||
|
> Let's Encrypt certificate challenge).
|
||||||
|
|
||||||
|
Replace `<YOUR-DOMAIN>` with your server's public domain (e.g. mydomain.com or
|
||||||
|
subdomain sub.mydomain.com).
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# create a volume
|
# create a volume
|
||||||
@ -50,14 +67,57 @@ docker run -d --name netbird-signal \
|
|||||||
netbirdio/signal:latest \
|
netbirdio/signal:latest \
|
||||||
--letsencrypt-domain <YOUR-DOMAIN>
|
--letsencrypt-domain <YOUR-DOMAIN>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Metrics
|
||||||
|
|
||||||
|
The Signal Server exposes the following metrics in Prometheus format:
|
||||||
|
|
||||||
|
### Application Metrics
|
||||||
|
|
||||||
|
- **active_peers**: A Gauge metric that tracks the number of active peers
|
||||||
|
connected to the server.
|
||||||
|
- **peer_connection_duration_seconds**: A Histogram metric that measures the
|
||||||
|
duration a peer was connected in seconds.
|
||||||
|
- **registrations_total**: A Counter metric that counts the total number of peer
|
||||||
|
registrations.
|
||||||
|
- **deregistrations_total**: A Counter metric that counts the total number of
|
||||||
|
peer deregistrations.
|
||||||
|
- **registration_failures_total**: A Counter metric that counts the total number
|
||||||
|
of failed peer registrations. Possible
|
||||||
|
labels:
|
||||||
|
- `error`: The type of error that caused the registration failure (
|
||||||
|
e.g., `missing_id`, `missing_meta`, `failed_header`).
|
||||||
|
- **registration_delay_milliseconds**: A Histogram metric that measures the time
|
||||||
|
it took to register a peer in
|
||||||
|
milliseconds.
|
||||||
|
- **messages_forwarded_total**: A Counter metric that counts the total number of
|
||||||
|
messages forwarded between peers.
|
||||||
|
- **message_forward_failures_total**: A Counter metric that counts the total
|
||||||
|
number of failed message forwards between
|
||||||
|
peers. Possible labels:
|
||||||
|
- `type`: The type of failure (
|
||||||
|
e.g., `error`, `not_connected`, `not_registered`).
|
||||||
|
- **message_forward_latency_milliseconds**: A Histogram metric that measures the
|
||||||
|
latency of message forwarding between
|
||||||
|
peers in milliseconds.
|
||||||
|
|
||||||
|
### Endpoint
|
||||||
|
|
||||||
|
The metrics are exposed in Prometheus format on the `/metrics` endpoint. By
|
||||||
|
default, the server listens on port `9090`,
|
||||||
|
so the full endpoint would be:
|
||||||
|
|
||||||
|
> http://<server_ip>:9090/metrics
|
||||||
|
|
||||||
## For development purposes:
|
## For development purposes:
|
||||||
|
|
||||||
The project uses gRpc library and defines service in protobuf file located in:
|
The project uses gRpc library and defines service in protobuf file located in:
|
||||||
```proto/signalexchange.proto```
|
```proto/signalexchange.proto```
|
||||||
|
|
||||||
To build the project you have to do the following things.
|
To build the project you have to do the following things.
|
||||||
|
|
||||||
Install golang gRpc tools:
|
Install golang gRpc tools:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.26
|
go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.26
|
||||||
|
@ -9,6 +9,7 @@ import (
|
|||||||
. "github.com/onsi/ginkgo"
|
. "github.com/onsi/ginkgo"
|
||||||
. "github.com/onsi/gomega"
|
. "github.com/onsi/gomega"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
"go.opentelemetry.io/otel"
|
||||||
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
||||||
"google.golang.org/grpc"
|
"google.golang.org/grpc"
|
||||||
"google.golang.org/grpc/credentials/insecure"
|
"google.golang.org/grpc/credentials/insecure"
|
||||||
@ -198,7 +199,11 @@ func startSignal() (*grpc.Server, net.Listener) {
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
s := grpc.NewServer()
|
s := grpc.NewServer()
|
||||||
sigProto.RegisterSignalExchangeServer(s, server.NewServer())
|
srv, err := server.NewServer(otel.Meter(""))
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
sigProto.RegisterSignalExchangeServer(s, srv)
|
||||||
go func() {
|
go func() {
|
||||||
if err := s.Serve(lis); err != nil {
|
if err := s.Serve(lis); err != nil {
|
||||||
log.Fatalf("failed to serve: %v", err)
|
log.Fatalf("failed to serve: %v", err)
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
@ -13,8 +14,11 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
|
||||||
"golang.org/x/crypto/acme/autocert"
|
"golang.org/x/crypto/acme/autocert"
|
||||||
|
|
||||||
|
"github.com/netbirdio/netbird/signal/metrics"
|
||||||
|
|
||||||
"github.com/netbirdio/netbird/encryption"
|
"github.com/netbirdio/netbird/encryption"
|
||||||
"github.com/netbirdio/netbird/signal/proto"
|
"github.com/netbirdio/netbird/signal/proto"
|
||||||
"github.com/netbirdio/netbird/signal/server"
|
"github.com/netbirdio/netbird/signal/server"
|
||||||
@ -28,6 +32,10 @@ import (
|
|||||||
"google.golang.org/grpc/keepalive"
|
"google.golang.org/grpc/keepalive"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
metricsPort = 9090
|
||||||
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
signalPort int
|
signalPort int
|
||||||
signalLetsencryptDomain string
|
signalLetsencryptDomain string
|
||||||
@ -95,9 +103,26 @@ var (
|
|||||||
opts = append(opts, grpc.Creds(transportCredentials))
|
opts = append(opts, grpc.Creds(transportCredentials))
|
||||||
}
|
}
|
||||||
|
|
||||||
opts = append(opts, signalKaep, signalKasp)
|
metricsServer := metrics.NewServer(metricsPort, "")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("setup metrics: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
opts = append(opts, signalKaep, signalKasp, grpc.StatsHandler(otelgrpc.NewServerHandler()))
|
||||||
grpcServer := grpc.NewServer(opts...)
|
grpcServer := grpc.NewServer(opts...)
|
||||||
proto.RegisterSignalExchangeServer(grpcServer, server.NewServer())
|
|
||||||
|
go func() {
|
||||||
|
log.Infof("running metrics server: %s%s", metricsServer.Addr, metricsServer.Endpoint)
|
||||||
|
if err := metricsServer.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) {
|
||||||
|
log.Fatalf("Failed to start metrics server: %v", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
srv, err := server.NewServer(metricsServer.Meter)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("creating signal server: %v", err)
|
||||||
|
}
|
||||||
|
proto.RegisterSignalExchangeServer(grpcServer, srv)
|
||||||
|
|
||||||
var compatListener net.Listener
|
var compatListener net.Listener
|
||||||
if signalPort != 10000 {
|
if signalPort != 10000 {
|
||||||
@ -150,6 +175,14 @@ var (
|
|||||||
_ = compatListener.Close()
|
_ = compatListener.Close()
|
||||||
log.Infof("stopped gRPC backward compatibility server")
|
log.Infof("stopped gRPC backward compatibility server")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(cmd.Context(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
if err := metricsServer.Shutdown(ctx); err != nil {
|
||||||
|
log.Errorf("Failed to stop metrics server: %v", err)
|
||||||
|
}
|
||||||
|
log.Infof("stopped metrics server")
|
||||||
|
|
||||||
log.Infof("stopped Signal Service")
|
log.Infof("stopped Signal Service")
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
124
signal/metrics/app.go
Normal file
124
signal/metrics/app.go
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
package metrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"go.opentelemetry.io/otel/metric"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AppMetrics holds all the application metrics
|
||||||
|
type AppMetrics struct {
|
||||||
|
metric.Meter
|
||||||
|
|
||||||
|
ActivePeers metric.Int64UpDownCounter
|
||||||
|
PeerConnectionDuration metric.Int64Histogram
|
||||||
|
|
||||||
|
Registrations metric.Int64Counter
|
||||||
|
Deregistrations metric.Int64Counter
|
||||||
|
RegistrationFailures metric.Int64Counter
|
||||||
|
RegistrationDelay metric.Float64Histogram
|
||||||
|
|
||||||
|
MessagesForwarded metric.Int64Counter
|
||||||
|
MessageForwardFailures metric.Int64Counter
|
||||||
|
MessageForwardLatency metric.Float64Histogram
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewAppMetrics(meter metric.Meter) (*AppMetrics, error) {
|
||||||
|
activePeers, err := meter.Int64UpDownCounter("active_peers")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
peerConnectionDuration, err := meter.Int64Histogram("peer_connection_duration_seconds",
|
||||||
|
metric.WithExplicitBucketBoundaries(getPeerConnectionDurationBucketBoundaries()...))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
registrations, err := meter.Int64Counter("registrations_total")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
deregistrations, err := meter.Int64Counter("deregistrations_total")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
registrationFailures, err := meter.Int64Counter("registration_failures_total")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
registrationDelay, err := meter.Float64Histogram("registration_delay_milliseconds",
|
||||||
|
metric.WithExplicitBucketBoundaries(getStandardBucketBoundaries()...))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
messagesForwarded, err := meter.Int64Counter("messages_forwarded_total")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
messageForwardFailures, err := meter.Int64Counter("message_forward_failures_total")
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
messageForwardLatency, err := meter.Float64Histogram("message_forward_latency_milliseconds",
|
||||||
|
metric.WithExplicitBucketBoundaries(getStandardBucketBoundaries()...))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &AppMetrics{
|
||||||
|
Meter: meter,
|
||||||
|
|
||||||
|
ActivePeers: activePeers,
|
||||||
|
PeerConnectionDuration: peerConnectionDuration,
|
||||||
|
|
||||||
|
Registrations: registrations,
|
||||||
|
Deregistrations: deregistrations,
|
||||||
|
RegistrationFailures: registrationFailures,
|
||||||
|
RegistrationDelay: registrationDelay,
|
||||||
|
|
||||||
|
MessagesForwarded: messagesForwarded,
|
||||||
|
MessageForwardFailures: messageForwardFailures,
|
||||||
|
MessageForwardLatency: messageForwardLatency,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getStandardBucketBoundaries() []float64 {
|
||||||
|
return []float64{
|
||||||
|
0.1,
|
||||||
|
0.5,
|
||||||
|
1,
|
||||||
|
5,
|
||||||
|
10,
|
||||||
|
50,
|
||||||
|
100,
|
||||||
|
500,
|
||||||
|
1000,
|
||||||
|
5000,
|
||||||
|
10000,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func getPeerConnectionDurationBucketBoundaries() []float64 {
|
||||||
|
return []float64{
|
||||||
|
1,
|
||||||
|
60,
|
||||||
|
// 10m
|
||||||
|
600,
|
||||||
|
// 1h
|
||||||
|
3600,
|
||||||
|
// 2h,
|
||||||
|
7200,
|
||||||
|
// 6h,
|
||||||
|
21600,
|
||||||
|
// 12h,
|
||||||
|
43200,
|
||||||
|
// 24h,
|
||||||
|
86400,
|
||||||
|
// 48h,
|
||||||
|
172800,
|
||||||
|
}
|
||||||
|
}
|
74
signal/metrics/metrics.go
Normal file
74
signal/metrics/metrics.go
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
package metrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"reflect"
|
||||||
|
|
||||||
|
prometheus2 "github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
|
"go.opentelemetry.io/otel"
|
||||||
|
"go.opentelemetry.io/otel/exporters/prometheus"
|
||||||
|
api "go.opentelemetry.io/otel/metric"
|
||||||
|
"go.opentelemetry.io/otel/sdk/metric"
|
||||||
|
)
|
||||||
|
|
||||||
|
const defaultEndpoint = "/metrics"
|
||||||
|
|
||||||
|
// Metrics holds the metrics information and exposes it
|
||||||
|
type Metrics struct {
|
||||||
|
Meter api.Meter
|
||||||
|
provider *metric.MeterProvider
|
||||||
|
Endpoint string
|
||||||
|
|
||||||
|
*http.Server
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewServer initializes and returns a new Metrics instance
|
||||||
|
func NewServer(port int, endpoint string) *Metrics {
|
||||||
|
exporter, err := prometheus.New()
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
provider := metric.NewMeterProvider(metric.WithReader(exporter))
|
||||||
|
otel.SetMeterProvider(provider)
|
||||||
|
|
||||||
|
pkg := reflect.TypeOf(defaultEndpoint).PkgPath()
|
||||||
|
meter := provider.Meter(pkg)
|
||||||
|
|
||||||
|
if endpoint == "" {
|
||||||
|
endpoint = defaultEndpoint
|
||||||
|
}
|
||||||
|
|
||||||
|
router := http.NewServeMux()
|
||||||
|
router.Handle(endpoint, promhttp.HandlerFor(
|
||||||
|
prometheus2.DefaultGatherer,
|
||||||
|
promhttp.HandlerOpts{EnableOpenMetrics: true}))
|
||||||
|
|
||||||
|
server := &http.Server{
|
||||||
|
Addr: fmt.Sprintf(":%d", port),
|
||||||
|
Handler: router,
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Metrics{
|
||||||
|
Meter: meter,
|
||||||
|
provider: provider,
|
||||||
|
Endpoint: endpoint,
|
||||||
|
Server: server,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shutdown stops the metrics server
|
||||||
|
func (m *Metrics) Shutdown(ctx context.Context) error {
|
||||||
|
if err := m.Server.Shutdown(ctx); err != nil {
|
||||||
|
return fmt.Errorf("http server: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := m.provider.Shutdown(ctx); err != nil {
|
||||||
|
return fmt.Errorf("meter provider: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
@ -1,10 +1,14 @@
|
|||||||
package peer
|
package peer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/netbirdio/netbird/signal/proto"
|
"context"
|
||||||
log "github.com/sirupsen/logrus"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
|
"github.com/netbirdio/netbird/signal/metrics"
|
||||||
|
"github.com/netbirdio/netbird/signal/proto"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Peer representation of a connected Peer
|
// Peer representation of a connected Peer
|
||||||
@ -33,12 +37,14 @@ type Registry struct {
|
|||||||
Peers sync.Map
|
Peers sync.Map
|
||||||
// regMutex ensures that registration and de-registrations are safe
|
// regMutex ensures that registration and de-registrations are safe
|
||||||
regMutex sync.Mutex
|
regMutex sync.Mutex
|
||||||
|
metrics *metrics.AppMetrics
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewRegistry creates a new connected Peer registry
|
// NewRegistry creates a new connected Peer registry
|
||||||
func NewRegistry() *Registry {
|
func NewRegistry(metrics *metrics.AppMetrics) *Registry {
|
||||||
return &Registry{
|
return &Registry{
|
||||||
regMutex: sync.Mutex{},
|
regMutex: sync.Mutex{},
|
||||||
|
metrics: metrics,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -60,6 +66,8 @@ func (registry *Registry) IsPeerRegistered(peerId string) bool {
|
|||||||
|
|
||||||
// Register registers peer in the registry
|
// Register registers peer in the registry
|
||||||
func (registry *Registry) Register(peer *Peer) {
|
func (registry *Registry) Register(peer *Peer) {
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
registry.regMutex.Lock()
|
registry.regMutex.Lock()
|
||||||
defer registry.regMutex.Unlock()
|
defer registry.regMutex.Unlock()
|
||||||
|
|
||||||
@ -72,6 +80,11 @@ func (registry *Registry) Register(peer *Peer) {
|
|||||||
registry.Peers.Store(peer.Id, peer)
|
registry.Peers.Store(peer.Id, peer)
|
||||||
}
|
}
|
||||||
log.Debugf("peer registered [%s]", peer.Id)
|
log.Debugf("peer registered [%s]", peer.Id)
|
||||||
|
|
||||||
|
// record time as milliseconds
|
||||||
|
registry.metrics.RegistrationDelay.Record(context.Background(), float64(time.Since(start).Nanoseconds())/1e6)
|
||||||
|
|
||||||
|
registry.metrics.Registrations.Add(context.Background(), 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Deregister Peer from the Registry (usually once it disconnects)
|
// Deregister Peer from the Registry (usually once it disconnects)
|
||||||
@ -90,4 +103,6 @@ func (registry *Registry) Deregister(peer *Peer) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
log.Debugf("peer deregistered [%s]", peer.Id)
|
log.Debugf("peer deregistered [%s]", peer.Id)
|
||||||
|
|
||||||
|
registry.metrics.Deregistrations.Add(context.Background(), 1)
|
||||||
}
|
}
|
||||||
|
@ -1,13 +1,21 @@
|
|||||||
package peer
|
package peer
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"go.opentelemetry.io/otel"
|
||||||
|
|
||||||
|
"github.com/netbirdio/netbird/signal/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestRegistry_ShouldNotDeregisterWhenHasNewerStreamRegistered(t *testing.T) {
|
func TestRegistry_ShouldNotDeregisterWhenHasNewerStreamRegistered(t *testing.T) {
|
||||||
r := NewRegistry()
|
metrics, err := metrics.NewAppMetrics(otel.Meter(""))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
r := NewRegistry(metrics)
|
||||||
|
|
||||||
peerID := "peer"
|
peerID := "peer"
|
||||||
|
|
||||||
@ -30,7 +38,10 @@ func TestRegistry_ShouldNotDeregisterWhenHasNewerStreamRegistered(t *testing.T)
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestRegistry_GetNonExistentPeer(t *testing.T) {
|
func TestRegistry_GetNonExistentPeer(t *testing.T) {
|
||||||
r := NewRegistry()
|
metrics, err := metrics.NewAppMetrics(otel.Meter(""))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
r := NewRegistry(metrics)
|
||||||
|
|
||||||
peer, ok := r.Get("non_existent_peer")
|
peer, ok := r.Get("non_existent_peer")
|
||||||
|
|
||||||
@ -44,7 +55,10 @@ func TestRegistry_GetNonExistentPeer(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestRegistry_Register(t *testing.T) {
|
func TestRegistry_Register(t *testing.T) {
|
||||||
r := NewRegistry()
|
metrics, err := metrics.NewAppMetrics(otel.Meter(""))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
r := NewRegistry(metrics)
|
||||||
peer1 := NewPeer("test_peer_1", nil)
|
peer1 := NewPeer("test_peer_1", nil)
|
||||||
peer2 := NewPeer("test_peer_2", nil)
|
peer2 := NewPeer("test_peer_2", nil)
|
||||||
r.Register(peer1)
|
r.Register(peer1)
|
||||||
@ -60,7 +74,10 @@ func TestRegistry_Register(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestRegistry_Deregister(t *testing.T) {
|
func TestRegistry_Deregister(t *testing.T) {
|
||||||
r := NewRegistry()
|
metrics, err := metrics.NewAppMetrics(otel.Meter(""))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
r := NewRegistry(metrics)
|
||||||
peer1 := NewPeer("test_peer_1", nil)
|
peer1 := NewPeer("test_peer_1", nil)
|
||||||
peer2 := NewPeer("test_peer_2", nil)
|
peer2 := NewPeer("test_peer_2", nil)
|
||||||
r.Register(peer1)
|
r.Register(peer1)
|
||||||
|
@ -3,72 +3,114 @@ package server
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/netbirdio/netbird/signal/peer"
|
"io"
|
||||||
"github.com/netbirdio/netbird/signal/proto"
|
"time"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
"go.opentelemetry.io/otel/attribute"
|
||||||
|
"go.opentelemetry.io/otel/metric"
|
||||||
"google.golang.org/grpc/codes"
|
"google.golang.org/grpc/codes"
|
||||||
"google.golang.org/grpc/metadata"
|
"google.golang.org/grpc/metadata"
|
||||||
"google.golang.org/grpc/status"
|
"google.golang.org/grpc/status"
|
||||||
"io"
|
|
||||||
|
"github.com/netbirdio/netbird/signal/metrics"
|
||||||
|
"github.com/netbirdio/netbird/signal/peer"
|
||||||
|
"github.com/netbirdio/netbird/signal/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
labelType = "type"
|
||||||
|
labelTypeError = "error"
|
||||||
|
labelTypeNotConnected = "not_connected"
|
||||||
|
labelTypeNotRegistered = "not_registered"
|
||||||
|
|
||||||
|
labelError = "error"
|
||||||
|
labelErrorMissingId = "missing_id"
|
||||||
|
labelErrorMissingMeta = "missing_meta"
|
||||||
|
labelErrorFailedHeader = "failed_header"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Server an instance of a Signal server
|
// Server an instance of a Signal server
|
||||||
type Server struct {
|
type Server struct {
|
||||||
registry *peer.Registry
|
registry *peer.Registry
|
||||||
proto.UnimplementedSignalExchangeServer
|
proto.UnimplementedSignalExchangeServer
|
||||||
|
|
||||||
|
metrics *metrics.AppMetrics
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewServer creates a new Signal server
|
// NewServer creates a new Signal server
|
||||||
func NewServer() *Server {
|
func NewServer(meter metric.Meter) (*Server, error) {
|
||||||
return &Server{
|
appMetrics, err := metrics.NewAppMetrics(meter)
|
||||||
registry: peer.NewRegistry(),
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("creating app metrics: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s := &Server{
|
||||||
|
registry: peer.NewRegistry(appMetrics),
|
||||||
|
metrics: appMetrics,
|
||||||
|
}
|
||||||
|
|
||||||
|
return s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send forwards a message to the signal peer
|
// Send forwards a message to the signal peer
|
||||||
func (s *Server) Send(ctx context.Context, msg *proto.EncryptedMessage) (*proto.EncryptedMessage, error) {
|
func (s *Server) Send(ctx context.Context, msg *proto.EncryptedMessage) (*proto.EncryptedMessage, error) {
|
||||||
|
|
||||||
if !s.registry.IsPeerRegistered(msg.Key) {
|
if !s.registry.IsPeerRegistered(msg.Key) {
|
||||||
|
s.metrics.MessageForwardFailures.Add(ctx, 1, metric.WithAttributes(attribute.String(labelType, labelTypeNotRegistered)))
|
||||||
|
|
||||||
return nil, fmt.Errorf("peer %s is not registered", msg.Key)
|
return nil, fmt.Errorf("peer %s is not registered", msg.Key)
|
||||||
}
|
}
|
||||||
|
|
||||||
if dstPeer, found := s.registry.Get(msg.RemoteKey); found {
|
if dstPeer, found := s.registry.Get(msg.RemoteKey); found {
|
||||||
//forward the message to the target peer
|
//forward the message to the target peer
|
||||||
err := dstPeer.Stream.Send(msg)
|
if err := dstPeer.Stream.Send(msg); err != nil {
|
||||||
if err != nil {
|
|
||||||
log.Errorf("error while forwarding message from peer [%s] to peer [%s] %v", msg.Key, msg.RemoteKey, err)
|
log.Errorf("error while forwarding message from peer [%s] to peer [%s] %v", msg.Key, msg.RemoteKey, err)
|
||||||
//todo respond to the sender?
|
//todo respond to the sender?
|
||||||
|
|
||||||
|
s.metrics.MessageForwardFailures.Add(ctx, 1, metric.WithAttributes(attribute.String(labelType, labelTypeError)))
|
||||||
|
} else {
|
||||||
|
s.metrics.MessagesForwarded.Add(context.Background(), 1)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Debugf("message from peer [%s] can't be forwarded to peer [%s] because destination peer is not connected", msg.Key, msg.RemoteKey)
|
log.Debugf("message from peer [%s] can't be forwarded to peer [%s] because destination peer is not connected", msg.Key, msg.RemoteKey)
|
||||||
//todo respond to the sender?
|
//todo respond to the sender?
|
||||||
|
|
||||||
|
s.metrics.MessageForwardFailures.Add(ctx, 1, metric.WithAttributes(attribute.String(labelType, labelTypeNotConnected)))
|
||||||
}
|
}
|
||||||
return &proto.EncryptedMessage{}, nil
|
return &proto.EncryptedMessage{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ConnectStream connects to the exchange stream
|
// ConnectStream connects to the exchange stream
|
||||||
func (s *Server) ConnectStream(stream proto.SignalExchange_ConnectStreamServer) error {
|
func (s *Server) ConnectStream(stream proto.SignalExchange_ConnectStreamServer) error {
|
||||||
|
|
||||||
p, err := s.connectPeer(stream)
|
p, err := s.connectPeer(stream)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
startRegister := time.Now()
|
||||||
|
|
||||||
|
s.metrics.ActivePeers.Add(stream.Context(), 1)
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
log.Infof("peer disconnected [%s] [streamID %d] ", p.Id, p.StreamID)
|
log.Infof("peer disconnected [%s] [streamID %d] ", p.Id, p.StreamID)
|
||||||
s.registry.Deregister(p)
|
s.registry.Deregister(p)
|
||||||
|
|
||||||
|
s.metrics.PeerConnectionDuration.Record(stream.Context(), int64(time.Since(startRegister).Seconds()))
|
||||||
|
s.metrics.ActivePeers.Add(context.Background(), -1)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
//needed to confirm that the peer has been registered so that the client can proceed
|
//needed to confirm that the peer has been registered so that the client can proceed
|
||||||
header := metadata.Pairs(proto.HeaderRegistered, "1")
|
header := metadata.Pairs(proto.HeaderRegistered, "1")
|
||||||
err = stream.SendHeader(header)
|
err = stream.SendHeader(header)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
s.metrics.RegistrationFailures.Add(stream.Context(), 1, metric.WithAttributes(attribute.String(labelError, labelErrorFailedHeader)))
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Infof("peer connected [%s] [streamID %d] ", p.Id, p.StreamID)
|
log.Infof("peer connected [%s] [streamID %d] ", p.Id, p.StreamID)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
|
|
||||||
//read incoming messages
|
//read incoming messages
|
||||||
msg, err := stream.Recv()
|
msg, err := stream.Recv()
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
@ -76,18 +118,28 @@ func (s *Server) ConnectStream(stream proto.SignalExchange_ConnectStreamServer)
|
|||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
log.Debugf("received a new message from peer [%s] to peer [%s]", p.Id, msg.RemoteKey)
|
log.Debugf("received a new message from peer [%s] to peer [%s]", p.Id, msg.RemoteKey)
|
||||||
|
|
||||||
// lookup the target peer where the message is going to
|
// lookup the target peer where the message is going to
|
||||||
if dstPeer, found := s.registry.Get(msg.RemoteKey); found {
|
if dstPeer, found := s.registry.Get(msg.RemoteKey); found {
|
||||||
//forward the message to the target peer
|
//forward the message to the target peer
|
||||||
err := dstPeer.Stream.Send(msg)
|
if err := dstPeer.Stream.Send(msg); err != nil {
|
||||||
if err != nil {
|
|
||||||
log.Errorf("error while forwarding message from peer [%s] to peer [%s] %v", p.Id, msg.RemoteKey, err)
|
log.Errorf("error while forwarding message from peer [%s] to peer [%s] %v", p.Id, msg.RemoteKey, err)
|
||||||
//todo respond to the sender?
|
//todo respond to the sender?
|
||||||
|
|
||||||
|
// in milliseconds
|
||||||
|
s.metrics.MessageForwardLatency.Record(stream.Context(), float64(time.Since(start).Nanoseconds())/1e6)
|
||||||
|
s.metrics.MessagesForwarded.Add(stream.Context(), 1)
|
||||||
|
} else {
|
||||||
|
s.metrics.MessageForwardFailures.Add(stream.Context(), 1, metric.WithAttributes(attribute.String(labelType, labelTypeError)))
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
log.Debugf("message from peer [%s] can't be forwarded to peer [%s] because destination peer is not connected", p.Id, msg.RemoteKey)
|
log.Debugf("message from peer [%s] can't be forwarded to peer [%s] because destination peer is not connected", p.Id, msg.RemoteKey)
|
||||||
//todo respond to the sender?
|
//todo respond to the sender?
|
||||||
|
|
||||||
|
s.metrics.MessageForwardFailures.Add(stream.Context(), 1, metric.WithAttributes(attribute.String(labelType, labelTypeNotConnected)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
<-stream.Context().Done()
|
<-stream.Context().Done()
|
||||||
@ -101,12 +153,16 @@ func (s Server) connectPeer(stream proto.SignalExchange_ConnectStreamServer) (*p
|
|||||||
if meta, hasMeta := metadata.FromIncomingContext(stream.Context()); hasMeta {
|
if meta, hasMeta := metadata.FromIncomingContext(stream.Context()); hasMeta {
|
||||||
if id, found := meta[proto.HeaderId]; found {
|
if id, found := meta[proto.HeaderId]; found {
|
||||||
p := peer.NewPeer(id[0], stream)
|
p := peer.NewPeer(id[0], stream)
|
||||||
|
|
||||||
s.registry.Register(p)
|
s.registry.Register(p)
|
||||||
|
|
||||||
return p, nil
|
return p, nil
|
||||||
} else {
|
} else {
|
||||||
|
s.metrics.RegistrationFailures.Add(stream.Context(), 1, metric.WithAttributes(attribute.String(labelError, labelErrorMissingId)))
|
||||||
return nil, status.Errorf(codes.FailedPrecondition, "missing connection header: "+proto.HeaderId)
|
return nil, status.Errorf(codes.FailedPrecondition, "missing connection header: "+proto.HeaderId)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
s.metrics.RegistrationFailures.Add(stream.Context(), 1, metric.WithAttributes(attribute.String(labelError, labelErrorMissingMeta)))
|
||||||
return nil, status.Errorf(codes.FailedPrecondition, "missing connection stream meta")
|
return nil, status.Errorf(codes.FailedPrecondition, "missing connection stream meta")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user