mirror of
https://github.com/zrepl/zrepl.git
synced 2025-02-01 19:20:03 +01:00
rpc: use grpchelper package, add grpc.KeepaliveEnforcementPolicy, fix 'transport is closing' error
Symptom: zrepl log message:
rpc error: code = Unavailable desc = transport is closing
Underlying Problem:
* rpc.NewServer was not using grpchelper.NewServer and not setting Server KeepaliveParams by itself
* and even grpchelper.NewServer didn't set a KeepaliveEnforcementPolicy
* However, KeepaliveEnforcementPolicy is necessary if the client keepalive is configured with non-default values
* .. which grpchelper.ClientConn does, and that method is used by rpc.NewClient
* rpc.Client was sending pings
* lacking server-side KeepaliveEnforcementPolicy caused grpc-hard-coded `pingStrikes` counter to go past limit of 2:
021bd5734e/internal/transport/http2_server.go (L726)
How was this debugged:
* GRPC_GO_LOG_VERBOSITY_LEVEL=99 GRPC_GO_LOG_SEVERITY_LEVEL=info PATH=/root/mockpath:$PATH zrepl daemon
* with a patch on grpc package to get more log messages on pingStrikes increases:
diff --git a/internal/transport/http2_server.go b/internal/transport/http2_server.go
index 8b04b039..f68f55ea 100644
--- a/internal/transport/http2_server.go
+++ b/internal/transport/http2_server.go
@@ -214,6 +214,7 @@ func newHTTP2Server(conn net.Conn, config *ServerConfig) (_ ServerTransport, err
if kep.MinTime == 0 {
kep.MinTime = defaultKeepalivePolicyMinTime
}
+ errorf("effective keepalive enforcement policy: %#v", kep)
done := make(chan struct{})
t := &http2Server{
ctx: context.Background(),
@@ -696,6 +697,7 @@ func (t *http2Server) handlePing(f *http2.PingFrame) {
t.controlBuf.put(pingAck)
now := time.Now()
+ errorf("transport:ping handlePing, last ping %s ago", now.Sub(t.lastPingAt))
defer func() {
t.lastPingAt = now
}()
@@ -713,11 +715,13 @@ func (t *http2Server) handlePing(f *http2.PingFrame) {
// Keepalive shouldn't be active thus, this new ping should
// have come after at least defaultPingTimeout.
if t.lastPingAt.Add(defaultPingTimeout).After(now) {
+ errorf("transport:ping strike ns < 1 && !t.kep.PermitWithoutStream")
t.pingStrikes++
}
} else {
// Check if keepalive policy is respected.
if t.lastPingAt.Add(t.kep.MinTime).After(now) {
+ errorf("transport:ping strike !(ns < 1 && !t.kep.PermitWithoutStream) kep.MinTime=%s ns=%d", t.kep.MinTime, ns)
t.pingStrikes++
}
}
fixes #181
This commit is contained in:
parent
d35e2400b2
commit
2927d0ca15
@ -87,10 +87,13 @@ func server() {
|
||||
|
||||
log := logger.NewStderrDebugLogger()
|
||||
|
||||
srv, serve, err := grpchelper.NewServer(authListenerFactory, clientIdentityKey, log)
|
||||
authListener, err := authListenerFactory()
|
||||
if err != nil {
|
||||
onErr(err, "new server")
|
||||
onErr(err, "cannot listen")
|
||||
}
|
||||
|
||||
srv, serve := grpchelper.NewServer(authListener, clientIdentityKey, log)
|
||||
|
||||
svc := &greeter{"hello "}
|
||||
pdu.RegisterGreeterServer(srv, svc)
|
||||
|
||||
|
@ -50,25 +50,25 @@ func ClientConn(cn transport.Connecter, log Logger) *grpc.ClientConn {
|
||||
}
|
||||
|
||||
// NewServer is a convenience interface around the TransportCredentials and Interceptors interface.
|
||||
func NewServer(authListenerFactory transport.AuthenticatedListenerFactory, clientIdentityKey interface{}, logger grpcclientidentity.Logger) (srv *grpc.Server, serve func() error, err error) {
|
||||
func NewServer(authListener transport.AuthenticatedListener, clientIdentityKey interface{}, logger grpcclientidentity.Logger) (srv *grpc.Server, serve func() error) {
|
||||
ka := grpc.KeepaliveParams(keepalive.ServerParameters{
|
||||
Time: StartKeepalivesAfterInactivityDuration,
|
||||
Timeout: KeepalivePeerTimeout,
|
||||
})
|
||||
ep := grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{
|
||||
MinTime: StartKeepalivesAfterInactivityDuration / 2, // avoid skew
|
||||
PermitWithoutStream: true,
|
||||
})
|
||||
tcs := grpcclientidentity.NewTransportCredentials(logger)
|
||||
unary, stream := grpcclientidentity.NewInterceptors(logger, clientIdentityKey)
|
||||
srv = grpc.NewServer(grpc.Creds(tcs), grpc.UnaryInterceptor(unary), grpc.StreamInterceptor(stream), ka)
|
||||
srv = grpc.NewServer(grpc.Creds(tcs), grpc.UnaryInterceptor(unary), grpc.StreamInterceptor(stream), ka, ep)
|
||||
|
||||
serve = func() error {
|
||||
l, err := authListenerFactory()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := srv.Serve(netadaptor.New(l, logger)); err != nil {
|
||||
if err := srv.Serve(netadaptor.New(authListener, logger)); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return srv, serve, nil
|
||||
return srv, serve
|
||||
}
|
||||
|
@ -4,13 +4,10 @@ import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
|
||||
"github.com/zrepl/zrepl/endpoint"
|
||||
"github.com/zrepl/zrepl/replication/logic/pdu"
|
||||
"github.com/zrepl/zrepl/rpc/dataconn"
|
||||
"github.com/zrepl/zrepl/rpc/grpcclientidentity"
|
||||
"github.com/zrepl/zrepl/rpc/netadaptor"
|
||||
"github.com/zrepl/zrepl/rpc/grpcclientidentity/grpchelper"
|
||||
"github.com/zrepl/zrepl/rpc/versionhandshake"
|
||||
"github.com/zrepl/zrepl/transport"
|
||||
"github.com/zrepl/zrepl/util/envconst"
|
||||
@ -28,7 +25,6 @@ type serveFunc func(ctx context.Context, demuxedListener transport.Authenticated
|
||||
type Server struct {
|
||||
logger Logger
|
||||
handler Handler
|
||||
controlServer *grpc.Server
|
||||
controlServerServe serveFunc
|
||||
dataServer *dataconn.Server
|
||||
dataServerServe serveFunc
|
||||
@ -39,12 +35,11 @@ type HandlerContextInterceptor func(ctx context.Context) context.Context
|
||||
// config must be valid (use its Validate function).
|
||||
func NewServer(handler Handler, loggers Loggers, ctxInterceptor HandlerContextInterceptor) *Server {
|
||||
|
||||
// setup control server
|
||||
tcs := grpcclientidentity.NewTransportCredentials(loggers.Control) // TODO different subsystem for log
|
||||
unary, stream := grpcclientidentity.NewInterceptors(loggers.Control, endpoint.ClientIdentityKey)
|
||||
controlServer := grpc.NewServer(grpc.Creds(tcs), grpc.UnaryInterceptor(unary), grpc.StreamInterceptor(stream))
|
||||
pdu.RegisterReplicationServer(controlServer, handler)
|
||||
controlServerServe := func(ctx context.Context, controlListener transport.AuthenticatedListener, errOut chan<- error) {
|
||||
|
||||
controlServer, serve := grpchelper.NewServer(controlListener, endpoint.ClientIdentityKey, loggers.Control)
|
||||
pdu.RegisterReplicationServer(controlServer, handler)
|
||||
|
||||
// give time for graceful stop until deadline expires, then hard stop
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
@ -55,7 +50,7 @@ func NewServer(handler Handler, loggers Loggers, ctxInterceptor HandlerContextIn
|
||||
controlServer.GracefulStop()
|
||||
}()
|
||||
|
||||
errOut <- controlServer.Serve(netadaptor.New(controlListener, loggers.Control))
|
||||
errOut <- serve()
|
||||
}
|
||||
|
||||
// setup data server
|
||||
@ -76,7 +71,6 @@ func NewServer(handler Handler, loggers Loggers, ctxInterceptor HandlerContextIn
|
||||
server := &Server{
|
||||
logger: loggers.General,
|
||||
handler: handler,
|
||||
controlServer: controlServer,
|
||||
controlServerServe: controlServerServe,
|
||||
dataServer: dataServer,
|
||||
dataServerServe: dataServerServe,
|
||||
|
Loading…
Reference in New Issue
Block a user