replication/driver: automatic retries on connectivity-related errors

2025-08-19 03:06:02 +02:00 · 2019-03-11 13:46:36 +01:00
parent 07b43bffa4
commit c87759affe
20 changed files with 933 additions and 90 deletions
--- a/rpc/dataconn/dataconn_server.go
+++ b/rpc/dataconn/dataconn_server.go
@@ -25,6 +25,8 @@ type Handler interface {
 	// It is guaranteed that Server calls Receive with a stream that holds the IdleConnTimeout
 	// configured in ServerConfig.Shared.IdleConnTimeout.
 	Receive(ctx context.Context, r *pdu.ReceiveReq, receive zfs.StreamCopier) (*pdu.ReceiveRes, error)
+	// PingDataconn handles a PingReq
+	PingDataconn(ctx context.Context, r *pdu.PingReq) (*pdu.PingRes, error)
 }

 type Logger = logger.Logger
@@ -125,6 +127,13 @@ func (s *Server) serveConn(nc *transport.AuthConn) {
 			return
 		}
 		res, handlerErr = s.h.Receive(ctx, &req, &streamCopier{streamConn: c, closeStreamOnClose: false}) // SHADOWING
+	case EndpointPing:
+		var req pdu.PingReq
+		if err := proto.Unmarshal(reqStructured, &req); err != nil {
+			s.log.WithError(err).Error("cannot unmarshal ping request")
+			return
+		}
+		res, handlerErr = s.h.PingDataconn(ctx, &req) // SHADOWING
 	default:
 		s.log.WithField("endpoint", endpoint).Error("unknown endpoint")
 		handlerErr = fmt.Errorf("requested endpoint does not exist")