diff --git a/replication/driver/replication_driver.go b/replication/driver/replication_driver.go index d6c352a..0d85501 100644 --- a/replication/driver/replication_driver.go +++ b/replication/driver/replication_driver.go @@ -797,7 +797,7 @@ func (a *attempt) errorReport() *errorReport { r.byClass[class] = errs } for _, err := range r.flattened { - if neterr, ok := err.Err.(net.Error); ok && neterr.Temporary() { + if neterr, ok := err.Err.(net.Error); ok && neterr.Timeout() { putClass(err, errorClassTemporaryConnectivityRelated) continue } diff --git a/rpc/dataconn/heartbeatconn/heartbeatconn.go b/rpc/dataconn/heartbeatconn/heartbeatconn.go index b4815a2..1769685 100644 --- a/rpc/dataconn/heartbeatconn/heartbeatconn.go +++ b/rpc/dataconn/heartbeatconn/heartbeatconn.go @@ -24,6 +24,9 @@ func (e HeartbeatTimeout) Error() string { return "heartbeat timeout" } +// This function is deprecated in net.Error and since this +// function is not involved in .Accept() code path, nothing +// really needs this method to be here. func (e HeartbeatTimeout) Temporary() bool { return true } func (e HeartbeatTimeout) Timeout() bool { return true } diff --git a/rpc/dataconn/stream/stream.go b/rpc/dataconn/stream/stream.go index 4a35a1b..ec2283c 100644 --- a/rpc/dataconn/stream/stream.go +++ b/rpc/dataconn/stream/stream.go @@ -181,23 +181,19 @@ func (e *ReadStreamError) Error() string { var _ net.Error = &ReadStreamError{} -func (e ReadStreamError) netErr() net.Error { - if netErr, ok := e.Err.(net.Error); ok { - return netErr - } - return nil -} - func (e ReadStreamError) Timeout() bool { - if netErr := e.netErr(); netErr != nil { + if netErr, ok := e.Err.(net.Error); ok { return netErr.Timeout() } return false } +// This function is deprecated in net.Error and since this +// function is not involved in .Accept() code path, nothing +// really needs this method to be here. func (e ReadStreamError) Temporary() bool { - if netErr := e.netErr(); netErr != nil { - return netErr.Temporary() + if te, ok := e.Err.(interface{ Temporary() bool }); ok { + return te.Temporary() } return false } diff --git a/rpc/dataconn/stream/stream_conn.go b/rpc/dataconn/stream/stream_conn.go index bcbed6e..efed8c9 100644 --- a/rpc/dataconn/stream/stream_conn.go +++ b/rpc/dataconn/stream/stream_conn.go @@ -232,7 +232,12 @@ var _ net.Error = (*closeStateErrConnectionClosed)(nil) func (e *closeStateErrConnectionClosed) Error() string { return "connection closed" } -func (e *closeStateErrConnectionClosed) Timeout() bool { return false } + +func (e *closeStateErrConnectionClosed) Timeout() bool { return false } + +// This function is deprecated in net.Error and since this +// function is not involved in .Accept() code path, nothing +// really needs this method to be here. func (e *closeStateErrConnectionClosed) Temporary() bool { return false } func (s *closeState) CloseEntry() error { diff --git a/rpc/netadaptor/authlistener_netlistener_adaptor.go b/rpc/netadaptor/authlistener_netlistener_adaptor.go index b15259d..26f35e4 100644 --- a/rpc/netadaptor/authlistener_netlistener_adaptor.go +++ b/rpc/netadaptor/authlistener_netlistener_adaptor.go @@ -1,25 +1,5 @@ // Package netadaptor implements an adaptor from // transport.AuthenticatedListener to net.Listener. -// -// In contrast to transport.AuthenticatedListener, -// net.Listener is commonly expected (e.g. by net/http.Server.Serve), -// to return errors that fulfill the Temporary interface: -// interface Temporary { Temporary() bool } -// Common behavior of packages consuming net.Listener is to return -// from the serve-loop if an error returned by Accept is not Temporary, -// i.e., does not implement the interface or is !net.Error.Temporary(). -// -// The zrepl transport infrastructure was written with the -// idea that Accept() may return any kind of error, and the consumer -// would just log the error and continue calling Accept(). -// We have to adapt these listeners' behavior to the expectations -// of net/http.Server. -// -// Hence, Listener does not return an error at all but blocks the -// caller of Accept() until we get a (successfully authenticated) -// connection without errors from the transport. -// Accept errors returned from the transport are logged as errors -// to the logger passed on initialization. package netadaptor import ( diff --git a/rpc/rpc_client.go b/rpc/rpc_client.go index 4c8f81e..e60213c 100644 --- a/rpc/rpc_client.go +++ b/rpc/rpc_client.go @@ -188,8 +188,8 @@ func (c *Client) WaitForConnectivity(ctx context.Context) error { data, dataErr := c.dataClient.ReqPing(ctx, &req) // dataClient uses transport.Connecter, which doesn't expose WaitForReady(true) // => we need to mask dial timeouts - if err, ok := dataErr.(interface{ Temporary() bool }); ok && err.Temporary() { - // Rate-limit pings here in case Temporary() is a mis-classification + if err, ok := dataErr.(interface{ Timeout() bool }); ok && err.Timeout() { + // Rate-limit pings here in case Timeout() == true is a mis-classification // or returns immediately (this is a tight loop in that case) // TODO keep this in lockstep with controlClient // => don't use FailFast for control, but check that both control and data worked diff --git a/rpc/versionhandshake/versionhandshake.go b/rpc/versionhandshake/versionhandshake.go index 8dcb467..0fb2160 100644 --- a/rpc/versionhandshake/versionhandshake.go +++ b/rpc/versionhandshake/versionhandshake.go @@ -33,8 +33,47 @@ var _ net.Error = &HandshakeError{} func (e HandshakeError) Error() string { return e.msg } -// Like with net.OpErr (Go issue 6163), a client failing to handshake -// should be a temporary Accept error toward the Listener . +// When a net.Listener.Accept() returns an error, the server must +// decide whether to retry calling Accept() or not. +// On some platforms (e.g., Linux), Accept() can return errors +// related to the specific protocol connection that was supposed +// to be returned as asocket FD. Obviously, we want to ignore, +// maybe log, those errors and retry Accept() immediately to +// serve other connections. +// But there are also conditions where we get Accept() errors because +// the process has run out of file descriptors. In that case, retrying +// won't help. We need to close some file descriptor to make progress. +// Note that there could be lots of open file descriptors because we +// have accepted, and not yet closed, lots of connections in the past. +// And then, of course there can be errors where we just want +// to return, e.g., if there's a programming error and we're getting +// an EBADFD or whatever. +// +// So, the serve loops in net/http.Server.Serve() or gRPC's server.Serve() +// must inspect the error and decide what to do. +// The vehicle for this is the +// +// interface { Temporary() bool } +// +// Behavior in both of the aforementioned Serve() loops: +// +// - if the error doesn't implement the interface, stop serving and return +// - `Temporary() == true`: retry with back-off +// - `Temporary() == false`: stop serving and return +// +// So, to make this package's HandshakeListener work with these +// Serve() loops, we return Temporary() == true if the handshake fails. +// In the aforementioned categories, that's the case of a per-connection +// protocol error. +// +// Note: the net.Error interface has deprecated the Temporary() method +// in go.dev/issue/45729, but there is no replacement for users of .Accept(). +// Existing users of .Accept() continue to check for the interface. +// So, we need to continue supporting Temporary() until there's a different +// mechanism for serve loops to decide whether to retry or not. +// The following mailing list post proposes to eliminate the retries +// completely, but it seems like the effort has stalled. +// https://groups.google.com/g/golang-nuts/c/-JcZzOkyqYI/m/xwaZzjCgAwAJ func (e HandshakeError) Temporary() bool { if e.isAcceptError { return true