zrepl/rpc/dataconn/timeoutconn/timeoutconn.go

// package timeoutconn wraps a Wire to provide idle timeouts
// based on Set{Read,Write}Deadline.
// Additionally, it exports abstractions for vectored I/O.
package timeoutconn

// NOTE
// Readv and Writev are not split-off into a separate package
// because we use raw syscalls, bypassing Conn's Read / Write methods.

import (
	"errors"
	"io"
	"net"
	"sync/atomic"
	"syscall"
	"time"
	"unsafe"
)

type Wire interface {
	net.Conn
	// A call to CloseWrite indicates that no further Write calls will be made to Wire.
	// The implementation must return an error in case of Write calls after CloseWrite.
	// On the peer's side, after it read all data written to Wire prior to the call to
	// CloseWrite on our side, the peer's Read calls must return io.EOF.
	// CloseWrite must not affect the read-direction of Wire: specifically, the
	// peer must continue to be able to send, and our side must continue be
	// able to receive data over Wire.
	//
	// Note that CloseWrite may (and most likely will) return sooner than the
	// peer having received all data written to Wire prior to CloseWrite.
	// Note further that buffering happening in the network stacks on either side
	// mandates an explicit acknowledgement from the peer that the connection may
	// be fully shut down: If we call Close without such acknowledgement, any data
	// from peer to us that was already in flight may cause connection resets to
	// be sent from us to the peer via the specific transport protocol. Those
	// resets (e.g. RST frames) may erase all connection context on the peer,
	// including data in its receive buffers. Thus, those resets are in race with
	// a) transmission of data written prior to CloseWrite and
	// b) the peer application reading from those buffers.
	//
	// The WaitForPeerClose method can be used to wait for connection termination,
	// iff the implementation supports it. If it does not, the only reliable way
	// to wait for a peer to have read all data from Wire (until io.EOF), is to
	// expect it to close the wire at that point as well, and to drain Wire until
	// we also read io.EOF.
	CloseWrite() error

	// Wait for the peer to close the connection.
	// No data that could otherwise be Read is lost as a consequence of this call.
	// The use case for this API is abortive connection shutdown.
	// To provide any value over draining Wire using io.Read, an implementation
	// will likely use out-of-bounds messaging mechanisms.
	// TODO WaitForPeerClose() (supported bool, err error)
}

type Conn struct {
	Wire
	renewDeadlinesDisabled int32
	idleTimeout            time.Duration
}

func Wrap(conn Wire, idleTimeout time.Duration) Conn {
	return Conn{Wire: conn, idleTimeout: idleTimeout}
}

// DisableTimeouts disables the idle timeout behavior provided by this package.
// Existing deadlines are cleared iff the call is the first call to this method.
func (c *Conn) DisableTimeouts() error {
	if atomic.CompareAndSwapInt32(&c.renewDeadlinesDisabled, 0, 1) {
		return c.SetDeadline(time.Time{})
	}
	return nil
}

func (c *Conn) renewReadDeadline() error {
	if atomic.LoadInt32(&c.renewDeadlinesDisabled) != 0 {
		return nil
	}
	return c.SetReadDeadline(time.Now().Add(c.idleTimeout))
}

func (c *Conn) RenewWriteDeadline() error {
	if atomic.LoadInt32(&c.renewDeadlinesDisabled) != 0 {
		return nil
	}
	return c.SetWriteDeadline(time.Now().Add(c.idleTimeout))
}

func (c Conn) Read(p []byte) (n int, err error) {
	n = 0
	err = nil
restart:
	if err := c.renewReadDeadline(); err != nil {
		return n, err
	}
	var nCurRead int
	nCurRead, err = c.Wire.Read(p[n:])
	n += nCurRead
	if netErr, ok := err.(net.Error); ok && netErr.Timeout() && nCurRead > 0 {
		err = nil
		goto restart
	}
	return n, err
}

func (c Conn) Write(p []byte) (n int, err error) {
	n = 0
restart:
	if err := c.RenewWriteDeadline(); err != nil {
		return n, err
	}
	var nCurWrite int
	nCurWrite, err = c.Wire.Write(p[n:])
	n += nCurWrite
	if netErr, ok := err.(net.Error); ok && netErr.Timeout() && nCurWrite > 0 {
		err = nil
		goto restart
	}
	return n, err
}

// Writes the given buffers to Conn, following the sematincs of io.Copy,
// but is guaranteed to use the writev system call if the wrapped Wire
// support it.
// Note the Conn does not support writev through io.Copy(aConn, aNetBuffers).
func (c Conn) WritevFull(bufs net.Buffers) (n int64, err error) {
	n = 0
restart:
	if err := c.RenewWriteDeadline(); err != nil {
		return n, err
	}
	var nCurWrite int64
	nCurWrite, err = io.Copy(c.Wire, &bufs)
	n += nCurWrite
	if netErr, ok := err.(net.Error); ok && netErr.Timeout() && nCurWrite > 0 {
		err = nil
		goto restart
	}
	return n, err
}

var SyscallConnNotSupported = errors.New("SyscallConn not supported")

// The interface that must be implemented for vectored I/O support.
// If the wrapped Wire does not implement it, a less efficient
// fallback implementation is used.
// Rest assured that Go's *net.TCPConn implements this interface.
type SyscallConner interface {
	// The sentinel error value SyscallConnNotSupported can be returned
	// if the support for SyscallConn depends on runtime conditions and
	// that runtime condition is not met.
	SyscallConn() (syscall.RawConn, error)
}

var _ SyscallConner = (*net.TCPConn)(nil)

func buildIovecs(buffers net.Buffers) (totalLen int64, vecs []syscall.Iovec) {
	vecs = make([]syscall.Iovec, 0, len(buffers))
	for i := range buffers {
		totalLen += int64(len(buffers[i]))
		if len(buffers[i]) == 0 {
			continue
		}

		v := syscall.Iovec{
			Base: &buffers[i][0],
		}
		// syscall.Iovec.Len has platform-dependent size, thus use SetLen
		v.SetLen(len(buffers[i]))

		vecs = append(vecs, v)
	}
	return totalLen, vecs
}

// Reads the given buffers full:
// Think of io.ReadvFull, but for net.Buffers + using the readv syscall.
//
// If the underlying Wire is not a SyscallConner, a fallback
// ipmlementation based on repeated Conn.Read invocations is used.
//
// If the connection returned io.EOF, the number of bytes up ritten until
// then + io.EOF is returned. This behavior is different to io.ReadFull
// which returns io.ErrUnexpectedEOF.
func (c Conn) ReadvFull(buffers net.Buffers) (n int64, err error) {
	totalLen, iovecs := buildIovecs(buffers)
	if debugReadvNoShortReadsAssertEnable {
		defer debugReadvNoShortReadsAssert(totalLen, n, err)
	}
	scc, ok := c.Wire.(SyscallConner)
	if !ok {
		return c.readvFallback(buffers)
	}
	raw, err := scc.SyscallConn()
	if err == SyscallConnNotSupported {
		return c.readvFallback(buffers)
	}
	if err != nil {
		return 0, err
	}
	n, err = c.readv(raw, iovecs)
	return
}

func (c Conn) readvFallback(nbuffers net.Buffers) (n int64, err error) {
	buffers := [][]byte(nbuffers)
	for i := range buffers {
		curBuf := buffers[i]
	inner:
		for len(curBuf) > 0 {
			if err := c.renewReadDeadline(); err != nil {
				return n, err
			}
			var oneN int
			oneN, err = c.Read(curBuf[:]) // WE WANT NO SHADOWING
			curBuf = curBuf[oneN:]
			n += int64(oneN)
			if err != nil {
				if netErr, ok := err.(net.Error); ok && netErr.Timeout() && oneN > 0 {
					continue inner
				}
				return n, err
			}
		}
	}
	return n, nil
}

func (c Conn) readv(rawConn syscall.RawConn, iovecs []syscall.Iovec) (n int64, err error) {
	for len(iovecs) > 0 {
		if err := c.renewReadDeadline(); err != nil {
			return n, err
		}
		oneN, oneErr := c.doOneReadv(rawConn, &iovecs)
		n += oneN
		if netErr, ok := oneErr.(net.Error); ok && netErr.Timeout() && oneN > 0 { // TODO likely not working
			continue
		} else if oneErr == nil && oneN > 0 {
			continue
		} else {
			return n, oneErr
		}
	}
	return n, nil
}

func (c Conn) doOneReadv(rawConn syscall.RawConn, iovecs *[]syscall.Iovec) (n int64, err error) {
	rawReadErr := rawConn.Read(func(fd uintptr) (done bool) {
		// iovecs, n and err must not be shadowed!

		// NOTE: unsafe.Pointer safety rules
		// 		https://tip.golang.org/pkg/unsafe/#Pointer
		//
		//		(4) Conversion of a Pointer to a uintptr when calling syscall.Syscall.
		// 		...
		//		uintptr() conversions must appear within the syscall.Syscall argument list.
		//      (even though we are not the escape analysis Likely not )
		thisReadN, _, errno := syscall.Syscall(
			syscall.SYS_READV,
			fd,
			uintptr(unsafe.Pointer(&(*iovecs)[0])),
			uintptr(len(*iovecs)),
		)
		if thisReadN == ^uintptr(0) {
			if errno == syscall.EAGAIN {
				return false
			}
			err = syscall.Errno(errno)
			return true
		}
		if int(thisReadN) < 0 {
			panic("unexpected return value")
		}
		n += int64(thisReadN) // TODO check overflow

		// shift iovecs forward
		for left := int(thisReadN); left > 0; {
			// conversion to uint does not change value, see TestIovecLenFieldIsMachineUint, and left > 0
			thisIovecConsumedCompletely := uint((*iovecs)[0].Len) <= uint(left)
			if thisIovecConsumedCompletely {
				// Update left, cannot go below 0 due to
				// a) definition of thisIovecConsumedCompletely
				// b) left > 0 due to loop invariant
				// Convertion .Len to int64 is thus also safe now, because it is < left < INT_MAX
				left -= int((*iovecs)[0].Len)
				*iovecs = (*iovecs)[1:]
			} else {
				// trim this iovec to remaining length

				// NOTE: unsafe.Pointer safety rules
				// 		https://tip.golang.org/pkg/unsafe/#Pointer
				// 		(3) Conversion of a Pointer to a uintptr and back, with arithmetic.
				// 		...
				//		Note that both conversions must appear in the same expression,
				//		with only the intervening arithmetic between them:
				(*iovecs)[0].Base = (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer((*iovecs)[0].Base)) + uintptr(left)))
				curVecNewLength := uint((*iovecs)[0].Len) - uint(left) // casts to uint do not change value
				(*iovecs)[0].SetLen(int(curVecNewLength))              // int and uint have the same size, no change of value

				break // inner
			}
		}
		if thisReadN == 0 {
			err = io.EOF
			return true
		}
		return true
	})

	if rawReadErr != nil {
		err = rawReadErr
	}

	return n, err
}