zrepl/util/iocommand.go

143 lines
3.1 KiB
Go
Raw Normal View History

package util
import (
"bytes"
"fmt"
"io"
util.IOCommand: add stderr logging for unexpected crashes in calls to ProcessState.Sys() Crashes observed on a FreeBSD 11.2 system 2018-09-27T05:08:39+02:00 [INFO][csnas]: start replication invocation="62" 2018-09-27T05:08:39+02:00 [INFO][csnas][repl]: start planning invocation="62" 2018-09-27T05:08:58+02:00 [INFO][csnas][repl]: start working invocation="62" 2018-09-27T05:09:57+02:00 [INFO][csnas]: start pruning sender invocation="62" 2018-09-27T05:10:11+02:00 [INFO][csnas]: start pruning receiver invocation="62" 2018-09-27T05:10:32+02:00 [INFO][csnas]: wait for wakeups 2018-09-27T06:08:39+02:00 [INFO][csnas]: start replication invocation="63" 2018-09-27T06:08:39+02:00 [INFO][csnas][repl]: start planning invocation="63" 2018-09-27T06:08:44+02:00 [INFO][csnas][repl]: start working invocation="63" 2018-09-27T06:08:49+02:00 [ERRO][csnas][repl]: receive request failed (might also be error on sender) invocation="63" filesystem="<REDACTED>" err="concurrent use of RPC connection" step="<REDACTED>(@zrepl_20180927_030838_000 => @zrepl_20180927_040835_000)" errType="*errors.errorString" panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x8 pc=0x7d484b] goroutine 3938545 [running]: os.(*ProcessState).os.sys(...) /usr/lib/golang/src/os/exec_posix.go:78 os.(*ProcessState).Sys(...) /usr/lib/golang/src/os/exec.go:157 github.com/zrepl/zrepl/util.(*IOCommand).doWait(0xc4201b2d80, 0xc420070060, 0xc420070060) /go/github.com/zrepl/zrepl/util/iocommand.go:91 +0x4b github.com/zrepl/zrepl/util.(*IOCommand).Read(0xc4201b2d80, 0xc420790000, 0x8000, 0x8000, 0x800c76d90, 0x0, 0xc420067c10) /go/github.com/zrepl/zrepl/util/iocommand.go:82 +0xe4 github.com/zrepl/zrepl/util.(*ByteCounterReader).Read(0xc4202dc580, 0xc420790000, 0x8000, 0x8000, 0x8c6900, 0x7cb201, 0xc420790000) /go/github.com/zrepl/zrepl/util/io.go:118 +0x51 github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*chunkBuffer).readChunk(0xc42057e3c0, 0x800d1bbf0, 0xc4202dc580, 0xc420790000, 0x8000, 0x8000) /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/stream.go:58 +0x5e github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.writeStream(0xa04620, 0xc4204a9c20, 0x9fe340, 0xc4200d6380, 0x800d1bbf0, 0xc4202dc580, 0x8000, 0xc42000e000, 0x900420) /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/stream.go:101 +0x1ce github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Conn).send(0xc4200d6380, 0xa04620, 0xc4204a9c20, 0xc42057e2c0, 0xc42013d570, 0x800d1bbf0, 0xc4202dc580, 0x0, 0x0) /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/main.go:374 +0x557 github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Client).RequestReply.func1(0x999741, 0x7, 0xc4200d6380, 0xa04620, 0xc4204a9c20, 0xc42013d570, 0xa00aa0, 0xc4202dc580, 0xc420516480) /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/client.go:169 +0x148 created by github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Client).RequestReply /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/client.go:167 +0x227
2018-09-27 12:06:59 +02:00
"os"
"os/exec"
"syscall"
)
// An IOCommand exposes a forked process's std(in|out|err) through the io.ReadWriteCloser interface.
type IOCommand struct {
Cmd *exec.Cmd
Stdin io.WriteCloser
Stdout io.ReadCloser
StderrBuf *bytes.Buffer
ExitResult *IOCommandExitResult
}
const IOCommandStderrBufSize = 1024
type IOCommandError struct {
WaitErr error
Stderr []byte
}
type IOCommandExitResult struct {
Error error
WaitStatus syscall.WaitStatus
}
func (e IOCommandError) Error() string {
return fmt.Sprintf("underlying process exited with error: %s\nstderr: %s\n", e.WaitErr, e.Stderr)
}
func RunIOCommand(command string, args ...string) (c *IOCommand, err error) {
c, err = NewIOCommand(command, args, IOCommandStderrBufSize)
if err != nil {
return
}
err = c.Start()
return
}
func NewIOCommand(command string, args []string, stderrBufSize int) (c *IOCommand, err error) {
if stderrBufSize == 0 {
stderrBufSize = IOCommandStderrBufSize
}
c = &IOCommand{}
c.Cmd = exec.Command(command, args...)
if c.Stdout, err = c.Cmd.StdoutPipe(); err != nil {
return
}
if c.Stdin, err = c.Cmd.StdinPipe(); err != nil {
return
}
c.StderrBuf = bytes.NewBuffer(make([]byte, 0, stderrBufSize))
c.Cmd.Stderr = c.StderrBuf
return
}
func (c *IOCommand) Start() (err error) {
if err = c.Cmd.Start(); err != nil {
return
}
return
}
// Read from process's stdout.
// The behavior after Close()ing is undefined
func (c *IOCommand) Read(buf []byte) (n int, err error) {
n, err = c.Stdout.Read(buf)
if err == io.EOF {
if waitErr := c.doWait(); waitErr != nil {
err = waitErr
}
}
return
}
func (c *IOCommand) doWait() (err error) {
waitErr := c.Cmd.Wait()
util.IOCommand: add stderr logging for unexpected crashes in calls to ProcessState.Sys() Crashes observed on a FreeBSD 11.2 system 2018-09-27T05:08:39+02:00 [INFO][csnas]: start replication invocation="62" 2018-09-27T05:08:39+02:00 [INFO][csnas][repl]: start planning invocation="62" 2018-09-27T05:08:58+02:00 [INFO][csnas][repl]: start working invocation="62" 2018-09-27T05:09:57+02:00 [INFO][csnas]: start pruning sender invocation="62" 2018-09-27T05:10:11+02:00 [INFO][csnas]: start pruning receiver invocation="62" 2018-09-27T05:10:32+02:00 [INFO][csnas]: wait for wakeups 2018-09-27T06:08:39+02:00 [INFO][csnas]: start replication invocation="63" 2018-09-27T06:08:39+02:00 [INFO][csnas][repl]: start planning invocation="63" 2018-09-27T06:08:44+02:00 [INFO][csnas][repl]: start working invocation="63" 2018-09-27T06:08:49+02:00 [ERRO][csnas][repl]: receive request failed (might also be error on sender) invocation="63" filesystem="<REDACTED>" err="concurrent use of RPC connection" step="<REDACTED>(@zrepl_20180927_030838_000 => @zrepl_20180927_040835_000)" errType="*errors.errorString" panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x8 pc=0x7d484b] goroutine 3938545 [running]: os.(*ProcessState).os.sys(...) /usr/lib/golang/src/os/exec_posix.go:78 os.(*ProcessState).Sys(...) /usr/lib/golang/src/os/exec.go:157 github.com/zrepl/zrepl/util.(*IOCommand).doWait(0xc4201b2d80, 0xc420070060, 0xc420070060) /go/github.com/zrepl/zrepl/util/iocommand.go:91 +0x4b github.com/zrepl/zrepl/util.(*IOCommand).Read(0xc4201b2d80, 0xc420790000, 0x8000, 0x8000, 0x800c76d90, 0x0, 0xc420067c10) /go/github.com/zrepl/zrepl/util/iocommand.go:82 +0xe4 github.com/zrepl/zrepl/util.(*ByteCounterReader).Read(0xc4202dc580, 0xc420790000, 0x8000, 0x8000, 0x8c6900, 0x7cb201, 0xc420790000) /go/github.com/zrepl/zrepl/util/io.go:118 +0x51 github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*chunkBuffer).readChunk(0xc42057e3c0, 0x800d1bbf0, 0xc4202dc580, 0xc420790000, 0x8000, 0x8000) /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/stream.go:58 +0x5e github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.writeStream(0xa04620, 0xc4204a9c20, 0x9fe340, 0xc4200d6380, 0x800d1bbf0, 0xc4202dc580, 0x8000, 0xc42000e000, 0x900420) /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/stream.go:101 +0x1ce github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Conn).send(0xc4200d6380, 0xa04620, 0xc4204a9c20, 0xc42057e2c0, 0xc42013d570, 0x800d1bbf0, 0xc4202dc580, 0x0, 0x0) /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/main.go:374 +0x557 github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Client).RequestReply.func1(0x999741, 0x7, 0xc4200d6380, 0xa04620, 0xc4204a9c20, 0xc42013d570, 0xa00aa0, 0xc4202dc580, 0xc420516480) /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/client.go:169 +0x148 created by github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Client).RequestReply /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/client.go:167 +0x227
2018-09-27 12:06:59 +02:00
var wasUs bool = false
var waitStatus syscall.WaitStatus
if c.Cmd.ProcessState == nil {
fmt.Fprintf(os.Stderr, "util.IOCommand: c.Cmd.ProcessState is nil after c.Cmd.Wait()\n")
}
if c.Cmd.ProcessState != nil {
sysSpecific := c.Cmd.ProcessState.Sys()
var ok bool
waitStatus, ok = sysSpecific.(syscall.WaitStatus)
if !ok {
fmt.Fprintf(os.Stderr, "util.IOCommand: c.Cmd.ProcessState.Sys() could not be converted to syscall.WaitStatus: %T\n", sysSpecific)
os.Stderr.Sync()
panic(sysSpecific) // this can only be true if we are not on UNIX, and we don't support that
}
wasUs = waitStatus.Signaled() && waitStatus.Signal() == syscall.SIGTERM // in Close()
}
if waitErr != nil && !wasUs {
err = IOCommandError{
WaitErr: waitErr,
Stderr: c.StderrBuf.Bytes(),
}
}
c.ExitResult = &IOCommandExitResult{
Error: err, // is still empty if waitErr was due to signalling
WaitStatus: waitStatus,
}
return
}
// Write to process's stdin.
// The behavior after Close()ing is undefined
func (c *IOCommand) Write(buf []byte) (n int, err error) {
return c.Stdin.Write(buf)
}
// Terminate the child process and collect its exit status
// It is safe to call Close() multiple times.
func (c *IOCommand) Close() (err error) {
if c.Cmd.ProcessState == nil {
// racy...
err = syscall.Kill(c.Cmd.Process.Pid, syscall.SIGTERM)
if err != nil {
return
}
return c.doWait()
} else {
return c.ExitResult.Error
}
}