util.IOCommand: add stderr logging for unexpected crashes in calls to ProcessState.Sys()

Crashes observed on a FreeBSD 11.2 system

2018-09-27T05:08:39+02:00 [INFO][csnas]: start replication invocation="62"
2018-09-27T05:08:39+02:00 [INFO][csnas][repl]: start planning invocation="62"
2018-09-27T05:08:58+02:00 [INFO][csnas][repl]: start working invocation="62"
2018-09-27T05:09:57+02:00 [INFO][csnas]: start pruning sender invocation="62"
2018-09-27T05:10:11+02:00 [INFO][csnas]: start pruning receiver invocation="62"
2018-09-27T05:10:32+02:00 [INFO][csnas]: wait for wakeups
2018-09-27T06:08:39+02:00 [INFO][csnas]: start replication invocation="63"
2018-09-27T06:08:39+02:00 [INFO][csnas][repl]: start planning invocation="63"
2018-09-27T06:08:44+02:00 [INFO][csnas][repl]: start working invocation="63"
2018-09-27T06:08:49+02:00 [ERRO][csnas][repl]: receive request failed (might also be error on sender) invocation="63" filesystem="<REDACTED>" err="concurrent use of RPC connection" step="<REDACTED>(@zrepl_20180927_030838_000 => @zrepl_20180927_040835_000)" errType="*errors.errorString"
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x8 pc=0x7d484b]

goroutine 3938545 [running]:
os.(*ProcessState).os.sys(...)
        /usr/lib/golang/src/os/exec_posix.go:78
os.(*ProcessState).Sys(...)
        /usr/lib/golang/src/os/exec.go:157
github.com/zrepl/zrepl/util.(*IOCommand).doWait(0xc4201b2d80, 0xc420070060, 0xc420070060)
        /go/github.com/zrepl/zrepl/util/iocommand.go:91 +0x4b
github.com/zrepl/zrepl/util.(*IOCommand).Read(0xc4201b2d80, 0xc420790000, 0x8000, 0x8000, 0x800c76d90, 0x0, 0xc420067c10)
        /go/github.com/zrepl/zrepl/util/iocommand.go:82 +0xe4
github.com/zrepl/zrepl/util.(*ByteCounterReader).Read(0xc4202dc580, 0xc420790000, 0x8000, 0x8000, 0x8c6900, 0x7cb201, 0xc420790000)
        /go/github.com/zrepl/zrepl/util/io.go:118 +0x51
github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*chunkBuffer).readChunk(0xc42057e3c0, 0x800d1bbf0, 0xc4202dc580, 0xc420790000, 0x8000, 0x8000)
        /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/stream.go:58 +0x5e
github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.writeStream(0xa04620, 0xc4204a9c20, 0x9fe340, 0xc4200d6380, 0x800d1bbf0, 0xc4202dc580, 0x8000, 0xc42000e000, 0x900420)
        /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/stream.go:101 +0x1ce
github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Conn).send(0xc4200d6380, 0xa04620, 0xc4204a9c20, 0xc42057e2c0, 0xc42013d570, 0x800d1bbf0, 0xc4202dc580, 0x0, 0x0)
        /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/main.go:374 +0x557
github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Client).RequestReply.func1(0x999741, 0x7, 0xc4200d6380, 0xa04620, 0xc4204a9c20, 0xc42013d570, 0xa00aa0, 0xc4202dc580, 0xc420516480)
        /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/client.go:169 +0x148
created by github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Client).RequestReply
        /go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/client.go:167 +0x227
This commit is contained in:
Christian Schwarz 2018-09-27 12:06:59 +02:00
parent 75e42fd860
commit 976c1f3929

View File

@ -4,6 +4,7 @@ import (
"bytes"
"fmt"
"io"
"os"
"os/exec"
"syscall"
)
@ -88,9 +89,23 @@ func (c *IOCommand) Read(buf []byte) (n int, err error) {
func (c *IOCommand) doWait() (err error) {
waitErr := c.Cmd.Wait()
waitStatus := c.Cmd.ProcessState.Sys().(syscall.WaitStatus) // Fail hard if we're not on UNIX
var wasUs bool = false
var waitStatus syscall.WaitStatus
if c.Cmd.ProcessState == nil {
fmt.Fprintf(os.Stderr, "util.IOCommand: c.Cmd.ProcessState is nil after c.Cmd.Wait()\n")
}
if c.Cmd.ProcessState != nil {
sysSpecific := c.Cmd.ProcessState.Sys()
var ok bool
waitStatus, ok = sysSpecific.(syscall.WaitStatus)
if !ok {
fmt.Fprintf(os.Stderr, "util.IOCommand: c.Cmd.ProcessState.Sys() could not be converted to syscall.WaitStatus: %T\n", sysSpecific)
os.Stderr.Sync()
panic(sysSpecific) // this can only be true if we are not on UNIX, and we don't support that
}
wasUs = waitStatus.Signaled() && waitStatus.Signal() == syscall.SIGTERM // in Close()
}
wasUs := waitStatus.Signaled() && waitStatus.Signal() == syscall.SIGTERM // in Close()
if waitErr != nil && !wasUs {
err = IOCommandError{
WaitErr: waitErr,