2017-05-14 12:27:15 +02:00
|
|
|
package util
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2018-10-19 16:12:21 +02:00
|
|
|
"context"
|
2017-05-14 12:27:15 +02:00
|
|
|
"fmt"
|
2018-11-06 23:37:25 +01:00
|
|
|
"github.com/zrepl/zrepl/util/envconst"
|
2017-05-14 12:27:15 +02:00
|
|
|
"io"
|
util.IOCommand: add stderr logging for unexpected crashes in calls to ProcessState.Sys()
Crashes observed on a FreeBSD 11.2 system
2018-09-27T05:08:39+02:00 [INFO][csnas]: start replication invocation="62"
2018-09-27T05:08:39+02:00 [INFO][csnas][repl]: start planning invocation="62"
2018-09-27T05:08:58+02:00 [INFO][csnas][repl]: start working invocation="62"
2018-09-27T05:09:57+02:00 [INFO][csnas]: start pruning sender invocation="62"
2018-09-27T05:10:11+02:00 [INFO][csnas]: start pruning receiver invocation="62"
2018-09-27T05:10:32+02:00 [INFO][csnas]: wait for wakeups
2018-09-27T06:08:39+02:00 [INFO][csnas]: start replication invocation="63"
2018-09-27T06:08:39+02:00 [INFO][csnas][repl]: start planning invocation="63"
2018-09-27T06:08:44+02:00 [INFO][csnas][repl]: start working invocation="63"
2018-09-27T06:08:49+02:00 [ERRO][csnas][repl]: receive request failed (might also be error on sender) invocation="63" filesystem="<REDACTED>" err="concurrent use of RPC connection" step="<REDACTED>(@zrepl_20180927_030838_000 => @zrepl_20180927_040835_000)" errType="*errors.errorString"
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x8 pc=0x7d484b]
goroutine 3938545 [running]:
os.(*ProcessState).os.sys(...)
/usr/lib/golang/src/os/exec_posix.go:78
os.(*ProcessState).Sys(...)
/usr/lib/golang/src/os/exec.go:157
github.com/zrepl/zrepl/util.(*IOCommand).doWait(0xc4201b2d80, 0xc420070060, 0xc420070060)
/go/github.com/zrepl/zrepl/util/iocommand.go:91 +0x4b
github.com/zrepl/zrepl/util.(*IOCommand).Read(0xc4201b2d80, 0xc420790000, 0x8000, 0x8000, 0x800c76d90, 0x0, 0xc420067c10)
/go/github.com/zrepl/zrepl/util/iocommand.go:82 +0xe4
github.com/zrepl/zrepl/util.(*ByteCounterReader).Read(0xc4202dc580, 0xc420790000, 0x8000, 0x8000, 0x8c6900, 0x7cb201, 0xc420790000)
/go/github.com/zrepl/zrepl/util/io.go:118 +0x51
github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*chunkBuffer).readChunk(0xc42057e3c0, 0x800d1bbf0, 0xc4202dc580, 0xc420790000, 0x8000, 0x8000)
/go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/stream.go:58 +0x5e
github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.writeStream(0xa04620, 0xc4204a9c20, 0x9fe340, 0xc4200d6380, 0x800d1bbf0, 0xc4202dc580, 0x8000, 0xc42000e000, 0x900420)
/go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/stream.go:101 +0x1ce
github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Conn).send(0xc4200d6380, 0xa04620, 0xc4204a9c20, 0xc42057e2c0, 0xc42013d570, 0x800d1bbf0, 0xc4202dc580, 0x0, 0x0)
/go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/main.go:374 +0x557
github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Client).RequestReply.func1(0x999741, 0x7, 0xc4200d6380, 0xa04620, 0xc4204a9c20, 0xc42013d570, 0xa00aa0, 0xc4202dc580, 0xc420516480)
/go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/client.go:169 +0x148
created by github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Client).RequestReply
/go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/client.go:167 +0x227
2018-09-27 12:06:59 +02:00
|
|
|
"os"
|
2017-05-14 12:27:15 +02:00
|
|
|
"os/exec"
|
2017-05-14 13:59:12 +02:00
|
|
|
"syscall"
|
2018-11-06 23:37:25 +01:00
|
|
|
"time"
|
2017-05-14 12:27:15 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
// An IOCommand exposes a forked process's std(in|out|err) through the io.ReadWriteCloser interface.
|
|
|
|
type IOCommand struct {
|
|
|
|
Cmd *exec.Cmd
|
2018-11-06 23:37:25 +01:00
|
|
|
kill context.CancelFunc
|
2018-06-20 20:20:37 +02:00
|
|
|
Stdin io.WriteCloser
|
|
|
|
Stdout io.ReadCloser
|
2017-05-14 12:27:15 +02:00
|
|
|
StderrBuf *bytes.Buffer
|
2017-08-09 21:01:06 +02:00
|
|
|
ExitResult *IOCommandExitResult
|
2017-05-14 12:27:15 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
const IOCommandStderrBufSize = 1024
|
|
|
|
|
|
|
|
type IOCommandError struct {
|
|
|
|
WaitErr error
|
|
|
|
Stderr []byte
|
|
|
|
}
|
|
|
|
|
2017-05-14 13:59:12 +02:00
|
|
|
type IOCommandExitResult struct {
|
|
|
|
Error error
|
|
|
|
WaitStatus syscall.WaitStatus
|
|
|
|
}
|
|
|
|
|
2017-05-14 12:27:15 +02:00
|
|
|
func (e IOCommandError) Error() string {
|
|
|
|
return fmt.Sprintf("underlying process exited with error: %s\nstderr: %s\n", e.WaitErr, e.Stderr)
|
|
|
|
}
|
|
|
|
|
2018-10-19 16:12:21 +02:00
|
|
|
func RunIOCommand(ctx context.Context, command string, args ...string) (c *IOCommand, err error) {
|
|
|
|
c, err = NewIOCommand(ctx, command, args, IOCommandStderrBufSize)
|
2017-05-14 12:27:15 +02:00
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
err = c.Start()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-10-19 16:12:21 +02:00
|
|
|
func NewIOCommand(ctx context.Context, command string, args []string, stderrBufSize int) (c *IOCommand, err error) {
|
2017-05-14 12:27:15 +02:00
|
|
|
|
|
|
|
if stderrBufSize == 0 {
|
|
|
|
stderrBufSize = IOCommandStderrBufSize
|
|
|
|
}
|
|
|
|
|
|
|
|
c = &IOCommand{}
|
|
|
|
|
2018-11-06 23:37:25 +01:00
|
|
|
ctx, c.kill = context.WithCancel(ctx)
|
2018-10-19 16:12:21 +02:00
|
|
|
c.Cmd = exec.CommandContext(ctx, command, args...)
|
2017-05-14 12:27:15 +02:00
|
|
|
|
|
|
|
if c.Stdout, err = c.Cmd.StdoutPipe(); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if c.Stdin, err = c.Cmd.StdinPipe(); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
c.StderrBuf = bytes.NewBuffer(make([]byte, 0, stderrBufSize))
|
|
|
|
c.Cmd.Stderr = c.StderrBuf
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *IOCommand) Start() (err error) {
|
|
|
|
if err = c.Cmd.Start(); err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read from process's stdout.
|
|
|
|
// The behavior after Close()ing is undefined
|
|
|
|
func (c *IOCommand) Read(buf []byte) (n int, err error) {
|
|
|
|
n, err = c.Stdout.Read(buf)
|
|
|
|
if err == io.EOF {
|
2018-11-06 23:37:25 +01:00
|
|
|
if waitErr := c.doWait(context.Background()); waitErr != nil {
|
2017-05-14 12:27:15 +02:00
|
|
|
err = waitErr
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-11-06 23:37:25 +01:00
|
|
|
func (c *IOCommand) doWait(ctx context.Context) (err error) {
|
|
|
|
go func() {
|
|
|
|
dl, ok := ctx.Deadline()
|
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
time.Sleep(dl.Sub(time.Now()))
|
|
|
|
c.kill()
|
|
|
|
c.Stdout.Close()
|
|
|
|
c.Stdin.Close()
|
|
|
|
}()
|
2017-05-14 12:27:15 +02:00
|
|
|
waitErr := c.Cmd.Wait()
|
util.IOCommand: add stderr logging for unexpected crashes in calls to ProcessState.Sys()
Crashes observed on a FreeBSD 11.2 system
2018-09-27T05:08:39+02:00 [INFO][csnas]: start replication invocation="62"
2018-09-27T05:08:39+02:00 [INFO][csnas][repl]: start planning invocation="62"
2018-09-27T05:08:58+02:00 [INFO][csnas][repl]: start working invocation="62"
2018-09-27T05:09:57+02:00 [INFO][csnas]: start pruning sender invocation="62"
2018-09-27T05:10:11+02:00 [INFO][csnas]: start pruning receiver invocation="62"
2018-09-27T05:10:32+02:00 [INFO][csnas]: wait for wakeups
2018-09-27T06:08:39+02:00 [INFO][csnas]: start replication invocation="63"
2018-09-27T06:08:39+02:00 [INFO][csnas][repl]: start planning invocation="63"
2018-09-27T06:08:44+02:00 [INFO][csnas][repl]: start working invocation="63"
2018-09-27T06:08:49+02:00 [ERRO][csnas][repl]: receive request failed (might also be error on sender) invocation="63" filesystem="<REDACTED>" err="concurrent use of RPC connection" step="<REDACTED>(@zrepl_20180927_030838_000 => @zrepl_20180927_040835_000)" errType="*errors.errorString"
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x8 pc=0x7d484b]
goroutine 3938545 [running]:
os.(*ProcessState).os.sys(...)
/usr/lib/golang/src/os/exec_posix.go:78
os.(*ProcessState).Sys(...)
/usr/lib/golang/src/os/exec.go:157
github.com/zrepl/zrepl/util.(*IOCommand).doWait(0xc4201b2d80, 0xc420070060, 0xc420070060)
/go/github.com/zrepl/zrepl/util/iocommand.go:91 +0x4b
github.com/zrepl/zrepl/util.(*IOCommand).Read(0xc4201b2d80, 0xc420790000, 0x8000, 0x8000, 0x800c76d90, 0x0, 0xc420067c10)
/go/github.com/zrepl/zrepl/util/iocommand.go:82 +0xe4
github.com/zrepl/zrepl/util.(*ByteCounterReader).Read(0xc4202dc580, 0xc420790000, 0x8000, 0x8000, 0x8c6900, 0x7cb201, 0xc420790000)
/go/github.com/zrepl/zrepl/util/io.go:118 +0x51
github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*chunkBuffer).readChunk(0xc42057e3c0, 0x800d1bbf0, 0xc4202dc580, 0xc420790000, 0x8000, 0x8000)
/go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/stream.go:58 +0x5e
github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.writeStream(0xa04620, 0xc4204a9c20, 0x9fe340, 0xc4200d6380, 0x800d1bbf0, 0xc4202dc580, 0x8000, 0xc42000e000, 0x900420)
/go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/stream.go:101 +0x1ce
github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Conn).send(0xc4200d6380, 0xa04620, 0xc4204a9c20, 0xc42057e2c0, 0xc42013d570, 0x800d1bbf0, 0xc4202dc580, 0x0, 0x0)
/go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/main.go:374 +0x557
github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Client).RequestReply.func1(0x999741, 0x7, 0xc4200d6380, 0xa04620, 0xc4204a9c20, 0xc42013d570, 0xa00aa0, 0xc4202dc580, 0xc420516480)
/go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/client.go:169 +0x148
created by github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc.(*Client).RequestReply
/go/github.com/zrepl/zrepl/vendor/github.com/problame/go-streamrpc/client.go:167 +0x227
2018-09-27 12:06:59 +02:00
|
|
|
var wasUs bool = false
|
|
|
|
var waitStatus syscall.WaitStatus
|
|
|
|
if c.Cmd.ProcessState == nil {
|
|
|
|
fmt.Fprintf(os.Stderr, "util.IOCommand: c.Cmd.ProcessState is nil after c.Cmd.Wait()\n")
|
|
|
|
}
|
|
|
|
if c.Cmd.ProcessState != nil {
|
|
|
|
sysSpecific := c.Cmd.ProcessState.Sys()
|
|
|
|
var ok bool
|
|
|
|
waitStatus, ok = sysSpecific.(syscall.WaitStatus)
|
|
|
|
if !ok {
|
|
|
|
fmt.Fprintf(os.Stderr, "util.IOCommand: c.Cmd.ProcessState.Sys() could not be converted to syscall.WaitStatus: %T\n", sysSpecific)
|
|
|
|
os.Stderr.Sync()
|
|
|
|
panic(sysSpecific) // this can only be true if we are not on UNIX, and we don't support that
|
|
|
|
}
|
|
|
|
wasUs = waitStatus.Signaled() && waitStatus.Signal() == syscall.SIGTERM // in Close()
|
|
|
|
}
|
2017-05-14 13:59:12 +02:00
|
|
|
|
2017-08-09 21:01:06 +02:00
|
|
|
if waitErr != nil && !wasUs {
|
2017-05-14 12:27:15 +02:00
|
|
|
err = IOCommandError{
|
|
|
|
WaitErr: waitErr,
|
|
|
|
Stderr: c.StderrBuf.Bytes(),
|
|
|
|
}
|
|
|
|
}
|
2017-05-14 13:59:12 +02:00
|
|
|
|
2017-08-09 21:01:06 +02:00
|
|
|
c.ExitResult = &IOCommandExitResult{
|
|
|
|
Error: err, // is still empty if waitErr was due to signalling
|
2017-05-14 13:59:12 +02:00
|
|
|
WaitStatus: waitStatus,
|
|
|
|
}
|
2017-05-14 12:27:15 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write to process's stdin.
|
|
|
|
// The behavior after Close()ing is undefined
|
|
|
|
func (c *IOCommand) Write(buf []byte) (n int, err error) {
|
|
|
|
return c.Stdin.Write(buf)
|
|
|
|
}
|
|
|
|
|
2017-05-14 13:59:12 +02:00
|
|
|
// Terminate the child process and collect its exit status
|
|
|
|
// It is safe to call Close() multiple times.
|
|
|
|
func (c *IOCommand) Close() (err error) {
|
|
|
|
if c.Cmd.ProcessState == nil {
|
|
|
|
// racy...
|
2017-08-09 21:01:06 +02:00
|
|
|
err = syscall.Kill(c.Cmd.Process.Pid, syscall.SIGTERM)
|
2018-11-06 23:37:25 +01:00
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), envconst.Duration("IOCOMMAND_TIMEOUT", 10*time.Second))
|
|
|
|
defer cancel()
|
|
|
|
return c.doWait(ctx)
|
2017-05-14 13:59:12 +02:00
|
|
|
} else {
|
|
|
|
return c.ExitResult.Error
|
|
|
|
}
|
2017-05-14 12:27:15 +02:00
|
|
|
}
|