zfscmd: fix crash in zfscmd_prometheus.go due to incorrectly extracted ProcessState

fixup of 96e188d7c4
refs #196
refs #301

panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x10 pc=0x9a472a]

goroutine 15826 [running]:
os.(*ProcessState).systemTime(...)
        /home/cs/go1.13/src/os/exec_unix.go:98
os.(*ProcessState).SystemTime(...)
        /home/cs/go1.13/src/os/exec.go:141
github.com/zrepl/zrepl/zfs/zfscmd.waitPostPrometheus(0xc000c04800, 0xe21ce0, 0xc000068270, 0xbf9f80d88107e861, 0x19bae710e6, 0x13a8b60)
        /home/cs/zrepl/zrepl/zfs/zfscmd/zfscmd_prometheus.go:69 +0x22a
github.com/zrepl/zrepl/zfs/zfscmd.(*Cmd).waitPost(0xc000c04800, 0xe21ce0, 0xc000068270)
        /home/cs/zrepl/zrepl/zfs/zfscmd/zfscmd.go:155 +0x18a
github.com/zrepl/zrepl/zfs/zfscmd.(*Cmd).CombinedOutput(0xc000c04800, 0xc0004b8270, 0xd02eea, 0x3, 0xc0001f6c40, 0x3)
        /home/cs/zrepl/zrepl/zfs/zfscmd/zfscmd.go:40 +0xb3
github.com/zrepl/zrepl/zfs.ZFSRelease(0xe36aa0, 0xc0004b8270, 0xc0009a3a40, 0x13, 0xc0004a5d00, 0x1, 0x1, 0xed62eb221, 0x13a8b60)
        /home/cs/zrepl/zrepl/zfs/holds.go:102 +0x2a7
github.com/zrepl/zrepl/endpoint.ReleaseStep(0xe36aa0, 0xc0004b8270, 0xc0004befc0, 0xe, 0xd08482, 0x8, 0xc0001cb02f, 0x2, 0x1eeea3bff89dc90b, 0x134d6, ...)
        /home/cs/zrepl/zrepl/endpoint/endpoint_zfs_abstraction_step.go:130 +0x367
github.com/zrepl/zrepl/endpoint.(*Sender).SendCompleted.func2(0xc000459190, 0xc000390e30, 0xc00041fd80, 0xc0004befc0, 0xe, 0xd08482, 0x8, 0xc0001cb02f, 0x2, 0x1eeea3bff89dc90b, ...)
        /home/cs/zrepl/zrepl/endpoint/endpoint.go:419 +0x1c3
created by github.com/zrepl/zrepl/endpoint.(*Sender).SendCompleted
        /home/cs/zrepl/zrepl/endpoint/endpoint.go:413 +0x776
This commit is contained in:
Christian Schwarz 2020-04-20 13:57:19 +02:00
parent 0834a184b8
commit aed6149c8c
4 changed files with 41 additions and 24 deletions

View File

@ -134,6 +134,10 @@ func (c *Cmd) waitPre() {
waitPreLogging(c, now)
}
type usage struct {
total_secs, system_secs, user_secs float64
}
func (c *Cmd) waitPost(err error) {
now := time.Now()
@ -146,9 +150,34 @@ func (c *Cmd) waitPost(err error) {
c.waitReturnedAt = now
c.mtx.Unlock()
waitPostReport(c, now)
waitPostLogging(c, err, now)
waitPostPrometheus(c, err, now)
// build usage
var u usage
{
var s *os.ProcessState
if err == nil {
s = c.cmd.ProcessState
} else if ee, ok := err.(*exec.ExitError); ok {
s = ee.ProcessState
}
if s == nil {
u = usage{
total_secs: c.Runtime().Seconds(),
system_secs: -1,
user_secs: -1,
}
} else {
u = usage{
total_secs: c.Runtime().Seconds(),
system_secs: s.SystemTime().Seconds(),
user_secs: s.UserTime().Seconds(),
}
}
}
waitPostReport(c, u, now)
waitPostLogging(c, u, err, now)
waitPostPrometheus(c, u, err, now)
}
// returns 0 if the command did not yet finish

View File

@ -1,7 +1,6 @@
package zfscmd
import (
"os/exec"
"time"
)
@ -28,23 +27,12 @@ func waitPreLogging(c *Cmd, now time.Time) {
c.log().Debug("start waiting")
}
func waitPostLogging(c *Cmd, err error, now time.Time) {
var total, system, user float64
total = c.Runtime().Seconds()
if ee, ok := err.(*exec.ExitError); ok {
system = ee.ProcessState.SystemTime().Seconds()
user = ee.ProcessState.UserTime().Seconds()
} else {
system = -1
user = -1
}
func waitPostLogging(c *Cmd, u usage, err error, now time.Time) {
log := c.log().
WithField("total_time_s", total).
WithField("systemtime_s", system).
WithField("usertime_s", user)
WithField("total_time_s", u.total_secs).
WithField("systemtime_s", u.system_secs).
WithField("usertime_s", u.user_secs)
if err == nil {
log.Info("command exited without error")

View File

@ -46,7 +46,7 @@ func RegisterMetrics(r prometheus.Registerer) {
r.MustRegister(metrics.usertime)
}
func waitPostPrometheus(c *Cmd, err error, now time.Time) {
func waitPostPrometheus(c *Cmd, u usage, err error, now time.Time) {
if len(c.cmd.Args) < 2 {
getLogger(c.ctx).WithField("args", c.cmd.Args).
@ -64,10 +64,10 @@ func waitPostPrometheus(c *Cmd, err error, now time.Time) {
metrics.totaltime.
WithLabelValues(labelValues...).
Observe(c.Runtime().Seconds())
Observe(u.total_secs)
metrics.systemtime.WithLabelValues(labelValues...).
Observe(c.cmd.ProcessState.SystemTime().Seconds())
Observe(u.system_secs)
metrics.usertime.WithLabelValues(labelValues...).
Observe(c.cmd.ProcessState.UserTime().Seconds())
Observe(u.user_secs)
}

View File

@ -57,7 +57,7 @@ func startPostReport(c *Cmd, err error, now time.Time) {
active.mtx.Unlock()
}
func waitPostReport(c *Cmd, now time.Time) {
func waitPostReport(c *Cmd, _ usage, now time.Time) {
active.mtx.Lock()
defer active.mtx.Unlock()
prev := active.cmds[c]