zrepl/zfs/zfscmd/zfscmd.go

// Package zfscmd provides a wrapper around packate os/exec.
// Functionality provided by the wrapper:
// - logging start and end of command execution
// - status report of active commands
// - prometheus metrics of runtimes
package zfscmd

import (
	"context"
	"io"
	"os"
	"os/exec"
	"strings"
	"sync"
	"time"

	"github.com/zrepl/zrepl/daemon/logging/trace"
	"github.com/zrepl/zrepl/util/circlog"
)

type Cmd struct {
	cmd                                      *exec.Cmd
	ctx                                      context.Context
	mtx                                      sync.RWMutex
	startedAt, waitStartedAt, waitReturnedAt time.Time
	waitReturnEndSpanCb                      trace.DoneFunc
}

func CommandContext(ctx context.Context, name string, arg ...string) *Cmd {
	cmd := exec.CommandContext(ctx, name, arg...)
	return &Cmd{cmd: cmd, ctx: ctx}
}

// err.(*exec.ExitError).Stderr will NOT be set
func (c *Cmd) CombinedOutput() (o []byte, err error) {
	c.startPre(false)
	c.startPost(nil)
	c.waitPre()
	o, err = c.cmd.CombinedOutput()
	c.waitPost(err)
	return
}

// err.(*exec.ExitError).Stderr will be set
func (c *Cmd) Output() (o []byte, err error) {
	c.startPre(false)
	c.startPost(nil)
	c.waitPre()
	o, err = c.cmd.Output()
	c.waitPost(err)
	return
}

// Careful: err.(*exec.ExitError).Stderr will not be set, even if you don't open an StderrPipe
func (c *Cmd) StdoutPipeWithErrorBuf() (p io.ReadCloser, errBuf *circlog.CircularLog, err error) {
	p, err = c.cmd.StdoutPipe()
	errBuf = circlog.MustNewCircularLog(1 << 15)
	c.cmd.Stderr = errBuf
	return p, errBuf, err
}

type Stdio struct {
	Stdin  io.ReadCloser
	Stdout io.Writer
	Stderr io.Writer
}

func (c *Cmd) SetStdio(stdio Stdio) {
	c.cmd.Stdin = stdio.Stdin
	c.cmd.Stderr = stdio.Stderr
	c.cmd.Stdout = stdio.Stdout
}

func (c *Cmd) String() string {
	return strings.Join(c.cmd.Args, " ") // includes argv[0] if initialized with CommandContext, that's the only way we o it
}

func (c *Cmd) log() Logger {
	return getLogger(c.ctx).WithField("cmd", c.String())
}

// Start the command.
//
// This creates a new trace.WithTask as a child task of the ctx passed to CommandContext.
// If the process is successfully started (err == nil), it is the CALLER'S RESPONSIBILITY to ensure that
// the spawned process does not outlive the ctx's trace.Task.
//
// If this method returns an error, the Cmd instance is invalid. Start must not be called repeatedly.
func (c *Cmd) Start() (err error) {
	c.startPre(true)
	err = c.cmd.Start()
	c.startPost(err)
	return err
}

// Get the underlying os.Process.
//
// Only call this method after a successful call to .Start().
func (c *Cmd) Process() *os.Process {
	if c.startedAt.IsZero() {
		panic("calling Process() only allowed after successful call to Start()")
	}
	return c.cmd.Process
}

// Blocking wait for the process to exit.
// May be called concurrently and repeatly (exec.Cmd.Wait() semantics apply).
//
// Only call this method after a successful call to .Start().
func (c *Cmd) Wait() (err error) {
	c.waitPre()
	err = c.cmd.Wait()
	c.waitPost(err)
	return err
}

func (c *Cmd) startPre(newTask bool) {
	if newTask {
		// avoid explosion of tasks with name c.String()
		c.ctx, c.waitReturnEndSpanCb = trace.WithTaskAndSpan(c.ctx, "zfscmd", c.String())
	} else {
		c.ctx, c.waitReturnEndSpanCb = trace.WithSpan(c.ctx, c.String())
	}
	startPreLogging(c, time.Now())
}

func (c *Cmd) startPost(err error) {
	now := time.Now()

	c.mtx.Lock()
	c.startedAt = now
	c.mtx.Unlock()

	startPostReport(c, err, now)
	startPostLogging(c, err, now)
}

func (c *Cmd) waitPre() {
	now := time.Now()

	// ignore duplicate waits
	c.mtx.Lock()
	// ignore duplicate waits
	if !c.waitStartedAt.IsZero() {
		c.mtx.Unlock()
		return
	}
	c.waitStartedAt = now
	c.mtx.Unlock()

	waitPreLogging(c, now)
}

type usage struct {
	total_secs, system_secs, user_secs float64
}

func (c *Cmd) waitPost(err error) {
	now := time.Now()

	c.mtx.Lock()
	// ignore duplicate waits
	if !c.waitReturnedAt.IsZero() {
		c.mtx.Unlock()
		return
	}
	c.waitReturnedAt = now
	c.mtx.Unlock()

	// build usage
	var u usage
	{
		var s *os.ProcessState
		if err == nil {
			s = c.cmd.ProcessState
		} else if ee, ok := err.(*exec.ExitError); ok {
			s = ee.ProcessState
		}

		if s == nil {
			u = usage{
				total_secs:  c.Runtime().Seconds(),
				system_secs: -1,
				user_secs:   -1,
			}
		} else {
			u = usage{
				total_secs:  c.Runtime().Seconds(),
				system_secs: s.SystemTime().Seconds(),
				user_secs:   s.UserTime().Seconds(),
			}
		}
	}

	waitPostReport(c, u, now)
	waitPostLogging(c, u, err, now)
	waitPostPrometheus(c, u, err, now)

	// must be last because c.ctx might be used by other waitPost calls
	c.waitReturnEndSpanCb()
}

// returns 0 if the command did not yet finish
func (c *Cmd) Runtime() time.Duration {
	if c.waitReturnedAt.IsZero() {
		return 0
	}
	return c.waitReturnedAt.Sub(c.startedAt)
}