2017-09-17 23:54:23 +02:00
|
|
|
package cmd
|
|
|
|
|
|
|
|
import (
|
2017-11-18 20:34:28 +01:00
|
|
|
"bytes"
|
2017-09-17 23:54:23 +02:00
|
|
|
"context"
|
2017-11-18 20:34:28 +01:00
|
|
|
"encoding/json"
|
2017-09-17 23:54:23 +02:00
|
|
|
"fmt"
|
2017-12-24 15:35:12 +01:00
|
|
|
"github.com/dustin/go-humanize"
|
2017-09-17 23:54:23 +02:00
|
|
|
"github.com/spf13/cobra"
|
2017-12-24 15:35:12 +01:00
|
|
|
"github.com/zrepl/zrepl/logger"
|
2017-09-17 23:54:23 +02:00
|
|
|
"io"
|
|
|
|
golog "log"
|
|
|
|
"net"
|
|
|
|
"net/http"
|
|
|
|
"net/url"
|
|
|
|
"os"
|
2017-12-24 15:35:12 +01:00
|
|
|
"sort"
|
|
|
|
"strings"
|
2017-12-27 12:58:32 +01:00
|
|
|
"time"
|
2017-09-17 23:54:23 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
var controlCmd = &cobra.Command{
|
|
|
|
Use: "control",
|
|
|
|
Short: "control zrepl daemon",
|
|
|
|
}
|
|
|
|
|
|
|
|
var pprofCmd = &cobra.Command{
|
|
|
|
Use: "pprof cpu OUTFILE",
|
|
|
|
Short: "pprof CPU of daemon to OUTFILE (- for stdout)",
|
|
|
|
Run: doControlPProf,
|
|
|
|
}
|
|
|
|
var pprofCmdArgs struct {
|
|
|
|
seconds int64
|
|
|
|
}
|
|
|
|
|
2017-11-18 20:34:28 +01:00
|
|
|
var controlVersionCmd = &cobra.Command{
|
|
|
|
Use: "version",
|
|
|
|
Short: "print version of running zrepl daemon",
|
|
|
|
Run: doControLVersionCmd,
|
|
|
|
}
|
|
|
|
|
2017-12-24 15:35:12 +01:00
|
|
|
var controlStatusCmdArgs struct {
|
2017-12-30 13:53:19 +01:00
|
|
|
format string
|
|
|
|
level logger.Level
|
|
|
|
onlyShowJob string
|
2017-12-24 15:35:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
var controlStatusCmd = &cobra.Command{
|
2017-12-30 13:44:55 +01:00
|
|
|
Use: "status [JOB_NAME]",
|
2017-12-24 15:35:12 +01:00
|
|
|
Short: "get current status",
|
|
|
|
Run: doControlStatusCmd,
|
|
|
|
}
|
|
|
|
|
2017-09-17 23:54:23 +02:00
|
|
|
func init() {
|
|
|
|
RootCmd.AddCommand(controlCmd)
|
|
|
|
controlCmd.AddCommand(pprofCmd)
|
|
|
|
pprofCmd.Flags().Int64Var(&pprofCmdArgs.seconds, "seconds", 30, "seconds to profile")
|
2017-11-18 20:34:28 +01:00
|
|
|
controlCmd.AddCommand(controlVersionCmd)
|
2017-12-24 15:35:12 +01:00
|
|
|
controlCmd.AddCommand(controlStatusCmd)
|
2017-12-27 12:58:32 +01:00
|
|
|
controlStatusCmd.Flags().StringVar(&controlStatusCmdArgs.format, "format", "human", "output format (human|raw)")
|
2017-12-30 13:53:19 +01:00
|
|
|
controlStatusCmdArgs.level = logger.Warn
|
|
|
|
controlStatusCmd.Flags().Var(&controlStatusCmdArgs.level, "level", "minimum log level to show")
|
2017-11-18 20:34:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func controlHttpClient() (client http.Client, err error) {
|
|
|
|
|
|
|
|
conf, err := ParseConfig(rootArgs.configFile)
|
|
|
|
if err != nil {
|
|
|
|
return http.Client{}, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return http.Client{
|
|
|
|
Transport: &http.Transport{
|
|
|
|
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
|
|
|
|
return net.Dial("unix", conf.Global.Control.Sockpath)
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}, nil
|
2017-09-17 23:54:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func doControlPProf(cmd *cobra.Command, args []string) {
|
|
|
|
|
|
|
|
log := golog.New(os.Stderr, "", 0)
|
|
|
|
|
|
|
|
die := func() {
|
|
|
|
log.Printf("exiting after error")
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
|
|
|
|
if cmd.Flags().Arg(0) != "cpu" {
|
|
|
|
log.Printf("only CPU profiles are supported")
|
|
|
|
log.Printf("%s", cmd.UsageString())
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
|
|
|
|
outfn := cmd.Flags().Arg(1)
|
|
|
|
if outfn == "" {
|
|
|
|
log.Printf("must specify output filename")
|
|
|
|
log.Printf("%s", cmd.UsageString())
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
var out io.Writer
|
2017-11-18 20:34:28 +01:00
|
|
|
var err error
|
2017-09-17 23:54:23 +02:00
|
|
|
if outfn == "-" {
|
|
|
|
out = os.Stdout
|
|
|
|
} else {
|
|
|
|
out, err = os.Create(outfn)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("error creating output file: %s", err)
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Printf("connecting to daemon")
|
2017-11-18 20:34:28 +01:00
|
|
|
httpc, err := controlHttpClient()
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("error parsing config: %s", err)
|
|
|
|
die()
|
2017-09-17 23:54:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
log.Printf("profiling...")
|
|
|
|
v := url.Values{}
|
|
|
|
v.Set("seconds", fmt.Sprintf("%d", pprofCmdArgs.seconds))
|
|
|
|
v.Encode()
|
|
|
|
resp, err := httpc.Get("http://unix" + ControlJobEndpointProfile + "?" + v.Encode())
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("error: %s", err)
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = io.Copy(out, resp.Body)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("error writing profile: %s", err)
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Printf("finished")
|
|
|
|
|
|
|
|
}
|
2017-11-18 20:34:28 +01:00
|
|
|
|
|
|
|
func doControLVersionCmd(cmd *cobra.Command, args []string) {
|
|
|
|
|
|
|
|
log := golog.New(os.Stderr, "", 0)
|
|
|
|
|
|
|
|
die := func() {
|
|
|
|
log.Printf("exiting after error")
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
|
|
|
|
httpc, err := controlHttpClient()
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("could not connect to daemon: %s", err)
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
|
|
|
|
resp, err := httpc.Get("http://unix" + ControlJobEndpointVersion)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("error: %s", err)
|
|
|
|
die()
|
|
|
|
} else if resp.StatusCode != http.StatusOK {
|
|
|
|
var msg bytes.Buffer
|
|
|
|
io.CopyN(&msg, resp.Body, 4096)
|
|
|
|
log.Printf("error: %s", msg.String())
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
|
|
|
|
var info ZreplVersionInformation
|
|
|
|
err = json.NewDecoder(resp.Body).Decode(&info)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("error unmarshaling response: %s", err)
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
|
|
|
|
fmt.Println(info.String())
|
|
|
|
|
|
|
|
}
|
2017-12-24 15:35:12 +01:00
|
|
|
|
|
|
|
func doControlStatusCmd(cmd *cobra.Command, args []string) {
|
|
|
|
|
|
|
|
log := golog.New(os.Stderr, "", 0)
|
|
|
|
|
|
|
|
die := func() {
|
|
|
|
log.Print("exiting after error")
|
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
|
2017-12-27 12:58:32 +01:00
|
|
|
if len(args) == 1 {
|
|
|
|
controlStatusCmdArgs.onlyShowJob = args[0]
|
|
|
|
} else if len(args) > 1 {
|
|
|
|
log.Print("can only specify one job as positional argument")
|
|
|
|
cmd.Usage()
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
|
2017-12-24 15:35:12 +01:00
|
|
|
httpc, err := controlHttpClient()
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("could not connect to daemon: %s", err)
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
|
|
|
|
resp, err := httpc.Get("http://unix" + ControlJobEndpointStatus)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("error: %s", err)
|
|
|
|
die()
|
|
|
|
} else if resp.StatusCode != http.StatusOK {
|
|
|
|
var msg bytes.Buffer
|
|
|
|
io.CopyN(&msg, resp.Body, 4096)
|
|
|
|
log.Printf("error: %s", msg.String())
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
|
|
|
|
var status DaemonStatus
|
|
|
|
err = json.NewDecoder(resp.Body).Decode(&status)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("error unmarshaling response: %s", err)
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
|
|
|
|
switch controlStatusCmdArgs.format {
|
2017-12-27 12:58:32 +01:00
|
|
|
case "raw":
|
2017-12-24 15:35:12 +01:00
|
|
|
enc := json.NewEncoder(os.Stdout)
|
|
|
|
enc.SetIndent("", " ")
|
|
|
|
if err := enc.Encode(status); err != nil {
|
|
|
|
log.Panic(err)
|
|
|
|
}
|
|
|
|
case "human":
|
2017-12-27 12:58:32 +01:00
|
|
|
|
2017-12-24 15:35:12 +01:00
|
|
|
formatter := HumanFormatter{}
|
|
|
|
formatter.SetMetadataFlags(MetadataAll)
|
|
|
|
formatter.SetIgnoreFields([]string{
|
2017-12-27 12:58:32 +01:00
|
|
|
logJobField,
|
2017-12-24 15:35:12 +01:00
|
|
|
})
|
|
|
|
jobNames := make([]string, 0, len(status.Jobs))
|
|
|
|
for name, _ := range status.Jobs {
|
|
|
|
jobNames = append(jobNames, name)
|
|
|
|
}
|
|
|
|
sort.Slice(jobNames, func(i, j int) bool {
|
|
|
|
return strings.Compare(jobNames[i], jobNames[j]) == -1
|
|
|
|
})
|
2017-12-27 12:58:32 +01:00
|
|
|
now := time.Now()
|
2017-12-24 15:35:12 +01:00
|
|
|
for _, name := range jobNames {
|
2017-12-27 12:58:32 +01:00
|
|
|
|
|
|
|
if controlStatusCmdArgs.onlyShowJob != "" && name != controlStatusCmdArgs.onlyShowJob {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2017-12-24 15:35:12 +01:00
|
|
|
job := status.Jobs[name]
|
2017-12-27 12:58:32 +01:00
|
|
|
jobLogEntries := make([]logger.Entry, 0)
|
|
|
|
informAboutError := false
|
|
|
|
|
2017-12-24 15:35:12 +01:00
|
|
|
fmt.Printf("Job '%s':\n", name)
|
|
|
|
for _, task := range job.Tasks {
|
|
|
|
|
|
|
|
var header bytes.Buffer
|
|
|
|
fmt.Fprintf(&header, " Task '%s': ", task.Name)
|
|
|
|
if !task.Idle {
|
|
|
|
fmt.Fprint(&header, strings.Join(task.ActivityStack, "."))
|
|
|
|
} else {
|
|
|
|
fmt.Fprint(&header, "<idle>")
|
|
|
|
}
|
|
|
|
fmt.Fprint(&header, " ")
|
2017-12-27 12:56:46 +01:00
|
|
|
const TASK_STALLED_HOLDOFF_DURATION = 10 * time.Second
|
|
|
|
sinceLastUpdate := now.Sub(task.LastUpdate)
|
2017-12-24 15:35:12 +01:00
|
|
|
if !task.Idle || task.ProgressRx != 0 || task.ProgressTx != 0 {
|
|
|
|
fmt.Fprintf(&header, "(%s / %s , Rx/Tx",
|
|
|
|
humanize.Bytes(uint64(task.ProgressRx)),
|
|
|
|
humanize.Bytes(uint64(task.ProgressTx)))
|
|
|
|
if task.Idle {
|
|
|
|
fmt.Fprint(&header, ", values from last run")
|
|
|
|
}
|
|
|
|
fmt.Fprint(&header, ")")
|
|
|
|
}
|
|
|
|
fmt.Fprint(&header, "\n")
|
2017-12-27 12:56:46 +01:00
|
|
|
if !task.Idle && !task.LastUpdate.IsZero() && sinceLastUpdate >= TASK_STALLED_HOLDOFF_DURATION {
|
2017-12-27 12:58:32 +01:00
|
|
|
informAboutError = true
|
2017-12-27 12:56:46 +01:00
|
|
|
fmt.Fprintf(&header, " WARNING: last update %s ago at %s)",
|
|
|
|
sinceLastUpdate.String(),
|
|
|
|
task.LastUpdate.Format(HumanFormatterDateFormat))
|
|
|
|
fmt.Fprint(&header, "\n")
|
|
|
|
}
|
2017-12-24 15:35:12 +01:00
|
|
|
io.Copy(os.Stdout, &header)
|
|
|
|
|
2017-12-27 12:58:32 +01:00
|
|
|
jobLogEntries = append(jobLogEntries, task.LogEntries...)
|
|
|
|
informAboutError = informAboutError || task.MaxLogLevel >= logger.Warn
|
|
|
|
}
|
|
|
|
|
2017-12-30 13:53:19 +01:00
|
|
|
sort.Slice(jobLogEntries, func(i, j int) bool {
|
|
|
|
return jobLogEntries[i].Time.Before(jobLogEntries[j].Time)
|
|
|
|
})
|
|
|
|
if informAboutError {
|
|
|
|
fmt.Println(" WARNING: Some tasks encountered problems since the last time they left idle state:")
|
|
|
|
fmt.Println(" check the logs below or your log file for more information.")
|
|
|
|
fmt.Println(" Use the --level flag if you need debug information.")
|
|
|
|
fmt.Println()
|
|
|
|
}
|
|
|
|
for _, e := range jobLogEntries {
|
|
|
|
if e.Level < controlStatusCmdArgs.level {
|
|
|
|
continue
|
2017-12-27 12:58:32 +01:00
|
|
|
}
|
2017-12-30 13:53:19 +01:00
|
|
|
formatted, err := formatter.Format(&e)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
2017-12-24 15:35:12 +01:00
|
|
|
}
|
2017-12-30 13:53:19 +01:00
|
|
|
fmt.Printf(" %s\n", string(formatted))
|
2017-12-24 15:35:12 +01:00
|
|
|
}
|
2017-12-30 13:53:19 +01:00
|
|
|
fmt.Println()
|
2017-12-27 12:58:32 +01:00
|
|
|
|
2017-12-24 15:35:12 +01:00
|
|
|
}
|
|
|
|
default:
|
|
|
|
log.Printf("invalid output format '%s'", controlStatusCmdArgs.format)
|
|
|
|
die()
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|