zrepl/client/pprof_activity_trace.go
Christian Schwarz 10a14a8c50 [#307] add package trace, integrate it with logging, and adopt it throughout zrepl
package trace:

- introduce the concept of tasks and spans, tracked as linked list within ctx
    - see package-level docs for an overview of the concepts
    - **main feature 1**: unique stack of task and span IDs
        - makes it easy to follow a series of log entries in concurrent code
    - **main feature 2**: ability to produce a chrome://tracing-compatible trace file
        - either via an env variable or a `zrepl pprof` subcommand
        - this is not a CPU profile, we already have go pprof for that
        - but it is very useful to visually inspect where the
          replication / snapshotter / pruner spends its time
          ( fixes #307 )

usage in package daemon/logging:

- goal: every log entry should have a trace field with the ID stack from package trace

- make `logging.GetLogger(ctx, Subsys)` the authoritative `logger.Logger` factory function
    - the context carries a linked list of injected fields which
      `logging.GetLogger` adds to the logger it returns
    - `logging.GetLogger` also uses package `trace` to get the
      task-and-span-stack and injects it into the returned logger's fields
2020-05-19 11:30:02 +02:00

45 lines
949 B
Go

package client
import (
"context"
"io"
"log"
"os"
"golang.org/x/net/websocket"
"github.com/zrepl/zrepl/cli"
)
var pprofActivityTraceCmd = &cli.Subcommand{
Use: "activity-trace ZREPL_PPROF_HOST:ZREPL_PPROF_PORT",
Short: "attach to zrepl daemon with activated pprof listener and dump an activity-trace to stdout",
Run: runPProfActivityTrace,
}
func runPProfActivityTrace(ctx context.Context, subcommand *cli.Subcommand, args []string) error {
log := log.New(os.Stderr, "", 0)
die := func() {
log.Printf("exiting after error")
os.Exit(1)
}
if len(args) != 1 {
log.Printf("exactly one positional argument is required")
die()
}
url := "ws://" + args[0] + "/debug/zrepl/activity-trace" // FIXME dont' repeat that
log.Printf("attaching to activity trace stream %s", url)
ws, err := websocket.Dial(url, "", url)
if err != nil {
log.Printf("error: %s", err)
die()
}
_, err = io.Copy(os.Stdout, ws)
return err
}