mirror of
https://github.com/zrepl/zrepl.git
synced 2025-06-19 17:27:46 +02:00
parent
aa3865d0a3
commit
a4da029105
100
cmd/config_job_prometheus.go
Normal file
100
cmd/config_job_prometheus.go
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"github.com/mitchellh/mapstructure"
|
||||||
|
"github.com/pkg/errors"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
|
"github.com/zrepl/zrepl/zfs"
|
||||||
|
"net"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PrometheusJob struct {
|
||||||
|
Listen string
|
||||||
|
}
|
||||||
|
|
||||||
|
var prom struct {
|
||||||
|
taskLastActiveStart *prometheus.GaugeVec
|
||||||
|
taskLastActiveDuration *prometheus.GaugeVec
|
||||||
|
taskLogEntries *prometheus.CounterVec
|
||||||
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
prom.taskLastActiveStart = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: "zrepl",
|
||||||
|
Subsystem: "daemon",
|
||||||
|
Name: "task_last_active_start",
|
||||||
|
Help: "point in time at which the job task last left idle state",
|
||||||
|
}, []string{"zrepl_job", "job_type", "task"})
|
||||||
|
prom.taskLastActiveDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: "zrepl",
|
||||||
|
Subsystem: "daemon",
|
||||||
|
Name: "task_last_active_duration",
|
||||||
|
Help: "seconds that the last run ob a job task spent between leaving and re-entering idle state",
|
||||||
|
}, []string{"zrepl_job", "job_type", "task"})
|
||||||
|
prom.taskLogEntries = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Namespace: "zrepl",
|
||||||
|
Subsystem: "daemon",
|
||||||
|
Name: "task_log_entries",
|
||||||
|
Help: "number of log entries per job task and level",
|
||||||
|
}, []string{"zrepl_job", "job_type", "task", "level"})
|
||||||
|
prometheus.MustRegister(prom.taskLastActiveStart)
|
||||||
|
prometheus.MustRegister(prom.taskLastActiveDuration)
|
||||||
|
prometheus.MustRegister(prom.taskLogEntries)
|
||||||
|
}
|
||||||
|
|
||||||
|
func parsePrometheusJob(c JobParsingContext, name string, i map[string]interface{}) (j *PrometheusJob, err error) {
|
||||||
|
var s struct {
|
||||||
|
Listen string
|
||||||
|
}
|
||||||
|
if err := mapstructure.Decode(i, &s); err != nil {
|
||||||
|
return nil, errors.Wrap(err, "mapstructure error")
|
||||||
|
}
|
||||||
|
if s.Listen == "" {
|
||||||
|
return nil, errors.New("must specify 'listen' attribute")
|
||||||
|
}
|
||||||
|
return &PrometheusJob{s.Listen}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (*PrometheusJob) JobName() string {
|
||||||
|
return "prometheus"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (j *PrometheusJob) JobType() JobType { return JobTypePrometheus }
|
||||||
|
|
||||||
|
func (j *PrometheusJob) JobStart(ctx context.Context) {
|
||||||
|
|
||||||
|
if err := zfs.PrometheusRegister(prometheus.DefaultRegisterer); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
log := ctx.Value(contextKeyLog).(Logger)
|
||||||
|
task := NewTask("main", j, log)
|
||||||
|
log = task.Log()
|
||||||
|
|
||||||
|
l, err := net.Listen("tcp", j.Listen)
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(err).Error("cannot listen")
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
l.Close()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
mux.Handle("/metrics", promhttp.Handler())
|
||||||
|
|
||||||
|
err = http.Serve(l, mux)
|
||||||
|
if err != nil {
|
||||||
|
log.WithError(err).Error("error while serving")
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (*PrometheusJob) JobStatus(ctxt context.Context) (*JobStatus, error) {
|
||||||
|
return &JobStatus{}, nil
|
||||||
|
}
|
@ -179,6 +179,8 @@ func parseJob(c JobParsingContext, i map[string]interface{}) (j Job, err error)
|
|||||||
return parseSourceJob(c, name, i)
|
return parseSourceJob(c, name, i)
|
||||||
case JobTypeLocal:
|
case JobTypeLocal:
|
||||||
return parseLocalJob(c, name, i)
|
return parseLocalJob(c, name, i)
|
||||||
|
case JobTypePrometheus:
|
||||||
|
return parsePrometheusJob(c, name, i)
|
||||||
default:
|
default:
|
||||||
panic(fmt.Sprintf("implementation error: unknown job type %s", jobtype))
|
panic(fmt.Sprintf("implementation error: unknown job type %s", jobtype))
|
||||||
}
|
}
|
||||||
|
@ -39,6 +39,7 @@ const (
|
|||||||
JobTypePull JobType = "pull"
|
JobTypePull JobType = "pull"
|
||||||
JobTypeSource JobType = "source"
|
JobTypeSource JobType = "source"
|
||||||
JobTypeLocal JobType = "local"
|
JobTypeLocal JobType = "local"
|
||||||
|
JobTypePrometheus JobType = "prometheus"
|
||||||
JobTypeControl JobType = "control"
|
JobTypeControl JobType = "control"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -50,6 +51,8 @@ func ParseUserJobType(s string) (JobType, error) {
|
|||||||
return JobTypeSource, nil
|
return JobTypeSource, nil
|
||||||
case "local":
|
case "local":
|
||||||
return JobTypeLocal, nil
|
return JobTypeLocal, nil
|
||||||
|
case "prometheus":
|
||||||
|
return JobTypePrometheus, nil
|
||||||
}
|
}
|
||||||
return "", fmt.Errorf("unknown job type '%s'", s)
|
return "", fmt.Errorf("unknown job type '%s'", s)
|
||||||
}
|
}
|
||||||
@ -204,6 +207,7 @@ type TaskStatus struct {
|
|||||||
|
|
||||||
// An instance of Task tracks a single thread of activity that is part of a Job.
|
// An instance of Task tracks a single thread of activity that is part of a Job.
|
||||||
type Task struct {
|
type Task struct {
|
||||||
|
name string // immutable
|
||||||
parent Job // immutable
|
parent Job // immutable
|
||||||
|
|
||||||
// Stack of activities the task is currently in
|
// Stack of activities the task is currently in
|
||||||
@ -219,6 +223,7 @@ type Task struct {
|
|||||||
type taskProgress struct {
|
type taskProgress struct {
|
||||||
rx int64
|
rx int64
|
||||||
tx int64
|
tx int64
|
||||||
|
creation time.Time
|
||||||
lastUpdate time.Time
|
lastUpdate time.Time
|
||||||
logEntries []logger.Entry
|
logEntries []logger.Entry
|
||||||
mtx sync.RWMutex
|
mtx sync.RWMutex
|
||||||
@ -226,6 +231,7 @@ type taskProgress struct {
|
|||||||
|
|
||||||
func newTaskProgress() (p *taskProgress) {
|
func newTaskProgress() (p *taskProgress) {
|
||||||
return &taskProgress{
|
return &taskProgress{
|
||||||
|
creation: time.Now(),
|
||||||
logEntries: make([]logger.Entry, 0),
|
logEntries: make([]logger.Entry, 0),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -250,6 +256,7 @@ func (p *taskProgress) DeepCopy() (out taskProgress) {
|
|||||||
p.mtx.RLock()
|
p.mtx.RLock()
|
||||||
defer p.mtx.RUnlock()
|
defer p.mtx.RUnlock()
|
||||||
out.rx, out.tx = p.rx, p.tx
|
out.rx, out.tx = p.rx, p.tx
|
||||||
|
out.creation = p.creation
|
||||||
out.lastUpdate = p.lastUpdate
|
out.lastUpdate = p.lastUpdate
|
||||||
out.logEntries = make([]logger.Entry, len(p.logEntries))
|
out.logEntries = make([]logger.Entry, len(p.logEntries))
|
||||||
for i := range p.logEntries {
|
for i := range p.logEntries {
|
||||||
@ -280,6 +287,7 @@ type taskActivity struct {
|
|||||||
|
|
||||||
func NewTask(name string, parent Job, lg *logger.Logger) *Task {
|
func NewTask(name string, parent Job, lg *logger.Logger) *Task {
|
||||||
t := &Task{
|
t := &Task{
|
||||||
|
name: name,
|
||||||
parent: parent,
|
parent: parent,
|
||||||
activities: list.New(),
|
activities: list.New(),
|
||||||
}
|
}
|
||||||
@ -318,6 +326,13 @@ func (t *Task) Enter(activity string) {
|
|||||||
// reset progress when leaving idle task
|
// reset progress when leaving idle task
|
||||||
// we leave the old progress dangling to have the user not worry about
|
// we leave the old progress dangling to have the user not worry about
|
||||||
prev.progress = newTaskProgress()
|
prev.progress = newTaskProgress()
|
||||||
|
|
||||||
|
prom.taskLastActiveStart.WithLabelValues(
|
||||||
|
t.parent.JobName(),
|
||||||
|
t.parent.JobType().String(),
|
||||||
|
t.name).
|
||||||
|
Set(float64(prev.progress.creation.UnixNano()) / 1e9)
|
||||||
|
|
||||||
}
|
}
|
||||||
act := &taskActivity{activity, false, nil, prev.progress}
|
act := &taskActivity{activity, false, nil, prev.progress}
|
||||||
t.activities.PushFront(act)
|
t.activities.PushFront(act)
|
||||||
@ -391,6 +406,21 @@ func (t *Task) Finish() {
|
|||||||
t.activities.Remove(top)
|
t.activities.Remove(top)
|
||||||
t.activitiesLastUpdate = time.Now()
|
t.activitiesLastUpdate = time.Now()
|
||||||
|
|
||||||
|
// prometheus
|
||||||
|
front := t.activities.Front()
|
||||||
|
if front != nil && front == t.activities.Back() {
|
||||||
|
idleAct := front.Value.(*taskActivity)
|
||||||
|
if !idleAct.idle {
|
||||||
|
panic("inconsistent implementation")
|
||||||
|
}
|
||||||
|
progress := idleAct.progress.Read()
|
||||||
|
non_idle_time := t.activitiesLastUpdate.Sub(progress.creation) // use same time
|
||||||
|
prom.taskLastActiveDuration.WithLabelValues(
|
||||||
|
t.parent.JobName(),
|
||||||
|
t.parent.JobType().String(),
|
||||||
|
t.name).Set(non_idle_time.Seconds())
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns a logger derived from the logger passed to the constructor function.
|
// Returns a logger derived from the logger passed to the constructor function.
|
||||||
@ -407,6 +437,14 @@ func (t *Task) WriteEntry(entry logger.Entry) error {
|
|||||||
t.rwl.RLock()
|
t.rwl.RLock()
|
||||||
defer t.rwl.RUnlock()
|
defer t.rwl.RUnlock()
|
||||||
t.cur().progress.UpdateLogEntry(entry)
|
t.cur().progress.UpdateLogEntry(entry)
|
||||||
|
|
||||||
|
prom.taskLogEntries.WithLabelValues(
|
||||||
|
t.parent.JobName(),
|
||||||
|
t.parent.JobType().String(),
|
||||||
|
t.name,
|
||||||
|
entry.Level.String()).
|
||||||
|
Inc()
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user