go-pulse/metrics/librato/librato.go
turboboost55 7dc100714d
metrics: add cpu counters (#26796)
This PR adds counter metrics for the CPU system and the Geth process.
Currently the only metrics available for these items are gauges. Gauges are
fine when the consumer scrapes metrics data at the same interval as Geth
produces new values (every 3 seconds), but it is likely that most consumers
will not scrape that often. Intervals of 10, 15, or maybe even 30 seconds
are probably more common.

So the problem is, how does the consumer estimate what the CPU was doing in
between scrapes. With a counter, it's easy ... you just subtract two
successive values and divide by the time to get a nice, accurate average.
But with a gauge, you can't do that. A gauge reading is an instantaneous
picture of what was happening at that moment, but it gives you no idea
about what was going on between scrapes. Taking an average of values is
meaningless.
2023-03-23 14:13:50 +01:00

248 lines
7.9 KiB
Go

package librato
import (
"fmt"
"log"
"math"
"regexp"
"time"
"github.com/ethereum/go-ethereum/metrics"
)
// a regexp for extracting the unit from time.Duration.String
var unitRegexp = regexp.MustCompile(`[^\\d]+$`)
// a helper that turns a time.Duration into librato display attributes for timer metrics
func translateTimerAttributes(d time.Duration) (attrs map[string]interface{}) {
attrs = make(map[string]interface{})
attrs[DisplayTransform] = fmt.Sprintf("x/%d", int64(d))
attrs[DisplayUnitsShort] = string(unitRegexp.Find([]byte(d.String())))
return
}
type Reporter struct {
Email, Token string
Namespace string
Source string
Interval time.Duration
Registry metrics.Registry
Percentiles []float64 // percentiles to report on histogram metrics
TimerAttributes map[string]interface{} // units in which timers will be displayed
intervalSec int64
}
func NewReporter(r metrics.Registry, d time.Duration, e string, t string, s string, p []float64, u time.Duration) *Reporter {
return &Reporter{e, t, "", s, d, r, p, translateTimerAttributes(u), int64(d / time.Second)}
}
func Librato(r metrics.Registry, d time.Duration, e string, t string, s string, p []float64, u time.Duration) {
NewReporter(r, d, e, t, s, p, u).Run()
}
func (rep *Reporter) Run() {
log.Printf("WARNING: This client has been DEPRECATED! It has been moved to https://github.com/mihasya/go-metrics-librato and will be removed from rcrowley/go-metrics on August 5th 2015")
ticker := time.NewTicker(rep.Interval)
defer ticker.Stop()
metricsApi := &LibratoClient{rep.Email, rep.Token}
for now := range ticker.C {
var metrics Batch
var err error
if metrics, err = rep.BuildRequest(now, rep.Registry); err != nil {
log.Printf("ERROR constructing librato request body %s", err)
continue
}
if err := metricsApi.PostMetrics(metrics); err != nil {
log.Printf("ERROR sending metrics to librato %s", err)
continue
}
}
}
// calculate sum of squares from data provided by metrics.Histogram
// see http://en.wikipedia.org/wiki/Standard_deviation#Rapid_calculation_methods
func sumSquares(s metrics.Sample) float64 {
count := float64(s.Count())
sumSquared := math.Pow(count*s.Mean(), 2)
sumSquares := math.Pow(count*s.StdDev(), 2) + sumSquared/count
if math.IsNaN(sumSquares) {
return 0.0
}
return sumSquares
}
func sumSquaresTimer(t metrics.Timer) float64 {
count := float64(t.Count())
sumSquared := math.Pow(count*t.Mean(), 2)
sumSquares := math.Pow(count*t.StdDev(), 2) + sumSquared/count
if math.IsNaN(sumSquares) {
return 0.0
}
return sumSquares
}
func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Batch, err error) {
snapshot = Batch{
// coerce timestamps to a stepping fn so that they line up in Librato graphs
MeasureTime: (now.Unix() / rep.intervalSec) * rep.intervalSec,
Source: rep.Source,
}
snapshot.Gauges = make([]Measurement, 0)
snapshot.Counters = make([]Measurement, 0)
histogramGaugeCount := 1 + len(rep.Percentiles)
r.Each(func(name string, metric interface{}) {
if rep.Namespace != "" {
name = fmt.Sprintf("%s.%s", rep.Namespace, name)
}
measurement := Measurement{}
measurement[Period] = rep.Interval.Seconds()
switch m := metric.(type) {
case metrics.Counter:
if m.Count() > 0 {
measurement[Name] = fmt.Sprintf("%s.%s", name, "count")
measurement[Value] = float64(m.Count())
measurement[Attributes] = map[string]interface{}{
DisplayUnitsLong: Operations,
DisplayUnitsShort: OperationsShort,
DisplayMin: "0",
}
snapshot.Counters = append(snapshot.Counters, measurement)
}
case metrics.CounterFloat64:
if m.Count() > 0 {
measurement[Name] = fmt.Sprintf("%s.%s", name, "count")
measurement[Value] = m.Count()
measurement[Attributes] = map[string]interface{}{
DisplayUnitsLong: Operations,
DisplayUnitsShort: OperationsShort,
DisplayMin: "0",
}
snapshot.Counters = append(snapshot.Counters, measurement)
}
case metrics.Gauge:
measurement[Name] = name
measurement[Value] = float64(m.Value())
snapshot.Gauges = append(snapshot.Gauges, measurement)
case metrics.GaugeFloat64:
measurement[Name] = name
measurement[Value] = m.Value()
snapshot.Gauges = append(snapshot.Gauges, measurement)
case metrics.Histogram:
if m.Count() > 0 {
gauges := make([]Measurement, histogramGaugeCount)
s := m.Sample()
measurement[Name] = fmt.Sprintf("%s.%s", name, "hist")
measurement[Count] = uint64(s.Count())
measurement[Max] = float64(s.Max())
measurement[Min] = float64(s.Min())
measurement[Sum] = float64(s.Sum())
measurement[SumSquares] = sumSquares(s)
gauges[0] = measurement
for i, p := range rep.Percentiles {
gauges[i+1] = Measurement{
Name: fmt.Sprintf("%s.%.2f", measurement[Name], p),
Value: s.Percentile(p),
Period: measurement[Period],
}
}
snapshot.Gauges = append(snapshot.Gauges, gauges...)
}
case metrics.Meter:
measurement[Name] = name
measurement[Value] = float64(m.Count())
snapshot.Counters = append(snapshot.Counters, measurement)
snapshot.Gauges = append(snapshot.Gauges,
Measurement{
Name: fmt.Sprintf("%s.%s", name, "1min"),
Value: m.Rate1(),
Period: int64(rep.Interval.Seconds()),
Attributes: map[string]interface{}{
DisplayUnitsLong: Operations,
DisplayUnitsShort: OperationsShort,
DisplayMin: "0",
},
},
Measurement{
Name: fmt.Sprintf("%s.%s", name, "5min"),
Value: m.Rate5(),
Period: int64(rep.Interval.Seconds()),
Attributes: map[string]interface{}{
DisplayUnitsLong: Operations,
DisplayUnitsShort: OperationsShort,
DisplayMin: "0",
},
},
Measurement{
Name: fmt.Sprintf("%s.%s", name, "15min"),
Value: m.Rate15(),
Period: int64(rep.Interval.Seconds()),
Attributes: map[string]interface{}{
DisplayUnitsLong: Operations,
DisplayUnitsShort: OperationsShort,
DisplayMin: "0",
},
},
)
case metrics.Timer:
measurement[Name] = name
measurement[Value] = float64(m.Count())
snapshot.Counters = append(snapshot.Counters, measurement)
if m.Count() > 0 {
libratoName := fmt.Sprintf("%s.%s", name, "timer.mean")
gauges := make([]Measurement, histogramGaugeCount)
gauges[0] = Measurement{
Name: libratoName,
Count: uint64(m.Count()),
Sum: m.Mean() * float64(m.Count()),
Max: float64(m.Max()),
Min: float64(m.Min()),
SumSquares: sumSquaresTimer(m),
Period: int64(rep.Interval.Seconds()),
Attributes: rep.TimerAttributes,
}
for i, p := range rep.Percentiles {
gauges[i+1] = Measurement{
Name: fmt.Sprintf("%s.timer.%2.0f", name, p*100),
Value: m.Percentile(p),
Period: int64(rep.Interval.Seconds()),
Attributes: rep.TimerAttributes,
}
}
snapshot.Gauges = append(snapshot.Gauges, gauges...)
snapshot.Gauges = append(snapshot.Gauges,
Measurement{
Name: fmt.Sprintf("%s.%s", name, "rate.1min"),
Value: m.Rate1(),
Period: int64(rep.Interval.Seconds()),
Attributes: map[string]interface{}{
DisplayUnitsLong: Operations,
DisplayUnitsShort: OperationsShort,
DisplayMin: "0",
},
},
Measurement{
Name: fmt.Sprintf("%s.%s", name, "rate.5min"),
Value: m.Rate5(),
Period: int64(rep.Interval.Seconds()),
Attributes: map[string]interface{}{
DisplayUnitsLong: Operations,
DisplayUnitsShort: OperationsShort,
DisplayMin: "0",
},
},
Measurement{
Name: fmt.Sprintf("%s.%s", name, "rate.15min"),
Value: m.Rate15(),
Period: int64(rep.Interval.Seconds()),
Attributes: map[string]interface{}{
DisplayUnitsLong: Operations,
DisplayUnitsShort: OperationsShort,
DisplayMin: "0",
},
},
)
}
}
})
return
}