From 34c0fe29adc68dfb7c519941cd420fa6389ea5ea Mon Sep 17 00:00:00 2001 From: milen <94537774+taratorio@users.noreply.github.com> Date: Mon, 20 Nov 2023 12:23:23 +0000 Subject: [PATCH] metrics: swap remaining VictoriaMetrics usages with erigon-lib/metrics (#8762) # Background Erigon currently uses a combination of Victoria Metrics and Prometheus client for providing metrics. We want to rationalize this and use only the Prometheus client library, but we want to maintain the simplified Victoria Metrics methods for constructing metrics. This task is currently partly complete and needs to be finished to a stage where we can remove the Victoria Metrics module from the Erigon code base. # Summary of changes - Adds missing `NewCounter`, `NewSummary`, `NewHistogram`, `GetOrCreateHistogram` functions to `erigon-lib/metrics` similar to the interface VictoriaMetrics lib provides - Minor tidy up for consistency inside `erigon-lib/metrics/set.go` around return types (panic vs err consistency for funcs inside the file), error messages, comments - Replace all remaining usages of `github.com/VictoriaMetrics/metrics` with `github.com/ledgerwatch/erigon-lib/metrics` - seamless (only import changes) since interfaces match --- core/state/rw_v3.go | 16 +-- erigon-lib/kv/kv_interface.go | 2 +- erigon-lib/kv/kvcache/cache.go | 20 +-- erigon-lib/metrics/register.go | 218 +++++++++++++++++++++++++++++++-- erigon-lib/metrics/set.go | 158 +++++++++++------------- erigon-lib/metrics/timer.go | 17 ++- erigon-lib/state/aggregator.go | 2 +- erigon-lib/txpool/pool.go | 2 +- eth/stagedsync/exec3.go | 32 ++--- go.mod | 2 +- p2p/transport.go | 4 +- 11 files changed, 330 insertions(+), 143 deletions(-) diff --git a/core/state/rw_v3.go b/core/state/rw_v3.go index ef1c61e2e..f44762bd0 100644 --- a/core/state/rw_v3.go +++ b/core/state/rw_v3.go @@ -6,23 +6,23 @@ import ( "encoding/binary" "encoding/hex" "fmt" - "github.com/ledgerwatch/erigon-lib/kv/dbutils" "sync" "time" "unsafe" - "github.com/VictoriaMetrics/metrics" "github.com/holiman/uint256" + "github.com/ledgerwatch/log/v3" + btree2 "github.com/tidwall/btree" + "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/dbg" "github.com/ledgerwatch/erigon-lib/common/length" "github.com/ledgerwatch/erigon-lib/etl" "github.com/ledgerwatch/erigon-lib/kv" + "github.com/ledgerwatch/erigon-lib/kv/dbutils" "github.com/ledgerwatch/erigon-lib/kv/order" + "github.com/ledgerwatch/erigon-lib/metrics" libstate "github.com/ledgerwatch/erigon-lib/state" - "github.com/ledgerwatch/log/v3" - btree2 "github.com/tidwall/btree" - "github.com/ledgerwatch/erigon/cmd/state/exec22" "github.com/ledgerwatch/erigon/core/state/temporal" "github.com/ledgerwatch/erigon/core/types/accounts" @@ -32,7 +32,7 @@ import ( const CodeSizeTable = "CodeSize" const StorageTable = "Storage" -var ExecTxsDone = metrics.NewCounter(`exec_txs_done`) +var execTxsDone = metrics.NewCounter(`exec_txs_done`) type StateV3 struct { lock sync.RWMutex @@ -275,7 +275,7 @@ func (rs *StateV3) RegisterSender(txTask *exec22.TxTask) bool { } func (rs *StateV3) CommitTxNum(sender *common.Address, txNum uint64, in *exec22.QueueWithRetry) (count int) { - ExecTxsDone.Inc() + execTxsDone.Inc() rs.triggerLock.Lock() defer rs.triggerLock.Unlock() @@ -652,7 +652,7 @@ func (rs *StateV3) Unwind(ctx context.Context, tx kv.RwTx, txUnwindTo uint64, ag return nil } -func (rs *StateV3) DoneCount() uint64 { return ExecTxsDone.Get() } +func (rs *StateV3) DoneCount() uint64 { return execTxsDone.Get() } func (rs *StateV3) SizeEstimate() (r uint64) { rs.lock.RLock() diff --git a/erigon-lib/kv/kv_interface.go b/erigon-lib/kv/kv_interface.go index 86255720a..634b1d04b 100644 --- a/erigon-lib/kv/kv_interface.go +++ b/erigon-lib/kv/kv_interface.go @@ -22,9 +22,9 @@ import ( "fmt" "unsafe" - "github.com/VictoriaMetrics/metrics" "github.com/ledgerwatch/erigon-lib/kv/iter" "github.com/ledgerwatch/erigon-lib/kv/order" + "github.com/ledgerwatch/erigon-lib/metrics" ) //Variables Naming: diff --git a/erigon-lib/kv/kvcache/cache.go b/erigon-lib/kv/kvcache/cache.go index 9ffefba70..4aa8b6637 100644 --- a/erigon-lib/kv/kvcache/cache.go +++ b/erigon-lib/kv/kvcache/cache.go @@ -26,7 +26,6 @@ import ( "sync/atomic" "time" - "github.com/VictoriaMetrics/metrics" "github.com/c2h5oh/datasize" btree2 "github.com/tidwall/btree" "golang.org/x/crypto/sha3" @@ -35,6 +34,7 @@ import ( "github.com/ledgerwatch/erigon-lib/gointerfaces" "github.com/ledgerwatch/erigon-lib/gointerfaces/remote" "github.com/ledgerwatch/erigon-lib/kv" + "github.com/ledgerwatch/erigon-lib/metrics" ) type CacheValidationResult struct { @@ -100,19 +100,19 @@ type CacheView interface { // - changes in Non-Canonical View SHOULD NOT reflect in stateEvict type Coherent struct { hasher hash.Hash - codeEvictLen *metrics.Counter - codeKeys *metrics.Counter - keys *metrics.Counter - evict *metrics.Counter + codeEvictLen metrics.Counter + codeKeys metrics.Counter + keys metrics.Counter + evict metrics.Counter latestStateView *CoherentRoot - codeMiss *metrics.Counter - timeout *metrics.Counter - hits *metrics.Counter - codeHits *metrics.Counter + codeMiss metrics.Counter + timeout metrics.Counter + hits metrics.Counter + codeHits metrics.Counter roots map[uint64]*CoherentRoot stateEvict *ThreadSafeEvictionList codeEvict *ThreadSafeEvictionList - miss *metrics.Counter + miss metrics.Counter cfg CoherentConfig latestStateVersionID uint64 lock sync.Mutex diff --git a/erigon-lib/metrics/register.go b/erigon-lib/metrics/register.go index 01df0c4cf..03de4c79a 100644 --- a/erigon-lib/metrics/register.go +++ b/erigon-lib/metrics/register.go @@ -1,6 +1,7 @@ package metrics import ( + "fmt" "time" vm "github.com/VictoriaMetrics/metrics" @@ -10,8 +11,18 @@ import ( const UsePrometheusClient = true -type Summary interface { +type Histogram interface { + // UpdateDuration updates request duration based on the given startTime. UpdateDuration(time.Time) + + // Update updates h with v. + // + // Negative values and NaNs are ignored. + Update(float64) +} + +type Summary interface { + Histogram } type Counter interface { @@ -36,13 +47,56 @@ func (c intCounter) Set(n uint64) { func (c intCounter) Get() uint64 { var m dto.Metric - c.Gauge.Write(&m) + if err := c.Gauge.Write(&m); err != nil { + panic(fmt.Errorf("calling intCounter.Get on invalid metric: %w", err)) + } return uint64(m.GetGauge().GetValue()) } +// NewCounter registers and returns new counter with the given name. +// +// name must be valid Prometheus-compatible metric with possible labels. +// For instance, +// +// - foo +// - foo{bar="baz"} +// - foo{bar="baz",aaa="b"} +// +// The returned counter is safe to use from concurrent goroutines. +func NewCounter(s string) Counter { + if UsePrometheusClient { + counter, err := defaultSet.NewGauge(s) + if err != nil { + panic(fmt.Errorf("could not create new counter: %w", err)) + } + + return intCounter{counter} + } else { + return vm.GetDefaultSet().NewCounter(s) + } +} + +// GetOrCreateCounter returns registered counter with the given name +// or creates new counter if the registry doesn't contain counter with +// the given name. +// +// name must be valid Prometheus-compatible metric with possible labels. +// For instance, +// +// - foo +// - foo{bar="baz"} +// - foo{bar="baz",aaa="b"} +// +// The returned counter is safe to use from concurrent goroutines. +// +// Performance tip: prefer NewCounter instead of GetOrCreateCounter. func GetOrCreateCounter(s string, isGauge ...bool) Counter { if UsePrometheusClient { - counter := defaultSet.GetOrCreateGauge(s) + counter, err := defaultSet.GetOrCreateGauge(s) + if err != nil { + panic(fmt.Errorf("could not get or create new counter: %w", err)) + } + return intCounter{counter} } else { if counter := DefaultRegistry.Get(s); counter != nil { @@ -58,8 +112,49 @@ func GetOrCreateCounter(s string, isGauge ...bool) Counter { } } +// NewGaugeFunc registers and returns gauge with the given name, which calls f +// to obtain gauge value. +// +// name must be valid Prometheus-compatible metric with possible labels. +// For instance, +// +// - foo +// - foo{bar="baz"} +// - foo{bar="baz",aaa="b"} +// +// f must be safe for concurrent calls. +// +// The returned gauge is safe to use from concurrent goroutines. +func NewGaugeFunc(s string, f func() float64) prometheus.GaugeFunc { + gf, err := defaultSet.NewGaugeFunc(s, f) + if err != nil { + panic(fmt.Errorf("could not create new gauge func: %w", err)) + } + + return gf +} + +// GetOrCreateGaugeFunc returns registered gauge with the given name +// or creates new gauge if the registry doesn't contain gauge with +// the given name. +// +// name must be valid Prometheus-compatible metric with possible labels. +// For instance, +// +// - foo +// - foo{bar="baz"} +// - foo{bar="baz",aaa="b"} +// +// The returned gauge is safe to use from concurrent goroutines. +// +// Performance tip: prefer NewGauge instead of GetOrCreateGauge. func GetOrCreateGaugeFunc(s string, f func() float64) prometheus.GaugeFunc { - return defaultSet.GetOrCreateGaugeFunc(s, f) + gf, err := defaultSet.GetOrCreateGaugeFunc(s, f) + if err != nil { + panic(fmt.Errorf("could not get or create new gauge func: %w", err)) + } + + return gf } type summary struct { @@ -70,9 +165,57 @@ func (sm summary) UpdateDuration(startTime time.Time) { sm.Observe(time.Since(startTime).Seconds()) } +func (sm summary) Update(v float64) { + sm.Observe(v) +} + +// NewSummary creates and returns new summary with the given name. +// +// name must be valid Prometheus-compatible metric with possible labels. +// For instance, +// +// - foo +// - foo{bar="baz"} +// - foo{bar="baz",aaa="b"} +// +// The returned summary is safe to use from concurrent goroutines. +func NewSummary(s string) Summary { + if UsePrometheusClient { + s, err := defaultSet.NewSummary(s) + if err != nil { + panic(fmt.Errorf("could not create new summary: %w", err)) + } + + return summary{s} + } else { + summary := vm.NewSummary(s) + DefaultRegistry.Register(s, summary) + vm.GetDefaultSet().UnregisterMetric(s) + return summary + } +} + +// GetOrCreateSummary returns registered summary with the given name +// or creates new summary if the registry doesn't contain summary with +// the given name. +// +// name must be valid Prometheus-compatible metric with possible labels. +// For instance, +// +// - foo +// - foo{bar="baz"} +// - foo{bar="baz",aaa="b"} +// +// The returned summary is safe to use from concurrent goroutines. +// +// Performance tip: prefer NewSummary instead of GetOrCreateSummary. func GetOrCreateSummary(s string) Summary { if UsePrometheusClient { - s := defaultSet.GetOrCreateSummary(s) + s, err := defaultSet.GetOrCreateSummary(s) + if err != nil { + panic(fmt.Errorf("could not get or create new summary: %w", err)) + } + return summary{s} } else { summary := vm.GetOrCreateSummary(s) @@ -82,6 +225,67 @@ func GetOrCreateSummary(s string) Summary { } } -func GetOrCreateHistogram(s string) prometheus.Histogram { - return defaultSet.GetOrCreateHistogram(s) +type histogram struct { + prometheus.Histogram +} + +func (h histogram) UpdateDuration(startTime time.Time) { + h.Observe(time.Since(startTime).Seconds()) +} + +func (h histogram) Update(v float64) { + h.Observe(v) +} + +// NewHistogram creates and returns new histogram with the given name. +// +// name must be valid Prometheus-compatible metric with possible labels. +// For instance, +// +// - foo +// - foo{bar="baz"} +// - foo{bar="baz",aaa="b"} +// +// The returned histogram is safe to use from concurrent goroutines. +func NewHistogram(s string) Histogram { + if UsePrometheusClient { + h, err := defaultSet.NewHistogram(s) + if err != nil { + panic(fmt.Errorf("could not create new histogram: %w", err)) + } + + return histogram{h} + } else { + return vm.NewHistogram(s) + } +} + +// GetOrCreateHistogram returns registered histogram with the given name +// or creates new histogram if the registry doesn't contain histogram with +// the given name. +// +// name must be valid Prometheus-compatible metric with possible labels. +// For instance, +// +// - foo +// - foo{bar="baz"} +// - foo{bar="baz",aaa="b"} +// +// The returned histogram is safe to use from concurrent goroutines. +// +// Performance tip: prefer NewHistogram instead of GetOrCreateHistogram. +func GetOrCreateHistogram(s string) Histogram { + if UsePrometheusClient { + h, err := defaultSet.GetOrCreateHistogram(s) + if err != nil { + panic(fmt.Errorf("could not get or create new histogram: %w", err)) + } + + return histogram{h} + } else { + summary := vm.GetOrCreateHistogram(s) + DefaultRegistry.Register(s, summary) + vm.GetDefaultSet().UnregisterMetric(s) + return summary + } } diff --git a/erigon-lib/metrics/set.go b/erigon-lib/metrics/set.go index ee455b50c..8acd618c1 100644 --- a/erigon-lib/metrics/set.go +++ b/erigon-lib/metrics/set.go @@ -79,8 +79,7 @@ func (s *Set) Collect(ch chan<- prometheus.Metric) { // // The returned histogram is safe to use from concurrent goroutines. func (s *Set) NewHistogram(name string, help ...string) (prometheus.Histogram, error) { - h, err := NewHistogram(name, help...) - + h, err := newHistogram(name, help...) if err != nil { return nil, err } @@ -89,9 +88,8 @@ func (s *Set) NewHistogram(name string, help ...string) (prometheus.Histogram, e return h, nil } -func NewHistogram(name string, help ...string) (prometheus.Histogram, error) { +func newHistogram(name string, help ...string) (prometheus.Histogram, error) { name, labels, err := parseMetric(name) - if err != nil { return nil, err } @@ -116,15 +114,14 @@ func NewHistogram(name string, help ...string) (prometheus.Histogram, error) { // The returned histogram is safe to use from concurrent goroutines. // // Performance tip: prefer NewHistogram instead of GetOrCreateHistogram. -func (s *Set) GetOrCreateHistogram(name string, help ...string) prometheus.Histogram { +func (s *Set) GetOrCreateHistogram(name string, help ...string) (prometheus.Histogram, error) { s.mu.Lock() nm := s.m[name] s.mu.Unlock() if nm == nil { - metric, err := NewHistogram(name, help...) - + metric, err := newHistogram(name, help...) if err != nil { - panic(fmt.Errorf("BUG: invalid metric name %q: %w", name, err)) + return nil, fmt.Errorf("invalid metric name %q: %w", name, err) } nmNew := &namedMetric{ @@ -141,11 +138,13 @@ func (s *Set) GetOrCreateHistogram(name string, help ...string) prometheus.Histo } s.mu.Unlock() } + h, ok := nm.metric.(prometheus.Histogram) if !ok { - panic(fmt.Errorf("BUG: metric %q isn't a Histogram. It is %T", name, nm.metric)) + return nil, fmt.Errorf("metric %q isn't a Histogram. It is %T", name, nm.metric) } - return h + + return h, nil } // NewCounter registers and returns new counter with the given name in the s. @@ -159,8 +158,7 @@ func (s *Set) GetOrCreateHistogram(name string, help ...string) prometheus.Histo // // The returned counter is safe to use from concurrent goroutines. func (s *Set) NewCounter(name string, help ...string) (prometheus.Counter, error) { - c, err := NewCounter(name, help...) - + c, err := newCounter(name, help...) if err != nil { return nil, err } @@ -169,9 +167,8 @@ func (s *Set) NewCounter(name string, help ...string) (prometheus.Counter, error return c, nil } -func NewCounter(name string, help ...string) (prometheus.Counter, error) { +func newCounter(name string, help ...string) (prometheus.Counter, error) { name, labels, err := parseMetric(name) - if err != nil { return nil, err } @@ -196,16 +193,15 @@ func NewCounter(name string, help ...string) (prometheus.Counter, error) { // The returned counter is safe to use from concurrent goroutines. // // Performance tip: prefer NewCounter instead of GetOrCreateCounter. -func (s *Set) GetOrCreateCounter(name string, help ...string) prometheus.Counter { +func (s *Set) GetOrCreateCounter(name string, help ...string) (prometheus.Counter, error) { s.mu.Lock() nm := s.m[name] s.mu.Unlock() if nm == nil { // Slow path - create and register missing counter. - metric, err := NewCounter(name, help...) - + metric, err := newCounter(name, help...) if err != nil { - panic(fmt.Errorf("BUG: invalid metric name %q: %w", name, err)) + return nil, fmt.Errorf("invalid metric name %q: %w", name, err) } nmNew := &namedMetric{ @@ -221,11 +217,13 @@ func (s *Set) GetOrCreateCounter(name string, help ...string) prometheus.Counter } s.mu.Unlock() } + c, ok := nm.metric.(prometheus.Counter) if !ok { - panic(fmt.Errorf("BUG: metric %q isn't a Counter. It is %T", name, nm.metric)) + return nil, fmt.Errorf("metric %q isn't a Counter. It is %T", name, nm.metric) } - return c + + return c, nil } // NewGauge registers and returns gauge with the given name in s, which calls f @@ -242,8 +240,7 @@ func (s *Set) GetOrCreateCounter(name string, help ...string) prometheus.Counter // // The returned gauge is safe to use from concurrent goroutines. func (s *Set) NewGauge(name string, help ...string) (prometheus.Gauge, error) { - g, err := NewGauge(name, help...) - + g, err := newGauge(name, help...) if err != nil { return nil, err } @@ -252,10 +249,8 @@ func (s *Set) NewGauge(name string, help ...string) (prometheus.Gauge, error) { return g, nil } -func NewGauge(name string, help ...string) (prometheus.Gauge, error) { - +func newGauge(name string, help ...string) (prometheus.Gauge, error) { name, labels, err := parseMetric(name) - if err != nil { return nil, err } @@ -267,7 +262,7 @@ func NewGauge(name string, help ...string) (prometheus.Gauge, error) { }), nil } -// GetOrCreateGaugeFunc returns registered gauge with the given name in s +// GetOrCreateGauge returns registered gauge with the given name in s // or creates new gauge if s doesn't contain gauge with the given name. // // name must be valid Prometheus-compatible metric with possible labels. @@ -280,16 +275,15 @@ func NewGauge(name string, help ...string) (prometheus.Gauge, error) { // The returned gauge is safe to use from concurrent goroutines. // // Performance tip: prefer NewGauge instead of GetOrCreateGauge. -func (s *Set) GetOrCreateGauge(name string, help ...string) prometheus.Gauge { +func (s *Set) GetOrCreateGauge(name string, help ...string) (prometheus.Gauge, error) { s.mu.Lock() nm := s.m[name] s.mu.Unlock() if nm == nil { // Slow path - create and register missing gauge. - metric, err := NewGauge(name, help...) - + metric, err := newGauge(name, help...) if err != nil { - panic(fmt.Errorf("BUG: invalid metric name %q: %w", name, err)) + return nil, fmt.Errorf("invalid metric name %q: %w", name, err) } nmNew := &namedMetric{ @@ -305,11 +299,13 @@ func (s *Set) GetOrCreateGauge(name string, help ...string) prometheus.Gauge { } s.mu.Unlock() } + g, ok := nm.metric.(prometheus.Gauge) if !ok { - panic(fmt.Errorf("BUG: metric %q isn't a Gauge. It is %T", name, nm.metric)) + return nil, fmt.Errorf("metric %q isn't a Gauge. It is %T", name, nm.metric) } - return g + + return g, nil } // NewGaugeFunc registers and returns gauge with the given name in s, which calls f @@ -326,8 +322,7 @@ func (s *Set) GetOrCreateGauge(name string, help ...string) prometheus.Gauge { // // The returned gauge is safe to use from concurrent goroutines. func (s *Set) NewGaugeFunc(name string, f func() float64, help ...string) (prometheus.GaugeFunc, error) { - g, err := NewGaugeFunc(name, f, help...) - + g, err := newGaugeFunc(name, f, help...) if err != nil { return nil, err } @@ -336,13 +331,12 @@ func (s *Set) NewGaugeFunc(name string, f func() float64, help ...string) (prome return g, nil } -func NewGaugeFunc(name string, f func() float64, help ...string) (prometheus.GaugeFunc, error) { +func newGaugeFunc(name string, f func() float64, help ...string) (prometheus.GaugeFunc, error) { if f == nil { - return nil, fmt.Errorf("BUG: f cannot be nil") + return nil, fmt.Errorf("f cannot be nil") } name, labels, err := parseMetric(name) - if err != nil { return nil, err } @@ -367,20 +361,14 @@ func NewGaugeFunc(name string, f func() float64, help ...string) (prometheus.Gau // The returned gauge is safe to use from concurrent goroutines. // // Performance tip: prefer NewGauge instead of GetOrCreateGauge. -func (s *Set) GetOrCreateGaugeFunc(name string, f func() float64, help ...string) prometheus.GaugeFunc { +func (s *Set) GetOrCreateGaugeFunc(name string, f func() float64, help ...string) (prometheus.GaugeFunc, error) { s.mu.Lock() nm := s.m[name] s.mu.Unlock() if nm == nil { - // Slow path - create and register missing gauge. - if f == nil { - panic(fmt.Errorf("BUG: f cannot be nil")) - } - - metric, err := NewGaugeFunc(name, f, help...) - + metric, err := newGaugeFunc(name, f, help...) if err != nil { - panic(fmt.Errorf("BUG: invalid metric name %q: %w", name, err)) + return nil, fmt.Errorf("invalid metric name %q: %w", name, err) } nmNew := &namedMetric{ @@ -396,11 +384,13 @@ func (s *Set) GetOrCreateGaugeFunc(name string, f func() float64, help ...string } s.mu.Unlock() } + g, ok := nm.metric.(prometheus.GaugeFunc) if !ok { - panic(fmt.Errorf("BUG: metric %q isn't a Gauge. It is %T", name, nm.metric)) + return nil, fmt.Errorf("metric %q isn't a Gauge. It is %T", name, nm.metric) } - return g + + return g, nil } const defaultSummaryWindow = 5 * time.Minute @@ -418,34 +408,11 @@ var defaultSummaryQuantiles = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.97: 0. // // The returned summary is safe to use from concurrent goroutines. func (s *Set) NewSummary(name string, help ...string) (prometheus.Summary, error) { - sm, err := NewSummary(name, defaultSummaryWindow, defaultSummaryQuantiles, help...) - - if err != nil { - return nil, err - } - s.mu.Lock() - // defer will unlock in case of panic - // checks in tests - defer s.mu.Unlock() - - s.registerMetric(name, sm) - return sm, nil + return s.NewSummaryExt(name, defaultSummaryWindow, defaultSummaryQuantiles, help...) } -// NewSummary creates and returns new summary in s with the given name, -// window and quantiles. -// -// name must be valid Prometheus-compatible metric with possible labels. -// For instance, -// -// - foo -// - foo{bar="baz"} -// - foo{bar="baz",aaa="b"} -// -// The returned summary is safe to use from concurrent goroutines. -func NewSummary(name string, window time.Duration, quantiles map[float64]float64, help ...string) (prometheus.Summary, error) { +func newSummary(name string, window time.Duration, quantiles map[float64]float64, help ...string) (prometheus.Summary, error) { name, labels, err := parseMetric(name) - if err != nil { return nil, err } @@ -455,6 +422,7 @@ func NewSummary(name string, window time.Duration, quantiles map[float64]float64 ConstLabels: labels, Objectives: quantiles, MaxAge: window, + Help: strings.Join(help, " "), }), nil } @@ -471,10 +439,31 @@ func NewSummary(name string, window time.Duration, quantiles map[float64]float64 // The returned summary is safe to use from concurrent goroutines. // // Performance tip: prefer NewSummary instead of GetOrCreateSummary. -func (s *Set) GetOrCreateSummary(name string, help ...string) prometheus.Summary { +func (s *Set) GetOrCreateSummary(name string, help ...string) (prometheus.Summary, error) { return s.GetOrCreateSummaryExt(name, defaultSummaryWindow, defaultSummaryQuantiles, help...) } +// NewSummaryExt creates and returns new summary in s with the given name, +// window and quantiles. +// +// name must be valid Prometheus-compatible metric with possible labels. +// For instance, +// +// - foo +// - foo{bar="baz"} +// - foo{bar="baz",aaa="b"} +// +// The returned summary is safe to use from concurrent goroutines. +func (s *Set) NewSummaryExt(name string, window time.Duration, quantiles map[float64]float64, help ...string) (prometheus.Summary, error) { + metric, err := newSummary(name, window, quantiles, help...) + if err != nil { + return nil, fmt.Errorf("invalid metric name %q: %w", name, err) + } + + s.registerMetric(name, metric) + return metric, nil +} + // GetOrCreateSummaryExt returns registered summary with the given name, // window and quantiles in s or creates new summary if s doesn't // contain summary with the given name. @@ -489,16 +478,15 @@ func (s *Set) GetOrCreateSummary(name string, help ...string) prometheus.Summary // The returned summary is safe to use from concurrent goroutines. // // Performance tip: prefer NewSummaryExt instead of GetOrCreateSummaryExt. -func (s *Set) GetOrCreateSummaryExt(name string, window time.Duration, quantiles map[float64]float64, help ...string) prometheus.Summary { +func (s *Set) GetOrCreateSummaryExt(name string, window time.Duration, quantiles map[float64]float64, help ...string) (prometheus.Summary, error) { s.mu.Lock() nm := s.m[name] s.mu.Unlock() if nm == nil { // Slow path - create and register missing summary. - metric, err := NewSummary(name, window, quantiles, help...) - + metric, err := newSummary(name, window, quantiles, help...) if err != nil { - panic(fmt.Errorf("BUG: invalid metric name %q: %w", name, err)) + return nil, fmt.Errorf("invalid metric name %q: %w", name, err) } nmNew := &namedMetric{ @@ -514,21 +502,17 @@ func (s *Set) GetOrCreateSummaryExt(name string, window time.Duration, quantiles } s.mu.Unlock() } + sm, ok := nm.metric.(prometheus.Summary) if !ok { - panic(fmt.Errorf("BUG: metric %q isn't a Summary. It is %T", name, nm.metric)) + return nil, fmt.Errorf("metric %q isn't a Summary. It is %T", name, nm.metric) } - return sm + return sm, nil } func (s *Set) registerMetric(name string, m prometheus.Metric) { - if _, _, err := parseMetric(name); err != nil { - panic(fmt.Errorf("BUG: invalid metric name %q: %w", name, err)) - } s.mu.Lock() - // defer will unlock in case of panic - // checks in test defer s.mu.Unlock() s.mustRegisterLocked(name, m) } @@ -547,7 +531,7 @@ func (s *Set) mustRegisterLocked(name string, m prometheus.Metric) { s.a = append(s.a, nm) } if ok { - panic(fmt.Errorf("BUG: metric %q is already registered", name)) + panic(fmt.Errorf("metric %q is already registered", name)) } } @@ -577,7 +561,7 @@ func (s *Set) unregisterMetricLocked(nm *namedMetric) bool { return } } - panic(fmt.Errorf("BUG: cannot find metric %q in the list of registered metrics", name)) + panic(fmt.Errorf("cannot find metric %q in the list of registered metrics", name)) } // remove metric from s.a diff --git a/erigon-lib/metrics/timer.go b/erigon-lib/metrics/timer.go index c13429c11..0cadd8ed3 100644 --- a/erigon-lib/metrics/timer.go +++ b/erigon-lib/metrics/timer.go @@ -4,22 +4,18 @@ import ( "fmt" "strings" "time" - - "github.com/VictoriaMetrics/metrics" ) type HistTimer struct { - *metrics.Histogram - + Histogram start time.Time - - name string + name string } func NewHistTimer(name string) *HistTimer { rawName := strings.Split(name, "{") return &HistTimer{ - Histogram: metrics.GetOrCreateCompatibleHistogram(name), + Histogram: GetOrCreateHistogram(name), start: time.Now(), name: rawName[0], } @@ -33,16 +29,19 @@ func (h *HistTimer) Tag(pairs ...string) *HistTimer { if len(pairs)%2 != 0 { pairs = append(pairs, "UNEQUAL_KEY_VALUE_TAGS") } - toJoin := []string{} + + var toJoin []string for i := 0; i < len(pairs); i = i + 2 { toJoin = append(toJoin, fmt.Sprintf(`%s="%s"`, pairs[i], pairs[i+1])) } + tags := "" if len(toJoin) > 0 { tags = "{" + strings.Join(toJoin, ",") + "}" } + return &HistTimer{ - Histogram: metrics.GetOrCreateCompatibleHistogram(h.name + tags), + Histogram: GetOrCreateHistogram(h.name + tags), start: time.Now(), name: h.name, } diff --git a/erigon-lib/state/aggregator.go b/erigon-lib/state/aggregator.go index 0f7199246..366d7d545 100644 --- a/erigon-lib/state/aggregator.go +++ b/erigon-lib/state/aggregator.go @@ -27,7 +27,6 @@ import ( "sync/atomic" "time" - "github.com/VictoriaMetrics/metrics" "github.com/holiman/uint256" "github.com/ledgerwatch/log/v3" "golang.org/x/sync/errgroup" @@ -39,6 +38,7 @@ import ( "github.com/ledgerwatch/erigon-lib/kv" "github.com/ledgerwatch/erigon-lib/kv/iter" "github.com/ledgerwatch/erigon-lib/kv/order" + "github.com/ledgerwatch/erigon-lib/metrics" ) // StepsInBiggestFile - files of this size are completely frozen/immutable. diff --git a/erigon-lib/txpool/pool.go b/erigon-lib/txpool/pool.go index 0f835136b..85098ac58 100644 --- a/erigon-lib/txpool/pool.go +++ b/erigon-lib/txpool/pool.go @@ -33,7 +33,6 @@ import ( "sync/atomic" "time" - "github.com/VictoriaMetrics/metrics" gokzg4844 "github.com/crate-crypto/go-kzg-4844" mapset "github.com/deckarep/golang-set/v2" "github.com/go-stack/stack" @@ -57,6 +56,7 @@ import ( "github.com/ledgerwatch/erigon-lib/kv" "github.com/ledgerwatch/erigon-lib/kv/kvcache" "github.com/ledgerwatch/erigon-lib/kv/mdbx" + "github.com/ledgerwatch/erigon-lib/metrics" "github.com/ledgerwatch/erigon-lib/txpool/txpoolcfg" "github.com/ledgerwatch/erigon-lib/types" ) diff --git a/eth/stagedsync/exec3.go b/eth/stagedsync/exec3.go index fb610e924..8ff3d694a 100644 --- a/eth/stagedsync/exec3.go +++ b/eth/stagedsync/exec3.go @@ -13,9 +13,11 @@ import ( "sync/atomic" "time" - "github.com/VictoriaMetrics/metrics" "github.com/c2h5oh/datasize" "github.com/erigontech/mdbx-go/mdbx" + "github.com/ledgerwatch/log/v3" + "golang.org/x/sync/errgroup" + "github.com/ledgerwatch/erigon-lib/chain" "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/common/datadir" @@ -25,14 +27,12 @@ import ( "github.com/ledgerwatch/erigon-lib/kv" kv2 "github.com/ledgerwatch/erigon-lib/kv/mdbx" "github.com/ledgerwatch/erigon-lib/kv/rawdbv3" + "github.com/ledgerwatch/erigon-lib/metrics" libstate "github.com/ledgerwatch/erigon-lib/state" state2 "github.com/ledgerwatch/erigon-lib/state" - "github.com/ledgerwatch/erigon/common/math" - "github.com/ledgerwatch/log/v3" - "golang.org/x/sync/errgroup" - "github.com/ledgerwatch/erigon/cmd/state/exec22" "github.com/ledgerwatch/erigon/cmd/state/exec3" + "github.com/ledgerwatch/erigon/common/math" "github.com/ledgerwatch/erigon/consensus" "github.com/ledgerwatch/erigon/core" "github.com/ledgerwatch/erigon/core/rawdb/rawdbhelpers" @@ -44,9 +44,9 @@ import ( "github.com/ledgerwatch/erigon/turbo/services" ) -var ExecStepsInDB = metrics.NewCounter(`exec_steps_in_db`) //nolint -var ExecRepeats = metrics.NewCounter(`exec_repeats`) //nolint -var ExecTriggers = metrics.NewCounter(`exec_triggers`) //nolint +var execStepsInDB = metrics.NewCounter(`exec_steps_in_db`) //nolint +var execRepeats = metrics.NewCounter(`exec_repeats`) //nolint +var execTriggers = metrics.NewCounter(`exec_triggers`) //nolint func NewProgress(prevOutputBlockNum, commitThreshold uint64, workersCount int, logPrefix string, logger log.Logger) *Progress { return &Progress{prevTime: time.Now(), prevOutputBlockNum: prevOutputBlockNum, commitThreshold: commitThreshold, workersCount: workersCount, logPrefix: logPrefix, logger: logger} @@ -65,7 +65,7 @@ type Progress struct { } func (p *Progress) Log(rs *state.StateV3, in *exec22.QueueWithRetry, rws *exec22.ResultsQueue, doneCount, inputBlockNum, outputBlockNum, outTxNum, repeatCount uint64, idxStepsAmountInDB float64) { - ExecStepsInDB.Set(uint64(idxStepsAmountInDB * 100)) + execStepsInDB.Set(uint64(idxStepsAmountInDB * 100)) var m runtime.MemStats dbg.ReadMemStats(&m) sizeEstimate := rs.SizeEstimate() @@ -280,8 +280,8 @@ func ExecV3(ctx context.Context, return err } - ExecRepeats.Add(conflicts) - ExecTriggers.Add(triggers) + execRepeats.Add(conflicts) + execTriggers.Add(triggers) if processedBlockNum > lastBlockNum { outputBlockNum.Set(processedBlockNum) lastBlockNum = processedBlockNum @@ -334,7 +334,7 @@ func ExecV3(ctx context.Context, case <-logEvery.C: stepsInDB := rawdbhelpers.IdxStepsCountV3(tx) - progress.Log(rs, in, rws, rs.DoneCount(), inputBlockNum.Load(), outputBlockNum.Get(), outputTxNum.Load(), ExecRepeats.Get(), stepsInDB) + progress.Log(rs, in, rws, rs.DoneCount(), inputBlockNum.Load(), outputBlockNum.Get(), outputTxNum.Load(), execRepeats.Get(), stepsInDB) if agg.HasBackgroundFilesBuild() { logger.Info(fmt.Sprintf("[%s] Background files build", logPrefix), "progress", agg.BackgroundProgress()) } @@ -369,8 +369,8 @@ func ExecV3(ctx context.Context, return err } - ExecRepeats.Add(conflicts) - ExecTriggers.Add(triggers) + execRepeats.Add(conflicts) + execTriggers.Add(triggers) if processedBlockNum > 0 { outputBlockNum.Set(processedBlockNum) } @@ -657,7 +657,7 @@ Loop: if err := rs.ApplyState(applyTx, txTask, agg); err != nil { return fmt.Errorf("StateV3.Apply: %w", err) } - ExecTriggers.Add(rs.CommitTxNum(txTask.Sender, txTask.TxNum, in)) + execTriggers.Add(rs.CommitTxNum(txTask.Sender, txTask.TxNum, in)) outputTxNum.Add(1) if err := rs.ApplyHistory(txTask, agg); err != nil { @@ -674,7 +674,7 @@ Loop: select { case <-logEvery.C: stepsInDB := rawdbhelpers.IdxStepsCountV3(applyTx) - progress.Log(rs, in, rws, count, inputBlockNum.Load(), outputBlockNum.Get(), outputTxNum.Load(), ExecRepeats.Get(), stepsInDB) + progress.Log(rs, in, rws, count, inputBlockNum.Load(), outputBlockNum.Get(), outputTxNum.Load(), execRepeats.Get(), stepsInDB) if rs.SizeEstimate() < commitThreshold { break } diff --git a/go.mod b/go.mod index b1e5c232a..521149d05 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,6 @@ require ( github.com/Giulio2002/bls v0.0.0-20230906201036-c2330c97dc7d github.com/RoaringBitmap/roaring v1.2.3 github.com/VictoriaMetrics/fastcache v1.12.1 - github.com/VictoriaMetrics/metrics v1.23.1 github.com/alecthomas/kong v0.8.0 github.com/anacrolix/log v0.14.3-0.20230823030427-4b296d71a6b4 github.com/anacrolix/sync v0.4.0 @@ -104,6 +103,7 @@ require ( ) require ( + github.com/VictoriaMetrics/metrics v1.23.1 // indirect github.com/agnivade/levenshtein v1.1.1 // indirect github.com/ajwerner/btree v0.0.0-20211221152037-f427b3e689c0 // indirect github.com/alecthomas/atomic v0.1.0-alpha2 // indirect diff --git a/p2p/transport.go b/p2p/transport.go index b2223cb71..405d28b3a 100644 --- a/p2p/transport.go +++ b/p2p/transport.go @@ -20,13 +20,13 @@ import ( "bytes" "crypto/ecdsa" "fmt" - "github.com/ledgerwatch/erigon-lib/common" "io" "net" "strings" "sync" "time" + "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon/common/bitutil" "github.com/ledgerwatch/erigon/p2p/rlpx" "github.com/ledgerwatch/erigon/rlp" @@ -102,7 +102,7 @@ func (t *rlpxTransport) WriteMsg(msg Msg) error { // Set metrics. msg.meterSize = size - // TODO: use "github.com/VictoriaMetrics/metrics" + // TODO: use "github.com/ledgerwatch/erigon-lib/metrics" //if metrics.Enabled && msg.meterCap.Name != "" { // don't meter non-subprotocol messages // m := fmt.Sprintf("%s/%s/%d/%#02x", egressMeterName, msg.meterCap.Name, msg.meterCap.Version, msg.meterCode) // metrics.GetOrRegisterMeter(m, nil).Mark(int64(msg.meterSize))