Add in P2P Metrics for Mainnet (#11386)

* connected peers gauge vec

* build

* add in gossip metric

* clean
This commit is contained in:
Raul Jordan 2022-09-01 14:00:54 -04:00 committed by GitHub
parent e1ab034d25
commit a5da9aedd4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 73 additions and 9 deletions

View File

@ -1,20 +1,39 @@
package p2p
import (
"strings"
"github.com/libp2p/go-libp2p-core/peer"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
var (
knownAgentVersions = []string{
"lighthouse",
"nimbus",
"prysm",
"teku",
"js-libp2p",
"rust-libp2p",
}
p2pPeerCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "p2p_peer_count",
Help: "The number of peers in a given state.",
},
[]string{"state"})
totalPeerCount = promauto.NewGauge(prometheus.GaugeOpts{
Name: "libp2p_peers",
Help: "Tracks the total number of libp2p peers",
})
connectedPeersCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "connected_libp2p_peers",
Help: "Tracks the total number of connected libp2p peers by agent string",
},
[]string{"agent"},
)
avgScoreConnectedClients = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "connected_libp2p_peers_average_scores",
Help: "Tracks the overall p2p scores of connected libp2p peers by agent string",
},
[]string{"agent"},
)
repeatPeerConnections = promauto.NewCounter(prometheus.CounterOpts{
Name: "p2p_repeat_attempts",
Help: "The number of repeat attempts the connection handler is triggered for a peer.",
@ -46,10 +65,57 @@ var (
)
func (s *Service) updateMetrics() {
totalPeerCount.Set(float64(len(s.peers.Connected())))
p2pPeerCount.WithLabelValues("Connected").Set(float64(len(s.peers.Connected())))
connectedPeers := s.peers.Connected()
p2pPeerCount.WithLabelValues("Connected").Set(float64(len(connectedPeers)))
p2pPeerCount.WithLabelValues("Disconnected").Set(float64(len(s.peers.Disconnected())))
p2pPeerCount.WithLabelValues("Connecting").Set(float64(len(s.peers.Connecting())))
p2pPeerCount.WithLabelValues("Disconnecting").Set(float64(len(s.peers.Disconnecting())))
p2pPeerCount.WithLabelValues("Bad").Set(float64(len(s.peers.Bad())))
store := s.Host().Peerstore()
numConnectedPeersByClient := make(map[string]float64)
peerScoresByClient := make(map[string][]float64)
for i := 0; i < len(connectedPeers); i++ {
p := connectedPeers[i]
pid, err := peer.Decode(p.String())
if err != nil {
log.WithError(err).Debug("Could not decode peer string")
continue
}
// Get the agent data.
rawAgent, err := store.Get(pid, "AgentVersion")
agent, ok := rawAgent.(string)
if err != nil || !ok {
agent = "unknown"
}
foundName := "unknown"
for _, knownAgent := range knownAgentVersions {
// If the agent string matches one of our known agents, we set
// the value to our own, sanitized string.
if strings.Contains(strings.ToLower(agent), knownAgent) {
foundName = knownAgent
}
}
numConnectedPeersByClient[foundName] += 1
// Get peer scoring data.
overallScore := s.peers.Scorers().Score(pid)
peerScoresByClient[foundName] = append(peerScoresByClient[foundName], overallScore)
}
for agent, total := range numConnectedPeersByClient {
connectedPeersCount.WithLabelValues(agent).Set(total)
}
for agent, scoringData := range peerScoresByClient {
avgScore := average(scoringData)
avgScoreConnectedClients.WithLabelValues(agent).Set(avgScore)
}
}
func average(xs []float64) float64 {
total := 0.0
for _, v := range xs {
total += v
}
return total / float64(len(xs))
}

View File

@ -245,9 +245,7 @@ func (s *Service) Start() {
})
async.RunEvery(s.ctx, 30*time.Minute, s.Peers().Prune)
async.RunEvery(s.ctx, params.BeaconNetworkConfig().RespTimeout, s.updateMetrics)
async.RunEvery(s.ctx, refreshRate, func() {
s.RefreshENR()
})
async.RunEvery(s.ctx, refreshRate, s.RefreshENR)
async.RunEvery(s.ctx, 1*time.Minute, func() {
log.WithFields(logrus.Fields{
"inbound": len(s.peers.InboundConnected()),