diff --git a/beacon-chain/p2p/monitoring.go b/beacon-chain/p2p/monitoring.go index aa565ec55..59624d230 100644 --- a/beacon-chain/p2p/monitoring.go +++ b/beacon-chain/p2p/monitoring.go @@ -1,20 +1,39 @@ package p2p import ( + "strings" + + "github.com/libp2p/go-libp2p-core/peer" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" ) var ( + knownAgentVersions = []string{ + "lighthouse", + "nimbus", + "prysm", + "teku", + "js-libp2p", + "rust-libp2p", + } p2pPeerCount = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "p2p_peer_count", Help: "The number of peers in a given state.", }, []string{"state"}) - totalPeerCount = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "libp2p_peers", - Help: "Tracks the total number of libp2p peers", - }) + connectedPeersCount = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "connected_libp2p_peers", + Help: "Tracks the total number of connected libp2p peers by agent string", + }, + []string{"agent"}, + ) + avgScoreConnectedClients = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "connected_libp2p_peers_average_scores", + Help: "Tracks the overall p2p scores of connected libp2p peers by agent string", + }, + []string{"agent"}, + ) repeatPeerConnections = promauto.NewCounter(prometheus.CounterOpts{ Name: "p2p_repeat_attempts", Help: "The number of repeat attempts the connection handler is triggered for a peer.", @@ -46,10 +65,57 @@ var ( ) func (s *Service) updateMetrics() { - totalPeerCount.Set(float64(len(s.peers.Connected()))) - p2pPeerCount.WithLabelValues("Connected").Set(float64(len(s.peers.Connected()))) + connectedPeers := s.peers.Connected() + p2pPeerCount.WithLabelValues("Connected").Set(float64(len(connectedPeers))) p2pPeerCount.WithLabelValues("Disconnected").Set(float64(len(s.peers.Disconnected()))) p2pPeerCount.WithLabelValues("Connecting").Set(float64(len(s.peers.Connecting()))) p2pPeerCount.WithLabelValues("Disconnecting").Set(float64(len(s.peers.Disconnecting()))) p2pPeerCount.WithLabelValues("Bad").Set(float64(len(s.peers.Bad()))) + + store := s.Host().Peerstore() + numConnectedPeersByClient := make(map[string]float64) + peerScoresByClient := make(map[string][]float64) + for i := 0; i < len(connectedPeers); i++ { + p := connectedPeers[i] + pid, err := peer.Decode(p.String()) + if err != nil { + log.WithError(err).Debug("Could not decode peer string") + continue + } + + // Get the agent data. + rawAgent, err := store.Get(pid, "AgentVersion") + agent, ok := rawAgent.(string) + if err != nil || !ok { + agent = "unknown" + } + foundName := "unknown" + for _, knownAgent := range knownAgentVersions { + // If the agent string matches one of our known agents, we set + // the value to our own, sanitized string. + if strings.Contains(strings.ToLower(agent), knownAgent) { + foundName = knownAgent + } + } + numConnectedPeersByClient[foundName] += 1 + + // Get peer scoring data. + overallScore := s.peers.Scorers().Score(pid) + peerScoresByClient[foundName] = append(peerScoresByClient[foundName], overallScore) + } + for agent, total := range numConnectedPeersByClient { + connectedPeersCount.WithLabelValues(agent).Set(total) + } + for agent, scoringData := range peerScoresByClient { + avgScore := average(scoringData) + avgScoreConnectedClients.WithLabelValues(agent).Set(avgScore) + } +} + +func average(xs []float64) float64 { + total := 0.0 + for _, v := range xs { + total += v + } + return total / float64(len(xs)) } diff --git a/beacon-chain/p2p/service.go b/beacon-chain/p2p/service.go index a62e30242..8162e6d2c 100644 --- a/beacon-chain/p2p/service.go +++ b/beacon-chain/p2p/service.go @@ -245,9 +245,7 @@ func (s *Service) Start() { }) async.RunEvery(s.ctx, 30*time.Minute, s.Peers().Prune) async.RunEvery(s.ctx, params.BeaconNetworkConfig().RespTimeout, s.updateMetrics) - async.RunEvery(s.ctx, refreshRate, func() { - s.RefreshENR() - }) + async.RunEvery(s.ctx, refreshRate, s.RefreshENR) async.RunEvery(s.ctx, 1*time.Minute, func() { log.WithFields(logrus.Fields{ "inbound": len(s.peers.InboundConnected()),