added snapshot sync diagnostic information, updated diagnostic channel (#8645)

This commit is contained in:
Dmytro 2023-11-07 15:50:36 +03:00 committed by GitHub
parent d1271268c7
commit 9c7c758bda
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 138 additions and 5 deletions

View File

@ -2,8 +2,6 @@ package main
import (
"fmt"
"github.com/ledgerwatch/erigon/cmd/devnet/services"
"github.com/ledgerwatch/erigon/cmd/devnet/services/polygon"
"os"
"os/signal"
"path/filepath"
@ -12,6 +10,9 @@ import (
"syscall"
"time"
"github.com/ledgerwatch/erigon/cmd/devnet/services"
"github.com/ledgerwatch/erigon/cmd/devnet/services/polygon"
"github.com/ledgerwatch/erigon-lib/chain/networkname"
"github.com/ledgerwatch/erigon-lib/common/metrics"
"github.com/ledgerwatch/erigon/cmd/devnet/accounts"

54
diagnostics/diagnostic.go Normal file
View File

@ -0,0 +1,54 @@
package diagnostics
import (
"context"
"net/http"
"github.com/ledgerwatch/erigon-lib/common"
diaglib "github.com/ledgerwatch/erigon-lib/diagnostics"
"github.com/ledgerwatch/erigon/turbo/node"
"github.com/ledgerwatch/log/v3"
"github.com/urfave/cli/v2"
)
type DiagnosticClient struct {
ctx *cli.Context
metricsMux *http.ServeMux
node *node.ErigonNode
snapshotDownload map[string]diaglib.DownloadStatistics
}
func NewDiagnosticClient(ctx *cli.Context, metricsMux *http.ServeMux, node *node.ErigonNode) *DiagnosticClient {
return &DiagnosticClient{ctx: ctx, metricsMux: metricsMux, node: node, snapshotDownload: map[string]diaglib.DownloadStatistics{}}
}
func (d *DiagnosticClient) Setup() {
d.runSnapshotListener()
}
func (d *DiagnosticClient) runSnapshotListener() {
go func() {
ctx, ch, _ /*cancel*/ := diaglib.Context[diaglib.DownloadStatistics](context.Background(), 1)
rootCtx, _ := common.RootContext()
diaglib.StartProviders(ctx, diaglib.TypeOf(diaglib.DownloadStatistics{}), log.Root())
for {
select {
case <-rootCtx.Done():
return
case info := <-ch:
d.snapshotDownload[info.StagePrefix] = info
if info.DownloadFinished {
return
}
}
}
}()
}
func (d *DiagnosticClient) SnapshotDownload() map[string]diaglib.DownloadStatistics {
return d.snapshotDownload
}

View File

@ -36,11 +36,11 @@ type PeerResponse struct {
Protocols map[string]interface{} `json:"protocols"` // Sub-protocol specific metadata fields
}
func SetupPeersAccess(ctx *cli.Context, metricsMux *http.ServeMux, node *node.ErigonNode) {
func SetupPeersAccess(ctxclient *cli.Context, metricsMux *http.ServeMux, node *node.ErigonNode) {
metricsMux.HandleFunc("/peers", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Content-Type", "application/json")
writePeers(w, ctx, node)
writePeers(w, ctxclient, node)
})
}

View File

@ -11,6 +11,9 @@ import (
func Setup(ctx *cli.Context, metricsMux *http.ServeMux, node *node.ErigonNode) {
debugMux := http.NewServeMux()
diagnostic := NewDiagnosticClient(ctx, debugMux, node)
diagnostic.Setup()
metricsMux.HandleFunc("/debug/", func(w http.ResponseWriter, r *http.Request) {
r.URL.Path = strings.TrimPrefix(r.URL.Path, "/debug")
r.URL.RawPath = strings.TrimPrefix(r.URL.RawPath, "/debug")
@ -27,5 +30,6 @@ func Setup(ctx *cli.Context, metricsMux *http.ServeMux, node *node.ErigonNode) {
SetupNodeInfoAccess(debugMux, node)
SetupPeersAccess(ctx, debugMux, node)
SetupBootnodesAccess(debugMux, node)
SetupStagesAccess(debugMux, diagnostic)
}

View File

@ -0,0 +1,18 @@
package diagnostics
import (
"encoding/json"
"net/http"
)
func SetupStagesAccess(metricsMux *http.ServeMux, diag *DiagnosticClient) {
metricsMux.HandleFunc("/snapshot-sync", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Content-Type", "application/json")
writeStages(w, diag)
})
}
func writeStages(w http.ResponseWriter, diag *DiagnosticClient) {
json.NewEncoder(w).Encode(diag.SnapshotDownload())
}

View File

@ -28,3 +28,22 @@ type PeerStatistics struct {
TypeBytesIn map[string]uint64
TypeBytesOut map[string]uint64
}
type DownloadStatistics struct {
Downloaded uint64 `json:"downloaded"`
Total uint64 `json:"total"`
TotalTime float64 `json:"totalTime"`
DownloadRate uint64 `json:"downloadRate"`
UploadRate uint64 `json:"uploadRate"`
Peers int32 `json:"peers"`
Files int32 `json:"files"`
Connections uint64 `json:"connections"`
Alloc uint64 `json:"alloc"`
Sys uint64 `json:"sys"`
DownloadFinished bool `json:"downloadFinished"`
StagePrefix string `json:"stagePrefix"`
}
func (ti DownloadStatistics) Type() Type {
return TypeOf(ti)
}

View File

@ -103,7 +103,6 @@ func StartProviders(ctx context.Context, infoType Type, logger log.Logger) {
providerMutex.Lock()
reg := providers[infoType]
if reg == nil {
reg = &registry{}
providers[infoType] = reg

View File

@ -12,6 +12,7 @@ import (
"github.com/ledgerwatch/erigon-lib/chain/snapcfg"
"github.com/ledgerwatch/erigon-lib/common"
"github.com/ledgerwatch/erigon-lib/common/dbg"
"github.com/ledgerwatch/erigon-lib/diagnostics"
"github.com/ledgerwatch/erigon-lib/downloader/downloadergrpc"
"github.com/ledgerwatch/erigon-lib/downloader/snaptype"
proto_downloader "github.com/ledgerwatch/erigon-lib/gointerfaces/downloader"
@ -130,6 +131,7 @@ func WaitForDownloader(logPrefix string, ctx context.Context, histV3 bool, capli
// send all hashes to the Downloader service
preverifiedBlockSnapshots := snapcfg.KnownCfg(cc.ChainName, []string{} /* whitelist */, snHistInDB).Preverified
downloadRequest := make([]services.DownloadRequest, 0, len(preverifiedBlockSnapshots)+len(missingSnapshots))
// build all download requests
// builds preverified snapshots request
for _, p := range preverifiedBlockSnapshots {
@ -182,6 +184,10 @@ func WaitForDownloader(logPrefix string, ctx context.Context, histV3 bool, capli
defer logEvery.Stop()
var m runtime.MemStats
/*diagnostics.RegisterProvider(diagnostics.ProviderFunc(func(ctx context.Context) error {
return nil
}), diagnostics.TypeOf(diagnostics.DownloadStatistics{}), log.Root())*/
// Check once without delay, for faster erigon re-start
stats, err := snapshotDownloader.Stats(ctx, &proto_downloader.StatsRequest{})
if err == nil && stats.Completed {
@ -205,6 +211,22 @@ Loop:
}
}
*/
diagnostics.Send(diagnostics.DownloadStatistics{
Downloaded: stats.BytesCompleted,
Total: stats.BytesTotal,
TotalTime: time.Since(downloadStartTime).Round(time.Second).Seconds(),
DownloadRate: stats.DownloadRate,
UploadRate: stats.UploadRate,
Peers: stats.PeersUnique,
Files: stats.FilesTotal,
Connections: stats.ConnectionsTotal,
Alloc: m.Alloc,
Sys: m.Sys,
DownloadFinished: stats.Completed,
StagePrefix: logPrefix,
})
log.Info(fmt.Sprintf("[%s] download finished", logPrefix), "time", time.Since(downloadStartTime).String())
break Loop
} else {
@ -218,6 +240,22 @@ Loop:
if stats.Progress > 0 && stats.DownloadRate == 0 {
suffix += " (or verifying)"
}
diagnostics.Send(diagnostics.DownloadStatistics{
Downloaded: stats.BytesCompleted,
Total: stats.BytesTotal,
TotalTime: time.Since(downloadStartTime).Round(time.Second).Seconds(),
DownloadRate: stats.DownloadRate,
UploadRate: stats.UploadRate,
Peers: stats.PeersUnique,
Files: stats.FilesTotal,
Connections: stats.ConnectionsTotal,
Alloc: m.Alloc,
Sys: m.Sys,
DownloadFinished: stats.Completed,
StagePrefix: logPrefix,
})
log.Info(fmt.Sprintf("[%s] %s", logPrefix, suffix),
"progress", fmt.Sprintf("%.2f%% %s/%s", stats.Progress, common.ByteCount(stats.BytesCompleted), common.ByteCount(stats.BytesTotal)),
"time-left", downloadTimeLeft,