From 9f7f7d6cff182007519f92417c98fb1230de8458 Mon Sep 17 00:00:00 2001 From: Preston Van Loon Date: Sat, 11 May 2019 17:43:55 -0400 Subject: [PATCH] Tracing improvements (#2570) * some improvements * fix * gazelle * disable lostcancel --- BUILD.bazel | 3 ++- beacon-chain/main.go | 1 + beacon-chain/node/node.go | 1 + beacon-chain/usage.go | 1 + k8s/beacon-chain/beacon-chain.deploy.yaml | 8 ++++++++ k8s/beacon-chain/beacon-chain.service.yaml | 16 ++++++++++++++++ k8s/beacon-chain/validator.deploy.yaml | 7 +++++++ shared/cmd/flags.go | 5 +++++ shared/p2p/service.go | 1 + shared/tracing/BUILD.bazel | 1 + shared/tracing/tracer.go | 22 +++++++++++++++++----- validator/client/validator_attest.go | 13 ++++++++----- validator/main.go | 1 + validator/node/node.go | 1 + validator/usage.go | 1 + 15 files changed, 71 insertions(+), 11 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 60908f04c..551b682bb 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -68,7 +68,8 @@ nogo( "@org_golang_x_tools//go/analysis/passes/pkgfact:go_tool_library", "@org_golang_x_tools//go/analysis/passes/nilness:go_tool_library", "@org_golang_x_tools//go/analysis/passes/nilfunc:go_tool_library", - # "@org_golang_x_tools//go/analysis/passes/lostcancel:go_tool_library", + # lost cancel ignore doesn't seem to work when running with coverage + #"@org_golang_x_tools//go/analysis/passes/lostcancel:go_tool_library", "@org_golang_x_tools//go/analysis/passes/loopclosure:go_tool_library", "@org_golang_x_tools//go/analysis/passes/httpresponse:go_tool_library", "@org_golang_x_tools//go/analysis/passes/findcall:go_tool_library", diff --git a/beacon-chain/main.go b/beacon-chain/main.go index 4d1413ccd..501eb8b1b 100644 --- a/beacon-chain/main.go +++ b/beacon-chain/main.go @@ -61,6 +61,7 @@ func main() { cmd.DataDirFlag, cmd.VerbosityFlag, cmd.EnableTracingFlag, + cmd.TracingProcessNameFlag, cmd.TracingEndpointFlag, cmd.TraceSampleFractionFlag, cmd.MonitoringPortFlag, diff --git a/beacon-chain/node/node.go b/beacon-chain/node/node.go index 83ee314bb..6d02d52e6 100644 --- a/beacon-chain/node/node.go +++ b/beacon-chain/node/node.go @@ -55,6 +55,7 @@ type BeaconNode struct { func NewBeaconNode(ctx *cli.Context) (*BeaconNode, error) { if err := tracing.Setup( "beacon-chain", // service name + ctx.GlobalString(cmd.TracingProcessNameFlag.Name), ctx.GlobalString(cmd.TracingEndpointFlag.Name), ctx.GlobalFloat64(cmd.TraceSampleFractionFlag.Name), ctx.GlobalBool(cmd.EnableTracingFlag.Name), diff --git a/beacon-chain/usage.go b/beacon-chain/usage.go index cc68a9cf9..f76279db2 100644 --- a/beacon-chain/usage.go +++ b/beacon-chain/usage.go @@ -51,6 +51,7 @@ var appHelpFlagGroups = []flagGroup{ cmd.DataDirFlag, cmd.VerbosityFlag, cmd.EnableTracingFlag, + cmd.TracingProcessNameFlag, cmd.TracingEndpointFlag, cmd.TraceSampleFractionFlag, cmd.MonitoringPortFlag, diff --git a/k8s/beacon-chain/beacon-chain.deploy.yaml b/k8s/beacon-chain/beacon-chain.deploy.yaml index e39b901dc..773f16999 100644 --- a/k8s/beacon-chain/beacon-chain.deploy.yaml +++ b/k8s/beacon-chain/beacon-chain.deploy.yaml @@ -3,6 +3,7 @@ apiVersion: apps/v1 metadata: name: beacon-chain namespace: beacon-chain + app: beacon-chain spec: replicas: 3 serviceName: beacon-chain @@ -11,11 +12,13 @@ spec: matchLabels: component: beacon-chain universe: beacon-chain + app: beacon-chain template: metadata: labels: component: beacon-chain universe: beacon-chain + app: beacon-chain annotations: prometheus.io/scrape: 'true' prometheus.io/port: '9090' @@ -53,6 +56,7 @@ spec: - --relay-node=/ip4/35.224.249.2/tcp/30000/p2p/QmfAgkmjiZNZhr2wFN9TwaRgHouMTBT6HELyzE5A3BT2wK - --p2p-port=5000 - --enable-tracing + - --tracing-process-name=$(POD_NAME) - --tracing-endpoint=http://jaeger-collector.istio-system.svc.cluster.local:14268 - --trace-sample-fraction=1.0 - --datadir=/data @@ -79,6 +83,10 @@ spec: configMapKeyRef: name: beacon-config key: DEPOSIT_CONTRACT_ADDRESS + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name readinessProbe: initialDelaySeconds: 60 httpGet: diff --git a/k8s/beacon-chain/beacon-chain.service.yaml b/k8s/beacon-chain/beacon-chain.service.yaml index dd12ebf01..d016e7b94 100644 --- a/k8s/beacon-chain/beacon-chain.service.yaml +++ b/k8s/beacon-chain/beacon-chain.service.yaml @@ -19,6 +19,8 @@ apiVersion: v1 metadata: name: beacon-chain namespace: beacon-chain + labels: + app: beacon-chain spec: selector: component: beacon-chain @@ -62,6 +64,18 @@ spec: loadBalancer: consistentHash: useSourceIp: true +--- + apiVersion: networking.istio.io/v1alpha3 + kind: DestinationRule + metadata: + name: beacon-chain-grpc-web + namespace: beacon-chain + spec: + host: beacon-chain-grpc-web.beacon-chain.svc.cluster.local + trafficPolicy: + loadBalancer: + consistentHash: + useSourceIp: true --- # Public grpc-web service kind: Service @@ -69,6 +83,8 @@ apiVersion: v1 metadata: name: beacon-chain-grpc-web namespace: beacon-chain + labels: + app: beacon-chain spec: selector: component: beacon-chain diff --git a/k8s/beacon-chain/validator.deploy.yaml b/k8s/beacon-chain/validator.deploy.yaml index 75d283b11..98378e3aa 100644 --- a/k8s/beacon-chain/validator.deploy.yaml +++ b/k8s/beacon-chain/validator.deploy.yaml @@ -29,6 +29,7 @@ spec: - --datadir=/data - --beacon-rpc-provider=beacon-chain:4000 - --enable-tracing + - --tracing-process-name=$(POD_NAME) - --tracing-endpoint=http://jaeger-collector.istio-system.svc.cluster.local:14268 - --trace-sample-fraction=1.0 volumeMounts: @@ -41,6 +42,12 @@ spec: requests: cpu: "50m" memory: "100Mi" + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + initContainers: - name: init-pk image: gcr.io/prysmaticlabs/prysm/cluster-pk-manager/client:latest diff --git a/shared/cmd/flags.go b/shared/cmd/flags.go index 1597d1433..56568d548 100644 --- a/shared/cmd/flags.go +++ b/shared/cmd/flags.go @@ -23,6 +23,11 @@ var ( Name: "enable-tracing", Usage: "Enable request tracing.", } + // TracingProcessNameFlag defines a flag to specify a process name. + TracingProcessNameFlag = cli.StringFlag{ + Name: "tracing-process-name", + Usage: "The name to apply to tracing tag \"process_name\"", + } // TracingEndpointFlag flag defines the http endpoint for serving traces to Jaeger. TracingEndpointFlag = cli.StringFlag{ Name: "tracing-endpoint", diff --git a/shared/p2p/service.go b/shared/p2p/service.go index 6d26b3679..8708a78a0 100644 --- a/shared/p2p/service.go +++ b/shared/p2p/service.go @@ -138,6 +138,7 @@ func NewServer(cfg *ServerConfig) (*Server, error) { } info, err := peerInfoFromAddr(addr) if err != nil { + cancel() return nil, err } exclusions = append(exclusions, info.ID) diff --git a/shared/tracing/BUILD.bazel b/shared/tracing/BUILD.bazel index 567f0b9a3..a85a2b40d 100644 --- a/shared/tracing/BUILD.bazel +++ b/shared/tracing/BUILD.bazel @@ -6,6 +6,7 @@ go_library( importpath = "github.com/prysmaticlabs/prysm/shared/tracing", visibility = ["//visibility:public"], deps = [ + "//shared/version:go_default_library", "@com_github_sirupsen_logrus//:go_default_library", "@io_opencensus_go//trace:go_default_library", "@io_opencensus_go_contrib_exporter_jaeger//:go_default_library", diff --git a/shared/tracing/tracer.go b/shared/tracing/tracer.go index 524bea819..9cdba3de8 100644 --- a/shared/tracing/tracer.go +++ b/shared/tracing/tracer.go @@ -4,6 +4,7 @@ import ( "errors" "contrib.go.opencensus.io/exporter/jaeger" + "github.com/prysmaticlabs/prysm/shared/version" "github.com/sirupsen/logrus" "go.opencensus.io/trace" ) @@ -11,23 +12,34 @@ import ( var log = logrus.WithField("prefix", "tracing") // Setup creates and initializes a new tracing configuration.. -func Setup(name, endpoint string, sampleFraction float64, enable bool) error { +func Setup(serviceName, processName, endpoint string, sampleFraction float64, enable bool) error { if !enable { trace.ApplyConfig(trace.Config{DefaultSampler: trace.NeverSample()}) return nil } - if name == "" { + if serviceName == "" { return errors.New("tracing service name cannot be empty") } - trace.ApplyConfig(trace.Config{DefaultSampler: trace.ProbabilitySampler(sampleFraction)}) + trace.ApplyConfig(trace.Config{ + DefaultSampler: trace.ProbabilitySampler(sampleFraction), + MaxMessageEventsPerSpan: 500, + }) log.Infof("Starting Jaeger exporter endpoint at address = %s", endpoint) exporter, err := jaeger.NewExporter(jaeger.Options{ - Endpoint: endpoint, + CollectorEndpoint: endpoint, Process: jaeger.Process{ - ServiceName: name, + ServiceName: serviceName, + Tags: []jaeger.Tag{ + jaeger.StringTag("process_name", processName), + jaeger.StringTag("version", version.GetVersion()), + }, + }, + BufferMaxCount: 256 * 1e6, // 256Mb + OnError: func(err error) { + log.WithError(err).Error("Failed to process span") }, }) if err != nil { diff --git a/validator/client/validator_attest.go b/validator/client/validator_attest.go index b00cb6c3d..1aff46461 100644 --- a/validator/client/validator_attest.go +++ b/validator/client/validator_attest.go @@ -137,6 +137,14 @@ func (v *validator) AttestToBlockHead(ctx context.Context, slot uint64, idx stri "validator": truncatedPk, }).Info("Attesting to beacon chain head...") + span.AddAttributes( + trace.Int64Attribute("slot", int64(slot-params.BeaconConfig().GenesisSlot)), + trace.Int64Attribute("shard", int64(attData.Shard)), + trace.StringAttribute("blockRoot", fmt.Sprintf("%#x", attestation.Data.BeaconBlockRootHash32)), + trace.Int64Attribute("justifiedEpoch", int64(attData.JustifiedEpoch-params.BeaconConfig().GenesisEpoch)), + trace.StringAttribute("bitfield", fmt.Sprintf("%#x", aggregationBitfield)), + ) + attResp, err := v.attesterClient.AttestHead(ctx, attestation) if err != nil { log.Errorf("Could not submit attestation to beacon node: %v", err) @@ -149,12 +157,7 @@ func (v *validator) AttestToBlockHead(ctx context.Context, slot uint64, idx stri "validator": truncatedPk, }).Info("Attested latest head") span.AddAttributes( - trace.Int64Attribute("slot", int64(slot-params.BeaconConfig().GenesisSlot)), trace.StringAttribute("attestationHash", fmt.Sprintf("%#x", attResp.AttestationHash)), - trace.Int64Attribute("shard", int64(attData.Shard)), - trace.StringAttribute("blockRoot", fmt.Sprintf("%#x", attestation.Data.BeaconBlockRootHash32)), - trace.Int64Attribute("justifiedEpoch", int64(attData.JustifiedEpoch-params.BeaconConfig().GenesisEpoch)), - trace.StringAttribute("bitfield", fmt.Sprintf("%#x", aggregationBitfield)), ) } diff --git a/validator/main.go b/validator/main.go index fb3a8389c..73c371e60 100644 --- a/validator/main.go +++ b/validator/main.go @@ -142,6 +142,7 @@ contract in order to activate the validator client`, cmd.VerbosityFlag, cmd.DataDirFlag, cmd.EnableTracingFlag, + cmd.TracingProcessNameFlag, cmd.TracingEndpointFlag, cmd.TraceSampleFractionFlag, cmd.BootstrapNode, diff --git a/validator/node/node.go b/validator/node/node.go index 233416dc0..e8542415b 100644 --- a/validator/node/node.go +++ b/validator/node/node.go @@ -40,6 +40,7 @@ type ValidatorClient struct { func NewValidatorClient(ctx *cli.Context, password string) (*ValidatorClient, error) { if err := tracing.Setup( "validator", // service name + ctx.GlobalString(cmd.TracingProcessNameFlag.Name), ctx.GlobalString(cmd.TracingEndpointFlag.Name), ctx.GlobalFloat64(cmd.TraceSampleFractionFlag.Name), ctx.GlobalBool(cmd.EnableTracingFlag.Name), diff --git a/validator/usage.go b/validator/usage.go index 7e86a1d8f..5e452ddf9 100644 --- a/validator/usage.go +++ b/validator/usage.go @@ -47,6 +47,7 @@ var appHelpFlagGroups = []flagGroup{ cmd.VerbosityFlag, cmd.DataDirFlag, cmd.EnableTracingFlag, + cmd.TracingProcessNameFlag, cmd.TracingEndpointFlag, cmd.TraceSampleFractionFlag, cmd.BootstrapNode,