2020-04-29 21:32:39 +00:00
|
|
|
// Package prometheus defines a service which is used for metrics collection
|
|
|
|
// and health of a node in Prysm.
|
2018-11-15 12:54:45 +00:00
|
|
|
package prometheus
|
|
|
|
|
|
|
|
import (
|
2018-12-30 21:20:43 +00:00
|
|
|
"bytes"
|
2018-11-15 12:54:45 +00:00
|
|
|
"context"
|
2018-12-30 21:20:43 +00:00
|
|
|
"fmt"
|
2020-04-05 19:36:18 +00:00
|
|
|
"net"
|
2018-11-15 12:54:45 +00:00
|
|
|
"net/http"
|
2019-04-06 00:19:27 +00:00
|
|
|
"runtime/debug"
|
|
|
|
"runtime/pprof"
|
2018-11-15 12:54:45 +00:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
2018-12-30 21:20:43 +00:00
|
|
|
"github.com/prysmaticlabs/prysm/shared"
|
2020-09-16 14:03:52 +00:00
|
|
|
"github.com/prysmaticlabs/prysm/shared/logutil"
|
2018-11-15 12:54:45 +00:00
|
|
|
"github.com/sirupsen/logrus"
|
|
|
|
)
|
|
|
|
|
|
|
|
var log = logrus.WithField("prefix", "prometheus")
|
|
|
|
|
|
|
|
// Service provides Prometheus metrics via the /metrics route. This route will
|
|
|
|
// show all the metrics registered with the Prometheus DefaultRegisterer.
|
|
|
|
type Service struct {
|
2018-12-30 21:20:43 +00:00
|
|
|
server *http.Server
|
|
|
|
svcRegistry *shared.ServiceRegistry
|
2019-01-24 01:26:25 +00:00
|
|
|
failStatus error
|
2018-11-15 12:54:45 +00:00
|
|
|
}
|
|
|
|
|
2019-09-03 16:07:40 +00:00
|
|
|
// Handler represents a path and handler func to serve on the same port as /metrics, /healthz, /goroutinez, etc.
|
|
|
|
type Handler struct {
|
2019-09-03 20:14:23 +00:00
|
|
|
Path string
|
2019-09-03 16:07:40 +00:00
|
|
|
Handler func(http.ResponseWriter, *http.Request)
|
|
|
|
}
|
|
|
|
|
2020-09-23 08:59:49 +00:00
|
|
|
// NewService sets up a new instance for a given address host:port.
|
2018-11-15 12:54:45 +00:00
|
|
|
// An empty host will match with any IP so an address like ":2121" is perfectly acceptable.
|
2020-09-23 08:59:49 +00:00
|
|
|
func NewService(addr string, svcRegistry *shared.ServiceRegistry, additionalHandlers ...Handler) *Service {
|
2018-12-30 21:20:43 +00:00
|
|
|
s := &Service{svcRegistry: svcRegistry}
|
|
|
|
|
2018-11-15 12:54:45 +00:00
|
|
|
mux := http.NewServeMux()
|
|
|
|
mux.Handle("/metrics", promhttp.Handler())
|
2018-12-30 21:20:43 +00:00
|
|
|
mux.HandleFunc("/healthz", s.healthzHandler)
|
2019-04-06 00:19:27 +00:00
|
|
|
mux.HandleFunc("/goroutinez", s.goroutinezHandler)
|
2020-09-16 14:03:52 +00:00
|
|
|
mux.HandleFunc("/logs", logutil.NewLogStreamServer().Handler)
|
2018-12-30 21:20:43 +00:00
|
|
|
|
2019-09-03 16:07:40 +00:00
|
|
|
// Register additional handlers.
|
|
|
|
for _, h := range additionalHandlers {
|
|
|
|
mux.HandleFunc(h.Path, h.Handler)
|
|
|
|
}
|
|
|
|
|
2018-12-30 21:20:43 +00:00
|
|
|
s.server = &http.Server{Addr: addr, Handler: mux}
|
|
|
|
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2020-04-21 20:58:53 +00:00
|
|
|
func (s *Service) healthzHandler(w http.ResponseWriter, r *http.Request) {
|
|
|
|
response := generatedResponse{}
|
2019-01-06 03:12:59 +00:00
|
|
|
|
2020-04-21 20:58:53 +00:00
|
|
|
type serviceStatus struct {
|
|
|
|
Name string `json:"service"`
|
|
|
|
Status bool `json:"status"`
|
|
|
|
Err string `json:"error"`
|
|
|
|
}
|
2020-07-29 00:31:21 +00:00
|
|
|
var hasError bool
|
2020-04-21 20:58:53 +00:00
|
|
|
var statuses []serviceStatus
|
|
|
|
for k, v := range s.svcRegistry.Statuses() {
|
|
|
|
s := serviceStatus{
|
2020-08-06 16:29:00 +00:00
|
|
|
Name: k.String(),
|
2020-04-21 20:58:53 +00:00
|
|
|
Status: true,
|
|
|
|
}
|
|
|
|
if v != nil {
|
|
|
|
s.Status = false
|
|
|
|
s.Err = v.Error()
|
2020-07-29 00:31:21 +00:00
|
|
|
if s.Err != "" {
|
|
|
|
hasError = true
|
|
|
|
}
|
2019-01-05 03:58:19 +00:00
|
|
|
}
|
2020-04-21 20:58:53 +00:00
|
|
|
statuses = append(statuses, s)
|
2018-11-15 12:54:45 +00:00
|
|
|
}
|
2020-04-21 20:58:53 +00:00
|
|
|
response.Data = statuses
|
|
|
|
|
2020-07-29 00:31:21 +00:00
|
|
|
if hasError {
|
|
|
|
w.WriteHeader(http.StatusServiceUnavailable)
|
2020-08-22 21:12:17 +00:00
|
|
|
} else {
|
|
|
|
w.WriteHeader(http.StatusOK)
|
2020-07-29 00:31:21 +00:00
|
|
|
}
|
|
|
|
|
2020-04-21 20:58:53 +00:00
|
|
|
// Handle plain text content.
|
|
|
|
if contentType := negotiateContentType(r); contentType == contentTypePlainText {
|
|
|
|
var buf bytes.Buffer
|
|
|
|
for _, s := range statuses {
|
|
|
|
var status string
|
|
|
|
if s.Status {
|
|
|
|
status = "OK"
|
|
|
|
} else {
|
|
|
|
status = "ERROR " + s.Err
|
|
|
|
}
|
|
|
|
|
|
|
|
if _, err := buf.WriteString(fmt.Sprintf("%s: %s\n", s.Name, status)); err != nil {
|
|
|
|
response.Err = err.Error()
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
response.Data = buf
|
2018-12-30 21:20:43 +00:00
|
|
|
}
|
|
|
|
|
2020-04-21 20:58:53 +00:00
|
|
|
if err := writeResponse(w, r, response); err != nil {
|
|
|
|
log.Errorf("Error writing response: %v", err)
|
2019-01-06 03:12:59 +00:00
|
|
|
}
|
2018-11-15 12:54:45 +00:00
|
|
|
}
|
|
|
|
|
2019-04-09 01:57:09 +00:00
|
|
|
func (s *Service) goroutinezHandler(w http.ResponseWriter, _ *http.Request) {
|
2019-04-06 00:19:27 +00:00
|
|
|
stack := debug.Stack()
|
2020-04-13 04:11:09 +00:00
|
|
|
if _, err := w.Write(stack); err != nil {
|
|
|
|
log.WithError(err).Error("Failed to write goroutines stack")
|
|
|
|
}
|
|
|
|
if err := pprof.Lookup("goroutine").WriteTo(w, 2); err != nil {
|
|
|
|
log.WithError(err).Error("Failed to write pprof goroutines")
|
|
|
|
}
|
2019-04-06 00:19:27 +00:00
|
|
|
}
|
|
|
|
|
2018-11-15 12:54:45 +00:00
|
|
|
// Start the prometheus service.
|
|
|
|
func (s *Service) Start() {
|
|
|
|
go func() {
|
2020-04-05 19:36:18 +00:00
|
|
|
// See if the port is already used.
|
2020-06-10 16:04:32 +00:00
|
|
|
conn, err := net.DialTimeout("tcp", s.server.Addr, time.Second)
|
2020-04-05 19:36:18 +00:00
|
|
|
if err == nil {
|
2020-04-13 04:11:09 +00:00
|
|
|
if err := conn.Close(); err != nil {
|
|
|
|
log.WithError(err).Error("Failed to close connection")
|
|
|
|
}
|
2020-04-05 19:36:18 +00:00
|
|
|
// Something on the port; we cannot use it.
|
|
|
|
log.WithField("address", s.server.Addr).Warn("Port already in use; cannot start prometheus service")
|
|
|
|
} else {
|
|
|
|
// Nothing on that port; we can use it.
|
|
|
|
log.WithField("address", s.server.Addr).Debug("Starting prometheus service")
|
|
|
|
err := s.server.ListenAndServe()
|
|
|
|
if err != nil && err != http.ErrServerClosed {
|
|
|
|
log.Errorf("Could not listen to host:port :%s: %v", s.server.Addr, err)
|
|
|
|
s.failStatus = err
|
|
|
|
}
|
2018-11-15 12:54:45 +00:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stop the service gracefully.
|
|
|
|
func (s *Service) Stop() error {
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
|
|
|
defer cancel()
|
|
|
|
return s.server.Shutdown(ctx)
|
|
|
|
}
|
2018-12-30 21:20:43 +00:00
|
|
|
|
2019-01-24 01:26:25 +00:00
|
|
|
// Status checks for any service failure conditions.
|
2018-12-30 21:20:43 +00:00
|
|
|
func (s *Service) Status() error {
|
2019-01-24 01:26:25 +00:00
|
|
|
if s.failStatus != nil {
|
|
|
|
return s.failStatus
|
|
|
|
}
|
2018-12-30 21:20:43 +00:00
|
|
|
return nil
|
|
|
|
}
|