heimdall: better error logging for clerk/event-record/list nil response (#9103)

Users reported this error
```
[bor.heimdall] an error while trying fetching path=clerk/event-record/list attempt=5 error="unexpected end of JSON input"
```

Which may happen if:

1. Heimdall is behind and not sync-ed - for more info check
https://github.com/maticnetwork/heimdall/pull/993
2. Or the header time erigon is sending is far into the future

The logs in this PR will help us see which of the 2 is the culprit but
most likely it is 1. We will investigate further 2. if it ever happens.

Changes:
1. Improves logging upon heimdall client retries - prints out the full
url that failed.
2. Fixes a bug where the body was incorrectly checked if it is empty -
`len(body) == 0` vs `body == nil`
3. Unit test for the bug regression
4. Adds a log to indicate to users to check their heimdall process if
they run into this scenario since that may be the culprit


Example output with new logs
<img width="1465" alt="Screenshot 2023-12-29 at 20 16 57"
src="https://github.com/ledgerwatch/erigon/assets/94537774/1ebfde68-aa93-41d6-889a-27bef5414f25">
This commit is contained in:
milen 2023-12-30 11:23:25 +00:00 committed by GitHub
parent fc9dae1783
commit b562eff482
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 6 deletions

View File

@ -121,6 +121,14 @@ func (c *Client) StateSyncEvents(ctx context.Context, fromID uint64, to int64) (
response, err := FetchWithRetry[StateSyncEventsResponse](ctx, c, url) response, err := FetchWithRetry[StateSyncEventsResponse](ctx, c, url)
if err != nil { if err != nil {
if errors.Is(err, ErrNoResponse) {
// for more info check https://github.com/maticnetwork/heimdall/pull/993
c.logger.Warn(
"[bor.heimdall] check heimdall logs to see if it is in sync - no response when querying state sync events",
"path", url.Path,
"queryParams", url.RawQuery,
)
}
return nil, err return nil, err
} }
@ -327,7 +335,7 @@ func FetchWithRetryEx[T any](ctx context.Context, client *Client, url *url.URL,
// yet in heimdall. E.g. when the hard fork hasn't hit yet but heimdall // yet in heimdall. E.g. when the hard fork hasn't hit yet but heimdall
// is upgraded. // is upgraded.
if errors.Is(err, ErrServiceUnavailable) { if errors.Is(err, ErrServiceUnavailable) {
client.logger.Debug("[bor.heimdall] service unavailable at the moment", "path", url.Path, "attempt", attempt, "err", err) client.logger.Debug("[bor.heimdall] service unavailable at the moment", "path", url.Path, "queryParams", url.RawQuery, "attempt", attempt, "err", err)
return nil, err return nil, err
} }
@ -335,14 +343,14 @@ func FetchWithRetryEx[T any](ctx context.Context, client *Client, url *url.URL,
return nil, err return nil, err
} }
client.logger.Warn("[bor.heimdall] an error while fetching", "path", url.Path, "attempt", attempt, "err", err) client.logger.Warn("[bor.heimdall] an error while fetching", "path", url.Path, "queryParams", url.RawQuery, "attempt", attempt, "err", err)
select { select {
case <-ctx.Done(): case <-ctx.Done():
client.logger.Debug("[bor.heimdall] request canceled", "reason", ctx.Err(), "path", url.Path, "attempt", attempt) client.logger.Debug("[bor.heimdall] request canceled", "reason", ctx.Err(), "path", url.Path, "queryParams", url.RawQuery, "attempt", attempt)
return nil, ctx.Err() return nil, ctx.Err()
case <-client.closeCh: case <-client.closeCh:
client.logger.Debug("[bor.heimdall] shutdown detected, terminating request", "path", url.Path) client.logger.Debug("[bor.heimdall] shutdown detected, terminating request", "path", url.Path, "queryParams", url.RawQuery)
return nil, ErrShutdownDetected return nil, ErrShutdownDetected
case <-ticker.C: case <-ticker.C:
// retry // retry
@ -369,7 +377,7 @@ func Fetch[T any](ctx context.Context, request *Request) (*T, error) {
return nil, err return nil, err
} }
if body == nil { if len(body) == 0 {
return nil, ErrNoResponse return nil, ErrNoResponse
} }

View File

@ -31,7 +31,10 @@ func TestHeimdallClientFetchesTerminateUponTooManyErrors(t *testing.T) {
httpClient := mock.NewMockHttpClient(ctrl) httpClient := mock.NewMockHttpClient(ctrl)
httpClient.EXPECT(). httpClient.EXPECT().
Do(gomock.Any()). Do(gomock.Any()).
Return(&http.Response{StatusCode: 404, Body: emptyBodyReadCloser{}}, nil). Return(&http.Response{
StatusCode: 404,
Body: emptyBodyReadCloser{},
}, nil).
Times(5) Times(5)
logger := testlog.Logger(t, log.LvlDebug) logger := testlog.Logger(t, log.LvlDebug)
heimdallClient := newHeimdallClient("https://dummyheimdal.com", httpClient, 100*time.Millisecond, 5, logger) heimdallClient := newHeimdallClient("https://dummyheimdal.com", httpClient, 100*time.Millisecond, 5, logger)
@ -40,3 +43,22 @@ func TestHeimdallClientFetchesTerminateUponTooManyErrors(t *testing.T) {
require.Nil(t, spanRes) require.Nil(t, spanRes)
require.Error(t, err) require.Error(t, err)
} }
func TestHeimdallClientStateSyncEventsReturnsErrNoResponseWhenHttp200WithEmptyBody(t *testing.T) {
ctx := context.Background()
ctrl := gomock.NewController(t)
httpClient := mock.NewMockHttpClient(ctrl)
httpClient.EXPECT().
Do(gomock.Any()).
Return(&http.Response{
StatusCode: 200,
Body: emptyBodyReadCloser{},
}, nil).
Times(2)
logger := testlog.Logger(t, log.LvlDebug)
heimdallClient := newHeimdallClient("https://dummyheimdal.com", httpClient, time.Millisecond, 2, logger)
spanRes, err := heimdallClient.StateSyncEvents(ctx, 100, time.Now().Unix())
require.Nil(t, spanRes)
require.ErrorIs(t, err, ErrNoResponse)
}