downloader: use manifest.txt for public bucket (#8863)

use manifest.txt instead of webseed.toml in public buckets
This commit is contained in:
Alex Sharov 2023-11-30 16:58:23 +07:00 committed by GitHub
parent 5ff9ce802b
commit 0fbcd5b5d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 84 additions and 35 deletions

View File

@ -34,6 +34,7 @@ import (
"github.com/ledgerwatch/erigon-lib/common/datadir"
"github.com/ledgerwatch/erigon-lib/common/dbg"
"github.com/ledgerwatch/erigon-lib/downloader/downloadercfg"
"github.com/ledgerwatch/erigon-lib/downloader/snaptype"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/ledgerwatch/erigon-lib/kv/mdbx"
"github.com/ledgerwatch/log/v3"
@ -470,6 +471,17 @@ func (d *Downloader) VerifyData(ctx context.Context, onlyFiles []string) error {
// have .torrent no .seg => get .seg file from .torrent
// have .seg no .torrent => get .torrent from .seg
func (d *Downloader) AddNewSeedableFile(ctx context.Context, name string) error {
ff, ok := snaptype.ParseFileName("", name)
if ok {
if !ff.Seedable() {
return nil
}
} else {
if !e3seedable(name) {
return nil
}
}
// if we don't have the torrent file we build it if we have the .seg file
torrentFilePath, err := BuildTorrentIfNeed(ctx, name, d.SnapDir())
if err != nil {

View File

@ -155,18 +155,33 @@ func New(dirs datadir.Dirs, version string, verbosity lg.Level, downloadRate, up
webseedFileProviders := make([]string, 0, len(webseedUrlsOrFiles))
webseedS3Providers := make([]string, 0, len(webseedUrlsOrFiles))
for _, webseed := range webseedUrlsOrFiles {
if strings.HasPrefix(webseed, "v") { // has marker v1/v2/...
webseedS3Providers = append(webseedS3Providers, webseed)
continue
}
uri, err := url.ParseRequestURI(webseed)
if err != nil {
if strings.HasSuffix(webseed, ".toml") && dir.FileExist(webseed) {
webseedFileProviders = append(webseedFileProviders, webseed)
if !strings.HasPrefix(webseed, "v") { // has marker v1/v2/...
uri, err := url.ParseRequestURI(webseed)
if err != nil {
if strings.HasSuffix(webseed, ".toml") && dir.FileExist(webseed) {
webseedFileProviders = append(webseedFileProviders, webseed)
}
continue
}
webseedHttpProviders = append(webseedHttpProviders, uri)
continue
}
if strings.HasPrefix(webseed, "v1:") {
withoutVerisonPrefix := webseed[3:]
if !strings.HasPrefix(withoutVerisonPrefix, "https:") {
webseedS3Providers = append(webseedS3Providers, webseed)
continue
}
uri, err := url.ParseRequestURI(withoutVerisonPrefix)
if err != nil {
log.Warn("[webseed] can't parse url", "err", err, "url", withoutVerisonPrefix)
continue
}
webseedHttpProviders = append(webseedHttpProviders, uri)
} else {
continue
}
webseedHttpProviders = append(webseedHttpProviders, uri)
}
localCfgFile := filepath.Join(dirs.DataDir, "webseed.toml") // datadir/webseed.toml allowed
if dir.FileExist(localCfgFile) {

View File

@ -98,20 +98,7 @@ func seedableSnapshotsBySubDir(dir, subDir string) ([]string, error) {
res := make([]string, 0, len(files))
for _, fPath := range files {
_, name := filepath.Split(fPath)
subs := historyFileRegex.FindStringSubmatch(name)
if len(subs) != 5 {
continue
}
// Check that it's seedable
from, err := strconv.ParseUint(subs[2], 10, 64)
if err != nil {
return nil, fmt.Errorf("ParseFileName: %w", err)
}
to, err := strconv.ParseUint(subs[3], 10, 64)
if err != nil {
return nil, fmt.Errorf("ParseFileName: %w", err)
}
if (to-from)%snaptype.Erigon3SeedableSteps != 0 {
if !e3seedable(name) {
continue
}
res = append(res, filepath.Join(subDir, name))
@ -119,6 +106,25 @@ func seedableSnapshotsBySubDir(dir, subDir string) ([]string, error) {
return res, nil
}
func e3seedable(name string) bool {
subs := historyFileRegex.FindStringSubmatch(name)
if len(subs) != 5 {
return false
}
// Check that it's seedable
from, err := strconv.ParseUint(subs[2], 10, 64)
if err != nil {
return false
}
to, err := strconv.ParseUint(subs[3], 10, 64)
if err != nil {
return false
}
if (to-from)%snaptype.Erigon3SeedableSteps != 0 {
return false
}
return true
}
func ensureCantLeaveDir(fName, root string) (string, error) {
if filepath.IsAbs(fName) {
newFName, err := filepath.Rel(root, fName)

View File

@ -63,6 +63,7 @@ func (d *WebSeeds) downloadWebseedTomlFromProviders(ctx context.Context, s3Provi
}
list = append(list, response)
}
for _, webSeedProviderURL := range s3Providers {
select {
case <-ctx.Done():
@ -130,19 +131,34 @@ func (d *WebSeeds) ByFileName(name string) (metainfo.UrlList, bool) {
return v, ok
}
func (d *WebSeeds) callHttpProvider(ctx context.Context, webSeedProviderUrl *url.URL) (snaptype.WebSeedsFromProvider, error) {
request, err := http.NewRequest(http.MethodGet, webSeedProviderUrl.String(), nil)
baseUrl := webSeedProviderUrl.String()
ref, err := url.Parse("manifest.txt")
if err != nil {
return nil, err
}
u := webSeedProviderUrl.ResolveReference(ref)
request, err := http.NewRequest(http.MethodGet, u.String(), nil)
if err != nil {
return nil, err
}
request = request.WithContext(ctx)
resp, err := http.DefaultClient.Do(request)
if err != nil {
return nil, fmt.Errorf("webseed.http: host=%s, url=%s, %w", webSeedProviderUrl.Hostname(), webSeedProviderUrl.EscapedPath(), err)
return nil, fmt.Errorf("webseed.http: %w, host=%s, url=%s", err, webSeedProviderUrl.Hostname(), webSeedProviderUrl.EscapedPath())
}
defer resp.Body.Close()
b, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("webseed.http: %w, host=%s, url=%s, ", err, webSeedProviderUrl.Hostname(), webSeedProviderUrl.EscapedPath())
}
response := snaptype.WebSeedsFromProvider{}
if err := toml.NewDecoder(resp.Body).Decode(&response); err != nil {
return nil, fmt.Errorf("webseed.http: host=%s, url=%s, %w", webSeedProviderUrl.Hostname(), webSeedProviderUrl.EscapedPath(), err)
fileNames := strings.Split(string(b), "\n")
for _, f := range fileNames {
response[f], err = url.JoinPath(baseUrl, f)
if err != nil {
return nil, err
}
}
d.logger.Debug("[snapshots.webseed] get from HTTP provider", "urls", len(response), "host", webSeedProviderUrl.Hostname(), "url", webSeedProviderUrl.EscapedPath())
return response, nil
@ -241,10 +257,10 @@ func (d *WebSeeds) downloadTorrentFilesFromProviders(ctx context.Context, rootDi
for _, url := range tUrls {
res, err := d.callTorrentHttpProvider(ctx, url, name)
if err != nil {
d.logger.Log(d.verbosity, "[snapshots] get .torrent file from webseed", "name", name, "err", err)
d.logger.Log(d.verbosity, "[snapshots] got from webseed", "name", name, "err", err)
continue
}
d.logger.Log(d.verbosity, "[snapshots] get .torrent file from webseed", "name", name)
d.logger.Log(d.verbosity, "[snapshots] got from webseed", "name", name)
if err := saveTorrent(tPath, res); err != nil {
d.logger.Debug("[snapshots] saveTorrent", "err", err)
continue

View File

@ -4,7 +4,7 @@ go 1.20
require (
github.com/erigontech/mdbx-go v0.27.21
github.com/ledgerwatch/erigon-snapshot v1.3.1-0.20231120030929-7bcfed8fdd1a
github.com/ledgerwatch/erigon-snapshot v1.3.1-0.20231130092351-fc8d6b8c7b27
github.com/ledgerwatch/interfaces v0.0.0-20231031050643-c86352e41520
github.com/ledgerwatch/log/v3 v3.9.0
github.com/ledgerwatch/secp256k1 v1.0.0

View File

@ -291,8 +291,8 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c=
github.com/ledgerwatch/erigon-snapshot v1.3.1-0.20231120030929-7bcfed8fdd1a h1:Y52iBc5LRxQriPnV+jKwdet2lRF96PRwRmpGLMK7OCc=
github.com/ledgerwatch/erigon-snapshot v1.3.1-0.20231120030929-7bcfed8fdd1a/go.mod h1:3AuPxZc85jkehh/HA9h8gabv5MSi3kb/ddtzBsTVJFo=
github.com/ledgerwatch/erigon-snapshot v1.3.1-0.20231130092351-fc8d6b8c7b27 h1:1iLvIq2oxLgPaz0BSVGa9Dmu750+G2puOeLxTWROoAg=
github.com/ledgerwatch/erigon-snapshot v1.3.1-0.20231130092351-fc8d6b8c7b27/go.mod h1:3AuPxZc85jkehh/HA9h8gabv5MSi3kb/ddtzBsTVJFo=
github.com/ledgerwatch/interfaces v0.0.0-20231031050643-c86352e41520 h1:j/PRJWbPrbk8wpVjU77SWS8xJ/N+dcxPs1relNSolUs=
github.com/ledgerwatch/interfaces v0.0.0-20231031050643-c86352e41520/go.mod h1:ugQv1QllJzBny3cKZKxUrSnykkjkBgm27eQM6dnGAcc=
github.com/ledgerwatch/log/v3 v3.9.0 h1:iDwrXe0PVwBC68Dd94YSsHbMgQ3ufsgjzXtFNFVZFRk=

2
go.mod
View File

@ -185,7 +185,7 @@ require (
github.com/koron/go-ssdp v0.0.4 // indirect
github.com/kr/pretty v0.3.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/ledgerwatch/erigon-snapshot v1.3.1-0.20231120030929-7bcfed8fdd1a // indirect
github.com/ledgerwatch/erigon-snapshot v1.3.1-0.20231130092351-fc8d6b8c7b27 // indirect
github.com/libp2p/go-buffer-pool v0.1.0 // indirect
github.com/libp2p/go-cidranger v1.1.0 // indirect
github.com/libp2p/go-flow-metrics v0.1.0 // indirect

4
go.sum
View File

@ -539,8 +539,8 @@ github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v0.0.0-20170224010052-a616ab194758 h1:0D5M2HQSGD3PYPwICLl+/9oulQauOuETfgFvhBDffs0=
github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c=
github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8=
github.com/ledgerwatch/erigon-snapshot v1.3.1-0.20231120030929-7bcfed8fdd1a h1:Y52iBc5LRxQriPnV+jKwdet2lRF96PRwRmpGLMK7OCc=
github.com/ledgerwatch/erigon-snapshot v1.3.1-0.20231120030929-7bcfed8fdd1a/go.mod h1:3AuPxZc85jkehh/HA9h8gabv5MSi3kb/ddtzBsTVJFo=
github.com/ledgerwatch/erigon-snapshot v1.3.1-0.20231130092351-fc8d6b8c7b27 h1:1iLvIq2oxLgPaz0BSVGa9Dmu750+G2puOeLxTWROoAg=
github.com/ledgerwatch/erigon-snapshot v1.3.1-0.20231130092351-fc8d6b8c7b27/go.mod h1:3AuPxZc85jkehh/HA9h8gabv5MSi3kb/ddtzBsTVJFo=
github.com/ledgerwatch/log/v3 v3.9.0 h1:iDwrXe0PVwBC68Dd94YSsHbMgQ3ufsgjzXtFNFVZFRk=
github.com/ledgerwatch/log/v3 v3.9.0/go.mod h1:EiAY6upmI/6LkNhOVxb4eVsmsP11HZCnZ3PlJMjYiqE=
github.com/ledgerwatch/secp256k1 v1.0.0 h1:Usvz87YoTG0uePIV8woOof5cQnLXGYa162rFf3YnwaQ=