2022-11-20 03:41:20 +00:00
/ *
Copyright 2021 Erigon contributors
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package downloader
import (
"context"
2023-09-06 03:19:00 +00:00
"errors"
2022-11-20 03:41:20 +00:00
"fmt"
2023-12-01 13:39:43 +00:00
"net/url"
2023-05-17 09:52:34 +00:00
"runtime"
2023-10-04 02:57:37 +00:00
"strings"
2022-11-20 03:41:20 +00:00
"sync"
2023-03-23 05:11:20 +00:00
"sync/atomic"
2022-11-20 03:41:20 +00:00
"time"
"github.com/anacrolix/torrent"
"github.com/anacrolix/torrent/metainfo"
"github.com/anacrolix/torrent/storage"
2023-10-05 07:25:00 +00:00
"github.com/c2h5oh/datasize"
2024-01-08 09:01:02 +00:00
"github.com/ledgerwatch/log/v3"
"golang.org/x/exp/slices"
"golang.org/x/sync/errgroup"
"golang.org/x/sync/semaphore"
2023-10-04 05:34:45 +00:00
"github.com/ledgerwatch/erigon-lib/common"
2023-10-04 04:01:02 +00:00
"github.com/ledgerwatch/erigon-lib/common/datadir"
2023-10-04 05:34:45 +00:00
"github.com/ledgerwatch/erigon-lib/common/dbg"
2023-12-08 11:08:38 +00:00
"github.com/ledgerwatch/erigon-lib/diagnostics"
2022-11-20 03:41:20 +00:00
"github.com/ledgerwatch/erigon-lib/downloader/downloadercfg"
2023-11-30 09:58:23 +00:00
"github.com/ledgerwatch/erigon-lib/downloader/snaptype"
2022-11-20 03:41:20 +00:00
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/ledgerwatch/erigon-lib/kv/mdbx"
)
// Downloader - component which downloading historical files. Can use BitTorrent, or other protocols
type Downloader struct {
db kv . RwDB
pieceCompletionDB storage . PieceCompletion
torrentClient * torrent . Client
cfg * downloadercfg . Cfg
statsLock * sync . RWMutex
stats AggStats
2023-09-15 06:46:50 +00:00
folder storage . ClientImplCloser
ctx context . Context
2023-03-13 02:12:26 +00:00
stopMainLoop context . CancelFunc
wg sync . WaitGroup
2023-09-12 05:18:39 +00:00
2023-10-04 02:57:37 +00:00
webseeds * WebSeeds
logger log . Logger
verbosity log . Lvl
2023-12-21 05:15:32 +00:00
torrentFiles * TorrentFiles
2022-11-20 03:41:20 +00:00
}
type AggStats struct {
MetadataReady , FilesTotal int32
PeersUnique int32
ConnectionsTotal uint64
Completed bool
Progress float32
2023-08-18 15:33:09 +00:00
BytesCompleted , BytesTotal uint64
2023-09-10 08:46:27 +00:00
DroppedCompleted , DroppedTotal uint64
2022-11-20 03:41:20 +00:00
BytesDownload , BytesUpload uint64
UploadRate , DownloadRate uint64
}
2023-11-13 13:10:57 +00:00
func New ( ctx context . Context , cfg * downloadercfg . Cfg , dirs datadir . Dirs , logger log . Logger , verbosity log . Lvl , discover bool ) ( * Downloader , error ) {
2023-10-12 07:11:46 +00:00
db , c , m , torrentClient , err := openClient ( ctx , cfg . Dirs . Downloader , cfg . Dirs . Snap , cfg . ClientConfig )
2022-11-20 03:41:20 +00:00
if err != nil {
return nil , fmt . Errorf ( "openClient: %w" , err )
}
peerID , err := readPeerID ( db )
if err != nil {
return nil , fmt . Errorf ( "get peer id: %w" , err )
}
2023-09-12 05:18:39 +00:00
cfg . ClientConfig . PeerID = string ( peerID )
2022-11-20 03:41:20 +00:00
if len ( peerID ) == 0 {
if err = savePeerID ( db , torrentClient . PeerID ( ) ) ; err != nil {
return nil , fmt . Errorf ( "save peer id: %w" , err )
}
}
d := & Downloader {
cfg : cfg ,
db : db ,
pieceCompletionDB : c ,
folder : m ,
torrentClient : torrentClient ,
2023-09-15 06:46:50 +00:00
statsLock : & sync . RWMutex { } ,
2023-11-24 11:46:17 +00:00
webseeds : & WebSeeds { logger : logger , verbosity : verbosity , downloadTorrentFile : cfg . DownloadTorrentFilesFromWebseed , torrentsWhitelist : cfg . ExpectedTorrentFilesHashes } ,
2023-10-03 04:21:41 +00:00
logger : logger ,
2023-10-04 02:57:37 +00:00
verbosity : verbosity ,
2023-12-21 05:15:32 +00:00
torrentFiles : & TorrentFiles { dir : cfg . Dirs . Snap } ,
2022-11-20 03:41:20 +00:00
}
2023-12-21 05:15:32 +00:00
d . webseeds . torrentFiles = d . torrentFiles
2023-09-15 06:46:50 +00:00
d . ctx , d . stopMainLoop = context . WithCancel ( ctx )
2023-12-27 22:05:09 +00:00
if cfg . AddTorrentsFromDisk {
if err := d . BuildTorrentFilesIfNeed ( d . ctx ) ; err != nil {
return nil , err
}
if err := d . addTorrentFilesFromDisk ( false ) ; err != nil {
return nil , err
}
2022-11-20 03:41:20 +00:00
}
2023-12-08 11:08:38 +00:00
2023-09-12 05:18:39 +00:00
// CornerCase: no peers -> no anoncments to trackers -> no magnetlink resolution (but magnetlink has filename)
// means we can start adding weebseeds without waiting for `<-t.GotInfo()`
d . wg . Add ( 1 )
2023-11-13 13:10:57 +00:00
2023-09-12 05:18:39 +00:00
go func ( ) {
defer d . wg . Done ( )
2023-11-13 13:10:57 +00:00
if ! discover {
return
}
2023-10-18 07:24:09 +00:00
d . webseeds . Discover ( d . ctx , d . cfg . WebSeedS3Tokens , d . cfg . WebSeedUrls , d . cfg . WebSeedFiles , d . cfg . Dirs . Snap )
2023-10-03 04:21:41 +00:00
// webseeds.Discover may create new .torrent files on disk
2023-10-04 05:34:45 +00:00
if err := d . addTorrentFilesFromDisk ( true ) ; err != nil && ! errors . Is ( err , context . Canceled ) {
2023-10-04 02:57:37 +00:00
d . logger . Warn ( "[snapshots] addTorrentFilesFromDisk" , "err" , err )
2023-10-03 04:21:41 +00:00
}
2023-09-12 05:18:39 +00:00
} ( )
2022-11-20 03:41:20 +00:00
return d , nil
}
2023-09-15 06:46:50 +00:00
func ( d * Downloader ) MainLoopInBackground ( silent bool ) {
2023-03-13 02:12:26 +00:00
d . wg . Add ( 1 )
go func ( ) {
defer d . wg . Done ( )
2023-09-15 06:46:50 +00:00
if err := d . mainLoop ( silent ) ; err != nil {
2023-09-06 03:19:00 +00:00
if ! errors . Is ( err , context . Canceled ) {
2023-10-03 04:21:41 +00:00
d . logger . Warn ( "[snapshots]" , "err" , err )
2023-09-06 03:19:00 +00:00
}
}
2023-03-13 02:12:26 +00:00
} ( )
}
2023-09-15 06:46:50 +00:00
func ( d * Downloader ) mainLoop ( silent bool ) error {
2023-03-13 02:12:26 +00:00
var sem = semaphore . NewWeighted ( int64 ( d . cfg . DownloadSlots ) )
2023-09-06 03:19:00 +00:00
d . wg . Add ( 1 )
2023-03-13 02:12:26 +00:00
go func ( ) {
2023-09-06 03:19:00 +00:00
defer d . wg . Done ( )
2023-08-18 15:33:09 +00:00
// Torrents that are already taken care of
2023-10-13 11:03:52 +00:00
//// First loop drops torrents that were downloaded or are already complete
//// This improves efficiency of download by reducing number of active torrent (empirical observation)
2023-10-04 05:34:45 +00:00
//for torrents := d.torrentClient.Torrents(); len(torrents) > 0; torrents = d.torrentClient.Torrents() {
// select {
// case <-d.ctx.Done():
// return
// default:
// }
// for _, t := range torrents {
// if _, already := torrentMap[t.InfoHash()]; already {
// continue
// }
// select {
// case <-d.ctx.Done():
// return
// case <-t.GotInfo():
// }
// if t.Complete.Bool() {
// atomic.AddUint64(&d.stats.DroppedCompleted, uint64(t.BytesCompleted()))
// atomic.AddUint64(&d.stats.DroppedTotal, uint64(t.Length()))
// t.Drop()
// torrentMap[t.InfoHash()] = struct{}{}
// continue
// }
// if err := sem.Acquire(d.ctx, 1); err != nil {
// return
// }
// t.AllowDataDownload()
// t.DownloadAll()
// torrentMap[t.InfoHash()] = struct{}{}
// d.wg.Add(1)
// go func(t *torrent.Torrent) {
// defer d.wg.Done()
// defer sem.Release(1)
// select {
// case <-d.ctx.Done():
// return
// case <-t.Complete.On():
// }
// atomic.AddUint64(&d.stats.DroppedCompleted, uint64(t.BytesCompleted()))
// atomic.AddUint64(&d.stats.DroppedTotal, uint64(t.Length()))
// t.Drop()
// }(t)
// }
//}
//atomic.StoreUint64(&d.stats.DroppedCompleted, 0)
//atomic.StoreUint64(&d.stats.DroppedTotal, 0)
//d.addTorrentFilesFromDisk(false)
2023-09-15 06:46:50 +00:00
for {
torrents := d . torrentClient . Torrents ( )
2023-10-04 02:57:37 +00:00
select {
case <- d . ctx . Done ( ) :
return
default :
}
2023-09-15 06:46:50 +00:00
for _ , t := range torrents {
2023-10-13 11:03:52 +00:00
if t . Complete . Bool ( ) {
2023-09-15 06:46:50 +00:00
continue
}
2023-10-13 11:03:52 +00:00
if err := sem . Acquire ( d . ctx , 1 ) ; err != nil {
return
}
t . AllowDataDownload ( )
2023-09-06 03:19:00 +00:00
select {
2023-09-15 06:46:50 +00:00
case <- d . ctx . Done ( ) :
2023-09-06 03:19:00 +00:00
return
2023-09-15 06:46:50 +00:00
case <- t . GotInfo ( ) :
2023-09-06 03:19:00 +00:00
}
2023-09-15 06:46:50 +00:00
t . DownloadAll ( )
d . wg . Add ( 1 )
go func ( t * torrent . Torrent ) {
defer d . wg . Done ( )
defer sem . Release ( 1 )
select {
case <- d . ctx . Done ( ) :
return
case <- t . Complete . On ( ) :
}
} ( t )
}
2023-09-28 03:08:29 +00:00
2023-09-28 08:41:39 +00:00
select {
case <- d . ctx . Done ( ) :
return
case <- time . After ( 10 * time . Second ) :
}
2023-03-13 02:12:26 +00:00
}
} ( )
logEvery := time . NewTicker ( 20 * time . Second )
defer logEvery . Stop ( )
statInterval := 20 * time . Second
statEvery := time . NewTicker ( statInterval )
defer statEvery . Stop ( )
2023-10-04 05:34:45 +00:00
var m runtime . MemStats
2023-03-13 02:12:26 +00:00
justCompleted := true
for {
select {
2023-09-15 06:46:50 +00:00
case <- d . ctx . Done ( ) :
return d . ctx . Err ( )
2023-03-13 02:12:26 +00:00
case <- statEvery . C :
d . ReCalcStats ( statInterval )
case <- logEvery . C :
if silent {
continue
}
stats := d . Stats ( )
2023-10-04 05:34:45 +00:00
dbg . ReadMemStats ( & m )
2023-03-13 02:12:26 +00:00
if stats . Completed {
if justCompleted {
justCompleted = false
// force fsync of db. to not loose results of downloading on power-off
2023-09-15 06:46:50 +00:00
_ = d . db . Update ( d . ctx , func ( tx kv . RwTx ) error { return nil } )
2023-03-13 02:12:26 +00:00
}
2023-10-03 04:21:41 +00:00
d . logger . Info ( "[snapshots] Seeding" ,
2023-10-04 05:34:45 +00:00
"up" , common . ByteCount ( stats . UploadRate ) + "/s" ,
2023-03-13 02:12:26 +00:00
"peers" , stats . PeersUnique ,
"conns" , stats . ConnectionsTotal ,
2023-10-04 05:34:45 +00:00
"files" , stats . FilesTotal ,
"alloc" , common . ByteCount ( m . Alloc ) , "sys" , common . ByteCount ( m . Sys ) ,
)
2023-03-13 02:12:26 +00:00
continue
}
2023-10-03 04:21:41 +00:00
d . logger . Info ( "[snapshots] Downloading" ,
2023-10-04 05:34:45 +00:00
"progress" , fmt . Sprintf ( "%.2f%% %s/%s" , stats . Progress , common . ByteCount ( stats . BytesCompleted ) , common . ByteCount ( stats . BytesTotal ) ) ,
"download" , common . ByteCount ( stats . DownloadRate ) + "/s" ,
"upload" , common . ByteCount ( stats . UploadRate ) + "/s" ,
2023-03-13 02:12:26 +00:00
"peers" , stats . PeersUnique ,
"conns" , stats . ConnectionsTotal ,
2023-10-04 05:34:45 +00:00
"files" , stats . FilesTotal ,
"alloc" , common . ByteCount ( m . Alloc ) , "sys" , common . ByteCount ( m . Sys ) ,
)
2023-03-13 02:12:26 +00:00
if stats . PeersUnique == 0 {
2023-09-15 06:46:50 +00:00
ips := d . TorrentClient ( ) . BadPeerIPs ( )
2023-03-13 02:12:26 +00:00
if len ( ips ) > 0 {
2023-10-03 04:21:41 +00:00
d . logger . Info ( "[snapshots] Stats" , "banned" , ips )
2023-03-13 02:12:26 +00:00
}
}
}
}
}
2023-10-04 04:01:02 +00:00
func ( d * Downloader ) SnapDir ( ) string { return d . cfg . Dirs . Snap }
2022-11-20 03:41:20 +00:00
func ( d * Downloader ) ReCalcStats ( interval time . Duration ) {
2023-12-21 04:00:03 +00:00
d . statsLock . Lock ( )
defer d . statsLock . Unlock ( )
2022-11-20 03:41:20 +00:00
//Call this methods outside of `statsLock` critical section, because they have own locks with contention
torrents := d . torrentClient . Torrents ( )
connStats := d . torrentClient . ConnStats ( )
peers := make ( map [ torrent . PeerID ] struct { } , 16 )
2023-09-10 17:09:33 +00:00
prevStats , stats := d . stats , d . stats
2022-11-20 03:41:20 +00:00
stats . Completed = true
stats . BytesDownload = uint64 ( connStats . BytesReadUsefulIntendedData . Int64 ( ) )
stats . BytesUpload = uint64 ( connStats . BytesWrittenData . Int64 ( ) )
2023-09-10 08:46:27 +00:00
stats . BytesTotal , stats . BytesCompleted , stats . ConnectionsTotal , stats . MetadataReady = atomic . LoadUint64 ( & stats . DroppedTotal ) , atomic . LoadUint64 ( & stats . DroppedCompleted ) , 0 , 0
2023-10-04 02:57:37 +00:00
var zeroProgress [ ] string
var noMetadata [ ] string
2023-12-14 06:53:54 +00:00
2022-11-20 03:41:20 +00:00
for _ , t := range torrents {
select {
case <- t . GotInfo ( ) :
stats . MetadataReady ++
2023-12-21 04:00:03 +00:00
// call methods once - to reduce internal mutex contention
2023-12-01 13:39:43 +00:00
peersOfThisFile := t . PeerConns ( )
weebseedPeersOfThisFile := t . WebseedPeerConns ( )
2023-12-21 04:00:03 +00:00
bytesCompleted := t . BytesCompleted ( )
tLen := t . Length ( )
torrentName := t . Name ( )
2023-12-01 13:39:43 +00:00
for _ , peer := range peersOfThisFile {
2022-11-20 03:41:20 +00:00
stats . ConnectionsTotal ++
peers [ peer . PeerID ] = struct { } { }
}
2023-12-21 04:00:03 +00:00
stats . BytesCompleted += uint64 ( bytesCompleted )
stats . BytesTotal += uint64 ( tLen )
2023-12-01 13:39:43 +00:00
2023-12-21 04:00:03 +00:00
progress := float32 ( float64 ( 100 ) * ( float64 ( bytesCompleted ) / float64 ( tLen ) ) )
2023-12-01 13:39:43 +00:00
if progress == 0 {
2023-12-21 04:00:03 +00:00
zeroProgress = append ( zeroProgress , torrentName )
2023-12-01 13:39:43 +00:00
}
2023-12-21 04:00:03 +00:00
webseedRates , websRates := getWebseedsRatesForlogs ( weebseedPeersOfThisFile , torrentName )
rates , peersRates := getPeersRatesForlogs ( peersOfThisFile , torrentName )
2023-12-15 07:05:54 +00:00
// more detailed statistic: download rate of each peer (for each file)
if ! t . Complete . Bool ( ) && progress != 0 {
2023-12-21 04:00:03 +00:00
d . logger . Log ( d . verbosity , "[snapshots] progress" , "file" , torrentName , "progress" , fmt . Sprintf ( "%.2f%%" , progress ) , "peers" , len ( peersOfThisFile ) , "webseeds" , len ( weebseedPeersOfThisFile ) )
2023-12-15 07:05:54 +00:00
d . logger . Log ( d . verbosity , "[snapshots] webseed peers" , webseedRates ... )
d . logger . Log ( d . verbosity , "[snapshots] bittorrent peers" , rates ... )
}
2023-12-08 11:08:38 +00:00
2023-12-18 08:28:20 +00:00
isDiagEnabled := diagnostics . TypeOf ( diagnostics . SegmentDownloadStatistics { } ) . Enabled ( )
2023-12-15 07:05:54 +00:00
if isDiagEnabled {
diagnostics . Send ( diagnostics . SegmentDownloadStatistics {
2023-12-21 04:00:03 +00:00
Name : torrentName ,
TotalBytes : uint64 ( tLen ) ,
DownloadedBytes : uint64 ( bytesCompleted ) ,
2023-12-15 07:05:54 +00:00
WebseedsCount : len ( weebseedPeersOfThisFile ) ,
PeersCount : len ( peersOfThisFile ) ,
WebseedsRate : websRates ,
PeersRate : peersRates ,
} )
2022-11-20 03:41:20 +00:00
}
2023-12-08 11:08:38 +00:00
2022-11-20 03:41:20 +00:00
default :
2023-10-04 02:57:37 +00:00
noMetadata = append ( noMetadata , t . Name ( ) )
2022-11-20 03:41:20 +00:00
}
stats . Completed = stats . Completed && t . Complete . Bool ( )
}
2023-12-08 11:08:38 +00:00
2023-10-04 02:57:37 +00:00
if len ( noMetadata ) > 0 {
2023-10-04 05:34:45 +00:00
amount := len ( noMetadata )
2023-10-04 02:57:37 +00:00
if len ( noMetadata ) > 5 {
noMetadata = append ( noMetadata [ : 5 ] , "..." )
}
2023-10-04 05:34:45 +00:00
d . logger . Log ( d . verbosity , "[snapshots] no metadata yet" , "files" , amount , "list" , strings . Join ( noMetadata , "," ) )
2023-10-04 02:57:37 +00:00
}
if len ( zeroProgress ) > 0 {
2023-10-04 05:34:45 +00:00
amount := len ( zeroProgress )
2023-10-04 02:57:37 +00:00
if len ( zeroProgress ) > 5 {
zeroProgress = append ( zeroProgress [ : 5 ] , "..." )
}
2023-10-04 05:34:45 +00:00
d . logger . Log ( d . verbosity , "[snapshots] no progress yet" , "files" , amount , "list" , strings . Join ( zeroProgress , "," ) )
2023-10-04 02:57:37 +00:00
}
2022-11-20 03:41:20 +00:00
stats . DownloadRate = ( stats . BytesDownload - prevStats . BytesDownload ) / uint64 ( interval . Seconds ( ) )
stats . UploadRate = ( stats . BytesUpload - prevStats . BytesUpload ) / uint64 ( interval . Seconds ( ) )
if stats . BytesTotal == 0 {
stats . Progress = 0
} else {
stats . Progress = float32 ( float64 ( 100 ) * ( float64 ( stats . BytesCompleted ) / float64 ( stats . BytesTotal ) ) )
2023-11-27 01:49:19 +00:00
if int ( stats . Progress ) == 100 && ! stats . Completed {
2022-11-20 03:41:20 +00:00
stats . Progress = 99.99
}
}
stats . PeersUnique = int32 ( len ( peers ) )
stats . FilesTotal = int32 ( len ( torrents ) )
2023-09-10 17:09:33 +00:00
d . stats = stats
2022-11-20 03:41:20 +00:00
}
2023-12-15 07:05:54 +00:00
func getWebseedsRatesForlogs ( weebseedPeersOfThisFile [ ] * torrent . Peer , fName string ) ( [ ] interface { } , uint64 ) {
2023-12-14 06:53:54 +00:00
totalRate := uint64 ( 0 )
averageRate := uint64 ( 0 )
webseedRates := make ( [ ] interface { } , 0 , len ( weebseedPeersOfThisFile ) * 2 )
2023-12-15 07:05:54 +00:00
webseedRates = append ( webseedRates , "file" , fName )
2023-12-14 06:53:54 +00:00
for _ , peer := range weebseedPeersOfThisFile {
urlS := strings . Trim ( strings . TrimPrefix ( peer . String ( ) , "webseed peer for " ) , "\"" )
if urlObj , err := url . Parse ( urlS ) ; err == nil {
if shortUrl , err := url . JoinPath ( urlObj . Host , urlObj . Path ) ; err == nil {
rate := uint64 ( peer . DownloadRate ( ) )
totalRate += rate
webseedRates = append ( webseedRates , shortUrl , fmt . Sprintf ( "%s/s" , common . ByteCount ( rate ) ) )
}
}
}
lenght := uint64 ( len ( weebseedPeersOfThisFile ) )
if lenght > 0 {
averageRate = totalRate / lenght
}
return webseedRates , averageRate
}
2023-12-15 07:05:54 +00:00
func getPeersRatesForlogs ( peersOfThisFile [ ] * torrent . PeerConn , fName string ) ( [ ] interface { } , uint64 ) {
2023-12-14 06:53:54 +00:00
totalRate := uint64 ( 0 )
averageRate := uint64 ( 0 )
rates := make ( [ ] interface { } , 0 , len ( peersOfThisFile ) * 2 )
2023-12-15 07:05:54 +00:00
rates = append ( rates , "file" , fName )
2023-12-14 06:53:54 +00:00
for _ , peer := range peersOfThisFile {
dr := uint64 ( peer . DownloadRate ( ) )
rates = append ( rates , peer . PeerClientName . Load ( ) , fmt . Sprintf ( "%s/s" , common . ByteCount ( dr ) ) )
totalRate += dr
}
lenght := uint64 ( len ( peersOfThisFile ) )
if lenght > 0 {
averageRate = totalRate / uint64 ( len ( peersOfThisFile ) )
}
return rates , averageRate
}
2024-01-08 09:01:02 +00:00
func ( d * Downloader ) VerifyData ( ctx context . Context , whiteList [ ] string , failFast bool ) error {
2022-11-20 03:41:20 +00:00
total := 0
2024-01-08 09:01:02 +00:00
allTorrents := d . torrentClient . Torrents ( )
toVerify := make ( [ ] * torrent . Torrent , 0 , len ( allTorrents ) )
for _ , t := range allTorrents {
2022-11-20 03:41:20 +00:00
select {
case <- t . GotInfo ( ) :
2023-11-27 01:49:19 +00:00
case <- ctx . Done ( ) :
return ctx . Err ( )
2022-11-20 03:41:20 +00:00
}
2024-01-08 09:01:02 +00:00
if len ( whiteList ) > 0 {
name := t . Name ( )
exactOrPartialMatch := slices . ContainsFunc ( whiteList , func ( s string ) bool {
return name == s || strings . HasSuffix ( name , s ) || strings . HasPrefix ( name , s )
} )
if ! exactOrPartialMatch {
continue
}
}
toVerify = append ( toVerify , t )
total += t . NumPieces ( )
2022-11-20 03:41:20 +00:00
}
2024-01-08 09:01:02 +00:00
d . logger . Info ( "[snapshots] Verify start" )
defer d . logger . Info ( "[snapshots] Verify done" , "files" , len ( toVerify ) , "whiteList" , whiteList )
2022-11-20 03:41:20 +00:00
2023-05-17 09:52:34 +00:00
completedPieces := & atomic . Uint64 { }
{
2023-10-04 05:34:45 +00:00
logEvery := time . NewTicker ( 20 * time . Second )
2023-05-17 09:52:34 +00:00
defer logEvery . Stop ( )
2023-09-06 03:19:00 +00:00
d . wg . Add ( 1 )
2023-05-17 09:52:34 +00:00
go func ( ) {
2023-09-06 03:19:00 +00:00
defer d . wg . Done ( )
2023-05-17 09:52:34 +00:00
for {
2022-11-20 03:41:20 +00:00
select {
2023-05-17 09:52:34 +00:00
case <- ctx . Done ( ) :
return
2022-11-20 03:41:20 +00:00
case <- logEvery . C :
2023-10-03 04:21:41 +00:00
d . logger . Info ( "[snapshots] Verify" , "progress" , fmt . Sprintf ( "%.2f%%" , 100 * float64 ( completedPieces . Load ( ) ) / float64 ( total ) ) )
2022-11-20 03:41:20 +00:00
}
}
2023-05-17 09:52:34 +00:00
} ( )
}
g , ctx := errgroup . WithContext ( ctx )
// torrent lib internally limiting amount of hashers per file
// set limit here just to make load predictable, not to control Disk/CPU consumption
2023-09-15 06:46:50 +00:00
g . SetLimit ( runtime . GOMAXPROCS ( - 1 ) * 4 )
2024-01-08 09:01:02 +00:00
for _ , t := range toVerify {
2023-05-17 09:52:34 +00:00
t := t
g . Go ( func ( ) error {
2024-01-08 09:01:02 +00:00
if failFast {
return VerifyFileFailFast ( ctx , t , d . SnapDir ( ) , completedPieces )
}
return ScheduleVerifyFile ( ctx , t , completedPieces )
2023-05-17 09:52:34 +00:00
} )
2022-11-20 03:41:20 +00:00
}
2023-10-04 04:01:02 +00:00
if err := g . Wait ( ) ; err != nil {
return err
}
2023-02-10 06:20:13 +00:00
// force fsync of db. to not loose results of validation on power-off
return d . db . Update ( context . Background ( ) , func ( tx kv . RwTx ) error { return nil } )
2022-11-20 03:41:20 +00:00
}
2023-09-15 06:46:50 +00:00
// AddNewSeedableFile decides what we do depending on wether we have the .seg file or the .torrent file
// have .torrent no .seg => get .seg file from .torrent
// have .seg no .torrent => get .torrent from .seg
func ( d * Downloader ) AddNewSeedableFile ( ctx context . Context , name string ) error {
2023-11-30 09:58:23 +00:00
ff , ok := snaptype . ParseFileName ( "" , name )
if ok {
if ! ff . Seedable ( ) {
return nil
}
} else {
if ! e3seedable ( name ) {
return nil
}
}
2023-09-15 06:46:50 +00:00
// if we don't have the torrent file we build it if we have the .seg file
2023-12-21 05:15:32 +00:00
err := BuildTorrentIfNeed ( ctx , name , d . SnapDir ( ) , d . torrentFiles )
2023-09-15 06:46:50 +00:00
if err != nil {
2023-12-01 11:50:06 +00:00
return fmt . Errorf ( "AddNewSeedableFile: %w" , err )
2023-09-15 06:46:50 +00:00
}
2023-12-21 05:15:32 +00:00
ts , err := d . torrentFiles . LoadByName ( name )
2023-09-15 06:46:50 +00:00
if err != nil {
2023-12-01 11:50:06 +00:00
return fmt . Errorf ( "AddNewSeedableFile: %w" , err )
2023-09-15 06:46:50 +00:00
}
2023-12-21 03:59:26 +00:00
_ , _ , err = addTorrentFile ( ctx , ts , d . torrentClient , d . webseeds )
2023-09-15 06:46:50 +00:00
if err != nil {
return fmt . Errorf ( "addTorrentFile: %w" , err )
}
return nil
}
2023-09-06 03:19:00 +00:00
2023-12-12 09:05:56 +00:00
func ( d * Downloader ) alreadyHaveThisName ( name string ) bool {
2023-09-15 06:46:50 +00:00
for _ , t := range d . torrentClient . Torrents ( ) {
2023-12-12 09:05:56 +00:00
select {
case <- t . GotInfo ( ) :
if t . Name ( ) == name {
return true
}
default :
2023-09-15 06:46:50 +00:00
}
}
return false
}
2023-12-12 09:05:56 +00:00
func ( d * Downloader ) AddMagnetLink ( ctx context . Context , infoHash metainfo . Hash , name string ) error {
// Paranoic Mode on: if same file changed infoHash - skip it
// Example:
// - Erigon generated file X with hash H1. User upgraded Erigon. New version has preverified file X with hash H2. Must ignore H2 (don't send to Downloader)
if d . alreadyHaveThisName ( name ) {
2023-09-13 08:21:38 +00:00
return nil
2023-09-06 03:19:00 +00:00
}
2024-01-24 11:41:31 +00:00
if d . torrentFiles . newDownloadsAreProhibited ( ) {
2023-12-12 09:05:56 +00:00
return nil
}
2023-09-15 06:46:50 +00:00
mi := & metainfo . MetaInfo { AnnounceList : Trackers }
2023-09-12 05:18:39 +00:00
magnet := mi . Magnet ( & infoHash , & metainfo . Info { Name : name } )
2023-10-04 05:34:45 +00:00
spec , err := torrent . TorrentSpecFromMagnetUri ( magnet . String ( ) )
if err != nil {
return err
}
2023-12-21 03:59:26 +00:00
t , ok , err := addTorrentFile ( ctx , spec , d . torrentClient , d . webseeds )
2023-09-06 03:19:00 +00:00
if err != nil {
2023-09-13 08:21:38 +00:00
return err
2023-09-06 03:19:00 +00:00
}
2023-12-21 03:59:26 +00:00
if ! ok {
return nil
}
2023-09-06 03:19:00 +00:00
d . wg . Add ( 1 )
go func ( t * torrent . Torrent ) {
defer d . wg . Done ( )
select {
case <- ctx . Done ( ) :
return
case <- t . GotInfo ( ) :
}
mi := t . Metainfo ( )
2023-12-21 05:15:32 +00:00
if err := CreateTorrentFileIfNotExists ( d . SnapDir ( ) , t . Info ( ) , & mi , d . torrentFiles ) ; err != nil {
2023-10-04 02:57:37 +00:00
d . logger . Warn ( "[snapshots] create torrent file" , "err" , err )
2023-09-06 03:19:00 +00:00
return
}
2023-10-04 02:57:37 +00:00
urls , ok := d . webseeds . ByFileName ( t . Name ( ) )
if ok {
t . AddWebSeeds ( urls )
}
2023-09-06 03:19:00 +00:00
} ( t )
//log.Debug("[downloader] downloaded both seg and torrent files", "hash", infoHash)
2023-09-13 08:21:38 +00:00
return nil
2023-09-06 03:19:00 +00:00
}
2023-10-04 04:01:02 +00:00
func seedableFiles ( dirs datadir . Dirs ) ( [ ] string , error ) {
files , err := seedableSegmentFiles ( dirs . Snap )
2023-09-12 05:18:39 +00:00
if err != nil {
return nil , fmt . Errorf ( "seedableSegmentFiles: %w" , err )
}
2024-01-08 09:01:02 +00:00
l1 , err := seedableSnapshotsBySubDir ( dirs . Snap , "idx" )
if err != nil {
return nil , err
}
l2 , err := seedableSnapshotsBySubDir ( dirs . Snap , "history" )
2023-09-12 05:18:39 +00:00
if err != nil {
2023-10-04 04:01:02 +00:00
return nil , err
2023-09-12 05:18:39 +00:00
}
2024-01-08 09:01:02 +00:00
l3 , err := seedableSnapshotsBySubDir ( dirs . Snap , "domain" )
2023-10-04 04:01:02 +00:00
if err != nil {
return nil , err
}
2024-01-08 09:01:02 +00:00
files = append ( append ( append ( files , l1 ... ) , l2 ... ) , l3 ... )
2023-09-12 05:18:39 +00:00
return files , nil
}
2023-10-04 05:34:45 +00:00
func ( d * Downloader ) addTorrentFilesFromDisk ( quiet bool ) error {
2023-10-04 02:57:37 +00:00
logEvery := time . NewTicker ( 20 * time . Second )
defer logEvery . Stop ( )
2023-10-04 05:34:45 +00:00
2023-12-21 05:15:32 +00:00
files , err := AllTorrentSpecs ( d . cfg . Dirs , d . torrentFiles )
2022-11-20 03:41:20 +00:00
if err != nil {
return err
}
2023-10-04 02:57:37 +00:00
for i , ts := range files {
2023-12-21 03:59:26 +00:00
_ , _ , err := addTorrentFile ( d . ctx , ts , d . torrentClient , d . webseeds )
2023-10-03 04:21:41 +00:00
if err != nil {
return err
}
2023-10-04 02:57:37 +00:00
select {
case <- logEvery . C :
2023-10-04 05:34:45 +00:00
if ! quiet {
log . Info ( "[snapshots] Adding .torrent files" , "progress" , fmt . Sprintf ( "%d/%d" , i , len ( files ) ) )
}
2023-10-04 02:57:37 +00:00
default :
}
2023-10-03 04:21:41 +00:00
}
return nil
}
func ( d * Downloader ) BuildTorrentFilesIfNeed ( ctx context . Context ) error {
2023-12-21 05:15:32 +00:00
return BuildTorrentFilesIfNeed ( ctx , d . cfg . Dirs , d . torrentFiles )
2022-11-20 03:41:20 +00:00
}
2023-09-10 17:09:33 +00:00
func ( d * Downloader ) Stats ( ) AggStats {
2022-11-20 03:41:20 +00:00
d . statsLock . RLock ( )
defer d . statsLock . RUnlock ( )
2023-09-10 17:09:33 +00:00
return d . stats
2022-11-20 03:41:20 +00:00
}
func ( d * Downloader ) Close ( ) {
2023-03-13 02:12:26 +00:00
d . stopMainLoop ( )
d . wg . Wait ( )
2022-11-20 03:41:20 +00:00
d . torrentClient . Close ( )
if err := d . folder . Close ( ) ; err != nil {
2023-10-03 04:21:41 +00:00
d . logger . Warn ( "[snapshots] folder.close" , "err" , err )
2022-11-20 03:41:20 +00:00
}
if err := d . pieceCompletionDB . Close ( ) ; err != nil {
2023-10-03 04:21:41 +00:00
d . logger . Warn ( "[snapshots] pieceCompletionDB.close" , "err" , err )
2022-11-20 03:41:20 +00:00
}
d . db . Close ( )
}
func ( d * Downloader ) PeerID ( ) [ ] byte {
peerID := d . torrentClient . PeerID ( )
return peerID [ : ]
}
func ( d * Downloader ) StopSeeding ( hash metainfo . Hash ) error {
t , ok := d . torrentClient . Torrent ( hash )
if ! ok {
return nil
}
ch := t . Closed ( )
t . Drop ( )
<- ch
return nil
}
2023-09-15 06:46:50 +00:00
func ( d * Downloader ) TorrentClient ( ) * torrent . Client { return d . torrentClient }
2022-11-20 03:41:20 +00:00
2023-10-12 07:11:46 +00:00
func openClient ( ctx context . Context , dbDir , snapDir string , cfg * torrent . ClientConfig ) ( db kv . RwDB , c storage . PieceCompletion , m storage . ClientImplCloser , torrentClient * torrent . Client , err error ) {
2022-11-20 03:41:20 +00:00
db , err = mdbx . NewMDBX ( log . New ( ) ) .
Label ( kv . DownloaderDB ) .
WithTableCfg ( func ( defaultBuckets kv . TableCfg ) kv . TableCfg { return kv . DownloaderTablesCfg } ) .
2023-10-05 07:25:00 +00:00
GrowthStep ( 16 * datasize . MB ) .
2023-10-11 05:53:34 +00:00
MapSize ( 16 * datasize . GB ) .
2023-10-20 06:54:38 +00:00
PageSize ( uint64 ( 8 * datasize . KB ) ) .
2023-10-04 04:01:02 +00:00
Path ( dbDir ) .
2023-10-12 07:11:46 +00:00
Open ( ctx )
2022-11-20 03:41:20 +00:00
if err != nil {
2023-09-15 06:46:50 +00:00
return nil , nil , nil , nil , fmt . Errorf ( "torrentcfg.openClient: %w" , err )
2022-11-20 03:41:20 +00:00
}
2023-03-14 08:41:43 +00:00
c , err = NewMdbxPieceCompletion ( db )
2022-11-20 03:41:20 +00:00
if err != nil {
return nil , nil , nil , nil , fmt . Errorf ( "torrentcfg.NewMdbxPieceCompletion: %w" , err )
}
m = storage . NewMMapWithCompletion ( snapDir , c )
cfg . DefaultStorage = m
2023-10-04 04:01:02 +00:00
torrentClient , err = torrent . NewClient ( cfg )
2022-11-20 03:41:20 +00:00
if err != nil {
return nil , nil , nil , nil , fmt . Errorf ( "torrent.NewClient: %w" , err )
}
return db , c , m , torrentClient , nil
}