// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see .
package bloombits
import (
"bytes"
"context"
"errors"
"math"
"sort"
"sync"
"sync/atomic"
"time"
"github.com/ledgerwatch/erigon/common/bitutil"
"github.com/ledgerwatch/erigon/crypto"
)
// bloomIndexes represents the bit indexes inside the bloom filter that belong
// to some key.
type bloomIndexes [3]uint
// calcBloomIndexes returns the bloom filter bit indexes belonging to the given key.
func calcBloomIndexes(b []byte) bloomIndexes {
b = crypto.Keccak256(b)
var idxs bloomIndexes
for i := 0; i < len(idxs); i++ {
idxs[i] = (uint(b[2*i])<<8)&2047 + uint(b[2*i+1])
}
return idxs
}
// partialMatches with a non-nil vector represents a section in which some sub-
// matchers have already found potential matches. Subsequent sub-matchers will
// binary AND their matches with this vector. If vector is nil, it represents a
// section to be processed by the first sub-matcher.
type partialMatches struct {
section uint64
bitset []byte
}
// Retrieval represents a request for retrieval task assignments for a given
// bit with the given number of fetch elements, or a response for such a request.
// It can also have the actual results set to be used as a delivery data struct.
//
// The contest and error fields are used by the light client to terminate matching
// early if an error is encountered on some path of the pipeline.
type Retrieval struct {
Bit uint
Sections []uint64
Bitsets [][]byte
Context context.Context
Error error
}
// Matcher is a pipelined system of schedulers and logic matchers which perform
// binary AND/OR operations on the bit-streams, creating a stream of potential
// blocks to inspect for data content.
type Matcher struct {
sectionSize uint64 // Size of the data batches to filter on
filters [][]bloomIndexes // Filter the system is matching for
schedulers map[uint]*scheduler // Retrieval schedulers for loading bloom bits
retrievers chan chan uint // Retriever processes waiting for bit allocations
counters chan chan uint // Retriever processes waiting for task count reports
retrievals chan chan *Retrieval // Retriever processes waiting for task allocations
deliveries chan *Retrieval // Retriever processes waiting for task response deliveries
running uint32 // Atomic flag whether a session is live or not
}
// NewMatcher creates a new pipeline for retrieving bloom bit streams and doing
// address and topic filtering on them. Setting a filter component to `nil` is
// allowed and will result in that filter rule being skipped (OR 0x11...1).
func NewMatcher(sectionSize uint64, filters [][][]byte) *Matcher {
// Create the matcher instance
m := &Matcher{
sectionSize: sectionSize,
schedulers: make(map[uint]*scheduler),
retrievers: make(chan chan uint),
counters: make(chan chan uint),
retrievals: make(chan chan *Retrieval),
deliveries: make(chan *Retrieval),
}
// Calculate the bloom bit indexes for the groups we're interested in
m.filters = nil
for _, filter := range filters {
// Gather the bit indexes of the filter rule, special casing the nil filter
if len(filter) == 0 {
continue
}
bloomBits := make([]bloomIndexes, len(filter))
for i, clause := range filter {
if clause == nil {
bloomBits = nil
break
}
bloomBits[i] = calcBloomIndexes(clause)
}
// Accumulate the filter rules if no nil rule was within
if bloomBits != nil {
m.filters = append(m.filters, bloomBits)
}
}
// For every bit, create a scheduler to load/download the bit vectors
for _, bloomIndexLists := range m.filters {
for _, bloomIndexList := range bloomIndexLists {
for _, bloomIndex := range bloomIndexList {
m.addScheduler(bloomIndex)
}
}
}
return m
}
// addScheduler adds a bit stream retrieval scheduler for the given bit index if
// it has not existed before. If the bit is already selected for filtering, the
// existing scheduler can be used.
func (m *Matcher) addScheduler(idx uint) {
if _, ok := m.schedulers[idx]; ok {
return
}
m.schedulers[idx] = newScheduler(idx)
}
// Start starts the matching process and returns a stream of bloom matches in
// a given range of blocks. If there are no more matches in the range, the result
// channel is closed.
func (m *Matcher) Start(ctx context.Context, begin, end uint64, results chan uint64) (*MatcherSession, error) {
// Make sure we're not creating concurrent sessions
if atomic.SwapUint32(&m.running, 1) == 1 {
return nil, errors.New("matcher already running")
}
defer atomic.StoreUint32(&m.running, 0)
// Initiate a new matching round
session := &MatcherSession{
matcher: m,
quit: make(chan struct{}),
ctx: ctx,
}
for _, scheduler := range m.schedulers {
scheduler.reset()
}
sink := m.run(begin, end, cap(results), session)
// Read the output from the result sink and deliver to the user
session.pend.Add(1)
go func() {
defer session.pend.Done()
defer close(results)
for {
select {
case <-session.quit:
return
case res, ok := <-sink:
// New match result found
if !ok {
return
}
// Calculate the first and last blocks of the section
sectionStart := res.section * m.sectionSize
first := sectionStart
if begin > first {
first = begin
}
last := sectionStart + m.sectionSize - 1
if end < last {
last = end
}
// Iterate over all the blocks in the section and return the matching ones
for i := first; i <= last; i++ {
// Skip the entire byte if no matches are found inside (and we're processing an entire byte!)
next := res.bitset[(i-sectionStart)/8]
if next == 0 {
if i%8 == 0 {
i += 7
}
continue
}
// Some bit it set, do the actual submatching
if bit := 7 - i%8; next&(1<= req.section })
requests[req.bit] = append(queue[:index], append([]uint64{req.section}, queue[index:]...)...)
// If it's a new bit and we have waiting fetchers, allocate to them
if len(queue) == 0 {
assign(req.bit)
}
case fetcher := <-retrievers:
// New retriever arrived, find the lowest section-ed bit to assign
bit, best := uint(0), uint64(math.MaxUint64)
for idx := range unallocs {
if requests[idx][0] < best {
bit, best = idx, requests[idx][0]
}
}
// Stop tracking this bit (and alloc notifications if no more work is available)
delete(unallocs, bit)
if len(unallocs) == 0 {
retrievers = nil
}
allocs++
fetcher <- bit
case fetcher := <-m.counters:
// New task count request arrives, return number of items
fetcher <- uint(len(requests[<-fetcher]))
case fetcher := <-m.retrievals:
// New fetcher waiting for tasks to retrieve, assign
task := <-fetcher
if want := len(task.Sections); want >= len(requests[task.Bit]) {
task.Sections = requests[task.Bit]
delete(requests, task.Bit)
} else {
task.Sections = append(task.Sections[:0], requests[task.Bit][:want]...)
requests[task.Bit] = append(requests[task.Bit][:0], requests[task.Bit][want:]...)
}
fetcher <- task
// If anything was left unallocated, try to assign to someone else
if len(requests[task.Bit]) > 0 {
assign(task.Bit)
}
case result := <-m.deliveries:
// New retrieval task response from fetcher, split out missing sections and
// deliver complete ones
var (
sections = make([]uint64, 0, len(result.Sections))
bitsets = make([][]byte, 0, len(result.Bitsets))
missing = make([]uint64, 0, len(result.Sections))
)
for i, bitset := range result.Bitsets {
if len(bitset) == 0 {
missing = append(missing, result.Sections[i])
continue
}
sections = append(sections, result.Sections[i])
bitsets = append(bitsets, bitset)
}
m.schedulers[result.Bit].deliver(sections, bitsets)
allocs--
// Reschedule missing sections and allocate bit if newly available
if len(missing) > 0 {
queue := requests[result.Bit]
for _, section := range missing {
index := sort.Search(len(queue), func(i int) bool { return queue[i] >= section })
queue = append(queue[:index], append([]uint64{section}, queue[index:]...)...)
}
requests[result.Bit] = queue
if len(queue) == len(missing) {
assign(result.Bit)
}
}
// End the session when all pending deliveries have arrived.
if shutdown == nil && allocs == 0 {
return
}
}
}
}
// MatcherSession is returned by a started matcher to be used as a terminator
// for the actively running matching operation.
type MatcherSession struct {
matcher *Matcher
closer sync.Once // Sync object to ensure we only ever close once
quit chan struct{} // Quit channel to request pipeline termination
ctx context.Context // Context used by the light client to abort filtering
err atomic.Value // Global error to track retrieval failures deep in the chain
pend sync.WaitGroup
}
// Close stops the matching process and waits for all subprocesses to terminate
// before returning. The timeout may be used for graceful shutdown, allowing the
// currently running retrievals to complete before this time.
func (s *MatcherSession) Close() {
s.closer.Do(func() {
// Signal termination and wait for all goroutines to tear down
close(s.quit)
s.pend.Wait()
})
}
// Error returns any failure encountered during the matching session.
func (s *MatcherSession) Error() error {
if err := s.err.Load(); err != nil {
return err.(error)
}
return nil
}
// allocateRetrieval assigns a bloom bit index to a client process that can either
// immediately request and fetch the section contents assigned to this bit or wait
// a little while for more sections to be requested.
func (s *MatcherSession) allocateRetrieval() (uint, bool) {
fetcher := make(chan uint)
select {
case <-s.quit:
return 0, false
case s.matcher.retrievers <- fetcher:
bit, ok := <-fetcher
return bit, ok
}
}
// pendingSections returns the number of pending section retrievals belonging to
// the given bloom bit index.
func (s *MatcherSession) pendingSections(bit uint) int {
fetcher := make(chan uint)
select {
case <-s.quit:
return 0
case s.matcher.counters <- fetcher:
fetcher <- bit
return int(<-fetcher)
}
}
// allocateSections assigns all or part of an already allocated bit-task queue
// to the requesting process.
func (s *MatcherSession) allocateSections(bit uint, count int) []uint64 {
fetcher := make(chan *Retrieval)
select {
case <-s.quit:
return nil
case s.matcher.retrievals <- fetcher:
task := &Retrieval{
Bit: bit,
Sections: make([]uint64, count),
}
fetcher <- task
return (<-fetcher).Sections
}
}
// deliverSections delivers a batch of section bit-vectors for a specific bloom
// bit index to be injected into the processing pipeline.
func (s *MatcherSession) deliverSections(bit uint, sections []uint64, bitsets [][]byte) {
s.matcher.deliveries <- &Retrieval{Bit: bit, Sections: sections, Bitsets: bitsets}
}
// Multiplex polls the matcher session for retrieval tasks and multiplexes it into
// the requested retrieval queue to be serviced together with other sessions.
//
// This method will block for the lifetime of the session. Even after termination
// of the session, any request in-flight need to be responded to! Empty responses
// are fine though in that case.
func (s *MatcherSession) Multiplex(batch int, wait time.Duration, mux chan chan *Retrieval) {
for {
// Allocate a new bloom bit index to retrieve data for, stopping when done
bit, ok := s.allocateRetrieval()
if !ok {
return
}
// Bit allocated, throttle a bit if we're below our batch limit
if s.pendingSections(bit) < batch {
select {
case <-s.quit:
// Session terminating, we can't meaningfully service, abort
s.allocateSections(bit, 0)
s.deliverSections(bit, []uint64{}, [][]byte{})
return
case <-time.After(wait):
// Throttling up, fetch whatever's available
}
}
// Allocate as much as we can handle and request servicing
sections := s.allocateSections(bit, batch)
request := make(chan *Retrieval)
select {
case <-s.quit:
// Session terminating, we can't meaningfully service, abort
s.deliverSections(bit, sections, make([][]byte, len(sections)))
return
case mux <- request:
// Retrieval accepted, something must arrive before we're aborting
request <- &Retrieval{Bit: bit, Sections: sections, Context: s.ctx}
result := <-request
if result.Error != nil {
s.err.Store(result.Error)
s.Close()
}
s.deliverSections(result.Bit, result.Sections, result.Bitsets)
}
}
}