prysm-pulse/shared/mputil/scatter.go
Jim McDonald 485fc538c3 Utility to parallelise functions over arrays (#3813)
* Initial version of scatter

* Add mutex for scatter batch

* Provide mutex to worker processes

* Embed mputil

* Add notes on scatter

* Tidy-up

* Add mutex test

* Simplify scatter for users

* Tidy-ups/code coverage

* Gazelle update

* Add benchmark

* Mutex to RWMutex

* Add test against internal functions

* Bazel fixes

* Fix benchmark

* Benchmark values to constants

* Update shared/mputil/scatter.go

Co-Authored-By: Raul Jordan <raul@prysmaticlabs.com>

* Update shared/mputil/scatter.go

Co-Authored-By: Raul Jordan <raul@prysmaticlabs.com>

* Moved struct to top of file

* Add featureconfig for scatter

* Gate scatter behind feature flag

* Lint fixes

* fmt
2019-11-03 16:25:52 -05:00

78 lines
2.0 KiB
Go

package mputil
import (
"errors"
"runtime"
"sync"
)
// WorkerResults are the results of a scatter worker.
type WorkerResults struct {
Offset int
Extent interface{}
}
// Scatter scatters a computation across multiple goroutines.
// This breaks the task in to a number of chunks and executes those chunks in parallel with the function provided.
// Results returned are collected and presented a a set of WorkerResults, which can be reassembled by the calling function.
// Any error that occurs in the workers will be passed back to the calling function.
func Scatter(inputLen int, sFunc func(int, int, *sync.RWMutex) (interface{}, error)) ([]*WorkerResults, error) {
if inputLen <= 0 {
return nil, errors.New("input length must be greater than 0")
}
chunkSize := calculateChunkSize(inputLen)
workers := inputLen / chunkSize
if inputLen%chunkSize != 0 {
workers++
}
resultCh := make(chan *WorkerResults, workers)
defer close(resultCh)
errorCh := make(chan error, workers)
defer close(errorCh)
mutex := new(sync.RWMutex)
for worker := 0; worker < workers; worker++ {
offset := worker * chunkSize
entries := chunkSize
if offset+entries > inputLen {
entries = inputLen - offset
}
go func(offset int, entries int) {
extent, err := sFunc(offset, entries, mutex)
if err != nil {
errorCh <- err
} else {
resultCh <- &WorkerResults{
Offset: offset,
Extent: extent,
}
}
}(offset, entries)
}
// Collect results from workers
results := make([]*WorkerResults, workers)
for i := 0; i < workers; i++ {
select {
case result := <-resultCh:
results[i] = result
case err := <-errorCh:
return nil, err
}
}
return results, nil
}
// calculateChunkSize calculates a suitable chunk size for the purposes of parallelisation.
func calculateChunkSize(items int) int {
// Start with a simple even split
chunkSize := items / runtime.GOMAXPROCS(0)
// Add 1 if we have leftovers (or if we have fewer items than processors).
if chunkSize == 0 || items%chunkSize != 0 {
chunkSize++
}
return chunkSize
}