mirror of
https://gitlab.com/pulsechaincom/prysm-pulse.git
synced 2025-01-20 16:41:11 +00:00
a664a07303
* multi value slice * extract helper function * comments * setup godoc fix * value benchmarks * use guid * fix bug when deleting items * remove callback and rename MultiValue * godoc * tiny change * Nishant's review * typos --------- Co-authored-by: Nishant Das <nishdas93@gmail.com>
489 lines
14 KiB
Go
489 lines
14 KiB
Go
// Package mvslice defines a multi value slice container. The purpose of the container is to be a replacement for a slice
|
||
// in scenarios where many objects of the same type share a copy of an identical or nearly identical slice.
|
||
// In such case using the multi value slice should result in less memory allocation because many values of the slice can be shared between objects.
|
||
//
|
||
// The multi value slice should be initialized by calling the Init function and passing the initial values of the slice.
|
||
// After initializing the slice, it can be shared between object by using the Copy function.
|
||
// Note that simply assigning the same multi value slice to several objects is not enough for it to work properly.
|
||
// Calling Copy is required in most circumstances (an exception is when the source object has only shared values).
|
||
//
|
||
// s := &Slice[int, *testObject]{}
|
||
// s.Init([]int{1, 2, 3})
|
||
// src := &testObject{id: id1, slice: s} // id1 is some UUID
|
||
// dst := &testObject{id: id2, slice: s} // id2 is some UUID
|
||
// s.Copy(src, dst)
|
||
//
|
||
// Each Value stores a value of type V along with identifiers to objects that have this value.
|
||
// A MultiValueItem is a slice of Value elements. A Slice contains shared items, individual items and appended items.
|
||
//
|
||
// You can think of a shared value as the original value (i.e. the value at the point in time when the multi value slice was constructed),
|
||
// and of an individual value as a changed value.
|
||
// There is no notion of a shared appended value because appended values never have an original value (appended values are empty when the slice is created).
|
||
//
|
||
// Whenever any of the slice’s functions (apart from Init) is called, the function needs to know which object it is dealing with.
|
||
// This is because if an object has an individual/appended value, the function must get/set/change this particular value instead of the shared value
|
||
// or another individual/appended value.
|
||
//
|
||
// The way appended items are stored is as follows. Let’s say appended items were a regular slice that is initially empty,
|
||
// and we append an item for object0 and then append another item for object1.
|
||
// Now we have two items in the slice, but object1 only has an item in index 1. This makes things very confusing and hard to deal with.
|
||
// If we make appended items a []*Value, things don’t become much better.
|
||
// It is therefore easiest to make appended items a []*MultiValueItem, which allows each object to have its own values starting at index 0
|
||
// and not having any “gaps”.
|
||
//
|
||
// The Detach function should be called when an object gets garbage collected.
|
||
// Its purpose is to clean up the slice from individual/appended values of the collected object.
|
||
// Otherwise the slice will get polluted with values for non-existing objects.
|
||
//
|
||
// Example diagram illustrating what happens after copying, updating and detaching:
|
||
//
|
||
// Create object o1 with value 10. At this point we only have a shared value.
|
||
//
|
||
// ===================
|
||
// shared | individual
|
||
// ===================
|
||
// 10 |
|
||
//
|
||
// Copy object o1 to object o2. o2 shares the value with o1, no individual value is created.
|
||
//
|
||
// ===================
|
||
// shared | individual
|
||
// ===================
|
||
// 10 |
|
||
//
|
||
// Update value of object o2 to 20. An individual value is created.
|
||
//
|
||
// ===================
|
||
// shared | individual
|
||
// ===================
|
||
// 10 | 20: [o2]
|
||
//
|
||
// Copy object o2 to object o3. The individual value's object list is updated.
|
||
//
|
||
// ===================
|
||
// shared | individual
|
||
// ===================
|
||
// 10 | 20: [o2,o3]
|
||
//
|
||
// Update value of object o3 to 30. There are two individual values now, one for o2 and one for o3.
|
||
//
|
||
// ===================
|
||
// shared | individual
|
||
// ===================
|
||
// 10 | 20: [o2]
|
||
// | 30: [o3]
|
||
//
|
||
// Update value of object o2 to 10. o2 no longer has an individual value
|
||
// because it got "reverted" to the original, shared value,
|
||
//
|
||
// ===================
|
||
// shared | individual
|
||
// ===================
|
||
// 10 | 30: [o3]
|
||
//
|
||
// Detach object o3. Individual value for o3 is removed.
|
||
//
|
||
// ===================
|
||
// shared | individual
|
||
// ===================
|
||
// 10 |
|
||
package mvslice
|
||
|
||
import (
|
||
"fmt"
|
||
"sync"
|
||
|
||
"github.com/google/uuid"
|
||
"github.com/prysmaticlabs/prysm/v4/container/multi-value-slice/interfaces"
|
||
)
|
||
|
||
// MultiValueSlice defines an abstraction over all concrete implementations of the generic Slice.
|
||
type MultiValueSlice[O interfaces.Identifiable] interface {
|
||
Len(obj O) uuid.UUID
|
||
}
|
||
|
||
// Value defines a single value along with one or more IDs that share this value.
|
||
type Value[V any] struct {
|
||
val V
|
||
ids []uuid.UUID
|
||
}
|
||
|
||
// MultiValueItem defines a collection of Value items.
|
||
type MultiValueItem[V any] struct {
|
||
Values []*Value[V]
|
||
}
|
||
|
||
// Slice is the main component of the multi-value slice data structure. It has two type parameters:
|
||
// - V comparable - the type of values stored the slice. The constraint is required
|
||
// because certain operations (e.g. updating, appending) have to compare values against each other.
|
||
// - O interfaces.Identifiable - the type of objects sharing the slice. The constraint is required
|
||
// because we need a way to compare objects against each other in order to know which objects
|
||
// values should be accessed.
|
||
type Slice[V comparable, O interfaces.Identifiable] struct {
|
||
sharedItems []V
|
||
individualItems map[uint64]*MultiValueItem[V]
|
||
appendedItems []*MultiValueItem[V]
|
||
cachedLengths map[uuid.UUID]int
|
||
lock sync.RWMutex
|
||
}
|
||
|
||
// Init initializes the slice with sensible defaults. Input values are assigned to shared items.
|
||
func (s *Slice[V, O]) Init(items []V) {
|
||
s.sharedItems = items
|
||
s.individualItems = map[uint64]*MultiValueItem[V]{}
|
||
s.appendedItems = []*MultiValueItem[V]{}
|
||
s.cachedLengths = map[uuid.UUID]int{}
|
||
}
|
||
|
||
// Len returns the number of items for the input object.
|
||
func (s *Slice[V, O]) Len(obj O) int {
|
||
s.lock.RLock()
|
||
defer s.lock.RUnlock()
|
||
|
||
l, ok := s.cachedLengths[obj.Id()]
|
||
if !ok {
|
||
return len(s.sharedItems)
|
||
}
|
||
return l
|
||
}
|
||
|
||
// Copy copies items between the source and destination.
|
||
func (s *Slice[V, O]) Copy(src O, dst O) {
|
||
s.lock.Lock()
|
||
defer s.lock.Unlock()
|
||
|
||
for _, item := range s.individualItems {
|
||
for _, v := range item.Values {
|
||
_, found := containsId(v.ids, src.Id())
|
||
if found {
|
||
v.ids = append(v.ids, dst.Id())
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
for _, item := range s.appendedItems {
|
||
found := false
|
||
for _, v := range item.Values {
|
||
_, found = containsId(v.ids, src.Id())
|
||
if found {
|
||
v.ids = append(v.ids, dst.Id())
|
||
break
|
||
}
|
||
}
|
||
if !found {
|
||
// This is an optimization. If we didn't find an appended item at index i,
|
||
// then all larger indices don't have an appended item for the object either.
|
||
break
|
||
}
|
||
}
|
||
|
||
srcLen, ok := s.cachedLengths[src.Id()]
|
||
if ok {
|
||
s.cachedLengths[dst.Id()] = srcLen
|
||
}
|
||
}
|
||
|
||
// Value returns all items for the input object.
|
||
func (s *Slice[V, O]) Value(obj O) []V {
|
||
s.lock.RLock()
|
||
defer s.lock.RUnlock()
|
||
|
||
l, ok := s.cachedLengths[obj.Id()]
|
||
if ok {
|
||
result := make([]V, l)
|
||
s.fillOriginalItems(obj, &result)
|
||
|
||
sharedLen := len(s.sharedItems)
|
||
for i, item := range s.appendedItems {
|
||
found := false
|
||
for _, v := range item.Values {
|
||
_, found = containsId(v.ids, obj.Id())
|
||
if found {
|
||
result[sharedLen+i] = v.val
|
||
break
|
||
}
|
||
}
|
||
if !found {
|
||
// This is an optimization. If we didn't find an appended item at index i,
|
||
// then all larger indices don't have an appended item for the object either.
|
||
return result
|
||
}
|
||
}
|
||
return result
|
||
} else {
|
||
result := make([]V, len(s.sharedItems))
|
||
s.fillOriginalItems(obj, &result)
|
||
return result
|
||
}
|
||
}
|
||
|
||
// At returns the item at the requested index for the input object.
|
||
// Appended items' indices are always larger than shared/individual items' indices.
|
||
// We first check if the index is within the length of shared items.
|
||
// If it is, then we return an individual value at that index - if it exists - or a shared value otherwise.
|
||
// If the index is beyond the length of shared values, it is an appended item and that's what gets returned.
|
||
func (s *Slice[V, O]) At(obj O, index uint64) (V, error) {
|
||
s.lock.RLock()
|
||
defer s.lock.RUnlock()
|
||
|
||
if index >= uint64(len(s.sharedItems)+len(s.appendedItems)) {
|
||
var def V
|
||
return def, fmt.Errorf("index %d out of bounds", index)
|
||
}
|
||
|
||
isOriginal := index < uint64(len(s.sharedItems))
|
||
if isOriginal {
|
||
ind, ok := s.individualItems[index]
|
||
if !ok {
|
||
return s.sharedItems[index], nil
|
||
}
|
||
for _, v := range ind.Values {
|
||
for _, id := range v.ids {
|
||
if id == obj.Id() {
|
||
return v.val, nil
|
||
}
|
||
}
|
||
}
|
||
return s.sharedItems[index], nil
|
||
} else {
|
||
item := s.appendedItems[index-uint64(len(s.sharedItems))]
|
||
for _, v := range item.Values {
|
||
for _, id := range v.ids {
|
||
if id == obj.Id() {
|
||
return v.val, nil
|
||
}
|
||
}
|
||
}
|
||
var def V
|
||
return def, fmt.Errorf("index %d out of bounds", index)
|
||
}
|
||
}
|
||
|
||
// UpdateAt updates the item at the required index for the input object to the passed in value.
|
||
func (s *Slice[V, O]) UpdateAt(obj O, index uint64, val V) error {
|
||
s.lock.Lock()
|
||
defer s.lock.Unlock()
|
||
|
||
if index >= uint64(len(s.sharedItems)+len(s.appendedItems)) {
|
||
return fmt.Errorf("index %d out of bounds", index)
|
||
}
|
||
|
||
isOriginal := index < uint64(len(s.sharedItems))
|
||
if isOriginal {
|
||
s.updateOriginalItem(obj, index, val)
|
||
return nil
|
||
}
|
||
return s.updateAppendedItem(obj, index, val)
|
||
}
|
||
|
||
// Append adds a new item to the input object.
|
||
func (s *Slice[V, O]) Append(obj O, val V) {
|
||
s.lock.Lock()
|
||
defer s.lock.Unlock()
|
||
|
||
if len(s.appendedItems) == 0 {
|
||
s.appendedItems = append(s.appendedItems, &MultiValueItem[V]{Values: []*Value[V]{{val: val, ids: []uuid.UUID{obj.Id()}}}})
|
||
s.cachedLengths[obj.Id()] = len(s.sharedItems) + 1
|
||
return
|
||
}
|
||
|
||
for _, item := range s.appendedItems {
|
||
found := false
|
||
for _, v := range item.Values {
|
||
_, found = containsId(v.ids, obj.Id())
|
||
if found {
|
||
break
|
||
}
|
||
}
|
||
if !found {
|
||
newValue := true
|
||
for _, v := range item.Values {
|
||
if v.val == val {
|
||
v.ids = append(v.ids, obj.Id())
|
||
newValue = false
|
||
break
|
||
}
|
||
}
|
||
if newValue {
|
||
item.Values = append(item.Values, &Value[V]{val: val, ids: []uuid.UUID{obj.Id()}})
|
||
}
|
||
|
||
l, ok := s.cachedLengths[obj.Id()]
|
||
if ok {
|
||
s.cachedLengths[obj.Id()] = l + 1
|
||
} else {
|
||
s.cachedLengths[obj.Id()] = len(s.sharedItems) + 1
|
||
}
|
||
|
||
return
|
||
}
|
||
}
|
||
|
||
s.appendedItems = append(s.appendedItems, &MultiValueItem[V]{Values: []*Value[V]{{val: val, ids: []uuid.UUID{obj.Id()}}}})
|
||
|
||
s.cachedLengths[obj.Id()] = s.cachedLengths[obj.Id()] + 1
|
||
}
|
||
|
||
// Detach removes the input object from the multi-value slice.
|
||
// What this means in practice is that we remove all individual and appended values for that object and clear the cached length.
|
||
func (s *Slice[V, O]) Detach(obj O) {
|
||
s.lock.Lock()
|
||
defer s.lock.Unlock()
|
||
|
||
for i, ind := range s.individualItems {
|
||
for vi, v := range ind.Values {
|
||
foundIndex, found := containsId(v.ids, obj.Id())
|
||
if found {
|
||
if len(v.ids) == 1 {
|
||
if len(ind.Values) == 1 {
|
||
delete(s.individualItems, i)
|
||
} else {
|
||
ind.Values = deleteElemFromSlice(ind.Values, vi)
|
||
}
|
||
} else {
|
||
v.ids = deleteElemFromSlice(v.ids, foundIndex)
|
||
}
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
for _, item := range s.appendedItems {
|
||
found := false
|
||
for vi, v := range item.Values {
|
||
var foundIndex int
|
||
foundIndex, found = containsId(v.ids, obj.Id())
|
||
if found {
|
||
if len(v.ids) == 1 {
|
||
item.Values = deleteElemFromSlice(item.Values, vi)
|
||
} else {
|
||
v.ids = deleteElemFromSlice(v.ids, foundIndex)
|
||
}
|
||
break
|
||
}
|
||
}
|
||
if !found {
|
||
// This is an optimization. If we didn't find an appended item at index i,
|
||
// then all larger indices don't have an appended item for the object either.
|
||
break
|
||
}
|
||
}
|
||
|
||
delete(s.cachedLengths, obj.Id())
|
||
}
|
||
|
||
func (s *Slice[V, O]) fillOriginalItems(obj O, items *[]V) {
|
||
for i, item := range s.sharedItems {
|
||
ind, ok := s.individualItems[uint64(i)]
|
||
if !ok {
|
||
(*items)[i] = item
|
||
} else {
|
||
found := false
|
||
for _, v := range ind.Values {
|
||
_, found = containsId(v.ids, obj.Id())
|
||
if found {
|
||
(*items)[i] = v.val
|
||
break
|
||
}
|
||
}
|
||
if !found {
|
||
(*items)[i] = item
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
func (s *Slice[V, O]) updateOriginalItem(obj O, index uint64, val V) {
|
||
ind, ok := s.individualItems[index]
|
||
if ok {
|
||
for mvi, v := range ind.Values {
|
||
// if we find an existing value, we remove it
|
||
foundIndex, found := containsId(v.ids, obj.Id())
|
||
if found {
|
||
if len(v.ids) == 1 {
|
||
// There is an improvement to be made here. If len(ind.Values) == 1,
|
||
// then after removing the item from the slice s.individualItems[i]
|
||
// will be a useless map entry whose value is an empty slice.
|
||
ind.Values = deleteElemFromSlice(ind.Values, mvi)
|
||
} else {
|
||
v.ids = deleteElemFromSlice(v.ids, foundIndex)
|
||
}
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
if val == s.sharedItems[index] {
|
||
return
|
||
}
|
||
|
||
if !ok {
|
||
s.individualItems[index] = &MultiValueItem[V]{Values: []*Value[V]{{val: val, ids: []uuid.UUID{obj.Id()}}}}
|
||
} else {
|
||
newValue := true
|
||
for _, v := range ind.Values {
|
||
if v.val == val {
|
||
v.ids = append(v.ids, obj.Id())
|
||
newValue = false
|
||
break
|
||
}
|
||
}
|
||
if newValue {
|
||
ind.Values = append(ind.Values, &Value[V]{val: val, ids: []uuid.UUID{obj.Id()}})
|
||
}
|
||
}
|
||
}
|
||
|
||
func (s *Slice[V, O]) updateAppendedItem(obj O, index uint64, val V) error {
|
||
item := s.appendedItems[index-uint64(len(s.sharedItems))]
|
||
found := false
|
||
for vi, v := range item.Values {
|
||
var foundIndex int
|
||
// if we find an existing value, we remove it
|
||
foundIndex, found = containsId(v.ids, obj.Id())
|
||
if found {
|
||
if len(v.ids) == 1 {
|
||
item.Values = deleteElemFromSlice(item.Values, vi)
|
||
} else {
|
||
v.ids = deleteElemFromSlice(v.ids, foundIndex)
|
||
}
|
||
break
|
||
}
|
||
}
|
||
if !found {
|
||
return fmt.Errorf("index %d out of bounds", index)
|
||
}
|
||
|
||
newValue := true
|
||
for _, v := range item.Values {
|
||
if v.val == val {
|
||
v.ids = append(v.ids, obj.Id())
|
||
newValue = false
|
||
break
|
||
}
|
||
}
|
||
if newValue {
|
||
item.Values = append(item.Values, &Value[V]{val: val, ids: []uuid.UUID{obj.Id()}})
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
func containsId(ids []uuid.UUID, wanted uuid.UUID) (int, bool) {
|
||
for i, id := range ids {
|
||
if id == wanted {
|
||
return i, true
|
||
}
|
||
}
|
||
return 0, false
|
||
}
|
||
|
||
// deleteElemFromSlice does not relocate the slice, but it also does not preserve the order of items.
|
||
// This is not a problem here because the order of values in a MultiValueItem and object IDs doesn't matter.
|
||
func deleteElemFromSlice[T any](s []T, i int) []T {
|
||
s[i] = s[len(s)-1] // Copy last element to index i.
|
||
s = s[:len(s)-1] // Truncate slice.
|
||
return s
|
||
}
|