freelist.go (353 lines of code) (raw):
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package txfile
import (
"math"
"sort"
"github.com/elastic/go-txfile/internal/invariant"
"github.com/elastic/go-txfile/internal/iter"
)
// freelist manages freed pages within an area. The freelist uses
// run-length-encoding, compining multiple pages into one region, so to reduce
// memory usage in memory, as well as when serializing the freelist. The
// freelist guarantees pages are sorted by PageID. Depending on allocOrder,
// pages with smallest/biggest PageID will be allocated first.
type freelist struct {
avail uint
regions regionList
}
// freelistEncPagePrediction is used to predict the number of meta-pages required
// to serialize the freelist.
// The prediction might over-estimate the number of pages required, which is
// perfectly fine, as long as we don't under-estimate (which would break
// serialization -> transaction fail).
type freelistEncPagePrediction struct {
count uint
payloadSize, avail uint
}
// allocOrder provides freelist access strategies.
type allocOrder struct {
// freelist iteration order
iter iter.Fn
// reportRange provides iteration limits for reporting allocated regions in
// order (by PageID).
reportRange func(last, len int) (int, int)
// allocFromRegion split the region into allocated and leftover region.
allocFromRegion func(reg region, N uint32) (region, region)
// keepRange determines which pages to keep/remove from the freelist, after
// allocation.
keepRange func(last, len int, partial bool) (int, int)
}
var (
allocFromBeginning = &allocOrder{
iter: iter.Forward,
reportRange: func(last, len int) (int, int) {
return 0, last + 1
},
allocFromRegion: func(reg region, N uint32) (region, region) {
return region{id: reg.id, count: N}, region{id: reg.id + PageID(N), count: reg.count - N}
},
keepRange: func(last, len int, partial bool) (int, int) {
if partial {
return last, len
}
return last + 1, len
},
}
allocFromEnd = &allocOrder{
iter: iter.Reversed,
reportRange: func(last, len int) (int, int) {
return last, len
},
allocFromRegion: func(reg region, N uint32) (region, region) {
return region{id: reg.id + PageID(reg.count-N), count: N}, region{id: reg.id, count: reg.count - N}
},
keepRange: func(last, len int, partial bool) (int, int) {
if partial {
return 0, last + 1
}
return 0, last
},
}
)
// Avail returns number of pages available in the current freelist.
func (f *freelist) Avail() uint {
return f.avail
}
// AllocAllRegionsWith allocates all regions in the freelist.
// The list will be empty afterwards.
func (f *freelist) AllocAllRegionsWith(fn func(region)) {
for _, r := range f.AllocAllRegions() {
fn(r)
}
}
// AllocAllRegions allocates all regions in the freelist.
// The list will be empty afterwards.
func (f *freelist) AllocAllRegions() regionList {
regions := f.regions
f.avail = 0
f.regions = nil
return regions
}
// AllocContinuousRegion tries to find a contiuous set of pages in the freelist.
// The best-fitting (or smallest) region having at least n pages will be used
// for allocation.
// Returns an empty region, if no continuous space could be found.
func (f *freelist) AllocContinuousRegion(order *allocOrder, n uint) region {
if f.avail < n || (f.avail == n && len(f.regions) > 1) {
return region{}
}
if n > math.MaxUint32 { // continuous regions max out at 4GB
return region{}
}
bestFit := -1
bestSz := uint(math.MaxUint32)
for i, end, next := order.iter(len(f.regions)); i != end; i = next(i) {
count := uint(f.regions[i].count)
if n <= count && count < bestSz {
bestFit = i
bestSz = count
if bestSz == n {
break
}
}
}
if bestFit < 0 {
// no continuous region found
return region{}
}
// allocate best fitting region from list
i := bestFit
selected := f.regions[i]
allocated, rest := order.allocFromRegion(selected, uint32(n))
invariant.Check(allocated.count == uint32(n), "allocation mismatch")
invariant.Check(allocated.count+rest.count == selected.count, "region split page count mismatch")
if rest.count == 0 {
// remove entry
copy(f.regions[i:], f.regions[i+1:])
f.regions = f.regions[:len(f.regions)-1]
} else {
f.regions[i] = rest
}
f.avail -= uint(allocated.count)
return allocated
}
// AllocRegionsWith allocates up n potentially non-continuous pages from the
// freelist. No page will be allocated, if n succeeds the number of available
// pages.
func (f *freelist) AllocRegionsWith(order *allocOrder, n uint, fn func(region)) {
if n == 0 {
return
}
var (
last int // last region to allocate from
L = len(f.regions)
N = n // number of pages to be allocated from 'last' region
)
if N > f.avail {
// not enough space -> return early
return
}
// Collect indices of regions to be allocated from.
for i, end, next := order.iter(L); i != end; i = next(i) {
count := uint(f.regions[i].count)
if count >= N {
last = i
break
}
N -= count
}
// Compute region split on last region to be allocated from.
selected := f.regions[last]
allocated, leftover := order.allocFromRegion(selected, uint32(N))
invariant.Check(allocated.count == uint32(N), "allocation mismatch")
invariant.Check(allocated.count+leftover.count == selected.count, "region split page count mismatch")
// Implicitely update last allocated region to match the allocation size
// and report all regions allocated.
f.regions[last] = allocated
for i, end := order.reportRange(last, L); i != end; i++ {
fn(f.regions[i])
}
// update free regions
f.regions[last] = leftover
start, end := order.keepRange(last, L, leftover.count != 0)
f.regions = f.regions[start:end]
f.avail -= n
}
// AddRegions merges a new list of regions with the freelist. The regions
// in the list must be sorted.
func (f *freelist) AddRegions(list regionList) {
count := list.CountPages()
if count > 0 {
f.regions = mergeRegionLists(f.regions, list)
f.avail += count
}
}
// AddRegion inserts a new region into the freelist. AddRegion ensures the new
// region is sorted within the freelist, potentially merging the new region
// with existing regions.
// Note: The region to be added MUST NOT overlap with existing regions.
func (f *freelist) AddRegion(reg region) {
if len(f.regions) == 0 {
f.regions = regionList{reg}
f.avail += uint(reg.count)
return
}
i := sort.Search(len(f.regions), func(i int) bool {
_, end := f.regions[i].Range()
return reg.id < end
})
total := uint(reg.count)
switch {
case len(f.regions) <= i: // add to end of region list?
last := len(f.regions) - 1
if regionsMergable(f.regions[last], reg) {
f.regions[last] = mergeRegions(f.regions[last], reg)
} else {
f.regions.Add(reg)
}
case i == 0: // add to start of region list?
if regionsMergable(reg, f.regions[0]) {
f.regions[0] = mergeRegions(reg, f.regions[0])
} else {
f.regions = append(f.regions, region{})
copy(f.regions[1:], f.regions)
f.regions[0] = reg
}
default: // insert in middle of region list
// try to merge region with already existing regions
mergeBefore := regionsMergable(f.regions[i-1], reg)
if mergeBefore {
reg = mergeRegions(f.regions[i-1], reg)
}
mergeAfter := regionsMergable(reg, f.regions[i])
if mergeAfter {
reg = mergeRegions(reg, f.regions[i])
}
// update region list
switch {
case mergeBefore && mergeAfter: // combine adjacent regions -> shrink list
f.regions[i-1] = reg
copy(f.regions[i:], f.regions[i+1:])
f.regions = f.regions[:len(f.regions)-1]
case mergeBefore:
f.regions[i-1] = reg
case mergeAfter:
f.regions[i] = reg
default: // no adjacent entries -> grow list
f.regions = append(f.regions, region{})
copy(f.regions[i+1:], f.regions[i:])
f.regions[i] = reg
}
}
f.avail += total
}
// RemoveRegion removes all pages from the freelist, that are found within
// the input region.
func (f *freelist) RemoveRegion(removed region) {
i := sort.Search(len(f.regions), func(i int) bool {
_, end := f.regions[i].Range()
return removed.id <= end
})
if i < 0 || i >= len(f.regions) {
return
}
current := &f.regions[i]
if current.id == removed.id && current.count == removed.count {
// fast path: entry can be completely removed
f.regions = append(f.regions[:i], f.regions[i+1:]...)
f.avail -= uint(removed.count)
return
}
var total uint
removedStart, removedEnd := removed.Range()
for removedStart < removedEnd && i < len(f.regions) {
current := &f.regions[i]
if removedStart < current.id {
// Gap: advance removedStart, so to deal with holes when removing the all regions
// matching the input region
removedStart = current.id
continue
}
count := uint32(removedEnd - removedStart)
if removedStart == current.id {
if current.count < count {
count = current.count
}
// remove entry:
current.id = current.id + PageID(count)
current.count -= count
if current.count == 0 {
// remove region from freelist -> i will point to next region if
// `removed` overlaps 2 non-merged regions
f.regions = append(f.regions[:i], f.regions[i+1:]...)
} else {
// overlapping region, but old region must be preserved:
i++
}
removedStart += PageID(count)
total += uint(count)
} else {
// split current region in removedStart
keep := uint32(removedStart - current.id)
leftover := region{
id: removedStart,
count: current.count - keep,
}
current.count = keep
// remove sub-region from leftover
if leftover.count < count {
count = leftover.count
}
leftover.id += PageID(count)
leftover.count -= count
total += uint(count)
removedStart += PageID(count)
i++ // advance to next region
// insert new entry into regionList if removed did remove region in
// middle of old region
if leftover.count > 0 {
f.regions = append(f.regions, region{})
copy(f.regions[i+1:], f.regions[i:])
f.regions[i] = leftover
break // no more region to split from
}
}
}
f.avail -= total
}
func (f *freelist) LastRegion() region {
L := len(f.regions)
if L == 0 {
return region{}
}
return f.regions[L-1]
}
// (de-)serialization
func readFreeList(
access func(PageID) []byte,
root PageID,
fn func(bool, region),
) (idList, reason) {
const op = "txfile/read-freelist"
if root == 0 {
return nil, nil
}
rootPage := access(root)
if rootPage == nil {
return nil, &Error{
op: op,
kind: InvalidMetaPage,
cause: raiseOutOfBounds(root),
msg: "root page not in bounds",
}
}
var metaPages idList
for pageID := root; pageID != 0; {
metaPages.Add(pageID)
node, payload := castFreePage(access(pageID))
if node == nil {
return nil, &Error{
op: op,
kind: InvalidMetaPage,
cause: raiseOutOfBounds(pageID),
msg: "invalid freelist node page",
}
}
pageID = node.next.Get()
entries := node.count.Get()
tracef("free list node: (next: %v, entries: %v)", pageID, entries)
for ; entries > 0; entries-- {
isMeta, reg, n := decodeRegion(payload)
payload = payload[n:]
fn(isMeta, reg)
}
}
return metaPages, nil
}
func writeFreeLists(
to regionList,
pageSize uint,
metaList, dataList regionList,
onPage func(id PageID, buf []byte) reason,
) reason {
allocPages := to.PageIDs()
writer := newPagingWriter(allocPages, pageSize, 0, onPage)
var writeErr reason
writeList := func(isMeta bool, lst regionList) {
if writeErr != nil {
return
}
for _, reg := range lst {
var buf [maxRegionEncSz]byte
n := encodeRegion(buf[:], isMeta, reg)
if err := writer.Write(buf[:n]); err != nil {
writeErr = err
return
}
}
}
writeList(true, metaList)
writeList(false, dataList)
if writeErr != nil {
return writeErr
}
return writer.Flush()
}
func prepareFreelistEncPagePrediction(header int, pageSize uint) freelistEncPagePrediction {
return freelistEncPagePrediction{payloadSize: pageSize - uint(header)}
}
func (f *freelistEncPagePrediction) Estimate() uint {
return f.count
}
func (f *freelistEncPagePrediction) AddRegion(reg region) {
sz := uint(regionEncodingSize(reg))
if f.avail < sz {
f.count++
f.avail = f.payloadSize
}
f.avail -= sz
}
func (f *freelistEncPagePrediction) AddRegions(lst regionList) {
for _, reg := range lst {
f.AddRegion(reg)
}
}