Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions builder/sizes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ func TestBinarySize(t *testing.T) {
// This is a small number of very diverse targets that we want to test.
tests := []sizeTest{
// microcontrollers
{"hifive1b", "examples/echo", 3817, 299, 0, 2252},
{"microbit", "examples/serial", 2820, 356, 8, 2248},
{"wioterminal", "examples/pininterrupt", 8020, 1652, 132, 7480},
{"hifive1b", "examples/echo", 3705, 299, 0, 2252},
{"microbit", "examples/serial", 2736, 356, 8, 2248},
{"wioterminal", "examples/pininterrupt", 7960, 1652, 132, 7480},

// TODO: also check wasm. Right now this is difficult, because
// wasm binaries are run through wasm-opt and therefore the
Expand Down
225 changes: 108 additions & 117 deletions src/runtime/gc_blocks.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ package runtime
// The memory manager internally uses blocks of 4 pointers big (see
// bytesPerBlock). Every allocation first rounds up to this size to align every
// block. It will first try to find a chain of blocks that is big enough to
// satisfy the allocation. If it finds one, it marks the first one as the "head"
// and the following ones (if any) as the "tail" (see below). If it cannot find
// satisfy the allocation. If it finds one, it marks the last one as the "head"
// and the preceding ones (if any) as the "tail" (see below). If it cannot find
// any free space, it will perform a garbage collection cycle and try again. If
// it still cannot find any free space, it gives up.
//
// Every block has some metadata, which is stored at the end of the heap.
// The four states are "free", "head", "tail", and "mark". During normal
// operation, there are no marked blocks. Every allocated object starts with a
// "head" and is followed by "tail" blocks. The reason for this distinction is
// operation, there are no marked blocks. Every allocated object ends with a
// "head" and is preceded by "tail" blocks. The reason for this distinction is
// that this way, the start and end of every object can be found easily.
//
// Metadata is stored in a special area at the end of the heap, in the area
Expand Down Expand Up @@ -129,7 +129,7 @@ func (b gcBlock) address() uintptr {
return addr
}

// findHead returns the head (first block) of an object, assuming the block
// findHead returns the head (last block) of an object, assuming the block
// points to an allocated object. It returns the same block if this block
// already points to the head.
func (b gcBlock) findHead() gcBlock {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like we should rename this findHeader

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This finds the head block. It returns the index of the block with the header, not the header itself.

Expand All @@ -142,7 +142,7 @@ func (b gcBlock) findHead() gcBlock {
// large allocation.
stateByte := b.stateByte()
if stateByte == blockStateByteAllTails {
b -= (b % blocksPerStateByte) + 1
b += blocksPerStateByte - (b % blocksPerStateByte)
continue
}

Expand All @@ -152,7 +152,7 @@ func (b gcBlock) findHead() gcBlock {
if state != blockStateTail {
break
}
b--
b++
}
if gcAsserts {
if b.state() != blockStateHead && b.state() != blockStateMark {
Expand All @@ -162,18 +162,6 @@ func (b gcBlock) findHead() gcBlock {
return b
}

// findNext returns the first block just past the end of the tail. This may or
// may not be the head of an object.
func (b gcBlock) findNext() gcBlock {
if b.state() == blockStateHead || b.state() == blockStateMark {
b++
}
for b.address() < uintptr(metadataStart) && b.state() == blockStateTail {
b++
}
return b
}

func (b gcBlock) stateByte() byte {
return *(*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
}
Expand All @@ -200,7 +188,22 @@ func (b gcBlock) setState(newState blockState) {
}
}

// objHeader is a structure prepended to every heap object to hold metadata.
// unmark changes the state of b from blockStateMark to blockStateHead.
func (b gcBlock) unmark() {
if gcAsserts && b.state() != blockStateMark {
runtimePanic("gc: block not marked")
}
stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
*stateBytePtr ^= uint8(blockStateMark^blockStateHead) << (b % blocksPerStateByte)
}

// free changes the state of b to blockStateFree.
func (b gcBlock) free() {
stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
*stateBytePtr &^= uint8(blockStateMask) << (b % blocksPerStateByte)
}

// objHeader is a structure appended to every heap object to hold metadata.
type objHeader struct {
// next is the next object to scan after this.
next *objHeader
Expand Down Expand Up @@ -317,8 +320,12 @@ func initHeap() {
metadataSize := heapEnd - uintptr(metadataStart)
memzero(unsafe.Pointer(metadataStart), metadataSize)

// Rebuild the free ranges list.
buildFreeRanges()
// Create the initial free range.
if endBlock > 0 {
r := (*freeRange)(unsafe.Pointer(heapStart))
*r = freeRange{len: uintptr(endBlock)}
freeRanges = r
}
}

// setHeapEnd is called to expand the heap. The heap can only grow, not shrink.
Expand All @@ -340,6 +347,7 @@ func setHeapEnd(newHeapEnd uintptr) {
// memcpy is fine as it only copies the old metadata and the new memory will
// have been zero initialized.
heapEnd = newHeapEnd
oldEndBlock := endBlock
calculateHeapAddresses()
memcpy(metadataStart, oldMetadataStart, oldMetadataSize)

Expand All @@ -351,8 +359,14 @@ func setHeapEnd(newHeapEnd uintptr) {
runtimePanic("gc: heap did not grow enough at once")
}

// Rebuild the free ranges list.
buildFreeRanges()
// Insert the new free range. This range will be separate from any previous
// free space at the end of the heap. This may result in more heap growth
// than strictly necessary when an allocation requests more memory than the
// previous heap size. Otherwise this will only result in slightly more
// memory fragmentation than necessary. We cannot easily remove the old
// range and adding a special free-list rebuild function for this edge case
// would not be worthwhile in terms of binary size or code maintenance.
insertFreeRange(oldEndBlock.pointer(), uintptr(endBlock-oldEndBlock))
}

// calculateHeapAddresses initializes variables such as metadataStart and
Expand Down Expand Up @@ -400,7 +414,7 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer {

// Round the size up to a multiple of blocks, adding space for the header.
rawSize := size
size += align(unsafe.Sizeof(objHeader{}))
size += unsafe.Sizeof(objHeader{})
size += bytesPerBlock - 1
if size < rawSize {
// The size overflowed.
Expand Down Expand Up @@ -456,25 +470,27 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer {
runtimePanicAt(returnAddress(0), "out of memory")
}

// Set the backing blocks as being allocated.
// Set the block states.
block := blockFromAddr(uintptr(pointer))
block.setState(blockStateHead)
for i := block + 1; i != block+gcBlock(neededBlocks); i++ {
i := block + gcBlock(neededBlocks) - 1
i.setState(blockStateHead)
for i != block {
i--
i.setState(blockStateTail)
}

// Create the object header.
header := (*objHeader)(pointer)
size -= unsafe.Sizeof(objHeader{})
header := (*objHeader)(unsafe.Add(pointer, size))
header.layout = parseGCLayout(layout)

// We've claimed this allocation, now we can unlock the heap.
gcLock.Unlock()

// Return a pointer to this allocation.
add := align(unsafe.Sizeof(objHeader{}))
pointer = unsafe.Add(pointer, add)
size -= add
// Clear the allocation body.
memzero(pointer, size)

// Return a pointer to this allocation.
return pointer
}

Expand All @@ -483,16 +499,28 @@ func realloc(ptr unsafe.Pointer, size uintptr) unsafe.Pointer {
return alloc(size, nil)
}

ptrAddress := uintptr(ptr)
endOfTailAddress := blockFromAddr(ptrAddress).findNext().address()
// Find the first block of the original allocation.
firstBlock := blockFromAddr(uintptr(ptr))

// Find the last block of the original allocation.
Comment thread
niaow marked this conversation as resolved.
lastBlock := firstBlock.findHead()

// Calculate the size of the original allocation body.
oldSize := uintptr(lastBlock-firstBlock)*blocksPerStateByte + (bytesPerBlock - unsafe.Sizeof(objHeader{}))

// this might be a few bytes longer than the original size of
// ptr, because we align to full blocks of size bytesPerBlock
oldSize := endOfTailAddress - ptrAddress
if size <= oldSize {
// The requested size is less than the old size.
// There are likely scenarios for this:
// - The caller intended to grow the allocation, but the original size
// was rounded up by alloc to a multiple of the block size.
// The rounded size is already sufficient.
// - The caller intended to shrink the allocation.
// We currently ignore this case.
// Either way, the current allocation can be left alone.
return ptr
}

// Create a new allocation and copy the old data.
newAlloc := alloc(size, nil)
memcpy(newAlloc, ptr, oldSize)
free(ptr)
Expand Down Expand Up @@ -559,11 +587,8 @@ func runGC() (freeBytes uintptr) {
gcResumeWorld()

// Sweep phase: free all non-marked objects and unmark marked objects for
// the next collection cycle.
sweep()

// Rebuild the free ranges list.
freeBytes = buildFreeRanges()
// the next collection cycle. This also rebuilds the free ranges list.
freeBytes = sweep()

// Show how much has been sweeped, for debugging.
if gcDebug {
Expand Down Expand Up @@ -629,13 +654,21 @@ func finishMark() {
continue
}

// Compute the scan bounds.
objAddr := uintptr(unsafe.Pointer(obj))
start := objAddr + align(unsafe.Sizeof(objHeader{}))
end := blockFromAddr(objAddr).findNext().address()
// Find the last block in the object.
// This block contains the header.
lastBlock := blockFromAddr(uintptr(unsafe.Pointer(obj)))

// Find the first block in the allocation.
firstBlock := lastBlock
for firstBlock > 0 && (firstBlock-1).state() == blockStateTail {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question as above: can we scan multiple blocks at once by checking the entire meta byte?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't think that was in the scope of this PR since we didn't do that before. It is also less important because this only happens once per allocation, as opposed to findHead which runs for every interior pointer to an allocation.

firstBlock--
}

// Compute the size of the allocation.
bodySize := uintptr(lastBlock-firstBlock)*bytesPerBlock + (bytesPerBlock - unsafe.Sizeof(objHeader{}))

// Scan the object.
obj.layout.scan(start, end-start)
obj.layout.scan(firstBlock.address(), bodySize)
}
}

Expand Down Expand Up @@ -668,97 +701,55 @@ func markRoot(addr, root uintptr) {
head.setState(blockStateMark)

// Add the object to the scan list.
header := (*objHeader)(head.pointer())
header := (*objHeader)(unsafe.Add(head.pointer(), bytesPerBlock-unsafe.Sizeof(objHeader{})))
header.next = scanList
scanList = header
}

// Sweep goes through all memory and frees unmarked memory.
func sweep() {
metadataEnd := unsafe.Add(metadataStart, (endBlock+(blocksPerStateByte-1))/blocksPerStateByte)
var carry byte
for meta := metadataStart; meta != metadataEnd; meta = unsafe.Add(meta, 1) {
// Fetch the state byte.
stateBytePtr := (*byte)(unsafe.Pointer(meta))
stateByte := *stateBytePtr

// Separate blocks by type.
// Split the nibbles.
// Each nibble is a mask of blocks.
high := stateByte >> blocksPerStateByte
low := stateByte & blockStateEach
// Marked heads are in both nibbles.
markedHeads := low & high
// Unmarked heads are in the low nibble but not the high nibble.
unmarkedHeads := low &^ high
// Tails are in the high nibble but not the low nibble.
tails := high &^ low

// Clear all tail runs after unmarked (freed) heads.
//
// Adding 1 to the start of a bit run will clear the run and set the next bit:
// (2^k - 1) + 1 = 2^k
// e.g. 0b0011 + 1 = 0b0100
// Bitwise-and with the original mask to clear the newly set bit.
// e.g. (0b0011 + 1) & 0b0011 = 0b0100 & 0b0011 = 0b0000
// This will not clear bits after the run because the gap stops the carry:
// e.g. (0b1011 + 1) & 0b1011 = 0b1100 & 0b1011 = 0b1000
// This can clear multiple runs in a single addition:
// e.g. (0b1101 + 0b0101) & 0b1101 = 0b10010 & 0b1101 = 0b0000
//
// In order to find tail run starts after unmarked heads we could use tails & (unmarkedHeads << 1).
// It is possible omit the bitwise-and because the clear still works if the next block is not a tail.
// A head is not a tail, so corresponding missing tail bit will stop the carry from a previous tail run.
// As such it will set the next bit which will be cleared back away later.
// e.g. HHTH: (0b0010 + (0b1101 << 1)) & 0b0010 = 0b11100 & 0b0010 = 0b0000
//
// Treat the whole heap as a single pair of integer masks.
// This is accomplished for addition by carrying the overflow to the next state byte.
// The unmarkedHeads << 1 is equivalent to unmarkedHeads + unmarkedHeads, so it can be merged with the sum.
// This does not require any special work for the bitwise-and because it operates bitwise.
tailClear := tails + (unmarkedHeads << 1) + carry
carry = tailClear >> blocksPerStateByte
tails &= tailClear

// Construct the new state byte.
*stateBytePtr = markedHeads | (tails << blocksPerStateByte)
}
}

// buildFreeRanges rebuilds the freeRanges list.
// This must be called after a GC sweep or heap grow.
// It returns how many bytes are free in the heap.
func buildFreeRanges() uintptr {
func sweep() uintptr {
// Discard the old free ranges list.
freeRanges = nil

// Scan backwards through the block metadata.
block := endBlock
var totalBlocks uintptr
var freeBlocks uintptr
for {
// Skip backwards over occupied blocks.
for block > 0 && (block-1).state() != blockStateFree {
// Scan backwards until we find a marked head.
// Free the blocks as we go.
freeEnd := block
for block > 0 && (block-1).state() != blockStateMark {
block--
block.free()
}

if freeLen := uintptr(freeEnd - block); freeLen > 0 {
// Insert the freed blocks.
freeBlocks += freeLen
insertFreeRange(block.pointer(), freeLen)
}

if block == 0 {
// There are no more blocks to sweep.
break
}

// Find the start of the free range.
end := block
for block > 0 && (block-1).state() == blockStateFree {
// Unmark the next head.
block--
block.unmark()

// Skip the tail.
for block > 0 && (block-1).state() == blockStateTail {
block--
}

// Insert the free range.
len := uintptr(end - block)
totalBlocks += len
insertFreeRange(block.pointer(), len)
}

if gcDebug {
println("free ranges after rebuild:")
println("free ranges after sweep:")
dumpFreeRangeCounts()
}

return totalBlocks * bytesPerBlock
return freeBlocks * bytesPerBlock
}

func dumpFreeRangeCounts() {
Expand Down
Loading