diff --git a/builder/sizes_test.go b/builder/sizes_test.go index f7ee7e1b27..32b80e31f9 100644 --- a/builder/sizes_test.go +++ b/builder/sizes_test.go @@ -42,9 +42,9 @@ func TestBinarySize(t *testing.T) { // This is a small number of very diverse targets that we want to test. tests := []sizeTest{ // microcontrollers - {"hifive1b", "examples/echo", 3817, 299, 0, 2252}, - {"microbit", "examples/serial", 2820, 356, 8, 2248}, - {"wioterminal", "examples/pininterrupt", 8020, 1652, 132, 7480}, + {"hifive1b", "examples/echo", 3705, 299, 0, 2252}, + {"microbit", "examples/serial", 2736, 356, 8, 2248}, + {"wioterminal", "examples/pininterrupt", 7960, 1652, 132, 7480}, // TODO: also check wasm. Right now this is difficult, because // wasm binaries are run through wasm-opt and therefore the diff --git a/src/runtime/gc_blocks.go b/src/runtime/gc_blocks.go index a10b594375..583c05169d 100644 --- a/src/runtime/gc_blocks.go +++ b/src/runtime/gc_blocks.go @@ -8,15 +8,15 @@ package runtime // The memory manager internally uses blocks of 4 pointers big (see // bytesPerBlock). Every allocation first rounds up to this size to align every // block. It will first try to find a chain of blocks that is big enough to -// satisfy the allocation. If it finds one, it marks the first one as the "head" -// and the following ones (if any) as the "tail" (see below). If it cannot find +// satisfy the allocation. If it finds one, it marks the last one as the "head" +// and the preceding ones (if any) as the "tail" (see below). If it cannot find // any free space, it will perform a garbage collection cycle and try again. If // it still cannot find any free space, it gives up. // // Every block has some metadata, which is stored at the end of the heap. // The four states are "free", "head", "tail", and "mark". During normal -// operation, there are no marked blocks. Every allocated object starts with a -// "head" and is followed by "tail" blocks. The reason for this distinction is +// operation, there are no marked blocks. Every allocated object ends with a +// "head" and is preceded by "tail" blocks. The reason for this distinction is // that this way, the start and end of every object can be found easily. // // Metadata is stored in a special area at the end of the heap, in the area @@ -129,7 +129,7 @@ func (b gcBlock) address() uintptr { return addr } -// findHead returns the head (first block) of an object, assuming the block +// findHead returns the head (last block) of an object, assuming the block // points to an allocated object. It returns the same block if this block // already points to the head. func (b gcBlock) findHead() gcBlock { @@ -142,7 +142,7 @@ func (b gcBlock) findHead() gcBlock { // large allocation. stateByte := b.stateByte() if stateByte == blockStateByteAllTails { - b -= (b % blocksPerStateByte) + 1 + b += blocksPerStateByte - (b % blocksPerStateByte) continue } @@ -152,7 +152,7 @@ func (b gcBlock) findHead() gcBlock { if state != blockStateTail { break } - b-- + b++ } if gcAsserts { if b.state() != blockStateHead && b.state() != blockStateMark { @@ -162,18 +162,6 @@ func (b gcBlock) findHead() gcBlock { return b } -// findNext returns the first block just past the end of the tail. This may or -// may not be the head of an object. -func (b gcBlock) findNext() gcBlock { - if b.state() == blockStateHead || b.state() == blockStateMark { - b++ - } - for b.address() < uintptr(metadataStart) && b.state() == blockStateTail { - b++ - } - return b -} - func (b gcBlock) stateByte() byte { return *(*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) } @@ -200,7 +188,22 @@ func (b gcBlock) setState(newState blockState) { } } -// objHeader is a structure prepended to every heap object to hold metadata. +// unmark changes the state of b from blockStateMark to blockStateHead. +func (b gcBlock) unmark() { + if gcAsserts && b.state() != blockStateMark { + runtimePanic("gc: block not marked") + } + stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) + *stateBytePtr ^= uint8(blockStateMark^blockStateHead) << (b % blocksPerStateByte) +} + +// free changes the state of b to blockStateFree. +func (b gcBlock) free() { + stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) + *stateBytePtr &^= uint8(blockStateMask) << (b % blocksPerStateByte) +} + +// objHeader is a structure appended to every heap object to hold metadata. type objHeader struct { // next is the next object to scan after this. next *objHeader @@ -317,8 +320,12 @@ func initHeap() { metadataSize := heapEnd - uintptr(metadataStart) memzero(unsafe.Pointer(metadataStart), metadataSize) - // Rebuild the free ranges list. - buildFreeRanges() + // Create the initial free range. + if endBlock > 0 { + r := (*freeRange)(unsafe.Pointer(heapStart)) + *r = freeRange{len: uintptr(endBlock)} + freeRanges = r + } } // setHeapEnd is called to expand the heap. The heap can only grow, not shrink. @@ -340,6 +347,7 @@ func setHeapEnd(newHeapEnd uintptr) { // memcpy is fine as it only copies the old metadata and the new memory will // have been zero initialized. heapEnd = newHeapEnd + oldEndBlock := endBlock calculateHeapAddresses() memcpy(metadataStart, oldMetadataStart, oldMetadataSize) @@ -351,8 +359,14 @@ func setHeapEnd(newHeapEnd uintptr) { runtimePanic("gc: heap did not grow enough at once") } - // Rebuild the free ranges list. - buildFreeRanges() + // Insert the new free range. This range will be separate from any previous + // free space at the end of the heap. This may result in more heap growth + // than strictly necessary when an allocation requests more memory than the + // previous heap size. Otherwise this will only result in slightly more + // memory fragmentation than necessary. We cannot easily remove the old + // range and adding a special free-list rebuild function for this edge case + // would not be worthwhile in terms of binary size or code maintenance. + insertFreeRange(oldEndBlock.pointer(), uintptr(endBlock-oldEndBlock)) } // calculateHeapAddresses initializes variables such as metadataStart and @@ -400,7 +414,7 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { // Round the size up to a multiple of blocks, adding space for the header. rawSize := size - size += align(unsafe.Sizeof(objHeader{})) + size += unsafe.Sizeof(objHeader{}) size += bytesPerBlock - 1 if size < rawSize { // The size overflowed. @@ -456,25 +470,27 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { runtimePanicAt(returnAddress(0), "out of memory") } - // Set the backing blocks as being allocated. + // Set the block states. block := blockFromAddr(uintptr(pointer)) - block.setState(blockStateHead) - for i := block + 1; i != block+gcBlock(neededBlocks); i++ { + i := block + gcBlock(neededBlocks) - 1 + i.setState(blockStateHead) + for i != block { + i-- i.setState(blockStateTail) } // Create the object header. - header := (*objHeader)(pointer) + size -= unsafe.Sizeof(objHeader{}) + header := (*objHeader)(unsafe.Add(pointer, size)) header.layout = parseGCLayout(layout) // We've claimed this allocation, now we can unlock the heap. gcLock.Unlock() - // Return a pointer to this allocation. - add := align(unsafe.Sizeof(objHeader{})) - pointer = unsafe.Add(pointer, add) - size -= add + // Clear the allocation body. memzero(pointer, size) + + // Return a pointer to this allocation. return pointer } @@ -483,16 +499,28 @@ func realloc(ptr unsafe.Pointer, size uintptr) unsafe.Pointer { return alloc(size, nil) } - ptrAddress := uintptr(ptr) - endOfTailAddress := blockFromAddr(ptrAddress).findNext().address() + // Find the first block of the original allocation. + firstBlock := blockFromAddr(uintptr(ptr)) + + // Find the last block of the original allocation. + lastBlock := firstBlock.findHead() + + // Calculate the size of the original allocation body. + oldSize := uintptr(lastBlock-firstBlock)*blocksPerStateByte + (bytesPerBlock - unsafe.Sizeof(objHeader{})) - // this might be a few bytes longer than the original size of - // ptr, because we align to full blocks of size bytesPerBlock - oldSize := endOfTailAddress - ptrAddress if size <= oldSize { + // The requested size is less than the old size. + // There are likely scenarios for this: + // - The caller intended to grow the allocation, but the original size + // was rounded up by alloc to a multiple of the block size. + // The rounded size is already sufficient. + // - The caller intended to shrink the allocation. + // We currently ignore this case. + // Either way, the current allocation can be left alone. return ptr } + // Create a new allocation and copy the old data. newAlloc := alloc(size, nil) memcpy(newAlloc, ptr, oldSize) free(ptr) @@ -559,11 +587,8 @@ func runGC() (freeBytes uintptr) { gcResumeWorld() // Sweep phase: free all non-marked objects and unmark marked objects for - // the next collection cycle. - sweep() - - // Rebuild the free ranges list. - freeBytes = buildFreeRanges() + // the next collection cycle. This also rebuilds the free ranges list. + freeBytes = sweep() // Show how much has been sweeped, for debugging. if gcDebug { @@ -629,13 +654,21 @@ func finishMark() { continue } - // Compute the scan bounds. - objAddr := uintptr(unsafe.Pointer(obj)) - start := objAddr + align(unsafe.Sizeof(objHeader{})) - end := blockFromAddr(objAddr).findNext().address() + // Find the last block in the object. + // This block contains the header. + lastBlock := blockFromAddr(uintptr(unsafe.Pointer(obj))) + + // Find the first block in the allocation. + firstBlock := lastBlock + for firstBlock > 0 && (firstBlock-1).state() == blockStateTail { + firstBlock-- + } + + // Compute the size of the allocation. + bodySize := uintptr(lastBlock-firstBlock)*bytesPerBlock + (bytesPerBlock - unsafe.Sizeof(objHeader{})) // Scan the object. - obj.layout.scan(start, end-start) + obj.layout.scan(firstBlock.address(), bodySize) } } @@ -668,97 +701,55 @@ func markRoot(addr, root uintptr) { head.setState(blockStateMark) // Add the object to the scan list. - header := (*objHeader)(head.pointer()) + header := (*objHeader)(unsafe.Add(head.pointer(), bytesPerBlock-unsafe.Sizeof(objHeader{}))) header.next = scanList scanList = header } // Sweep goes through all memory and frees unmarked memory. -func sweep() { - metadataEnd := unsafe.Add(metadataStart, (endBlock+(blocksPerStateByte-1))/blocksPerStateByte) - var carry byte - for meta := metadataStart; meta != metadataEnd; meta = unsafe.Add(meta, 1) { - // Fetch the state byte. - stateBytePtr := (*byte)(unsafe.Pointer(meta)) - stateByte := *stateBytePtr - - // Separate blocks by type. - // Split the nibbles. - // Each nibble is a mask of blocks. - high := stateByte >> blocksPerStateByte - low := stateByte & blockStateEach - // Marked heads are in both nibbles. - markedHeads := low & high - // Unmarked heads are in the low nibble but not the high nibble. - unmarkedHeads := low &^ high - // Tails are in the high nibble but not the low nibble. - tails := high &^ low - - // Clear all tail runs after unmarked (freed) heads. - // - // Adding 1 to the start of a bit run will clear the run and set the next bit: - // (2^k - 1) + 1 = 2^k - // e.g. 0b0011 + 1 = 0b0100 - // Bitwise-and with the original mask to clear the newly set bit. - // e.g. (0b0011 + 1) & 0b0011 = 0b0100 & 0b0011 = 0b0000 - // This will not clear bits after the run because the gap stops the carry: - // e.g. (0b1011 + 1) & 0b1011 = 0b1100 & 0b1011 = 0b1000 - // This can clear multiple runs in a single addition: - // e.g. (0b1101 + 0b0101) & 0b1101 = 0b10010 & 0b1101 = 0b0000 - // - // In order to find tail run starts after unmarked heads we could use tails & (unmarkedHeads << 1). - // It is possible omit the bitwise-and because the clear still works if the next block is not a tail. - // A head is not a tail, so corresponding missing tail bit will stop the carry from a previous tail run. - // As such it will set the next bit which will be cleared back away later. - // e.g. HHTH: (0b0010 + (0b1101 << 1)) & 0b0010 = 0b11100 & 0b0010 = 0b0000 - // - // Treat the whole heap as a single pair of integer masks. - // This is accomplished for addition by carrying the overflow to the next state byte. - // The unmarkedHeads << 1 is equivalent to unmarkedHeads + unmarkedHeads, so it can be merged with the sum. - // This does not require any special work for the bitwise-and because it operates bitwise. - tailClear := tails + (unmarkedHeads << 1) + carry - carry = tailClear >> blocksPerStateByte - tails &= tailClear - - // Construct the new state byte. - *stateBytePtr = markedHeads | (tails << blocksPerStateByte) - } -} - -// buildFreeRanges rebuilds the freeRanges list. -// This must be called after a GC sweep or heap grow. -// It returns how many bytes are free in the heap. -func buildFreeRanges() uintptr { +func sweep() uintptr { + // Discard the old free ranges list. freeRanges = nil + + // Scan backwards through the block metadata. block := endBlock - var totalBlocks uintptr + var freeBlocks uintptr for { - // Skip backwards over occupied blocks. - for block > 0 && (block-1).state() != blockStateFree { + // Scan backwards until we find a marked head. + // Free the blocks as we go. + freeEnd := block + for block > 0 && (block-1).state() != blockStateMark { block-- + block.free() + } + + if freeLen := uintptr(freeEnd - block); freeLen > 0 { + // Insert the freed blocks. + freeBlocks += freeLen + insertFreeRange(block.pointer(), freeLen) } + if block == 0 { + // There are no more blocks to sweep. break } - // Find the start of the free range. - end := block - for block > 0 && (block-1).state() == blockStateFree { + // Unmark the next head. + block-- + block.unmark() + + // Skip the tail. + for block > 0 && (block-1).state() == blockStateTail { block-- } - - // Insert the free range. - len := uintptr(end - block) - totalBlocks += len - insertFreeRange(block.pointer(), len) } if gcDebug { - println("free ranges after rebuild:") + println("free ranges after sweep:") dumpFreeRangeCounts() } - return totalBlocks * bytesPerBlock + return freeBlocks * bytesPerBlock } func dumpFreeRangeCounts() {