diff --git a/builder/sizes_test.go b/builder/sizes_test.go index f7ee7e1b27..82f3de0995 100644 --- a/builder/sizes_test.go +++ b/builder/sizes_test.go @@ -42,9 +42,9 @@ func TestBinarySize(t *testing.T) { // This is a small number of very diverse targets that we want to test. tests := []sizeTest{ // microcontrollers - {"hifive1b", "examples/echo", 3817, 299, 0, 2252}, - {"microbit", "examples/serial", 2820, 356, 8, 2248}, - {"wioterminal", "examples/pininterrupt", 8020, 1652, 132, 7480}, + {"hifive1b", "examples/echo", 3925, 299, 0, 2260}, + {"microbit", "examples/serial", 2876, 356, 8, 2256}, + {"wioterminal", "examples/pininterrupt", 8108, 1652, 132, 7488}, // TODO: also check wasm. Right now this is difficult, because // wasm binaries are run through wasm-opt and therefore the diff --git a/compiler/asserts.go b/compiler/asserts.go index 7e3a8b1504..7e5df98356 100644 --- a/compiler/asserts.go +++ b/compiler/asserts.go @@ -162,6 +162,11 @@ func (b *builder) createChanBoundsCheck(elementSize uint64, bufSize llvm.Value, // It has no effect in well-behaved programs, but makes sure no uncaught nil // pointer dereferences exist in valid Go code. func (b *builder) createNilCheck(inst ssa.Value, ptr llvm.Value, blockPrefix string) { + if b.info.nobounds { + // Function disabled bounds checking - skip nil check. + return + } + // Check whether we need to emit this check at all. if !ptr.IsAGlobalValue().IsNil() { return diff --git a/src/runtime/gc_blocks.go b/src/runtime/gc_blocks.go index a10b594375..1b63ad5439 100644 --- a/src/runtime/gc_blocks.go +++ b/src/runtime/gc_blocks.go @@ -1,35 +1,7 @@ -//go:build gc.conservative || gc.precise +//go:build (gc.conservative || gc.precise) && !oldblocks package runtime -// This memory manager is a textbook mark/sweep implementation, heavily inspired -// by the MicroPython garbage collector. -// -// The memory manager internally uses blocks of 4 pointers big (see -// bytesPerBlock). Every allocation first rounds up to this size to align every -// block. It will first try to find a chain of blocks that is big enough to -// satisfy the allocation. If it finds one, it marks the first one as the "head" -// and the following ones (if any) as the "tail" (see below). If it cannot find -// any free space, it will perform a garbage collection cycle and try again. If -// it still cannot find any free space, it gives up. -// -// Every block has some metadata, which is stored at the end of the heap. -// The four states are "free", "head", "tail", and "mark". During normal -// operation, there are no marked blocks. Every allocated object starts with a -// "head" and is followed by "tail" blocks. The reason for this distinction is -// that this way, the start and end of every object can be found easily. -// -// Metadata is stored in a special area at the end of the heap, in the area -// metadataStart..heapEnd. The actual blocks are stored in -// heapStart..metadataStart. -// -// More information: -// https://aykevl.nl/2020/09/gc-tinygo -// https://github.com/micropython/micropython/wiki/Memory-Manager -// https://github.com/micropython/micropython/blob/master/py/gc.c -// "The Garbage Collection Handbook" by Richard Jones, Antony Hosking, Eliot -// Moss. - import ( "internal/task" "runtime/interrupt" @@ -37,168 +9,54 @@ import ( ) const gcDebug = false +const gcTiming = false +const sweepMetrics = false const needsStaticHeap = true // Some globals + constants for the entire GC. const ( - wordsPerBlock = 4 // number of pointers in an allocated block - bytesPerBlock = wordsPerBlock * unsafe.Sizeof(heapStart) - stateBits = 2 // how many bits a block state takes (see blockState type) - blocksPerStateByte = 8 / stateBits + // wordsPerBlock is the number of pointers that can fit into a block without overlapping. + wordsPerBlock = 4 + + // bytesPerBlock is the size of a heap block in bytes. + bytesPerBlock = wordsPerBlock * unsafe.Sizeof(uintptr(0)) ) var ( - metadataStart unsafe.Pointer // pointer to the start of the heap metadata - scanList *objHeader // scanList is a singly linked list of heap objects that have been marked but not scanned - freeRanges *freeRange // freeRanges is a linked list of free block ranges - endBlock gcBlock // the block just past the end of the available space - gcTotalAlloc uint64 // total number of bytes allocated - gcMallocs uint64 // total number of allocations - gcLock task.PMutex // lock to avoid race conditions on multicore systems -) + // endBlocksBitmap is the base address of the end blocks bitmap. + // The last block in a heap object or free range is considered an end block. + endBlocksBitmap uintptr -// zeroSizedAlloc is just a sentinel that gets returned when allocating 0 bytes. -var zeroSizedAlloc uint8 + // visitedBlocksBitmap is the base address of the visited blocks bitmap. + // markRoot "visits" blocks from the marked address to the next end block. + // It may stop early if it finds an already-visited block. + visitedBlocksBitmap uintptr -// Provide some abstraction over heap blocks. + // blocks is the heap size in blocks. + blocks uintptr -// blockState stores the four states in which a block can be. -// It holds 1 bit in each nibble. -// When stored into a state byte, each bit in a nibble corresponds to a different block. -// For blocks A-D, a state byte would be laid out as 0bDCBA_DCBA. -type blockState uint8 + // scanList is a singly linked list of heap objects that have been marked but not scanned. + scanList *objHeader -const ( - blockStateLow blockState = 1 - blockStateHigh blockState = 1 << blocksPerStateByte - - blockStateFree blockState = 0 - blockStateHead blockState = blockStateLow - blockStateTail blockState = blockStateHigh - blockStateMark blockState = blockStateLow | blockStateHigh - blockStateMask blockState = blockStateLow | blockStateHigh -) + // freeRanges is a linked list of free block ranges. + freeRanges *freeRange -// blockStateEach is a mask that can be used to extract a nibble from the block state. -const blockStateEach = 1<= uintptr(metadataStart)) { - runtimePanic("gc: trying to get block from invalid address") - } - return gcBlock((addr - heapStart) / bytesPerBlock) -} - -// Return a pointer to the start of the allocated object. -func (b gcBlock) pointer() unsafe.Pointer { - return unsafe.Pointer(b.address()) -} - -// Return the address of the start of the allocated object. -func (b gcBlock) address() uintptr { - addr := heapStart + uintptr(b)*bytesPerBlock - if gcAsserts && addr > uintptr(metadataStart) { - runtimePanic("gc: block pointing inside metadata") - } - return addr -} + // gcLock is used to control access to the GC on multicore systems. + // The GC is not otherwise thread-safe. + gcLock task.PMutex -// findHead returns the head (first block) of an object, assuming the block -// points to an allocated object. It returns the same block if this block -// already points to the head. -func (b gcBlock) findHead() gcBlock { - for { - // Optimization: check whether the current block state byte (which - // contains the state of multiple blocks) is composed entirely of tail - // blocks. If so, we can skip back to the last block in the previous - // state byte. - // This optimization speeds up findHead for pointers that point into a - // large allocation. - stateByte := b.stateByte() - if stateByte == blockStateByteAllTails { - b -= (b % blocksPerStateByte) + 1 - continue - } - - // Check whether we've found a non-tail block, which means we found the - // head. - state := b.stateFromByte(stateByte) - if state != blockStateTail { - break - } - b-- - } - if gcAsserts { - if b.state() != blockStateHead && b.state() != blockStateMark { - runtimePanic("gc: found tail without head") - } - } - return b -} - -// findNext returns the first block just past the end of the tail. This may or -// may not be the head of an object. -func (b gcBlock) findNext() gcBlock { - if b.state() == blockStateHead || b.state() == blockStateMark { - b++ - } - for b.address() < uintptr(metadataStart) && b.state() == blockStateTail { - b++ - } - return b -} - -func (b gcBlock) stateByte() byte { - return *(*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) -} - -// Return the block state given a state byte. The state byte must have been -// obtained using b.stateByte(), otherwise the result is incorrect. -func (b gcBlock) stateFromByte(stateByte byte) blockState { - return blockState(stateByte>>(b%blocksPerStateByte)) & blockStateMask -} - -// State returns the current block state. -func (b gcBlock) state() blockState { - return b.stateFromByte(b.stateByte()) -} - -// setState sets the current block to the given state, which must contain more -// bits than the current state. Allowed transitions: from free to any state and -// from head to mark. -func (b gcBlock) setState(newState blockState) { - stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) - *stateBytePtr |= uint8(newState << (b % blocksPerStateByte)) - if gcAsserts && b.state() != newState { - runtimePanic("gc: setState() was not successful") - } -} + // zeroSizedAlloc is just a sentinel that gets returned when allocating 0 bytes. + zeroSizedAlloc uint8 +) // objHeader is a structure prepended to every heap object to hold metadata. type objHeader struct { @@ -230,8 +88,8 @@ type freeRangeMore struct { next *freeRangeMore } -// insertFreeRange inserts a range of len blocks starting at ptr into the free list. -func insertFreeRange(ptr unsafe.Pointer, len uintptr) { +// insertFreeRange inserts a range of len blocks ending at endAddr into the free list. +func insertFreeRange(endAddr uintptr, len uintptr) { if gcAsserts && len == 0 { runtimePanic("gc: insert 0-length free range") } @@ -247,12 +105,12 @@ func insertFreeRange(ptr unsafe.Pointer, len uintptr) { next := *insDst if next != nil && next.len == len { // Insert into the list with this length. - newRange := (*freeRangeMore)(ptr) + newRange := (*freeRangeMore)(unsafe.Pointer(endAddr - unsafe.Sizeof(freeRangeMore{}))) newRange.next = next.nextWithLen next.nextWithLen = newRange } else { // Insert into the list of lengths. - newRange := (*freeRange)(ptr) + newRange := (*freeRange)(unsafe.Pointer(endAddr - unsafe.Sizeof(freeRange{}))) *newRange = freeRange{ len: len, nextLen: next, @@ -262,9 +120,11 @@ func insertFreeRange(ptr unsafe.Pointer, len uintptr) { } } -// popFreeRange removes a range of len blocks from the freeRanges list. -// It returns nil if there are no sufficiently long ranges. -func popFreeRange(len uintptr) unsafe.Pointer { +// popFreeRange removes a range of len blocks from the freeRanges list and returns the ending address. +// It returns 0 if there are no sufficiently long ranges. +// +//go:nobounds +func popFreeRange(len uintptr) uintptr { if gcAsserts && len == 0 { runtimePanic("gc: pop 0-length free range") } @@ -279,46 +139,65 @@ func popFreeRange(len uintptr) unsafe.Pointer { rangeWithLength := *remDst if rangeWithLength == nil { // No ranges are long enough. - return nil + return 0 } removedLen := rangeWithLength.len // Remove the range. - var ptr unsafe.Pointer + var endAddr uintptr if nextWithLen := rangeWithLength.nextWithLen; nextWithLen != nil { // Remove from the list with this length. rangeWithLength.nextWithLen = nextWithLen.next - ptr = unsafe.Pointer(nextWithLen) + endAddr = uintptr(unsafe.Pointer(nextWithLen)) + unsafe.Sizeof(freeRangeMore{}) } else { // Remove from the list of lengths. *remDst = rangeWithLength.nextLen - ptr = unsafe.Pointer(rangeWithLength) + endAddr = uintptr(unsafe.Pointer(rangeWithLength)) + unsafe.Sizeof(freeRange{}) } if removedLen > len { // Insert the leftover range. - insertFreeRange(unsafe.Add(ptr, len*bytesPerBlock), removedLen-len) + leftover := removedLen - len + insertFreeRange(endAddr, leftover) + endAddr -= leftover * bytesPerBlock + + // Add the new range to the ends bitmap. + endBlock := ((endAddr - heapStart) / bytesPerBlock) - 1 + *(*byte)(unsafe.Pointer(endBlocksBitmap + (endBlock / 8))) |= 1 << (endBlock % 8) } - return ptr -} -func isOnHeap(ptr uintptr) bool { - return ptr >= heapStart && ptr < uintptr(metadataStart) + return endAddr } // Initialize the memory allocator. // No memory may be allocated before this is called. That means the runtime and // any packages the runtime depends upon may not allocate memory during package // initialization. +// +//go:nobounds func initHeap() { calculateHeapAddresses() - // Set all block states to 'free'. - metadataSize := heapEnd - uintptr(metadataStart) - memzero(unsafe.Pointer(metadataStart), metadataSize) + // Initialize the ends bitmap. + endBlocksBitmap := endBlocksBitmap + visitedBlocksBitmap := visitedBlocksBitmap + bitmapSize := visitedBlocksBitmap - endBlocksBitmap + if bitmapSize == 0 { + // Empty heap. + return + } + memzero(unsafe.Pointer(endBlocksBitmap), bitmapSize-1) + blocks := blocks + *(*byte)(unsafe.Pointer(visitedBlocksBitmap - 1)) = 1 << ((blocks - 1) % 8) - // Rebuild the free ranges list. - buildFreeRanges() + // Insert the initial free range. + r := (*freeRange)(unsafe.Pointer(endBlocksBitmap - unsafe.Sizeof(freeRange{}))) + freeRanges = r + *r = freeRange{ + len: blocks, + nextLen: nil, + nextWithLen: nil, + } } // setHeapEnd is called to expand the heap. The heap can only grow, not shrink. @@ -330,58 +209,68 @@ func setHeapEnd(newHeapEnd uintptr) { } // Save some old variables we need later. - oldMetadataStart := metadataStart - oldMetadataSize := heapEnd - uintptr(metadataStart) - - // Increase the heap. After setting the new heapEnd, calculateHeapAddresses - // will update metadataStart and the memcpy will copy the metadata to the - // new location. - // The new metadata will be bigger than the old metadata, but a simple - // memcpy is fine as it only copies the old metadata and the new memory will - // have been zero initialized. + oldEndBlocksBitmap := endBlocksBitmap + oldBitmapSize := visitedBlocksBitmap - endBlocksBitmap + + // Update the heap layout. heapEnd = newHeapEnd calculateHeapAddresses() - memcpy(metadataStart, oldMetadataStart, oldMetadataSize) - // Note: the memcpy above assumes the heap grows enough so that the new - // metadata does not overlap the old metadata. If that isn't true, memmove - // should be used to avoid corruption. - // This assert checks whether that's true. - if gcAsserts && uintptr(metadataStart) < uintptr(oldMetadataStart)+oldMetadataSize { - runtimePanic("gc: heap did not grow enough at once") - } + // Move the old end blocks bitmap. + endBlocksBitmap := endBlocksBitmap + memmove(unsafe.Pointer(endBlocksBitmap), unsafe.Pointer(oldEndBlocksBitmap), oldBitmapSize) + + // Widen the bitmap. + visitedBlocksBitmap := visitedBlocksBitmap + newBitmapSize := visitedBlocksBitmap - endBlocksBitmap + memzero(unsafe.Pointer(endBlocksBitmap+oldBitmapSize), newBitmapSize-oldBitmapSize) + + // Remove the free ranges from the endBlocksBitmap. + toggleFree(endBlocksBitmap) + + // Populate the visitedBlocksBitmap with free range ends (including the new free range). + memzero(unsafe.Pointer(visitedBlocksBitmap), newBitmapSize) + *(*byte)(unsafe.Pointer(visitedBlocksBitmap + newBitmapSize - 1)) |= 1 << ((blocks - 1) % 8) + toggleFree(visitedBlocksBitmap) - // Rebuild the free ranges list. + // Rebuild the free ranges. buildFreeRanges() } -// calculateHeapAddresses initializes variables such as metadataStart and -// numBlock based on heapStart and heapEnd. +// calculateHeapAddresses initializes the heap layout variables based on +// heapStart and heapEnd. // // This function can be called again when the heap size increases. The caller is -// responsible for copying the metadata to the new location. +// responsible for copying the endBlockBitmap to the new location. func calculateHeapAddresses() { totalSize := heapEnd - heapStart // Allocate some memory to keep 2 bits of information about every block. - metadataSize := (totalSize + blocksPerStateByte*bytesPerBlock) / (1 + blocksPerStateByte*bytesPerBlock) - metadataStart = unsafe.Pointer(heapEnd - metadataSize) - // Use the rest of the available memory as heap. - numBlocks := (uintptr(metadataStart) - heapStart) / bytesPerBlock - endBlock = gcBlock(numBlocks) + const batchSize = 8*bytesPerBlock + 2 + bitmapSize := (totalSize + batchSize - bytesPerBlock) / batchSize + blocks = (totalSize - 2*bitmapSize) / bytesPerBlock + endBlocksBitmap = heapStart + blocks*bytesPerBlock + visitedBlocksBitmap = endBlocksBitmap + bitmapSize + if gcDebug { - println("heapStart: ", heapStart) - println("heapEnd: ", heapEnd) - println("total size: ", totalSize) - println("metadata size: ", metadataSize) - println("metadataStart: ", metadataStart) - println("# of blocks: ", numBlocks) - println("# of block states:", metadataSize*blocksPerStateByte) - } - if gcAsserts && metadataSize*blocksPerStateByte < numBlocks { + println("heapStart: ", heapStart) + println("heapEnd: ", heapEnd) + println("total size: ", totalSize) + println("bitmap size: ", bitmapSize) + println("endBlocksBitmap: ", endBlocksBitmap) + println("visitedBlocksBitmap:", visitedBlocksBitmap) + println("# of blocks: ", blocks) + } + + if gcAsserts { // sanity check - runtimePanic("gc: metadata array is too small") + if 8*bitmapSize < blocks { + runtimePanic("gc: metadata array is too small") + } + if visitedBlocksBitmap+bitmapSize > heapEnd { + runtimePanic("gc: heap bounds overrun") + } } } @@ -389,6 +278,7 @@ func calculateHeapAddresses() { // collection cycle if needed. If no space is free, it panics. // //go:noinline +//go:nobounds func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { if size == 0 { return unsafe.Pointer(&zeroSizedAlloc) @@ -400,7 +290,7 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { // Round the size up to a multiple of blocks, adding space for the header. rawSize := size - size += align(unsafe.Sizeof(objHeader{})) + size += unsafe.Sizeof(objHeader{}) size += bytesPerBlock - 1 if size < rawSize { // The size overflowed. @@ -420,10 +310,10 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { // Acquire a range of free blocks. var ranGC bool var grewHeap bool - var pointer unsafe.Pointer + var endAddr uintptr for { - pointer = popFreeRange(neededBlocks) - if pointer != nil { + endAddr = popFreeRange(neededBlocks) + if endAddr != 0 { break } @@ -431,7 +321,7 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { // Run the collector and try again. freeBytes := runGC() ranGC = true - heapSize := uintptr(metadataStart) - heapStart + heapSize := endBlocksBitmap - heapStart if freeBytes < heapSize/3 { // Ensure there is at least 33% headroom. // This percentage was arbitrarily chosen, and may need to @@ -456,24 +346,19 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { runtimePanicAt(returnAddress(0), "out of memory") } - // Set the backing blocks as being allocated. - block := blockFromAddr(uintptr(pointer)) - block.setState(blockStateHead) - for i := block + 1; i != block+gcBlock(neededBlocks); i++ { - i.setState(blockStateTail) - } - // Create the object header. - header := (*objHeader)(pointer) + header := (*objHeader)(unsafe.Pointer(endAddr - unsafe.Sizeof(objHeader{}))) header.layout = parseGCLayout(layout) // We've claimed this allocation, now we can unlock the heap. gcLock.Unlock() // Return a pointer to this allocation. - add := align(unsafe.Sizeof(objHeader{})) - pointer = unsafe.Add(pointer, add) - size -= add + pointer := unsafe.Pointer(endAddr - size) + size -= unsafe.Sizeof(objHeader{}) + if gcDebug { + println("alloc", pointer, "-", endAddr, "size:", size) + } memzero(pointer, size) return pointer } @@ -483,12 +368,23 @@ func realloc(ptr unsafe.Pointer, size uintptr) unsafe.Pointer { return alloc(size, nil) } - ptrAddress := uintptr(ptr) - endOfTailAddress := blockFromAddr(ptrAddress).findNext().address() + startBlock := (uintptr(ptr) - heapStart) / bytesPerBlock + blocks := blocks + endBlocksBitmap := endBlocksBitmap + endBlock := startBlock + for { + endBlock++ + if endBlock >= blocks { + break + } + if *(*byte)(unsafe.Pointer(endBlocksBitmap + (endBlock / 8)))&(1<<(endBlock%8)) != 0 { + break + } + } // this might be a few bytes longer than the original size of // ptr, because we align to full blocks of size bytesPerBlock - oldSize := endOfTailAddress - ptrAddress + oldSize := (endBlock-startBlock)*bytesPerBlock - unsafe.Sizeof(objHeader{}) if size <= oldSize { return ptr } @@ -519,9 +415,31 @@ func runGC() (freeBytes uintptr) { println("running collection cycle...") } + var gcStart timeUnit + if gcTiming { + gcStart = ticks() + } + + // Clear the visited bitmap. + memzero(unsafe.Pointer(visitedBlocksBitmap), visitedBlocksBitmap-endBlocksBitmap) + + // Add all free range ends to the visited bitmap. + // This will prevent them from being added to the scan list. + toggleFree(visitedBlocksBitmap) + + var gcPrepEnd timeUnit + if gcTiming { + gcPrepEnd = ticks() + } + // Mark phase: mark all reachable objects, recursively. gcMarkReachable() + var gcPreMarkEnd timeUnit + if gcTiming { + gcPreMarkEnd = ticks() + } + if baremetal && hasScheduler { // Channel operations in interrupts may move task pointers around while we are marking. // Therefore we need to scan the runqueue separately. @@ -554,25 +472,64 @@ func runGC() (freeBytes uintptr) { finishMark() } + var gcPostMarkEnd timeUnit + if gcTiming { + gcPostMarkEnd = ticks() + } + // If we're using threads, resume all other threads before starting the // sweep. gcResumeWorld() + // Remove all free range ends to the visited bitmap so that they can be swept. + toggleFree(visitedBlocksBitmap) + + var gcCleanupEnd timeUnit + if gcTiming { + gcCleanupEnd = ticks() + } + // Sweep phase: free all non-marked objects and unmark marked objects for // the next collection cycle. - sweep() + freeBytes = sweep() - // Rebuild the free ranges list. - freeBytes = buildFreeRanges() + var gcSweepEnd timeUnit + if gcTiming { + gcSweepEnd = ticks() + } + + if gcTiming { + println("gc timing:", ticksToNanoseconds(gcSweepEnd-gcStart), "ns") + println("\tprep: ", ticksToNanoseconds(gcPrepEnd-gcStart), "ns") + println("\tpre-mark: ", ticksToNanoseconds(gcPreMarkEnd-gcPrepEnd), "ns") + println("\tpost-mark:", ticksToNanoseconds(gcPostMarkEnd-gcPreMarkEnd), "ns") + println("\tcleanup: ", ticksToNanoseconds(gcCleanupEnd-gcPostMarkEnd), "ns") + println("\tsweep: ", ticksToNanoseconds(gcSweepEnd-gcCleanupEnd), "ns") + } - // Show how much has been sweeped, for debugging. if gcDebug { - dumpHeap() + println("free ranges after gc:") + dumpFreeRangeCounts() } return } +// toggleFree toggles the ends of free ranges in the provided bitmap. +// +//go:nobounds +func toggleFree(base uintptr) { + heapStart := heapStart + for rangeWithLength := freeRanges; rangeWithLength != nil; rangeWithLength = rangeWithLength.nextLen { + rangeWithLengthBlock := (uintptr(unsafe.Pointer(rangeWithLength)) - heapStart) / bytesPerBlock + *(*byte)(unsafe.Pointer(base + (rangeWithLengthBlock / 8))) ^= 1 << (rangeWithLengthBlock % 8) + for nextWithLen := rangeWithLength.nextWithLen; nextWithLen != nil; nextWithLen = nextWithLen.next { + nextWithLenBlock := (uintptr(unsafe.Pointer(nextWithLen)) - heapStart) / bytesPerBlock + *(*byte)(unsafe.Pointer(base + (nextWithLenBlock / 8))) ^= 1 << (nextWithLenBlock % 8) + } + } +} + // markRoots reads all pointers from start to end (exclusive) and if they look // like a heap pointer and are unmarked, marks them and scans that object as // well (recursively). The starting address must be valid and aligned. @@ -595,6 +552,8 @@ func markRoots(start, end uintptr) { // scanConservative scans all possible pointer locations in a range and marks referenced heap allocations. // The starting address must be valid and pointer-aligned. +// +//go:nobounds func scanConservative(addr, len uintptr) { for len >= unsafe.Sizeof(addr) { root := *(*uintptr)(unsafe.Pointer(addr)) @@ -611,7 +570,85 @@ func markCurrentGoroutineStack(sp uintptr) { markRoot(0, sp) } +// mark a GC root at the address addr. +// +//go:nobounds +func markRoot(addr, root uintptr) { + // Find the corresponding heap block index. + heapStart := heapStart + block := (root - heapStart) / bytesPerBlock + if block >= blocks { + // This is not on the heap. + return + } + + // Visit blocks until we reach an end. + endBlocksBitmap := endBlocksBitmap + visitedBlocksBitmap := visitedBlocksBitmap + for { + // Split the bitmap position into a byte and a bit. + byteIdx := block / 8 + bit := byte(1) << (block % 8) + + // Subtracting a bit from the ends bitmap will clear the next bit and set the preceding bits. + // If there is no following end, the subtraction will underflow and set all following bits. + ends := *(*byte)(unsafe.Pointer(endBlocksBitmap + byteIdx)) + endSub := ends - bit + // If we xor this with the ends bitmap: + // - Unrelated ends will be excluded. + // - The set bits up to the end will be preserved. + // - The cleared end will be set. + // This produces a bitmap of blocks up to and including the end. + newVisit := endSub ^ ends + + // Add these blocks to the visited bitmap. + visitedPtr := (*byte)(unsafe.Pointer(visitedBlocksBitmap + byteIdx)) + oldVisit := *visitedPtr + *visitedPtr = oldVisit | newVisit + if oldVisit&newVisit != 0 { + // We reached a block that has already been visited. + // This markRoot is redundant. + if gcDebug { + println("root already visited", root, "from", addr) + } + return + } + + if endSub <= ends { + // We reached an unvisited end. + // Compute the final block index. + tmp := newVisit + for { + tmp >>= 1 + if tmp < bit { + break + } + block++ + } + break + } + + // Skip to the next bitmap byte. + block = (block | 7) + 1 + } + + if gcAsserts && *(*byte)(unsafe.Pointer(endBlocksBitmap + (block / 8)))&(1<<(block%8)) == 0 { + runtimePanic("wrong end") + } + + if gcDebug { + println("mark root", root, "from", addr, "end", heapStart+block*bytesPerBlock+bytesPerBlock) + } + + // Add the object to the scan list. + hdr := (*objHeader)(unsafe.Pointer(heapStart + block*bytesPerBlock + (bytesPerBlock - unsafe.Sizeof(objHeader{})))) + hdr.next = scanList + scanList = hdr +} + // finishMark finishes the marking process by scanning all heap objects on scanList. +// +//go:nobounds func finishMark() { for { // Remove an object from the scan list. @@ -630,135 +667,120 @@ func finishMark() { } // Compute the scan bounds. - objAddr := uintptr(unsafe.Pointer(obj)) - start := objAddr + align(unsafe.Sizeof(objHeader{})) - end := blockFromAddr(objAddr).findNext().address() + end := uintptr(unsafe.Pointer(obj)) + heapStart := heapStart + endBlock := (end - heapStart) / bytesPerBlock + startBlock := gcBitmapScanBackwards(endBlocksBitmap, endBlock) + 1 + start := heapStart + startBlock*bytesPerBlock // Scan the object. obj.layout.scan(start, end-start) } } -// mark a GC root at the address addr. -func markRoot(addr, root uintptr) { - // Find the heap block corresponding to the root. - if !isOnHeap(root) { - // This is not a heap pointer. - return +//go:nobounds +func sweep() uintptr { + // Split the ends into two bitmaps: one with visited ends and one with unvisited ends. + endBlocksBitmap := endBlocksBitmap + visitedBlocksBitmap := visitedBlocksBitmap + for i := visitedBlocksBitmap - endBlocksBitmap; i > 0; { + i-- + endsPtr := (*byte)(unsafe.Pointer(endBlocksBitmap + i)) + visitedPtr := (*byte)(unsafe.Pointer(visitedBlocksBitmap + i)) + ends := *endsPtr + visited := *visitedPtr + *endsPtr = ends & visited + *visitedPtr = ends &^ visited } - block := blockFromAddr(root) - // Find the head of the corresponding object. - if block.state() == blockStateFree { - // The to-be-marked object doesn't actually exist. - // This could either be a dangling pointer (oops!) but most likely - // just a false positive. - return - } - head := block.findHead() - - // Mark the object. - if head.state() == blockStateMark { - // This object is already marked. - return - } - if gcDebug { - println("found unmarked pointer", root, "at address", addr) - } - head.setState(blockStateMark) - - // Add the object to the scan list. - header := (*objHeader)(head.pointer()) - header.next = scanList - scanList = header + return buildFreeRanges() } -// Sweep goes through all memory and frees unmarked memory. -func sweep() { - metadataEnd := unsafe.Add(metadataStart, (endBlock+(blocksPerStateByte-1))/blocksPerStateByte) - var carry byte - for meta := metadataStart; meta != metadataEnd; meta = unsafe.Add(meta, 1) { - // Fetch the state byte. - stateBytePtr := (*byte)(unsafe.Pointer(meta)) - stateByte := *stateBytePtr - - // Separate blocks by type. - // Split the nibbles. - // Each nibble is a mask of blocks. - high := stateByte >> blocksPerStateByte - low := stateByte & blockStateEach - // Marked heads are in both nibbles. - markedHeads := low & high - // Unmarked heads are in the low nibble but not the high nibble. - unmarkedHeads := low &^ high - // Tails are in the high nibble but not the low nibble. - tails := high &^ low - - // Clear all tail runs after unmarked (freed) heads. - // - // Adding 1 to the start of a bit run will clear the run and set the next bit: - // (2^k - 1) + 1 = 2^k - // e.g. 0b0011 + 1 = 0b0100 - // Bitwise-and with the original mask to clear the newly set bit. - // e.g. (0b0011 + 1) & 0b0011 = 0b0100 & 0b0011 = 0b0000 - // This will not clear bits after the run because the gap stops the carry: - // e.g. (0b1011 + 1) & 0b1011 = 0b1100 & 0b1011 = 0b1000 - // This can clear multiple runs in a single addition: - // e.g. (0b1101 + 0b0101) & 0b1101 = 0b10010 & 0b1101 = 0b0000 - // - // In order to find tail run starts after unmarked heads we could use tails & (unmarkedHeads << 1). - // It is possible omit the bitwise-and because the clear still works if the next block is not a tail. - // A head is not a tail, so corresponding missing tail bit will stop the carry from a previous tail run. - // As such it will set the next bit which will be cleared back away later. - // e.g. HHTH: (0b0010 + (0b1101 << 1)) & 0b0010 = 0b11100 & 0b0010 = 0b0000 - // - // Treat the whole heap as a single pair of integer masks. - // This is accomplished for addition by carrying the overflow to the next state byte. - // The unmarkedHeads << 1 is equivalent to unmarkedHeads + unmarkedHeads, so it can be merged with the sum. - // This does not require any special work for the bitwise-and because it operates bitwise. - tailClear := tails + (unmarkedHeads << 1) + carry - carry = tailClear >> blocksPerStateByte - tails &= tailClear - - // Construct the new state byte. - *stateBytePtr = markedHeads | (tails << blocksPerStateByte) - } -} - -// buildFreeRanges rebuilds the freeRanges list. -// This must be called after a GC sweep or heap grow. -// It returns how many bytes are free in the heap. +//go:nobounds func buildFreeRanges() uintptr { + // Clear the free ranges list. freeRanges = nil - block := endBlock - var totalBlocks uintptr - for { - // Skip backwards over occupied blocks. - for block > 0 && (block-1).state() != blockStateFree { - block-- - } - if block == 0 { + + heapStart := heapStart + var totalFreeBlocks uintptr + var totalFreeRanges uintptr + for block := blocks; ; { + // Find the next unvisited end. + groupEnd := gcBitmapScanBackwards(visitedBlocksBitmap, block) + if groupEnd == ^uintptr(0) { break } - // Find the start of the free range. - end := block - for block > 0 && (block-1).state() == blockStateFree { - block-- + // Add it to the new ends bitmap. + *(*byte)(unsafe.Pointer(endBlocksBitmap + (groupEnd / 8))) |= 1 << (groupEnd % 8) + + // Find the next visited end. + block = gcBitmapScanBackwards(endBlocksBitmap, groupEnd) + + // Add the range between these ends to the free list. + groupBlocks := groupEnd - block + totalFreeBlocks += groupBlocks + if gcDebug { + println("insert free range", heapStart+block*bytesPerBlock+bytesPerBlock, "-", heapStart+groupEnd*bytesPerBlock+bytesPerBlock, "blocks:", groupBlocks) } + insertFreeRange(heapStart+groupEnd*bytesPerBlock+bytesPerBlock, groupBlocks) + totalFreeRanges++ - // Insert the free range. - len := uintptr(end - block) - totalBlocks += len - insertFreeRange(block.pointer(), len) + if block == ^uintptr(0) { + // There was no visited end. + // This is the end of the heap. + break + } } - if gcDebug { - println("free ranges after rebuild:") - dumpFreeRangeCounts() + if sweepMetrics { + var sourceFrees uintptr + for i := visitedBlocksBitmap - endBlocksBitmap; i > 0; { + i-- + mask := *(*byte)(unsafe.Pointer(visitedBlocksBitmap + i)) + for mask != 0 { + sourceFrees++ + mask &= mask - 1 + } + } + println("sweep metrics:") + println("\tsource free ranges: ", uint(sourceFrees)) + println("\tfree blocks: ", uint(totalFreeBlocks)) + println("\tfree ranges: ", uint(totalFreeRanges)) + println("\tavg blocks per range:", uint(totalFreeBlocks/totalFreeRanges)) + println("\tavg merged: ", uint(sourceFrees/totalFreeRanges)) + } + + return totalFreeBlocks * bytesPerBlock +} + +//go:nobounds +func gcBitmapScanBackwards(base uintptr, idx uintptr) uintptr { + idx-- + if idx == ^uintptr(0) { + return idx + } + maskAddr := base + (idx / 8) + mask := *(*byte)(unsafe.Pointer(maskAddr)) << (7 - (idx % 8)) + if mask == 0 { + idx |= 7 + for { + idx -= 8 + if idx == ^uintptr(0) { + return idx + } + maskAddr-- + mask = *(*byte)(unsafe.Pointer(maskAddr)) + if mask != 0 { + break + } + } } - - return totalBlocks * bytesPerBlock + for mask < 1<<7 { + mask <<= 1 + idx-- + } + return idx } func dumpFreeRangeCounts() { @@ -771,27 +793,6 @@ func dumpFreeRangeCounts() { } } -// dumpHeap can be used for debugging purposes. It dumps the state of each heap -// block to standard output. -func dumpHeap() { - println("heap:") - for block := gcBlock(0); block < endBlock; block++ { - switch block.state() { - case blockStateHead: - print("*") - case blockStateTail: - print("-") - case blockStateMark: - print("#") - default: // free - print("·") - } - if block%64 == 63 || block+1 == endBlock { - println() - } - } -} - // ReadMemStats populates m with memory statistics. // // The returned memory statistics are up to date as of the @@ -803,43 +804,57 @@ func ReadMemStats(m *MemStats) { heapEnd := heapEnd heapStart := heapStart m.Sys = uint64(heapEnd - heapStart) - m.HeapSys = uint64(uintptr(metadataStart) - heapStart) - metadataStart := metadataStart + endBlocksBitmap := endBlocksBitmap + m.HeapSys = uint64(endBlocksBitmap - heapStart) // TODO: should GCSys include objHeaders? - m.GCSys = uint64(heapEnd - uintptr(metadataStart)) + m.GCSys = uint64(heapEnd - endBlocksBitmap) m.HeapReleased = 0 // always 0, we don't currently release memory back to the OS. - // Count live heads and tails. - var liveHeads, liveTails uintptr - endBlock := endBlock - metadataEnd := unsafe.Add(metadataStart, (endBlock+(blocksPerStateByte-1))/blocksPerStateByte) - for meta := metadataStart; meta != metadataEnd; meta = unsafe.Add(meta, 1) { - // Since we are outside of a GC, nothing is marked. - // A bit in the low nibble implies a head. - // A bit in the high nibble implies a tail. - stateByte := *(*byte)(unsafe.Pointer(meta)) - liveHeads += uintptr(count4LUT[stateByte&blockStateEach]) - liveTails += uintptr(count4LUT[stateByte>>blocksPerStateByte]) - } - - // Add heads and tails to count live blocks. - liveBlocks := liveHeads + liveTails + // Count ends. + blocks := blocks + var ends uintptr + for i := visitedBlocksBitmap - endBlocksBitmap; i > 0; { + i-- + mask := *(*byte)(unsafe.Pointer(endBlocksBitmap + i)) + for mask != 0 { + ends++ + mask &= mask - 1 + } + } + + // Count free ranges and their contained space. + var freeRangeCount uintptr + var freeBlocks uintptr + for rangeWithLength := freeRanges; rangeWithLength != nil; rangeWithLength = rangeWithLength.nextLen { + freeRangeCount++ + len := rangeWithLength.len + freeBlocks += len + for nextWithLen := rangeWithLength.nextWithLen; nextWithLen != nil; nextWithLen = nextWithLen.next { + freeRangeCount++ + freeBlocks += len + } + } + + // Record the free space. + m.HeapIdle = uint64(freeBlocks * bytesPerBlock) + + // Subtract free blocks from total blocks to count live blocks. + liveBlocks := blocks - freeBlocks liveBytes := uint64(liveBlocks * bytesPerBlock) m.HeapInuse = liveBytes m.HeapAlloc = liveBytes - m.HeapObjects = uint64(liveHeads) - m.Alloc = liveBytes - // Subtract live blocks from total blocks to count free blocks. - freeBlocks := uintptr(endBlock) - liveBlocks - m.HeapIdle = uint64(freeBlocks * bytesPerBlock) + // Subtract free ranges from total ends to count live objects. + liveObjects := blocks - freeRangeCount + m.HeapObjects = uint64(blocks - freeRangeCount) + m.Alloc = liveBytes - // Record the number of allocated objects. + // Record the lifetime allocation count of the GC. gcMallocs := gcMallocs m.Mallocs = gcMallocs // Subtract live objects from allocated objects to count freed objects. - m.Frees = gcMallocs - uint64(liveHeads) + m.Frees = gcMallocs - uint64(liveObjects) // Record the total allocated bytes. m.TotalAlloc = gcTotalAlloc @@ -847,27 +862,6 @@ func ReadMemStats(m *MemStats) { gcLock.Unlock() } -// count4LUT is a lookup table used to count set bits in a 4-bit mask. -// TODO: replace with popcnt when available -var count4LUT = [16]uint8{ - 0b0000: 0, - 0b0001: 1, - 0b0010: 1, - 0b0011: 2, - 0b0100: 1, - 0b0101: 2, - 0b0110: 2, - 0b0111: 3, - 0b1000: 1, - 0b1001: 2, - 0b1010: 2, - 0b1011: 3, - 0b1100: 2, - 0b1101: 3, - 0b1110: 3, - 0b1111: 4, -} - func SetFinalizer(obj interface{}, finalizer interface{}) { // Unimplemented. } diff --git a/src/runtime/gc_blocks_old.go b/src/runtime/gc_blocks_old.go new file mode 100644 index 0000000000..0e2e7ead48 --- /dev/null +++ b/src/runtime/gc_blocks_old.go @@ -0,0 +1,907 @@ +//go:build (gc.conservative || gc.precise) && oldblocks + +package runtime + +// This memory manager is a textbook mark/sweep implementation, heavily inspired +// by the MicroPython garbage collector. +// +// The memory manager internally uses blocks of 4 pointers big (see +// bytesPerBlock). Every allocation first rounds up to this size to align every +// block. It will first try to find a chain of blocks that is big enough to +// satisfy the allocation. If it finds one, it marks the first one as the "head" +// and the following ones (if any) as the "tail" (see below). If it cannot find +// any free space, it will perform a garbage collection cycle and try again. If +// it still cannot find any free space, it gives up. +// +// Every block has some metadata, which is stored at the end of the heap. +// The four states are "free", "head", "tail", and "mark". During normal +// operation, there are no marked blocks. Every allocated object starts with a +// "head" and is followed by "tail" blocks. The reason for this distinction is +// that this way, the start and end of every object can be found easily. +// +// Metadata is stored in a special area at the end of the heap, in the area +// metadataStart..heapEnd. The actual blocks are stored in +// heapStart..metadataStart. +// +// More information: +// https://aykevl.nl/2020/09/gc-tinygo +// https://github.com/micropython/micropython/wiki/Memory-Manager +// https://github.com/micropython/micropython/blob/master/py/gc.c +// "The Garbage Collection Handbook" by Richard Jones, Antony Hosking, Eliot +// Moss. + +import ( + "internal/task" + "runtime/interrupt" + "unsafe" +) + +const gcDebug = false +const gcTiming = false +const needsStaticHeap = true + +// Some globals + constants for the entire GC. + +const ( + wordsPerBlock = 4 // number of pointers in an allocated block + bytesPerBlock = wordsPerBlock * unsafe.Sizeof(heapStart) + stateBits = 2 // how many bits a block state takes (see blockState type) + blocksPerStateByte = 8 / stateBits +) + +var ( + metadataStart unsafe.Pointer // pointer to the start of the heap metadata + scanList *objHeader // scanList is a singly linked list of heap objects that have been marked but not scanned + freeRanges *freeRange // freeRanges is a linked list of free block ranges + endBlock gcBlock // the block just past the end of the available space + gcTotalAlloc uint64 // total number of bytes allocated + gcMallocs uint64 // total number of allocations + gcLock task.PMutex // lock to avoid race conditions on multicore systems +) + +// zeroSizedAlloc is just a sentinel that gets returned when allocating 0 bytes. +var zeroSizedAlloc uint8 + +// Provide some abstraction over heap blocks. + +// blockState stores the four states in which a block can be. +// It holds 1 bit in each nibble. +// When stored into a state byte, each bit in a nibble corresponds to a different block. +// For blocks A-D, a state byte would be laid out as 0bDCBA_DCBA. +type blockState uint8 + +const ( + blockStateLow blockState = 1 + blockStateHigh blockState = 1 << blocksPerStateByte + + blockStateFree blockState = 0 + blockStateHead blockState = blockStateLow + blockStateTail blockState = blockStateHigh + blockStateMark blockState = blockStateLow | blockStateHigh + blockStateMask blockState = blockStateLow | blockStateHigh +) + +// blockStateEach is a mask that can be used to extract a nibble from the block state. +const blockStateEach = 1<= uintptr(metadataStart)) { + runtimePanic("gc: trying to get block from invalid address") + } + return gcBlock((addr - heapStart) / bytesPerBlock) +} + +// Return a pointer to the start of the allocated object. +func (b gcBlock) pointer() unsafe.Pointer { + return unsafe.Pointer(b.address()) +} + +// Return the address of the start of the allocated object. +func (b gcBlock) address() uintptr { + addr := heapStart + uintptr(b)*bytesPerBlock + if gcAsserts && addr > uintptr(metadataStart) { + runtimePanic("gc: block pointing inside metadata") + } + return addr +} + +// findHead returns the head (first block) of an object, assuming the block +// points to an allocated object. It returns the same block if this block +// already points to the head. +func (b gcBlock) findHead() gcBlock { + for { + // Optimization: check whether the current block state byte (which + // contains the state of multiple blocks) is composed entirely of tail + // blocks. If so, we can skip back to the last block in the previous + // state byte. + // This optimization speeds up findHead for pointers that point into a + // large allocation. + stateByte := b.stateByte() + if stateByte == blockStateByteAllTails { + b -= (b % blocksPerStateByte) + 1 + continue + } + + // Check whether we've found a non-tail block, which means we found the + // head. + state := b.stateFromByte(stateByte) + if state != blockStateTail { + break + } + b-- + } + if gcAsserts { + if b.state() != blockStateHead && b.state() != blockStateMark { + runtimePanic("gc: found tail without head") + } + } + return b +} + +// findNext returns the first block just past the end of the tail. This may or +// may not be the head of an object. +func (b gcBlock) findNext() gcBlock { + if b.state() == blockStateHead || b.state() == blockStateMark { + b++ + } + for b.address() < uintptr(metadataStart) && b.state() == blockStateTail { + b++ + } + return b +} + +func (b gcBlock) stateByte() byte { + return *(*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) +} + +// Return the block state given a state byte. The state byte must have been +// obtained using b.stateByte(), otherwise the result is incorrect. +func (b gcBlock) stateFromByte(stateByte byte) blockState { + return blockState(stateByte>>(b%blocksPerStateByte)) & blockStateMask +} + +// State returns the current block state. +func (b gcBlock) state() blockState { + return b.stateFromByte(b.stateByte()) +} + +// setState sets the current block to the given state, which must contain more +// bits than the current state. Allowed transitions: from free to any state and +// from head to mark. +func (b gcBlock) setState(newState blockState) { + stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) + *stateBytePtr |= uint8(newState << (b % blocksPerStateByte)) + if gcAsserts && b.state() != newState { + runtimePanic("gc: setState() was not successful") + } +} + +// objHeader is a structure prepended to every heap object to hold metadata. +type objHeader struct { + // next is the next object to scan after this. + next *objHeader + + // layout holds the layout bitmap used to find pointers in the object. + layout gcLayout +} + +// freeRange is a node on the outer list of range lengths. +// The free ranges are structured as two nested singly-linked lists: +// - The outer level (freeRange) has one entry for each unique range length. +// - The inner level (freeRangeMore) has one entry for each additional range of the same length. +// This two-level structure ensures that insertion/removal times are proportional to the requested length. +type freeRange struct { + // len is the length of this free range. + len uintptr + + // nextLen is the next longer free range. + nextLen *freeRange + + // nextWithLen is the next free range with this length. + nextWithLen *freeRangeMore +} + +// freeRangeMore is a node on the inner list of equal-length ranges. +type freeRangeMore struct { + next *freeRangeMore +} + +// insertFreeRange inserts a range of len blocks starting at ptr into the free list. +func insertFreeRange(ptr unsafe.Pointer, len uintptr) { + if gcAsserts && len == 0 { + runtimePanic("gc: insert 0-length free range") + } + + // Find the insertion point by length. + // Skip until the next range is at least the target length. + insDst := &freeRanges + for *insDst != nil && (*insDst).len < len { + insDst = &(*insDst).nextLen + } + + // Create the new free range. + next := *insDst + if next != nil && next.len == len { + // Insert into the list with this length. + newRange := (*freeRangeMore)(ptr) + newRange.next = next.nextWithLen + next.nextWithLen = newRange + } else { + // Insert into the list of lengths. + newRange := (*freeRange)(ptr) + *newRange = freeRange{ + len: len, + nextLen: next, + nextWithLen: nil, + } + *insDst = newRange + } +} + +// popFreeRange removes a range of len blocks from the freeRanges list. +// It returns nil if there are no sufficiently long ranges. +func popFreeRange(len uintptr) unsafe.Pointer { + if gcAsserts && len == 0 { + runtimePanic("gc: pop 0-length free range") + } + + // Find the removal point by length. + // Skip until the next range is at least the target length. + remDst := &freeRanges + for *remDst != nil && (*remDst).len < len { + remDst = &(*remDst).nextLen + } + + rangeWithLength := *remDst + if rangeWithLength == nil { + // No ranges are long enough. + return nil + } + removedLen := rangeWithLength.len + + // Remove the range. + var ptr unsafe.Pointer + if nextWithLen := rangeWithLength.nextWithLen; nextWithLen != nil { + // Remove from the list with this length. + rangeWithLength.nextWithLen = nextWithLen.next + ptr = unsafe.Pointer(nextWithLen) + } else { + // Remove from the list of lengths. + *remDst = rangeWithLength.nextLen + ptr = unsafe.Pointer(rangeWithLength) + } + + if removedLen > len { + // Insert the leftover range. + insertFreeRange(unsafe.Add(ptr, len*bytesPerBlock), removedLen-len) + } + return ptr +} + +func isOnHeap(ptr uintptr) bool { + return ptr >= heapStart && ptr < uintptr(metadataStart) +} + +// Initialize the memory allocator. +// No memory may be allocated before this is called. That means the runtime and +// any packages the runtime depends upon may not allocate memory during package +// initialization. +func initHeap() { + calculateHeapAddresses() + + // Set all block states to 'free'. + metadataSize := heapEnd - uintptr(metadataStart) + memzero(unsafe.Pointer(metadataStart), metadataSize) + + // Rebuild the free ranges list. + buildFreeRanges() +} + +// setHeapEnd is called to expand the heap. The heap can only grow, not shrink. +// Also, the heap should grow substantially each time otherwise growing the heap +// will be expensive. +func setHeapEnd(newHeapEnd uintptr) { + if gcAsserts && newHeapEnd <= heapEnd { + runtimePanic("gc: setHeapEnd didn't grow the heap") + } + + // Save some old variables we need later. + oldMetadataStart := metadataStart + oldMetadataSize := heapEnd - uintptr(metadataStart) + + // Increase the heap. After setting the new heapEnd, calculateHeapAddresses + // will update metadataStart and the memcpy will copy the metadata to the + // new location. + // The new metadata will be bigger than the old metadata, but a simple + // memcpy is fine as it only copies the old metadata and the new memory will + // have been zero initialized. + heapEnd = newHeapEnd + calculateHeapAddresses() + memcpy(metadataStart, oldMetadataStart, oldMetadataSize) + + // Note: the memcpy above assumes the heap grows enough so that the new + // metadata does not overlap the old metadata. If that isn't true, memmove + // should be used to avoid corruption. + // This assert checks whether that's true. + if gcAsserts && uintptr(metadataStart) < uintptr(oldMetadataStart)+oldMetadataSize { + runtimePanic("gc: heap did not grow enough at once") + } + + // Rebuild the free ranges list. + buildFreeRanges() +} + +// calculateHeapAddresses initializes variables such as metadataStart and +// numBlock based on heapStart and heapEnd. +// +// This function can be called again when the heap size increases. The caller is +// responsible for copying the metadata to the new location. +func calculateHeapAddresses() { + totalSize := heapEnd - heapStart + + // Allocate some memory to keep 2 bits of information about every block. + metadataSize := (totalSize + blocksPerStateByte*bytesPerBlock) / (1 + blocksPerStateByte*bytesPerBlock) + metadataStart = unsafe.Pointer(heapEnd - metadataSize) + + // Use the rest of the available memory as heap. + numBlocks := (uintptr(metadataStart) - heapStart) / bytesPerBlock + endBlock = gcBlock(numBlocks) + if gcDebug { + println("heapStart: ", heapStart) + println("heapEnd: ", heapEnd) + println("total size: ", totalSize) + println("metadata size: ", metadataSize) + println("metadataStart: ", metadataStart) + println("# of blocks: ", numBlocks) + println("# of block states:", metadataSize*blocksPerStateByte) + } + if gcAsserts && metadataSize*blocksPerStateByte < numBlocks { + // sanity check + runtimePanic("gc: metadata array is too small") + } +} + +// alloc tries to find some free space on the heap, possibly doing a garbage +// collection cycle if needed. If no space is free, it panics. +// +//go:noinline +func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { + if size == 0 { + return unsafe.Pointer(&zeroSizedAlloc) + } + + if interrupt.In() { + runtimePanicAt(returnAddress(0), "heap alloc in interrupt") + } + + // Round the size up to a multiple of blocks, adding space for the header. + rawSize := size + size += align(unsafe.Sizeof(objHeader{})) + size += bytesPerBlock - 1 + if size < rawSize { + // The size overflowed. + runtimePanicAt(returnAddress(0), "out of memory") + } + neededBlocks := size / bytesPerBlock + size = neededBlocks * bytesPerBlock + + // Make sure there are no concurrent allocations. The heap is not currently + // designed for concurrent alloc/GC. + gcLock.Lock() + + // Update the total allocation counters. + gcTotalAlloc += uint64(rawSize) + gcMallocs++ + + // Acquire a range of free blocks. + var ranGC bool + var grewHeap bool + var pointer unsafe.Pointer + for { + pointer = popFreeRange(neededBlocks) + if pointer != nil { + break + } + + if !ranGC { + // Run the collector and try again. + freeBytes := runGC() + ranGC = true + heapSize := uintptr(metadataStart) - heapStart + if freeBytes < heapSize/3 { + // Ensure there is at least 33% headroom. + // This percentage was arbitrarily chosen, and may need to + // be tuned in the future. + growHeap() + } + continue + } + + if gcDebug && !grewHeap { + println("grow heap for request:", uint(neededBlocks)) + dumpFreeRangeCounts() + } + if growHeap() { + grewHeap = true + continue + } + + // Unfortunately the heap could not be increased. This + // happens on baremetal systems for example (where all + // available RAM has already been dedicated to the heap). + runtimePanicAt(returnAddress(0), "out of memory") + } + + // Set the backing blocks as being allocated. + block := blockFromAddr(uintptr(pointer)) + block.setState(blockStateHead) + for i := block + 1; i != block+gcBlock(neededBlocks); i++ { + i.setState(blockStateTail) + } + + // Create the object header. + header := (*objHeader)(pointer) + header.layout = parseGCLayout(layout) + + // We've claimed this allocation, now we can unlock the heap. + gcLock.Unlock() + + // Return a pointer to this allocation. + add := align(unsafe.Sizeof(objHeader{})) + pointer = unsafe.Add(pointer, add) + size -= add + memzero(pointer, size) + return pointer +} + +func realloc(ptr unsafe.Pointer, size uintptr) unsafe.Pointer { + if ptr == nil { + return alloc(size, nil) + } + + ptrAddress := uintptr(ptr) + endOfTailAddress := blockFromAddr(ptrAddress).findNext().address() + + // this might be a few bytes longer than the original size of + // ptr, because we align to full blocks of size bytesPerBlock + oldSize := endOfTailAddress - ptrAddress + if size <= oldSize { + return ptr + } + + newAlloc := alloc(size, nil) + memcpy(newAlloc, ptr, oldSize) + free(ptr) + + return newAlloc +} + +func free(ptr unsafe.Pointer) { + // TODO: free blocks on request, when the compiler knows they're unused. +} + +// GC performs a garbage collection cycle. +func GC() { + gcLock.Lock() + runGC() + gcLock.Unlock() +} + +// runGC performs a garbage collection cycle. It is the internal implementation +// of the runtime.GC() function. The difference is that it returns the number of +// free bytes in the heap after the GC is finished. +func runGC() (freeBytes uintptr) { + if gcDebug { + println("running collection cycle...") + } + + var gcStart timeUnit + if gcTiming { + gcStart = ticks() + } + + // Mark phase: mark all reachable objects, recursively. + gcMarkReachable() + + var gcPreMarkEnd timeUnit + if gcTiming { + gcPreMarkEnd = ticks() + } + + if baremetal && hasScheduler { + // Channel operations in interrupts may move task pointers around while we are marking. + // Therefore we need to scan the runqueue separately. + var markedTaskQueue task.Queue + runqueueScan: + runqueue := schedulerRunQueue() + for !runqueue.Empty() { + // Pop the next task off of the runqueue. + t := runqueue.Pop() + + // Mark the task if it has not already been marked. + markRoot(uintptr(unsafe.Pointer(runqueue)), uintptr(unsafe.Pointer(t))) + + // Push the task onto our temporary queue. + markedTaskQueue.Push(t) + } + + finishMark() + + // Restore the runqueue. + i := interrupt.Disable() + if !runqueue.Empty() { + // Something new came in while finishing the mark. + interrupt.Restore(i) + goto runqueueScan + } + *runqueue = markedTaskQueue + interrupt.Restore(i) + } else { + finishMark() + } + + var gcPostMarkEnd timeUnit + if gcTiming { + gcPostMarkEnd = ticks() + } + + // If we're using threads, resume all other threads before starting the + // sweep. + gcResumeWorld() + + var gcCleanupEnd timeUnit + if gcTiming { + gcCleanupEnd = ticks() + } + + // Sweep phase: free all non-marked objects and unmark marked objects for + // the next collection cycle. + sweep() + + // Rebuild the free ranges list. + freeBytes = buildFreeRanges() + + var gcSweepEnd timeUnit + if gcTiming { + gcSweepEnd = ticks() + } + + if gcTiming { + println("gc timing:", ticksToNanoseconds(gcSweepEnd-gcStart), "ns") + println("\tpre-mark: ", ticksToNanoseconds(gcPreMarkEnd-gcStart), "ns") + println("\tpost-mark:", ticksToNanoseconds(gcPostMarkEnd-gcPreMarkEnd), "ns") + println("\tcleanup: ", ticksToNanoseconds(gcCleanupEnd-gcPostMarkEnd), "ns") + println("\tsweep: ", ticksToNanoseconds(gcSweepEnd-gcCleanupEnd), "ns") + } + + // Show how much has been sweeped, for debugging. + if gcDebug { + dumpHeap() + } + + if gcDebug { + println("free ranges after rebuild:") + dumpFreeRangeCounts() + } + + return +} + +// markRoots reads all pointers from start to end (exclusive) and if they look +// like a heap pointer and are unmarked, marks them and scans that object as +// well (recursively). The starting address must be valid and aligned. +func markRoots(start, end uintptr) { + if gcDebug { + println("mark from", start, "to", end, int(end-start)) + } + if gcAsserts { + if start >= end { + runtimePanic("gc: unexpected range to mark") + } + if start%unsafe.Alignof(start) != 0 { + runtimePanic("gc: unaligned start pointer") + } + } + + // Scan the range conservatively. + scanConservative(start, end-start) +} + +// scanConservative scans all possible pointer locations in a range and marks referenced heap allocations. +// The starting address must be valid and pointer-aligned. +func scanConservative(addr, len uintptr) { + for len >= unsafe.Sizeof(addr) { + root := *(*uintptr)(unsafe.Pointer(addr)) + markRoot(addr, root) + + addr += unsafe.Alignof(addr) + len -= unsafe.Alignof(addr) + } +} + +func markCurrentGoroutineStack(sp uintptr) { + // This could be optimized by only marking the stack area that's currently + // in use. + markRoot(0, sp) +} + +// finishMark finishes the marking process by scanning all heap objects on scanList. +func finishMark() { + for { + // Remove an object from the scan list. + obj := scanList + if obj == nil { + return + } + scanList = obj.next + + // Check if the object may contain pointers. + if obj.layout.pointerFree() { + // This object doesn't contain any pointers. + // This is a fast path for objects like make([]int, 4096). + // It skips the length calculation. + continue + } + + // Compute the scan bounds. + objAddr := uintptr(unsafe.Pointer(obj)) + start := objAddr + align(unsafe.Sizeof(objHeader{})) + end := blockFromAddr(objAddr).findNext().address() + + // Scan the object. + obj.layout.scan(start, end-start) + } +} + +// mark a GC root at the address addr. +func markRoot(addr, root uintptr) { + // Find the heap block corresponding to the root. + if !isOnHeap(root) { + // This is not a heap pointer. + return + } + block := blockFromAddr(root) + + // Find the head of the corresponding object. + if block.state() == blockStateFree { + // The to-be-marked object doesn't actually exist. + // This could either be a dangling pointer (oops!) but most likely + // just a false positive. + return + } + head := block.findHead() + + // Mark the object. + if head.state() == blockStateMark { + // This object is already marked. + return + } + if gcDebug { + println("found unmarked pointer", root, "at address", addr) + } + head.setState(blockStateMark) + + // Add the object to the scan list. + header := (*objHeader)(head.pointer()) + header.next = scanList + scanList = header +} + +// Sweep goes through all memory and frees unmarked memory. +func sweep() { + metadataEnd := unsafe.Add(metadataStart, (endBlock+(blocksPerStateByte-1))/blocksPerStateByte) + var carry byte + for meta := metadataStart; meta != metadataEnd; meta = unsafe.Add(meta, 1) { + // Fetch the state byte. + stateBytePtr := (*byte)(unsafe.Pointer(meta)) + stateByte := *stateBytePtr + + // Separate blocks by type. + // Split the nibbles. + // Each nibble is a mask of blocks. + high := stateByte >> blocksPerStateByte + low := stateByte & blockStateEach + // Marked heads are in both nibbles. + markedHeads := low & high + // Unmarked heads are in the low nibble but not the high nibble. + unmarkedHeads := low &^ high + // Tails are in the high nibble but not the low nibble. + tails := high &^ low + + // Clear all tail runs after unmarked (freed) heads. + // + // Adding 1 to the start of a bit run will clear the run and set the next bit: + // (2^k - 1) + 1 = 2^k + // e.g. 0b0011 + 1 = 0b0100 + // Bitwise-and with the original mask to clear the newly set bit. + // e.g. (0b0011 + 1) & 0b0011 = 0b0100 & 0b0011 = 0b0000 + // This will not clear bits after the run because the gap stops the carry: + // e.g. (0b1011 + 1) & 0b1011 = 0b1100 & 0b1011 = 0b1000 + // This can clear multiple runs in a single addition: + // e.g. (0b1101 + 0b0101) & 0b1101 = 0b10010 & 0b1101 = 0b0000 + // + // In order to find tail run starts after unmarked heads we could use tails & (unmarkedHeads << 1). + // It is possible omit the bitwise-and because the clear still works if the next block is not a tail. + // A head is not a tail, so corresponding missing tail bit will stop the carry from a previous tail run. + // As such it will set the next bit which will be cleared back away later. + // e.g. HHTH: (0b0010 + (0b1101 << 1)) & 0b0010 = 0b11100 & 0b0010 = 0b0000 + // + // Treat the whole heap as a single pair of integer masks. + // This is accomplished for addition by carrying the overflow to the next state byte. + // The unmarkedHeads << 1 is equivalent to unmarkedHeads + unmarkedHeads, so it can be merged with the sum. + // This does not require any special work for the bitwise-and because it operates bitwise. + tailClear := tails + (unmarkedHeads << 1) + carry + carry = tailClear >> blocksPerStateByte + tails &= tailClear + + // Construct the new state byte. + *stateBytePtr = markedHeads | (tails << blocksPerStateByte) + } +} + +// buildFreeRanges rebuilds the freeRanges list. +// This must be called after a GC sweep or heap grow. +// It returns how many bytes are free in the heap. +func buildFreeRanges() uintptr { + freeRanges = nil + block := endBlock + var totalBlocks uintptr + for { + // Skip backwards over occupied blocks. + for block > 0 && (block-1).state() != blockStateFree { + block-- + } + if block == 0 { + break + } + + // Find the start of the free range. + end := block + for block > 0 && (block-1).state() == blockStateFree { + block-- + } + + // Insert the free range. + len := uintptr(end - block) + totalBlocks += len + insertFreeRange(block.pointer(), len) + } + + return totalBlocks * bytesPerBlock +} + +func dumpFreeRangeCounts() { + for rangeWithLength := freeRanges; rangeWithLength != nil; rangeWithLength = rangeWithLength.nextLen { + totalRanges := uintptr(1) + for nextWithLen := rangeWithLength.nextWithLen; nextWithLen != nil; nextWithLen = nextWithLen.next { + totalRanges++ + } + println("-", uint(rangeWithLength.len), "x", uint(totalRanges)) + } +} + +// dumpHeap can be used for debugging purposes. It dumps the state of each heap +// block to standard output. +func dumpHeap() { + println("heap:") + for block := gcBlock(0); block < endBlock; block++ { + switch block.state() { + case blockStateHead: + print("*") + case blockStateTail: + print("-") + case blockStateMark: + print("#") + default: // free + print("·") + } + if block%64 == 63 || block+1 == endBlock { + println() + } + } +} + +// ReadMemStats populates m with memory statistics. +// +// The returned memory statistics are up to date as of the +// call to ReadMemStats. This would not do GC implicitly for you. +func ReadMemStats(m *MemStats) { + gcLock.Lock() + + // Calculate the raw size of the heap. + heapEnd := heapEnd + heapStart := heapStart + m.Sys = uint64(heapEnd - heapStart) + m.HeapSys = uint64(uintptr(metadataStart) - heapStart) + metadataStart := metadataStart + // TODO: should GCSys include objHeaders? + m.GCSys = uint64(heapEnd - uintptr(metadataStart)) + m.HeapReleased = 0 // always 0, we don't currently release memory back to the OS. + + // Count live heads and tails. + var liveHeads, liveTails uintptr + endBlock := endBlock + metadataEnd := unsafe.Add(metadataStart, (endBlock+(blocksPerStateByte-1))/blocksPerStateByte) + for meta := metadataStart; meta != metadataEnd; meta = unsafe.Add(meta, 1) { + // Since we are outside of a GC, nothing is marked. + // A bit in the low nibble implies a head. + // A bit in the high nibble implies a tail. + stateByte := *(*byte)(unsafe.Pointer(meta)) + liveHeads += uintptr(count4LUT[stateByte&blockStateEach]) + liveTails += uintptr(count4LUT[stateByte>>blocksPerStateByte]) + } + + // Add heads and tails to count live blocks. + liveBlocks := liveHeads + liveTails + liveBytes := uint64(liveBlocks * bytesPerBlock) + m.HeapInuse = liveBytes + m.HeapAlloc = liveBytes + m.HeapObjects = uint64(liveHeads) + m.Alloc = liveBytes + + // Subtract live blocks from total blocks to count free blocks. + freeBlocks := uintptr(endBlock) - liveBlocks + m.HeapIdle = uint64(freeBlocks * bytesPerBlock) + + // Record the number of allocated objects. + gcMallocs := gcMallocs + m.Mallocs = gcMallocs + + // Subtract live objects from allocated objects to count freed objects. + m.Frees = gcMallocs - uint64(liveHeads) + + // Record the total allocated bytes. + m.TotalAlloc = gcTotalAlloc + + gcLock.Unlock() +} + +// count4LUT is a lookup table used to count set bits in a 4-bit mask. +// TODO: replace with popcnt when available +var count4LUT = [16]uint8{ + 0b0000: 0, + 0b0001: 1, + 0b0010: 1, + 0b0011: 2, + 0b0100: 1, + 0b0101: 2, + 0b0110: 2, + 0b0111: 3, + 0b1000: 1, + 0b1001: 2, + 0b1010: 2, + 0b1011: 3, + 0b1100: 2, + 0b1101: 3, + 0b1110: 3, + 0b1111: 4, +} + +func SetFinalizer(obj interface{}, finalizer interface{}) { + // Unimplemented. +} diff --git a/testdata/gc.go b/testdata/gc.go index 456d763b4c..99804fb220 100644 --- a/testdata/gc.go +++ b/testdata/gc.go @@ -1,6 +1,8 @@ package main -import "runtime" +import ( + "runtime" +) var xorshift32State uint32 = 1