-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
runtime (gc.blocks): move objHeader to the end #5455
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,15 +8,15 @@ package runtime | |
| // The memory manager internally uses blocks of 4 pointers big (see | ||
| // bytesPerBlock). Every allocation first rounds up to this size to align every | ||
| // block. It will first try to find a chain of blocks that is big enough to | ||
| // satisfy the allocation. If it finds one, it marks the first one as the "head" | ||
| // and the following ones (if any) as the "tail" (see below). If it cannot find | ||
| // satisfy the allocation. If it finds one, it marks the last one as the "head" | ||
| // and the preceding ones (if any) as the "tail" (see below). If it cannot find | ||
| // any free space, it will perform a garbage collection cycle and try again. If | ||
| // it still cannot find any free space, it gives up. | ||
| // | ||
| // Every block has some metadata, which is stored at the end of the heap. | ||
| // The four states are "free", "head", "tail", and "mark". During normal | ||
| // operation, there are no marked blocks. Every allocated object starts with a | ||
| // "head" and is followed by "tail" blocks. The reason for this distinction is | ||
| // operation, there are no marked blocks. Every allocated object ends with a | ||
| // "head" and is preceded by "tail" blocks. The reason for this distinction is | ||
| // that this way, the start and end of every object can be found easily. | ||
| // | ||
| // Metadata is stored in a special area at the end of the heap, in the area | ||
|
|
@@ -129,7 +129,7 @@ func (b gcBlock) address() uintptr { | |
| return addr | ||
| } | ||
|
|
||
| // findHead returns the head (first block) of an object, assuming the block | ||
| // findHead returns the head (last block) of an object, assuming the block | ||
| // points to an allocated object. It returns the same block if this block | ||
| // already points to the head. | ||
| func (b gcBlock) findHead() gcBlock { | ||
|
|
@@ -142,7 +142,7 @@ func (b gcBlock) findHead() gcBlock { | |
| // large allocation. | ||
| stateByte := b.stateByte() | ||
| if stateByte == blockStateByteAllTails { | ||
| b -= (b % blocksPerStateByte) + 1 | ||
| b += blocksPerStateByte - (b % blocksPerStateByte) | ||
| continue | ||
| } | ||
|
|
||
|
|
@@ -152,7 +152,7 @@ func (b gcBlock) findHead() gcBlock { | |
| if state != blockStateTail { | ||
| break | ||
| } | ||
| b-- | ||
| b++ | ||
| } | ||
| if gcAsserts { | ||
| if b.state() != blockStateHead && b.state() != blockStateMark { | ||
|
|
@@ -162,18 +162,6 @@ func (b gcBlock) findHead() gcBlock { | |
| return b | ||
| } | ||
|
|
||
| // findNext returns the first block just past the end of the tail. This may or | ||
| // may not be the head of an object. | ||
| func (b gcBlock) findNext() gcBlock { | ||
| if b.state() == blockStateHead || b.state() == blockStateMark { | ||
| b++ | ||
| } | ||
| for b.address() < uintptr(metadataStart) && b.state() == blockStateTail { | ||
| b++ | ||
| } | ||
| return b | ||
| } | ||
|
|
||
| func (b gcBlock) stateByte() byte { | ||
| return *(*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) | ||
| } | ||
|
|
@@ -200,7 +188,22 @@ func (b gcBlock) setState(newState blockState) { | |
| } | ||
| } | ||
|
|
||
| // objHeader is a structure prepended to every heap object to hold metadata. | ||
| // unmark changes the state of b from blockStateMark to blockStateHead. | ||
| func (b gcBlock) unmark() { | ||
| if gcAsserts && b.state() != blockStateMark { | ||
| runtimePanic("gc: block not marked") | ||
| } | ||
| stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) | ||
| *stateBytePtr ^= uint8(blockStateMark^blockStateHead) << (b % blocksPerStateByte) | ||
| } | ||
|
|
||
| // free changes the state of b to blockStateFree. | ||
| func (b gcBlock) free() { | ||
| stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte)) | ||
| *stateBytePtr &^= uint8(blockStateMask) << (b % blocksPerStateByte) | ||
| } | ||
|
|
||
| // objHeader is a structure appended to every heap object to hold metadata. | ||
| type objHeader struct { | ||
| // next is the next object to scan after this. | ||
| next *objHeader | ||
|
|
@@ -317,8 +320,12 @@ func initHeap() { | |
| metadataSize := heapEnd - uintptr(metadataStart) | ||
| memzero(unsafe.Pointer(metadataStart), metadataSize) | ||
|
|
||
| // Rebuild the free ranges list. | ||
| buildFreeRanges() | ||
| // Create the initial free range. | ||
| if endBlock > 0 { | ||
| r := (*freeRange)(unsafe.Pointer(heapStart)) | ||
| *r = freeRange{len: uintptr(endBlock)} | ||
| freeRanges = r | ||
| } | ||
| } | ||
|
|
||
| // setHeapEnd is called to expand the heap. The heap can only grow, not shrink. | ||
|
|
@@ -340,6 +347,7 @@ func setHeapEnd(newHeapEnd uintptr) { | |
| // memcpy is fine as it only copies the old metadata and the new memory will | ||
| // have been zero initialized. | ||
| heapEnd = newHeapEnd | ||
| oldEndBlock := endBlock | ||
| calculateHeapAddresses() | ||
| memcpy(metadataStart, oldMetadataStart, oldMetadataSize) | ||
|
|
||
|
|
@@ -351,8 +359,14 @@ func setHeapEnd(newHeapEnd uintptr) { | |
| runtimePanic("gc: heap did not grow enough at once") | ||
| } | ||
|
|
||
| // Rebuild the free ranges list. | ||
| buildFreeRanges() | ||
| // Insert the new free range. This range will be separate from any previous | ||
| // free space at the end of the heap. This may result in more heap growth | ||
| // than strictly necessary when an allocation requests more memory than the | ||
| // previous heap size. Otherwise this will only result in slightly more | ||
| // memory fragmentation than necessary. We cannot easily remove the old | ||
| // range and adding a special free-list rebuild function for this edge case | ||
| // would not be worthwhile in terms of binary size or code maintenance. | ||
| insertFreeRange(oldEndBlock.pointer(), uintptr(endBlock-oldEndBlock)) | ||
| } | ||
|
|
||
| // calculateHeapAddresses initializes variables such as metadataStart and | ||
|
|
@@ -400,7 +414,7 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { | |
|
|
||
| // Round the size up to a multiple of blocks, adding space for the header. | ||
| rawSize := size | ||
| size += align(unsafe.Sizeof(objHeader{})) | ||
| size += unsafe.Sizeof(objHeader{}) | ||
| size += bytesPerBlock - 1 | ||
| if size < rawSize { | ||
| // The size overflowed. | ||
|
|
@@ -456,25 +470,27 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { | |
| runtimePanicAt(returnAddress(0), "out of memory") | ||
| } | ||
|
|
||
| // Set the backing blocks as being allocated. | ||
| // Set the block states. | ||
| block := blockFromAddr(uintptr(pointer)) | ||
| block.setState(blockStateHead) | ||
| for i := block + 1; i != block+gcBlock(neededBlocks); i++ { | ||
| i := block + gcBlock(neededBlocks) - 1 | ||
| i.setState(blockStateHead) | ||
| for i != block { | ||
| i-- | ||
| i.setState(blockStateTail) | ||
| } | ||
|
|
||
| // Create the object header. | ||
| header := (*objHeader)(pointer) | ||
| size -= unsafe.Sizeof(objHeader{}) | ||
| header := (*objHeader)(unsafe.Add(pointer, size)) | ||
| header.layout = parseGCLayout(layout) | ||
|
|
||
| // We've claimed this allocation, now we can unlock the heap. | ||
| gcLock.Unlock() | ||
|
|
||
| // Return a pointer to this allocation. | ||
| add := align(unsafe.Sizeof(objHeader{})) | ||
| pointer = unsafe.Add(pointer, add) | ||
| size -= add | ||
| // Clear the allocation body. | ||
| memzero(pointer, size) | ||
|
|
||
| // Return a pointer to this allocation. | ||
| return pointer | ||
| } | ||
|
|
||
|
|
@@ -483,16 +499,28 @@ func realloc(ptr unsafe.Pointer, size uintptr) unsafe.Pointer { | |
| return alloc(size, nil) | ||
| } | ||
|
|
||
| ptrAddress := uintptr(ptr) | ||
| endOfTailAddress := blockFromAddr(ptrAddress).findNext().address() | ||
| // Find the first block of the original allocation. | ||
| firstBlock := blockFromAddr(uintptr(ptr)) | ||
|
|
||
| // Find the last block of the original allocation. | ||
|
niaow marked this conversation as resolved.
|
||
| lastBlock := firstBlock.findHead() | ||
|
|
||
| // Calculate the size of the original allocation body. | ||
| oldSize := uintptr(lastBlock-firstBlock)*blocksPerStateByte + (bytesPerBlock - unsafe.Sizeof(objHeader{})) | ||
|
|
||
| // this might be a few bytes longer than the original size of | ||
| // ptr, because we align to full blocks of size bytesPerBlock | ||
| oldSize := endOfTailAddress - ptrAddress | ||
| if size <= oldSize { | ||
| // The requested size is less than the old size. | ||
| // There are likely scenarios for this: | ||
| // - The caller intended to grow the allocation, but the original size | ||
| // was rounded up by alloc to a multiple of the block size. | ||
| // The rounded size is already sufficient. | ||
| // - The caller intended to shrink the allocation. | ||
| // We currently ignore this case. | ||
| // Either way, the current allocation can be left alone. | ||
| return ptr | ||
| } | ||
|
|
||
| // Create a new allocation and copy the old data. | ||
| newAlloc := alloc(size, nil) | ||
| memcpy(newAlloc, ptr, oldSize) | ||
| free(ptr) | ||
|
|
@@ -559,11 +587,8 @@ func runGC() (freeBytes uintptr) { | |
| gcResumeWorld() | ||
|
|
||
| // Sweep phase: free all non-marked objects and unmark marked objects for | ||
| // the next collection cycle. | ||
| sweep() | ||
|
|
||
| // Rebuild the free ranges list. | ||
| freeBytes = buildFreeRanges() | ||
| // the next collection cycle. This also rebuilds the free ranges list. | ||
| freeBytes = sweep() | ||
|
|
||
| // Show how much has been sweeped, for debugging. | ||
| if gcDebug { | ||
|
|
@@ -629,13 +654,21 @@ func finishMark() { | |
| continue | ||
| } | ||
|
|
||
| // Compute the scan bounds. | ||
| objAddr := uintptr(unsafe.Pointer(obj)) | ||
| start := objAddr + align(unsafe.Sizeof(objHeader{})) | ||
| end := blockFromAddr(objAddr).findNext().address() | ||
| // Find the last block in the object. | ||
| // This block contains the header. | ||
| lastBlock := blockFromAddr(uintptr(unsafe.Pointer(obj))) | ||
|
|
||
| // Find the first block in the allocation. | ||
| firstBlock := lastBlock | ||
| for firstBlock > 0 && (firstBlock-1).state() == blockStateTail { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same question as above: can we scan multiple blocks at once by checking the entire meta byte?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't think that was in the scope of this PR since we didn't do that before. It is also less important because this only happens once per allocation, as opposed to |
||
| firstBlock-- | ||
| } | ||
|
|
||
| // Compute the size of the allocation. | ||
| bodySize := uintptr(lastBlock-firstBlock)*bytesPerBlock + (bytesPerBlock - unsafe.Sizeof(objHeader{})) | ||
|
|
||
| // Scan the object. | ||
| obj.layout.scan(start, end-start) | ||
| obj.layout.scan(firstBlock.address(), bodySize) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -668,97 +701,55 @@ func markRoot(addr, root uintptr) { | |
| head.setState(blockStateMark) | ||
|
|
||
| // Add the object to the scan list. | ||
| header := (*objHeader)(head.pointer()) | ||
| header := (*objHeader)(unsafe.Add(head.pointer(), bytesPerBlock-unsafe.Sizeof(objHeader{}))) | ||
| header.next = scanList | ||
| scanList = header | ||
| } | ||
|
|
||
| // Sweep goes through all memory and frees unmarked memory. | ||
| func sweep() { | ||
| metadataEnd := unsafe.Add(metadataStart, (endBlock+(blocksPerStateByte-1))/blocksPerStateByte) | ||
| var carry byte | ||
| for meta := metadataStart; meta != metadataEnd; meta = unsafe.Add(meta, 1) { | ||
| // Fetch the state byte. | ||
| stateBytePtr := (*byte)(unsafe.Pointer(meta)) | ||
| stateByte := *stateBytePtr | ||
|
|
||
| // Separate blocks by type. | ||
| // Split the nibbles. | ||
| // Each nibble is a mask of blocks. | ||
| high := stateByte >> blocksPerStateByte | ||
| low := stateByte & blockStateEach | ||
| // Marked heads are in both nibbles. | ||
| markedHeads := low & high | ||
| // Unmarked heads are in the low nibble but not the high nibble. | ||
| unmarkedHeads := low &^ high | ||
| // Tails are in the high nibble but not the low nibble. | ||
| tails := high &^ low | ||
|
|
||
| // Clear all tail runs after unmarked (freed) heads. | ||
| // | ||
| // Adding 1 to the start of a bit run will clear the run and set the next bit: | ||
| // (2^k - 1) + 1 = 2^k | ||
| // e.g. 0b0011 + 1 = 0b0100 | ||
| // Bitwise-and with the original mask to clear the newly set bit. | ||
| // e.g. (0b0011 + 1) & 0b0011 = 0b0100 & 0b0011 = 0b0000 | ||
| // This will not clear bits after the run because the gap stops the carry: | ||
| // e.g. (0b1011 + 1) & 0b1011 = 0b1100 & 0b1011 = 0b1000 | ||
| // This can clear multiple runs in a single addition: | ||
| // e.g. (0b1101 + 0b0101) & 0b1101 = 0b10010 & 0b1101 = 0b0000 | ||
| // | ||
| // In order to find tail run starts after unmarked heads we could use tails & (unmarkedHeads << 1). | ||
| // It is possible omit the bitwise-and because the clear still works if the next block is not a tail. | ||
| // A head is not a tail, so corresponding missing tail bit will stop the carry from a previous tail run. | ||
| // As such it will set the next bit which will be cleared back away later. | ||
| // e.g. HHTH: (0b0010 + (0b1101 << 1)) & 0b0010 = 0b11100 & 0b0010 = 0b0000 | ||
| // | ||
| // Treat the whole heap as a single pair of integer masks. | ||
| // This is accomplished for addition by carrying the overflow to the next state byte. | ||
| // The unmarkedHeads << 1 is equivalent to unmarkedHeads + unmarkedHeads, so it can be merged with the sum. | ||
| // This does not require any special work for the bitwise-and because it operates bitwise. | ||
| tailClear := tails + (unmarkedHeads << 1) + carry | ||
| carry = tailClear >> blocksPerStateByte | ||
| tails &= tailClear | ||
|
|
||
| // Construct the new state byte. | ||
| *stateBytePtr = markedHeads | (tails << blocksPerStateByte) | ||
| } | ||
| } | ||
|
|
||
| // buildFreeRanges rebuilds the freeRanges list. | ||
| // This must be called after a GC sweep or heap grow. | ||
| // It returns how many bytes are free in the heap. | ||
| func buildFreeRanges() uintptr { | ||
| func sweep() uintptr { | ||
| // Discard the old free ranges list. | ||
| freeRanges = nil | ||
|
|
||
| // Scan backwards through the block metadata. | ||
| block := endBlock | ||
| var totalBlocks uintptr | ||
| var freeBlocks uintptr | ||
| for { | ||
| // Skip backwards over occupied blocks. | ||
| for block > 0 && (block-1).state() != blockStateFree { | ||
| // Scan backwards until we find a marked head. | ||
| // Free the blocks as we go. | ||
| freeEnd := block | ||
| for block > 0 && (block-1).state() != blockStateMark { | ||
| block-- | ||
| block.free() | ||
| } | ||
|
|
||
| if freeLen := uintptr(freeEnd - block); freeLen > 0 { | ||
| // Insert the freed blocks. | ||
| freeBlocks += freeLen | ||
| insertFreeRange(block.pointer(), freeLen) | ||
| } | ||
|
|
||
| if block == 0 { | ||
| // There are no more blocks to sweep. | ||
| break | ||
| } | ||
|
|
||
| // Find the start of the free range. | ||
| end := block | ||
| for block > 0 && (block-1).state() == blockStateFree { | ||
| // Unmark the next head. | ||
| block-- | ||
| block.unmark() | ||
|
|
||
| // Skip the tail. | ||
| for block > 0 && (block-1).state() == blockStateTail { | ||
| block-- | ||
| } | ||
|
|
||
| // Insert the free range. | ||
| len := uintptr(end - block) | ||
| totalBlocks += len | ||
| insertFreeRange(block.pointer(), len) | ||
| } | ||
|
|
||
| if gcDebug { | ||
| println("free ranges after rebuild:") | ||
| println("free ranges after sweep:") | ||
| dumpFreeRangeCounts() | ||
| } | ||
|
|
||
| return totalBlocks * bytesPerBlock | ||
| return freeBlocks * bytesPerBlock | ||
| } | ||
|
|
||
| func dumpFreeRangeCounts() { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems like we should rename this
findHeaderThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This finds the head block. It returns the index of the block with the header, not the header itself.