diff --git a/lib/system/alloc.nim b/lib/system/alloc.nim index 3de6d871370a7..bec89c0bdd15d 100644 --- a/lib/system/alloc.nim +++ b/lib/system/alloc.nim @@ -25,7 +25,7 @@ template track(op, address, size) = # An allocation of a small pointer looks approximately like this #[ - alloc -> rawAlloc -> No free chunk available > Request a new page from tslf -> result = chunk.data -------------+ + alloc -> rawAlloc -> No free chunk available > Request a new page from tlsf -> result = chunk.data -------------+ | | v | Free chunk available | @@ -47,7 +47,7 @@ template track(op, address, size) = | | | | v v v v A different thread owns this chunk. The current chunk is not active. chunk.free was < size Chunk references foreign cells, noop - Add the cell to a.sharedFreeLists Add the cell into the active chunk Activate the chunk (end) + Add the cell to owner.sharedFreeLists Add the cell into the active chunk Activate the chunk (end) (end) (end) (end) ]# # So "true" deallocation is delayed for as long as possible in favor of reusing cells. @@ -147,6 +147,8 @@ type next: ptr HeapLinks MemRegion = object + bigChunksInUse: int + smallCellsInUse: int # cells must be tracked individually as small chunks stay alive in most dealloc scenarios when not defined(gcDestructors): minLargeObj, maxLargeObj: int freeSmallChunks: array[0..max(1, SmallChunkSize div MemAlign-1), PSmallChunk] @@ -837,9 +839,11 @@ when defined(gcDestructors): inc total, size let chunk = cast[PSmallChunk](pageAddr(it)) if c != chunk: - # The cell is foreign, potentially even from a foreign thread. - # It must block the current chunk from being freed, as doing so would leak memory. + # The cell is foreign, a different thread tried to free it and gave it back. + # It must block the current chunk from being freed, as doing so would cause undefined behavior. + sysAssert(chunk.owner == addr a, "compensateCounters: tried to take ownership of a foreign thread's memory") inc c.foreignCells + dec a.smallCellsInUse it = it.next # By not adjusting the foreign chunk we reserve space in it to prevent deallocation inc(c.free, total) @@ -908,7 +912,7 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer = if c.free >= size: # Because removals from `a.freeSmallChunks[s]` only happen in the other alloc branch and during dealloc, # we must not add it to the list if it cannot be used the next time a pointer of `size` bytes is needed. - listAdd(a.freeSmallChunks[s], c) + a.freeSmallChunks[s] = c result = addr(c.data) sysAssert((cast[int](result) and (MemAlign-1)) == 0, "rawAlloc 4") else: @@ -949,14 +953,16 @@ proc rawAlloc(a: var MemRegion, requestedSize: int): pointer = if c.free < size: # Even after fetching shared cells the chunk has no usable memory left. It is no longer the active chunk sysAssert(allocInv(a), "rawAlloc: before listRemove test") - listRemove(a.freeSmallChunks[s], c) + a.freeSmallChunks[s] = nil sysAssert(allocInv(a), "rawAlloc: end listRemove test") sysAssert(((cast[int](result) and PageMask) - smallChunkOverhead()) %% size == 0, "rawAlloc 21") sysAssert(allocInv(a), "rawAlloc: end small size") inc a.occ, size trackSize(c.size) + inc a.smallCellsInUse else: + inc a.bigChunksInUse when defined(gcDestructors): when hasThreadSupport: let deferredFrees = atomicExchangeN(addr a.sharedFreeListBigChunks, nil, ATOMIC_RELAXED) @@ -988,6 +994,24 @@ proc rawAlloc0(a: var MemRegion, requestedSize: int): pointer = result = rawAlloc(a, requestedSize) zeroMem(result, requestedSize) + +proc deallocOsPages(a: var MemRegion) = + # we free every 'ordinarily' allocated page by iterating over the page bits: + var it = addr(a.heapLinks) + while true: + let next = it.next + for i in 0..it.len-1: + let (p, size) = it.chunks[i] + when defined(debugHeapLinks): + cprintf("owner %p; dealloc A: %p size: %ld; next: %p\n", addr(a), + it, size, next) + sysAssert size >= PageSize, "origSize too small" + osDeallocPages(p, size) + it = next + if it == nil: break + # And then we free the pages that are in use for the page bits: + llDeallocAll(a) + proc rawDealloc(a: var MemRegion, p: pointer) = when defined(nimTypeNames): inc(a.deallocCounter) @@ -1005,6 +1029,7 @@ proc rawDealloc(a: var MemRegion, p: pointer) = if c.owner == addr(a): # We own the block, there is no foreign thread involved. dec a.occ, s + dec a.smallCellsInUse untrackSize(s) sysAssert a.occ >= 0, "rawDealloc: negative occupied memory (case A)" sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %% @@ -1029,11 +1054,12 @@ proc rawDealloc(a: var MemRegion, p: pointer) = inc(activeChunk.free, s) # By not adjusting the current chunk's capacity it is prevented from being freed inc(activeChunk.foreignCells) # The cell is now considered foreign from the perspective of the active chunk else: + # The current chunk is also the active chunk or there is no active chunk f.next = c.freeList c.freeList = f if c.free < s: # The chunk could not have been active as it didn't have enough space to give - listAdd(a.freeSmallChunks[s div MemAlign], c) + a.freeSmallChunks[s div MemAlign] = c inc(c.free, s) else: inc(c.free, s) @@ -1041,7 +1067,7 @@ proc rawDealloc(a: var MemRegion, p: pointer) = # If the chunk were to be freed while it references foreign cells, # the foreign chunks will leak memory and can never be freed. if c.free == SmallChunkSize-smallChunkOverhead() and c.foreignCells == 0: - listRemove(a.freeSmallChunks[s div MemAlign], c) + a.freeSmallChunks[s div MemAlign] = nil c.size = SmallChunkSize freeBigChunk(a, cast[PBigChunk](c)) else: @@ -1050,18 +1076,20 @@ proc rawDealloc(a: var MemRegion, p: pointer) = when defined(gcDestructors): addToSharedFreeList(c, f, s div MemAlign) sysAssert(((cast[int](p) and PageMask) - smallChunkOverhead()) %% - s == 0, "rawDealloc 2") + s == 0, "rawDealloc 2") else: + dec a.bigChunksInUse + let c = cast[PBigChunk](c) # set to 0xff to check for usage after free bugs: when overwriteFree: nimSetMem(p, -1'i32, c.size -% bigChunkOverhead()) when logAlloc: cprintf("dealloc(pointer_%p) # BIG %p\n", p, c.owner) when defined(gcDestructors): if c.owner == addr(a): - deallocBigChunk(a, cast[PBigChunk](c)) + deallocBigChunk(a, c) else: - addToSharedFreeListBigChunks(c.owner[], cast[PBigChunk](c)) + addToSharedFreeListBigChunks(c.owner[], c) else: - deallocBigChunk(a, cast[PBigChunk](c)) + deallocBigChunk(a, c) sysAssert(allocInv(a), "rawDealloc: end") #when logAlloc: cprintf("dealloc(pointer_%p)\n", p) @@ -1177,22 +1205,27 @@ proc realloc0(allocator: var MemRegion, p: pointer, oldsize, newsize: Natural): if newsize > oldsize: zeroMem(cast[pointer](cast[uint](result) + uint(oldsize)), newsize - oldsize) -proc deallocOsPages(a: var MemRegion) = - # we free every 'ordinarily' allocated page by iterating over the page bits: - var it = addr(a.heapLinks) - while true: - let next = it.next - for i in 0..it.len-1: - let (p, size) = it.chunks[i] - when defined(debugHeapLinks): - cprintf("owner %p; dealloc A: %p size: %ld; next: %p\n", addr(a), - it, size, next) - sysAssert size >= PageSize, "origSize too small" - osDeallocPages(p, size) - it = next - if it == nil: break - # And then we free the pages that are in use for the page bits: - llDeallocAll(a) +proc abandonAllocator(a: var MemRegion) = + # In the current implementation, this is allowed to leak memory + # when any of it has been shared and not reclaimed yet + when defined(gcDestructors): + for s in 0 .. max(1, SmallChunkSize div MemAlign-1): + # Edge case: A shared cell was deallocated and this thread didn't allocate again before being abandoned + # -> must adjust the small cell counter + when hasThreadSupport: + var it = atomicExchangeN(addr a.sharedFreeLists[s], nil, ATOMIC_RELAXED) + else: + var it = a.sharedFreeLists[s] + a.sharedFreeLists[s] = nil + while it != nil: + dec a.smallCellsInUse + it = it.next + + if a.smallCellsInUse == 0 and a.bigChunksInUse == 0: + # Deallocating is only safe if all pages are unused + deallocOsPages(a) + else: + discard #c_printf("Not freeing allocator: smallCellsInUse %ld | bigChunksInUse %ld\n", a.smallCellsInUse, a.bigChunksInUse) proc getFreeMem(a: MemRegion): int {.inline.} = result = a.freeMem proc getTotalMem(a: MemRegion): int {.inline.} = result = a.currMem @@ -1219,6 +1252,8 @@ template instantiateForRegion(allocator: untyped) {.dirty.} = proc deallocOsPages = deallocOsPages(allocator) + proc abandonAllocator = abandonAllocator(allocator) + proc allocImpl(size: Natural): pointer = result = alloc(allocator, size) diff --git a/lib/system/threadimpl.nim b/lib/system/threadimpl.nim index 285b8f5e7fac3..e88d724ea070b 100644 --- a/lib/system/threadimpl.nim +++ b/lib/system/threadimpl.nim @@ -2,6 +2,8 @@ var nimThreadDestructionHandlers* {.rtlThreadVar.}: seq[proc () {.closure, gcsafe, raises: [].}] when not defined(boehmgc) and not hasSharedHeap and not defined(gogc) and not defined(gcRegions): proc deallocOsPages() {.rtl, raises: [].} +when defined(gcDestructors) and not defined(useMalloc): + proc abandonAllocator() {.rtl, raises: [].} proc threadTrouble() {.raises: [], gcsafe.} # create for the main thread. Note: do not insert this data into the list # of all threads; it's not to be stopped etc. @@ -93,6 +95,8 @@ proc threadProcWrapStackFrame[TArg](thrd: ptr Thread[TArg]) {.raises: [].} = when declared(deallocOsPages): deallocOsPages() else: threadProcWrapDispatch(thrd) + when defined(gcDestructors) and not defined(useMalloc): + abandonAllocator() template nimThreadProcWrapperBody*(closure: untyped): untyped = var thrd = cast[ptr Thread[TArg]](closure)