in src/heap/hh_shared.c [1333:1458]
CAMLprim value hh_compact(value unit) {
CAMLparam1(unit);
assert_master();
assert(info->gc_phase == Phase_idle);
intnat hashtbl_slots = info->hashtbl_slots;
// Step 1: Scan the root set, threading any pointers to the heap. The
// threading performed during this step will be unthreaded in the next step,
// updating the hash table to point to the updated locations.
for (intnat i = 0; i < hashtbl_slots; i++) {
addr_t hashtbl_addr = Addr_of_ptr(&hashtbl[i].addr);
gc_thread(hashtbl_addr);
}
// Step 2: Scan the heap object-by-object from bottom to top. The dst pointer
// keeps track of where objects will move to, but we do not move anything
// during this step.
//
// If we encounter an unmarked header, the object is unreachable, so do not
// update the dst pointer.
//
// If we encounter an address, then this object was reachable via "forward"
// reference, i.e., a pointer stored at a lower address. Because we know where
// the object will move to (dst), we eagerly update the forward references and
// copy the original header back.
//
// If we encounter a marked header, then the object is reachable only via
// "backwards" reference. These backwards references will be handled in the
// next step.
//
// NB: Instead of scanning the entire heap, it may be worthwhile to track the
// min/max live addresses during the marking phase, and only scan that part.
// Possible that the extra marking work would be more expensive than a linear
// memory scan, but worth experimenting.
//
// NB: Also worth experimenting with explicit prefetching.
addr_t src = info->heap_init;
addr_t dst = info->heap_init;
addr_t heap_ptr = info->heap;
while (src < heap_ptr) {
hh_header_t hd = Deref(src);
intnat size;
if (Is_blue(hd)) {
size = Obj_bhsize(hd);
} else {
gc_update(src, dst);
hd = Deref(src);
size = Obj_bhsize(hd);
gc_scan(src);
dst += size;
}
src += size;
}
// Step 3: Scan the heap object-by-object again, actually moving objects this
// time around.
//
// Unmarked headers still indicate unreachable data and is not moved.
//
// If we encounter an address, then the object was reachable via a "backwards"
// reference from the previous step, and we fix up those references to point
// to the new location and copy the original header back.
//
// Finally we can move the object. We unset the mark bit on the header so that
// future collections can free the space if the object becomes unreachable.
src = info->heap_init;
dst = info->heap_init;
intnat next_version = info->next_version;
while (src < heap_ptr) {
hh_header_t hd = Deref(src);
intnat size;
if (Is_blue(hd)) {
size = Obj_bhsize(hd);
} else {
gc_update(src, dst);
hd = Deref(src);
size = Obj_bhsize(hd);
if (Obj_tag(hd) == Entity_tag) {
// Move entities manually, resetting the entity version to 0 and writing
// the previous entity data to the correct offset. If the entity version
// is >= the next version, that means we're compacting after a canceled
// recheck, so we must preserve the committed and latest data.
intnat v = Deref(Obj_field(src, 2));
addr_t data0 = Deref(Obj_field(src, v & 1));
addr_t data1 = NULL_ADDR;
if (v >= next_version) {
data1 = Deref(Obj_field(src, ~v & 1));
v = 2;
} else {
v = 0;
}
Deref(dst) = hd;
Deref(Obj_field(dst, 0)) = data0;
Deref(Obj_field(dst, 1)) = data1;
Deref(Obj_field(dst, 2)) = v;
} else {
memmove(Ptr_of_addr(dst), Ptr_of_addr(src), size);
}
dst += size;
}
src += size;
}
// TODO: Space between dst and info->heap is unused, but will almost certainly
// become used again soon. Currently we will never decommit, which may cause
// issues when there is memory pressure.
//
// If the kernel supports it, we might consider using madvise(MADV_FREE),
// which allows the kernel to reclaim the memory lazily under pressure, but
// would not force page faults under healthy operation.
info->heap = dst;
// Invariant: info->heap_init <= gc_end <= info->heap
// See declaration of gc_end
gc_end = dst;
info->free_bsize = 0;
// All live entities have been reset to version 0, so we can also reset the
// global version counter.
info->next_version = 2;
CAMLreturn(Val_unit);
}