CAMLprim value hh_compact()

in src/heap/hh_shared.c [1333:1458]


CAMLprim value hh_compact(value unit) {
  CAMLparam1(unit);
  assert_master();
  assert(info->gc_phase == Phase_idle);

  intnat hashtbl_slots = info->hashtbl_slots;

  // Step 1: Scan the root set, threading any pointers to the heap. The
  // threading performed during this step will be unthreaded in the next step,
  // updating the hash table to point to the updated locations.
  for (intnat i = 0; i < hashtbl_slots; i++) {
    addr_t hashtbl_addr = Addr_of_ptr(&hashtbl[i].addr);
    gc_thread(hashtbl_addr);
  }

  // Step 2: Scan the heap object-by-object from bottom to top. The dst pointer
  // keeps track of where objects will move to, but we do not move anything
  // during this step.
  //
  // If we encounter an unmarked header, the object is unreachable, so do not
  // update the dst pointer.
  //
  // If we encounter an address, then this object was reachable via "forward"
  // reference, i.e., a pointer stored at a lower address. Because we know where
  // the object will move to (dst), we eagerly update the forward references and
  // copy the original header back.
  //
  // If we encounter a marked header, then the object is reachable only via
  // "backwards" reference. These backwards references will be handled in the
  // next step.
  //
  // NB: Instead of scanning the entire heap, it may be worthwhile to track the
  // min/max live addresses during the marking phase, and only scan that part.
  // Possible that the extra marking work would be more expensive than a linear
  // memory scan, but worth experimenting.
  //
  // NB: Also worth experimenting with explicit prefetching.
  addr_t src = info->heap_init;
  addr_t dst = info->heap_init;
  addr_t heap_ptr = info->heap;
  while (src < heap_ptr) {
    hh_header_t hd = Deref(src);
    intnat size;
    if (Is_blue(hd)) {
      size = Obj_bhsize(hd);
    } else {
      gc_update(src, dst);
      hd = Deref(src);
      size = Obj_bhsize(hd);
      gc_scan(src);
      dst += size;
    }
    src += size;
  }

  // Step 3: Scan the heap object-by-object again, actually moving objects this
  // time around.
  //
  // Unmarked headers still indicate unreachable data and is not moved.
  //
  // If we encounter an address, then the object was reachable via a "backwards"
  // reference from the previous step, and we fix up those references to point
  // to the new location and copy the original header back.
  //
  // Finally we can move the object. We unset the mark bit on the header so that
  // future collections can free the space if the object becomes unreachable.
  src = info->heap_init;
  dst = info->heap_init;
  intnat next_version = info->next_version;
  while (src < heap_ptr) {
    hh_header_t hd = Deref(src);
    intnat size;
    if (Is_blue(hd)) {
      size = Obj_bhsize(hd);
    } else {
      gc_update(src, dst);
      hd = Deref(src);
      size = Obj_bhsize(hd);
      if (Obj_tag(hd) == Entity_tag) {
        // Move entities manually, resetting the entity version to 0 and writing
        // the previous entity data to the correct offset. If the entity version
        // is >= the next version, that means we're compacting after a canceled
        // recheck, so we must preserve the committed and latest data.
        intnat v = Deref(Obj_field(src, 2));
        addr_t data0 = Deref(Obj_field(src, v & 1));
        addr_t data1 = NULL_ADDR;
        if (v >= next_version) {
          data1 = Deref(Obj_field(src, ~v & 1));
          v = 2;
        } else {
          v = 0;
        }
        Deref(dst) = hd;
        Deref(Obj_field(dst, 0)) = data0;
        Deref(Obj_field(dst, 1)) = data1;
        Deref(Obj_field(dst, 2)) = v;
      } else {
        memmove(Ptr_of_addr(dst), Ptr_of_addr(src), size);
      }
      dst += size;
    }
    src += size;
  }

  // TODO: Space between dst and info->heap is unused, but will almost certainly
  // become used again soon. Currently we will never decommit, which may cause
  // issues when there is memory pressure.
  //
  // If the kernel supports it, we might consider using madvise(MADV_FREE),
  // which allows the kernel to reclaim the memory lazily under pressure, but
  // would not force page faults under healthy operation.

  info->heap = dst;

  // Invariant: info->heap_init <= gc_end <= info->heap
  // See declaration of gc_end
  gc_end = dst;

  info->free_bsize = 0;

  // All live entities have been reset to version 0, so we can also reset the
  // global version counter.
  info->next_version = 2;

  CAMLreturn(Val_unit);
}