private void doFold()

in Extremem/src/main/java/com/amazon/corretto/benchmark/extremem/RelativeTimeMetrics.java [335:706]


  private void doFold(long requiredSpan) {
    /* We enforce the following constraints on folds:
     *  1. There must be n available buckets to hold the fold.
     *  2. The bucket that becomes the right neighbor of the fold must
     *     be of size 2^(n-1) or larger.
     * While either constraint is not satisfied and there is at least
     * one larger neighbor, we expand the fold to subsume an additional
     * larger neighbor.  Subsuming a larger neighbor that spans
     * twice the range of the previously largest fold bucket does not
     *  improve compliance with constraint 2.  In the case that
     *  subsuming the next larger neighbor makes it possible to
     *  satisfy constraint 2, we know that the newly subsumed neighbor
     *  was either smaller or the same size as the previously largest
     *  bucket in the fold. Thus, we are assured that processing of
     *  overlap between existing data and the new fold will always
     *  coalesce at least the two largest nodes in the overlapping
     *  region.  */
    int span_buckets = 1;
    long span_of_buckets = DefaultIntervalMicroseconds;
    long span_of_largest_bucket = span_of_buckets;
    long required_fblb = fblb - requiredSpan;

    Trace.msg(4, id, ": doFold(", Long.toString(requiredSpan),
              ") represents new fblb: ", Long.toString(required_fblb));
    if (Trace.enabled(3))
      dump();
    while (span_of_buckets < requiredSpan) {
      span_buckets++;
      span_of_largest_bucket *= 2;
      span_of_buckets += span_of_largest_bucket;
    }

    Trace.msg(4, id, ": span_buckets: ", Integer.toString(span_buckets),
              ", span_of_buckets: ", Long.toString(span_of_buckets),
              ", largest span: ", Long.toString(span_of_largest_bucket));

    // Find the desired overlap with existing contents.
    //
    //  Suppose the existing tail looks like:
    //    1, 1, 1, 2, 4, 8, 16, 16, 16, 16, 16, 16, 32, ...
    //                                          ^^
    //                                          A  
    //  and span_of_largest_bucket is 32.  In other words, suppose my
    //  proposed folded span consists of:
    //    1, 2, 4, 8, 16, 32
    //  My naive first preference for the overlap point would be at
    //  point A.  But this fold doesn't properly align with the
    //  existing data.  In other words, we can't figure out how to
    //  combine the values of the existing buckets into the folded
    //  buckets.  See below:
    //
    //    1, 1, 1, 2, 4, 8, 16, 16, 16, 16, 16, 16, 32, ...
    //       \--/  \-----------------/  \/  \----/
    //    1    2    does not fit in 8   16    32
    //
    //  The canSquash() method answers the question of whether a
    //  particular fold is capable of representing the data contained
    //  within existing buckets.
    
    int neighbor_index = fbi;
    int adjusted_span_buckets = span_buckets;
    long adjusted_largest_span = span_of_largest_bucket;
    long adjusted_requiredSpan = requiredSpan;
    long adjusted_span_of_buckets = span_of_buckets;

    boolean found_overlap = false;
    // Number of existing buckets to overlap with new logarithmic span.
    int overlap_candidate = 0;

    // The search for an overlap candidate proceeds in two dimensions:
    //
    //  1. Consider each possible point at which the existing fold
    //     might align.  
    //
    //  2. Consider expanding the fold by adding a new entry that is
    //     larger than the previous largest entry.  Once the fold is
    //     expanded, we should consider aligning at the same
    //     overlap_candidate position that did not work previously
    //     before expanding it again.

    while (overlap_candidate < biu) {
      // Since overlap_candidate < biu, neighbor_index known to be valid.
      if ((adjusted_span_of_buckets >= adjusted_requiredSpan) &&
          (spanAt(neighbor_index) >= adjusted_largest_span) &&
          canSquash(overlap_candidate, adjusted_largest_span)) {
        found_overlap = true;
        break;
      } else if (adjusted_span_of_buckets >=
                 adjusted_requiredSpan + spanAt(neighbor_index)) {
        // Try the same fold at a higher overlap_candidate index
        adjusted_requiredSpan += spanAt(neighbor_index);
        overlap_candidate++;
        neighbor_index = incrIndex(neighbor_index);
      } else {
        // Try an expanded fold at the same overlap_candidate position
        adjusted_largest_span *= 2;
        adjusted_span_of_buckets += adjusted_largest_span;
        adjusted_span_buckets++;
      }
    }

    Trace.msg(4, id, ": found_overlap: ", found_overlap? "true": "false",
              ",: overlap_candidate: ", Integer.toString(overlap_candidate),
              ", adjusted_span_of_buckets: ",
              Long.toString(adjusted_span_of_buckets));
    Trace.msg(4, id, ": adjusted_largest_span: ",
              Long.toString(adjusted_largest_span),
              ", adjusted_requiredSpan: ",
              Long.toString(adjusted_requiredSpan));

    if (!found_overlap) {
      // Subsume everything.  For example, suppose span_of_largest_bucket
      // is 1024 but the current state buckets hold only 1 2 4 8.  The
      // search for a suitable overlap_candidate will fail and control
      // flows to here.  The solution is to consolidate the entirety
      // of the existing span into a single consolidation bucket.
      int index = fbi;
      int tally = 0;
      for (int i = 0; i < biu; i++) {
        tally += buckets[index];
        index = incrIndex(index);
      }

      adjusted_largest_span = span_of_largest_bucket;
      adjusted_requiredSpan = requiredSpan;
      adjusted_span_of_buckets = span_of_buckets;
      adjusted_span_buckets = span_buckets;

      long total_span = lbhb - fblb;

      biu = 1;
      buckets[fbi] = tally;

      if ((total_span <= adjusted_largest_span) &&
        (span_of_buckets - total_span >= requiredSpan)) {
        // Overlap the largest span with consolidation bucket.
        // Exercise discretion to preserve log resolution in the small
        // time ranges.
        long excess = (span_of_buckets - total_span) - requiredSpan;
        // Put the excess at the high end of the consolidation
        // bucket's span.
        lbhb += excess;
        bucket_bounds[incrIndex(fbi)] = lbhb;
        fblb = lbhb - adjusted_largest_span;

        adjusted_span_of_buckets -= adjusted_largest_span;
        adjusted_span_buckets--;
        adjusted_largest_span /= 2;

        overlap_candidate = 1;
      } else {
        // Otherwise, logarithmic expansion precedes consolidation
        // bucket.  The consolidation bucket is the larger of twice
        // the span_of_largest_bucket and the first power of two size
        // as large as total_span.
        long consolidation_span = span_of_largest_bucket * 2;
        while (consolidation_span < total_span)
          consolidation_span *= 2;
        lbhb = fblb + consolidation_span;
        bucket_bounds[incrIndex(fbi)] = lbhb;
        overlap_candidate = 0;
      }
    } else {
      // Turn overlap_candidate tail buckets into a power-of-two sequence,
      // but don't expand the tail yet.  We'll prepend additional
      // buckets in the code that follows.
      int fill_index = incrIndexBy(fbi, overlap_candidate - 1);
      int index = fill_index;
      int compress_count = overlap_candidate;
      int fill_count = 0;
      int fill_follow_index = incrIndex(fill_index);
      long end_of_fill_span = bucket_bounds[fill_follow_index];
      while (compress_count > 0) {
        long bucket_span = adjusted_largest_span;
        int tally = 0;
        while ((bucket_span > 0) && (compress_count > 0)) {
          bucket_span -= spanAt(index);
          tally += buckets[index];
          index = decrIndex(index);
          compress_count--;
        }

        Trace.msg(4, id, " Filling @", Integer.toString(fill_index), " with ",
                  Integer.toString(tally), ", spanning ",
                  debug_us2s(adjusted_largest_span));

        buckets[fill_index] = tally;
        fill_count++;
        bucket_bounds[fill_follow_index] = end_of_fill_span;

        fblb = end_of_fill_span - adjusted_largest_span;
        adjusted_span_of_buckets -= adjusted_largest_span;
        adjusted_span_buckets--;

        Trace.msg(4, id, " Start bound adjusted to: ", debug_us2s(fblb));

        end_of_fill_span = fblb;
        fill_follow_index = fill_index;
        fill_index = decrIndex(fill_index);
        adjusted_largest_span /= 2;
      }
      int bucket_change = overlap_candidate - fill_count;
      fbi = incrIndexBy(fbi, bucket_change);
      biu -= bucket_change;
    }

    Trace.msg(4, id, ": After squashing tail, dump is: ");
    if (Trace.enabled(3))
      dump();

    Trace.msg(4, id, " After processing overlap effects");
    Trace.msg(4, id, " adjusted_span_of_buckets: ",
              Long.toString(adjusted_span_of_buckets),
              ", adjusted_largest_span: ",
              Long.toString(adjusted_largest_span),
              ", adjusted_span_buckets: ",
              Long.toString(adjusted_span_buckets));

    long overshoot = adjusted_span_of_buckets - (fblb - required_fblb);
    if (overshoot < 0)
      overshoot = 0;

    Trace.msg(4, id, ": overshoot is: ", Long.toString(overshoot));
    
    // if (overshoot > 0) prepending the logarithmic tree reaches
    // further than necessary.  Remove from the logarithmic tree any
    // buckets that are not required for the required span.
    //
    // If we exhuast our bucket budget before we have spanned the
    // full desired range, simply double the span of the bucket that
    // precedes the exhaustion point.  For example, if our intention
    // is to emit the tail sequence:
    //
    //   1, 2, 4, 8, 16, 32, 64, 128
    //
    // and we find ourselves limited to only 4 available slots, then
    // we replace with:
    //
    //   _, _, _, _, 32, 32, 64, 128  */

    int available_slots = BucketCount - biu;
    if (available_slots > 0) {
      while ((available_slots > 0) && (adjusted_span_of_buckets > 0)) {
        if (overshoot > adjusted_largest_span) {

          Trace.msg(4, id, ": skipping overshot bucket with span: ",
                    Long.toString(adjusted_largest_span));
          
          // Skip this span, as it pushes us beyond target required_fblb.
          overshoot -= adjusted_largest_span;
        } else {
          bucket_bounds[fbi] = fblb;
          biu++;
          fbi = decrIndex(fbi);
          buckets[fbi] = 0;
          fblb -= adjusted_largest_span;
          available_slots--;
        }
        adjusted_span_of_buckets -= adjusted_largest_span;
        adjusted_largest_span /= 2;
      }
      
      // We have run out of slots before we spanned the desired range.
      // Doubling the span of the last emitted bucket is guaranteed to
      // cover the entire span.  But we don't want to cause fblb to have
      // a negative value.
      //
      // Suppose, for example, that available_slots = 3, span_of_buckets
      // is 31, and overshoot is 7.  We will emit: 
      //
      //                8 16
      //
      // Will skip 4, 2, 1 which sum to overshoot value 7.  Suppose,
      // available_slots = 3, span_of_buckets is 63, overshoot is 13.
      // We emit:
      //
      //                  2 16 32
      //
      // We skip 8, 4, 1.  The real problem occurs if the overshoot is
      // part of the tail that has to be omitted.  For example, suppose
      // available_slots = 3, span_of_buckets is 63, overshoot is 5.
      // We emit:
      //
      //                  8 16 32
      //
      // and we're out of slots.  If I replace 8 with 16, I will cover
      // the intended span, including the overshoot, possibly resulting
      // in a negative value for fblb.  If I leave the span for this
      // slot at 8, I am 2 units too short, and I do not cover the
      // required cumulative range of spans.  I would like to put 10
      // into the position that holds 8, but my invariant says each span
      // is a power of 2.
      //
      // It is rare that this will come up.  If it does, we'll accept a
      // negative value of fblb.
      
    }

    if (adjusted_span_of_buckets > 0) {
      // We've run out of buckets before spanning the full desired
      // range.  We know that we are within spanAt(fbi) of fulfilling
      // the desired range, given that each neighbor's span is
      // typically twice the span of the node at its next lower index
      // (within the logarithmic spanning tree).
      //
      // But we cannot double the span of fbi unless its neighbor is
      // twice its own size.
      //
      // At this point, we must compress the data without adding new
      // buckets. We have two options:
      //
      //  1. if spanAt(incrIndex(fbi)) > spanAt(fbi), just double the
      //     span at fbi.
      //  2. else, spanAt(incrIndex(fbi)) == spanAt(fbi):
      //      Find N where spanAt(fbi) = spanAt(incrIndexBy(fbi, i))
      //      for all values of i <= N and (N == biu) or
      //      spanAt(incrIndexBy(fbi, N+1) > spanAt(increIndexBy(fbi, N).
      //      In this case, coalesce buckets at N and N-1.

      if (spanAt(incrIndex(fbi)) > spanAt(fbi)) {
        Trace.msg(4, id, ": adjusted_span_of_buckets > 0, " +
                  "spanAt(fbi) < spanAt(fbi+1)");
        fblb -= spanAt(fbi);
      } else {
        long each_span = spanAt(fbi);
        // By test above, there are at least two buckets with same span
        int count = 2;
        int index = incrIndex(fbi);
        while (count <= biu) {
          int last_index = index;
          index = incrIndex(index);
          if (spanAt(index) != each_span) {
            index = last_index;
            break;
          }
          count++;
        }
        // count is number of consecutive buckets with same span (each_span).
        // index identifies position of last of these buckets.
        
        Trace.msg(4, id, ": adjusted_span_of_buckets > 0, ",
                  Integer.toString(count), " buckets have same span");

        assert (count > 1): "Expect multiple buckets to span same range";
        int left_index = decrIndex(index);
        // Double the span of the last of these buckets.
        buckets[index] += buckets[left_index];
        bucket_bounds[index] -= each_span;
        // Slide (count - 2) buckets forward.
        for (int i = (count -2); i > 0; i--) {
          index = left_index;
          left_index = decrIndex(left_index);
          buckets[index] = buckets[left_index];
          bucket_bounds[index] = bucket_bounds[left_index];
        }
        // Upon exiting loop, index points to bucket most recently
        // initialized.  left_index points to bucket from which this
        // most recently initialized bucket's tally was copied.
        // left_index is same as fbi.

        // Initialize first bucket's tally to zero and shift its start
        // range forward by each_span.
        bucket_bounds[index] = fblb;
        buckets[fbi] = 0;
        fblb -= each_span;
      }
    }

    Trace.msg(4, id, ": Done with doFold()");
    if (Trace.enabled(3))
      dump();
  }