List fuseMatchedRanges()

in lib/src/license_detection/token_matcher.dart [283:386]


List<MatchRange> fuseMatchedRanges(
  List<MatchRange> matches,
  double confidence,
  int size,
  List<Range> runs,
  int targetSize,
) {
  var claimed = <MatchRange>[];
  final errorMargin = (size * (1 - confidence)).round();

  var filter = List.filled(targetSize, false);

  for (var match in runs) {
    for (var i = match.start; i < match.end; i++) {
      filter[i] = true;
    }
  }

  for (var match in matches) {
    var offset = match.input.start - match.source.start;

    if (offset < 0) {
      if (-offset <= errorMargin) {
        offset = 0;
      } else {
        continue;
      }
    }

    // If filter is false this implies that there were not enough instances of
    // match in this range, so this is a spurious hit and is discarded.
    if (!filter[offset]) {
      continue;
    }

    var unclaimed = true;

    final matchOffset = match.input.start - match.source.start;
    for (var i = 0; i < claimed.length; i++) {
      var claim = claimed[i];
      var claimOffset = claim.input.start - claim.source.start;

      var sampleError = (matchOffset - claimOffset).abs();
      final withinError = sampleError < errorMargin;

      if (withinError && (match.tokensClaimed > sampleError)) {
        // Check if this match lies within the claim, if does just update the number
        // of token count.
        if (match.input.start >= claim.input.start &&
            match.input.end <= claim.input.end) {
          claim =
              claim.update(newClaim: claim.tokensClaimed + match.tokensClaimed);
          unclaimed = false;
        }
        // Check if the claim and match can be merged.
        else {
          // Match is within error margin and claim is likely to
          // be an extension of match. So we update the input and
          // source start offsets of claim.
          if (match.input.start < claim.input.start &&
              match.source.start < claim.source.start) {
            claim = claim.update(
              inputStart: match.input.start,
              srcStart: match.source.start,
              newClaim: claim.tokensClaimed + match.tokensClaimed,
            );

            unclaimed = false;
          }
          // Match is within error margin and match is likely to
          // to extend claim. So we update the input and source
          // end offsets of claim.
          else if (match.input.end > claim.input.end &&
              match.source.end > claim.source.end) {
            claim = claim.update(
              inputEnd: match.input.end,
              srcEnd: match.source.end,
              newClaim: claim.tokensClaimed + match.tokensClaimed,
            );
            unclaimed = false;
          }
        }

        // The match does not extend any existing claims, and
        // can be added as a new claim.
      }

      claimed[i] = claim;
      if (!unclaimed) {
        break;
      }
    }

    // Add as a new claim if it is relevant and has higher quality of
    // hits.
    if (unclaimed && match.tokensClaimed * 10 > matches[0].tokensClaimed) {
      claimed.add(match);
    }
  }

  claimed.sort(_compareTokenCount);

  return claimed;
}