List detectRuns()

in lib/src/license_detection/token_matcher.dart [190:273]


List<Range> detectRuns(
  List<MatchRange> matches,
  double confidenceThreshold,
  final int inputTokensCount,
  final int licenseTokenCount,
  int n,
) {
  // Set the subset length to smaller of the number of input tokens
  // or number of source tokens.
  //
  // If the input has lesser number of tokens than the source
  // i.e target doesn't has at least one subset of source
  // we decrease the subset length to number of tokens in the
  // input and analyse what we have.
  final subsetLength = inputTokensCount < licenseTokenCount
      ? inputTokensCount
      : licenseTokenCount;

  // Minimum number of tokens that must match in a window of subsetLength
  // to consider it a possible match.
  final targetTokens = (confidenceThreshold * subsetLength).toInt();
  var hits = List<bool>.filled(inputTokensCount, false);

  for (var match in matches) {
    for (var i = match.input.start; i < match.input.end; i++) {
      hits[i] = true;
    }
  }

  // Initialize the total number of matches for the first window
  // i.e [0,subsetLength).
  var totalMatches = hits.take(subsetLength).where((element) => element).length;

  var out = <int>[];
  if (totalMatches >= targetTokens) {
    out.add(0);
  }

  // Slide the window to right and keep on updating the number
  // of hits. If the total number of hits is greater than
  // the confidence threshold add it to the output list.
  for (var i = 1; i < inputTokensCount; i++) {
    // Check if the start of the last window was a
    // hit and decrease the total count.
    if (hits[i - 1]) {
      totalMatches--;
    }

    final end = i + subsetLength - 1;

    // Similarly check if the last value of the updated window is a hit
    // and update the total count accordingly.
    if (end < inputTokensCount && hits[end]) {
      totalMatches++;
    }

    if (totalMatches >= targetTokens) {
      out.add(i);
    }
  }

  if (out.isEmpty) {
    return [];
  }

  var finalOut = <Range>[
    Range(
      out[0],
      out[0] + n,
    )
  ];

  // Create a list of matchRange from the token indexes that were
  // were considered to be a potential match.
  for (var i = 1; i < out.length; i++) {
    if (out[i] != 1 + out[i - 1]) {
      finalOut.add(Range(out[i], out[i] + n));
    } else {
      finalOut.last = Range(finalOut.last.start, out[i] + n);
    }
  }

  return List.unmodifiable(finalOut);
}