export function textToSnippet()

in pathology/viewer/src/utils/common.ts [68:165]


export function textToSnippet(
    keywords: Set<string>, text: string, snippetSize: number): string {
  const defaultSnippet = text.slice(0, snippetSize);
  if (!keywords.size || !text || !snippetSize) {
    return defaultSnippet;
  }

  // Trim, remove words>snippetSize, escape, clean and join keywords for regexp
  const regexString: string =
      Array.from(keywords)
          .filter((word) => {
            return word.trim().length <= snippetSize;
          })
          .map((word) => {
            return word.trim().replace(/([.?*+^$[\]\\(){}|-])/g, '\\$1');
          })
          .join('|');
  if (!regexString) {
    return defaultSnippet;
  }
  const regex = new RegExp(regexString, 'gi');

  let wordMatchesInSnippet = 0;
  let maxWordMatchesInSnippet = 0;

  // Position of the first character of the best snippet candidate.
  let bestSnippetStart: number|undefined;
  // Position of the last character of the best snippet candidate.
  let bestSnippetEnd: number|undefined;
  // Current length of the window (snippet candidate).
  let winLen = 0;
  let firstWordIndexInCurrentSnippet = 0;

  const matches: RegExpMatchArray[] =
      [...text.matchAll(regex)].filter(({index}) => index);

  for (let i = 0; i < matches.length; i++) {
    const currentMatch = matches[i];
    const firstWordInCurrentSnippet = matches[firstWordIndexInCurrentSnippet];
    const currentMatchSearchTerm = currentMatch[0];

    wordMatchesInSnippet++;
    winLen = currentMatch.index! + currentMatchSearchTerm.length -
        firstWordInCurrentSnippet.index!;

    // Once we breached the max snippet length, complete current candidate.
    if (winLen > snippetSize) {
      // The last word should be discarded as it breached the max snippet
      // length.
      wordMatchesInSnippet--;

      if (wordMatchesInSnippet > maxWordMatchesInSnippet) {
        const prevMatch = matches[i - 1];

        const prevMatchSearchTerm = prevMatch[0];
        maxWordMatchesInSnippet = wordMatchesInSnippet;
        bestSnippetStart = matches[firstWordIndexInCurrentSnippet].index;
        bestSnippetEnd = prevMatch.index! + prevMatchSearchTerm.length;
      }

      const newFirstWordInCurrentSnippet =
          matches[firstWordIndexInCurrentSnippet + 1];
      //  We already have the next word accounted for, so we only need to reduce
      //  the candidate by the length of the 1st word.
      winLen = currentMatch.index! + currentMatchSearchTerm.length -
          newFirstWordInCurrentSnippet.index!;
      firstWordIndexInCurrentSnippet++;
    }
    // If we reached the last match but the maxlength is not
    // yet reached, then we do one last best snippet check
    else if (
        i === matches.length - 1 &&
        wordMatchesInSnippet > maxWordMatchesInSnippet) {
      maxWordMatchesInSnippet = wordMatchesInSnippet;
      bestSnippetStart = matches[firstWordIndexInCurrentSnippet].index;
      bestSnippetEnd = currentMatch.index! + currentMatchSearchTerm.length;
    }
  }
  if (!bestSnippetStart || !bestSnippetEnd) {
    return defaultSnippet;
  }

  // Prefer adding more (70%) to the front for better context.
  const spareLenPrefixPercent = 0.7;
  const snippetSpareLength = snippetSize - (bestSnippetEnd - bestSnippetStart);
  let finalSnippetStart = Math.max(
      0,
      bestSnippetStart -
          Math.round(spareLenPrefixPercent * snippetSpareLength));

  // If we are too close to the end of the overall string, then try to max
  // out chars to the beginning.
  if (finalSnippetStart + snippetSize > text.length) {
    finalSnippetStart = Math.max(0, text.length - snippetSize);
  }

  return text.substring(finalSnippetStart, finalSnippetStart + snippetSize);
}