static addMarksToUnmarkedSentences()

in src/core/awspack/TextToSpeechUtils.js [118:176]


  static addMarksToUnmarkedSentences(text, marks) {
    if (!marks || marks.length === 0) return text;

    const ssmlMarkRegex = /<mark name=(?:"|')(.*?)(?:"|')\/>/g;
    const ssmlTagRegex = /<[^>]*>/g;

    // Find the indices of any marks in the text
    const markIndices = [];
    let markResult = ssmlMarkRegex.exec(text);

    while (markResult !== null) {
      markIndices.push(markResult.index);
      markResult = ssmlMarkRegex.exec(text);
    }

    // Find all SSML tags in the text
    const existingTags = [];
    let ssmlResult = ssmlTagRegex.exec(text);
    while (ssmlResult !== null) {
      existingTags.push({
        start: ssmlResult.index,
        end: ssmlResult.index + ssmlResult[0].length,
        text: ssmlResult[0],
      });
      ssmlResult = ssmlTagRegex.exec(text);
    }

    // Create a copy of the text with all SSML marks replaces with whitespace
    let cleanedText = text.slice();
    existingTags.forEach(existingSsml => {
      const whitespace = new Array(existingSsml.text.length + 1).join(' ');
      cleanedText = [
        cleanedText.slice(0, existingSsml.start),
        whitespace,
        cleanedText.slice(existingSsml.end),
      ].join('');
    });

    const sentenceEndIndices = this._getSentenceEnds(cleanedText);

    // Only insert random marks into sentences that don't already have any
    let prevIndex = 0;
    const targetIndices = sentenceEndIndices.filter(index => {
      const containsMark =
        markIndices.findIndex(markIndex => {
          return prevIndex <= markIndex && index > markIndex;
        }) !== -1;
      prevIndex = index;
      return !containsMark;
    });

    const randomMarkedText = this._insertRandomMarksAt(
      text,
      targetIndices,
      marks
    );

    return randomMarkedText;
  }