LineBreakResult _unsafeNextLineBreak()

in lib/web_ui/lib/src/engine/text/line_breaker.dart [187:680]
294 lines of code
132 McCabe index (conditional complexity)

LineBreakResult _unsafeNextLineBreak(String text, int index, {int? maxEnd}) {
  int? codePoint = getCodePoint(text, index);
  LineCharProperty curr = lineLookup.findForChar(codePoint);

  LineCharProperty? prev1;

  // Keeps track of the character two positions behind.
  LineCharProperty? prev2;

  // When there's a sequence of spaces or combining marks, this variable
  // contains the base property i.e. the property of the character before the
  // sequence.
  LineCharProperty? baseOfSpaceSequence;

  /// The index of the last character that wasn't a space.
  int lastNonSpaceIndex = index;

  /// The index of the last character that wasn't a new line.
  int lastNonNewlineIndex = index;

  // When the text/line starts with SP, we should treat the beginning of text/line
  // as if it were a WJ (word joiner).
  if (curr == LineCharProperty.SP) {
    baseOfSpaceSequence = LineCharProperty.WJ;
  }

  bool isCurrZWJ = curr == LineCharProperty.ZWJ;

  // LB10: Treat any remaining combining mark or ZWJ as AL.
  // This catches the case where a CM is the first character on the line.
  if (curr == LineCharProperty.CM || curr == LineCharProperty.ZWJ) {
    curr = LineCharProperty.AL;
  }

  int regionalIndicatorCount = 0;

  // Always break at the end of text.
  // LB3: ! eot
  while (index < text.length) {
    if (maxEnd != null && index > maxEnd) {
      return LineBreakResult(
        maxEnd,
        math.min(maxEnd, lastNonNewlineIndex),
        math.min(maxEnd, lastNonSpaceIndex),
        LineBreakType.prohibited,
      );
    }

    // Keep count of the RI (regional indicator) sequence.
    if (curr == LineCharProperty.RI) {
      regionalIndicatorCount++;
    } else {
      regionalIndicatorCount = 0;
    }

    if (codePoint != null && codePoint > 0xFFFF) {
      // Advance `index` one extra step when handling a surrogate pair in the
      // string.
      index++;
    }
    index++;
    prev2 = prev1;
    prev1 = curr;

    final bool isPrevZWJ = isCurrZWJ;

    // Reset the base when we are past the space sequence.
    if (prev1 != LineCharProperty.SP) {
      baseOfSpaceSequence = null;
    }

    codePoint = getCodePoint(text, index);
    curr = lineLookup.findForChar(codePoint);

    isCurrZWJ = curr == LineCharProperty.ZWJ;

    // Always break after hard line breaks.
    // LB4: BK !
    //
    // Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks.
    // LB5: LF !
    //      NL !
    if (_isHardBreak(prev1)) {
      return LineBreakResult(
        index,
        lastNonNewlineIndex,
        lastNonSpaceIndex,
        LineBreakType.mandatory,
      );
    }

    if (prev1 == LineCharProperty.CR) {
      if (curr == LineCharProperty.LF) {
        // LB5: CR × LF
        continue;
      } else {
        // LB5: CR !
        return LineBreakResult(
          index,
          lastNonNewlineIndex,
          lastNonSpaceIndex,
          LineBreakType.mandatory,
        );
      }
    }

    // At this point, we know for sure the prev character wasn't a new line.
    lastNonNewlineIndex = index;
    if (prev1 != LineCharProperty.SP) {
      lastNonSpaceIndex = index;
    }

    // Do not break before hard line breaks.
    // LB6: × ( BK | CR | LF | NL )
    if (_isHardBreak(curr) || curr == LineCharProperty.CR) {
      continue;
    }

    // Always break at the end of text.
    // LB3: ! eot
    if (index >= text.length) {
      return LineBreakResult(
        text.length,
        lastNonNewlineIndex,
        lastNonSpaceIndex,
        LineBreakType.endOfText,
      );
    }

    // Do not break before spaces or zero width space.
    // LB7: × SP
    if (curr == LineCharProperty.SP) {
      // When we encounter SP, we preserve the property of the previous
      // character so we can later apply the indirect breaking rules.
      if (prev1 == LineCharProperty.SP) {
        // If we are in the middle of a space sequence, a base should've
        // already been set.
        assert(baseOfSpaceSequence != null);
      } else {
        // We are at the beginning of a space sequence, establish the base.
        baseOfSpaceSequence = prev1;
      }
      continue;
    }
    // LB7: × ZW
    if (curr == LineCharProperty.ZW) {
      continue;
    }

    // Break before any character following a zero-width space, even if one or
    // more spaces intervene.
    // LB8: ZW SP* ÷
    if (prev1 == LineCharProperty.ZW ||
        baseOfSpaceSequence == LineCharProperty.ZW) {
      return LineBreakResult(
        index,
        lastNonNewlineIndex,
        lastNonSpaceIndex,
        LineBreakType.opportunity,
      );
    }

    // Do not break a combining character sequence; treat it as if it has the
    // line breaking class of the base character in all of the following rules.
    // Treat ZWJ as if it were CM.
    // LB9: Treat X (CM | ZWJ)* as if it were X
    //      where X is any line break class except BK, NL, LF, CR, SP, or ZW.
    if (curr == LineCharProperty.CM || curr == LineCharProperty.ZWJ) {
      // Other properties: BK, NL, LF, CR, ZW would've already generated a line
      // break, so we won't find them in `prev`.
      if (prev1 == LineCharProperty.SP) {
        // LB10: Treat any remaining combining mark or ZWJ as AL.
        curr = LineCharProperty.AL;
      } else {
        if (prev1 == LineCharProperty.RI) {
          // Prevent the previous RI from being double-counted.
          regionalIndicatorCount--;
        }
        // Preserve the property of the previous character to treat the sequence
        // as if it were X.
        curr = prev1;
        continue;
      }
    }

    // Do not break after a zero width joiner.
    // LB8a: ZWJ ×
    if (isPrevZWJ) {
      continue;
    }

    // Do not break before or after Word joiner and related characters.
    // LB11: × WJ
    //       WJ ×
    if (curr == LineCharProperty.WJ || prev1 == LineCharProperty.WJ) {
      continue;
    }

    // Do not break after NBSP and related characters.
    // LB12: GL ×
    if (prev1 == LineCharProperty.GL) {
      continue;
    }

    // Do not break before NBSP and related characters, except after spaces and
    // hyphens.
    // LB12a: [^SP BA HY] × GL
    if (!(prev1 == LineCharProperty.SP ||
            prev1 == LineCharProperty.BA ||
            prev1 == LineCharProperty.HY) &&
        curr == LineCharProperty.GL) {
      continue;
    }

    // Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces.
    // LB13: × CL
    //       × CP
    //       × EX
    //       × IS
    //       × SY
    //
    // The above is a quote from unicode.org. In our implementation, we did the
    // following modification: When there are spaces present, we consider it a
    // line break opportunity.
    if (prev1 != LineCharProperty.SP &&
        (curr == LineCharProperty.CL ||
            curr == LineCharProperty.CP ||
            curr == LineCharProperty.EX ||
            curr == LineCharProperty.IS ||
            curr == LineCharProperty.SY)) {
      continue;
    }

    // Do not break after ‘[’, even after spaces.
    // LB14: OP SP* ×
    //
    // The above is a quote from unicode.org. In our implementation, we did the
    // following modification: Allow breaks when there are spaces.
    if (prev1 == LineCharProperty.OP) {
      continue;
    }

    // Do not break within ‘”[’, even with intervening spaces.
    // LB15: QU SP* × OP
    //
    // The above is a quote from unicode.org. In our implementation, we did the
    // following modification: Allow breaks when there are spaces.
    if (prev1 == LineCharProperty.QU && curr == LineCharProperty.OP) {
      continue;
    }

    // Do not break between closing punctuation and a nonstarter, even with
    // intervening spaces.
    // LB16: (CL | CP) SP* × NS
    if ((prev1 == LineCharProperty.CL ||
            baseOfSpaceSequence == LineCharProperty.CL ||
            prev1 == LineCharProperty.CP ||
            baseOfSpaceSequence == LineCharProperty.CP) &&
        curr == LineCharProperty.NS) {
      continue;
    }

    // Do not break within ‘——’, even with intervening spaces.
    // LB17: B2 SP* × B2
    if ((prev1 == LineCharProperty.B2 ||
            baseOfSpaceSequence == LineCharProperty.B2) &&
        curr == LineCharProperty.B2) {
      continue;
    }

    // Break after spaces.
    // LB18: SP ÷
    if (prev1 == LineCharProperty.SP) {
      return LineBreakResult(
        index,
        lastNonNewlineIndex,
        lastNonSpaceIndex,
        LineBreakType.opportunity,
      );
    }

    // Do not break before or after quotation marks, such as ‘”’.
    // LB19: × QU
    //       QU ×
    if (prev1 == LineCharProperty.QU || curr == LineCharProperty.QU) {
      continue;
    }

    // Break before and after unresolved CB.
    // LB20: ÷ CB
    //       CB ÷
    if (prev1 == LineCharProperty.CB || curr == LineCharProperty.CB) {
      return LineBreakResult(
        index,
        lastNonNewlineIndex,
        lastNonSpaceIndex,
        LineBreakType.opportunity,
      );
    }

    // Do not break before hyphen-minus, other hyphens, fixed-width spaces,
    // small kana, and other non-starters, or after acute accents.
    // LB21: × BA
    //       × HY
    //       × NS
    //       BB ×
    if (curr == LineCharProperty.BA ||
        curr == LineCharProperty.HY ||
        curr == LineCharProperty.NS ||
        prev1 == LineCharProperty.BB) {
      continue;
    }

    // Don't break after Hebrew + Hyphen.
    // LB21a: HL (HY | BA) ×
    if (prev2 == LineCharProperty.HL &&
        (prev1 == LineCharProperty.HY || prev1 == LineCharProperty.BA)) {
      continue;
    }

    // Don’t break between Solidus and Hebrew letters.
    // LB21b: SY × HL
    if (prev1 == LineCharProperty.SY && curr == LineCharProperty.HL) {
      continue;
    }

    // Do not break before ellipses.
    // LB22: × IN
    if (curr == LineCharProperty.IN) {
      continue;
    }

    // Do not break between digits and letters.
    // LB23: (AL | HL) × NU
    //       NU × (AL | HL)
    if ((_isALorHL(prev1) && curr == LineCharProperty.NU) ||
        (prev1 == LineCharProperty.NU && _isALorHL(curr))) {
      continue;
    }

    // Do not break between numeric prefixes and ideographs, or between
    // ideographs and numeric postfixes.
    // LB23a: PR × (ID | EB | EM)
    if (prev1 == LineCharProperty.PR &&
        (curr == LineCharProperty.ID ||
            curr == LineCharProperty.EB ||
            curr == LineCharProperty.EM)) {
      continue;
    }
    // LB23a: (ID | EB | EM) × PO
    if ((prev1 == LineCharProperty.ID ||
            prev1 == LineCharProperty.EB ||
            prev1 == LineCharProperty.EM) &&
        curr == LineCharProperty.PO) {
      continue;
    }

    // Do not break between numeric prefix/postfix and letters, or between
    // letters and prefix/postfix.
    // LB24: (PR | PO) × (AL | HL)
    if ((prev1 == LineCharProperty.PR || prev1 == LineCharProperty.PO) &&
        _isALorHL(curr)) {
      continue;
    }
    // LB24: (AL | HL) × (PR | PO)
    if (_isALorHL(prev1) &&
        (curr == LineCharProperty.PR || curr == LineCharProperty.PO)) {
      continue;
    }

    // Do not break between the following pairs of classes relevant to numbers.
    // LB25: (CL | CP | NU) × (PO | PR)
    if ((prev1 == LineCharProperty.CL ||
            prev1 == LineCharProperty.CP ||
            prev1 == LineCharProperty.NU) &&
        (curr == LineCharProperty.PO || curr == LineCharProperty.PR)) {
      continue;
    }
    // LB25: (PO | PR) × OP
    if ((prev1 == LineCharProperty.PO || prev1 == LineCharProperty.PR) &&
        curr == LineCharProperty.OP) {
      continue;
    }
    // LB25: (PO | PR | HY | IS | NU | SY) × NU
    if ((prev1 == LineCharProperty.PO ||
            prev1 == LineCharProperty.PR ||
            prev1 == LineCharProperty.HY ||
            prev1 == LineCharProperty.IS ||
            prev1 == LineCharProperty.NU ||
            prev1 == LineCharProperty.SY) &&
        curr == LineCharProperty.NU) {
      continue;
    }

    // Do not break a Korean syllable.
    // LB26: JL × (JL | JV | H2 | H3)
    if (prev1 == LineCharProperty.JL &&
        (curr == LineCharProperty.JL ||
            curr == LineCharProperty.JV ||
            curr == LineCharProperty.H2 ||
            curr == LineCharProperty.H3)) {
      continue;
    }
    // LB26: (JV | H2) × (JV | JT)
    if ((prev1 == LineCharProperty.JV || prev1 == LineCharProperty.H2) &&
        (curr == LineCharProperty.JV || curr == LineCharProperty.JT)) {
      continue;
    }
    // LB26: (JT | H3) × JT
    if ((prev1 == LineCharProperty.JT || prev1 == LineCharProperty.H3) &&
        curr == LineCharProperty.JT) {
      continue;
    }

    // Treat a Korean Syllable Block the same as ID.
    // LB27: (JL | JV | JT | H2 | H3) × PO
    if (_isKoreanSyllable(prev1) && curr == LineCharProperty.PO) {
      continue;
    }
    // LB27: PR × (JL | JV | JT | H2 | H3)
    if (prev1 == LineCharProperty.PR && _isKoreanSyllable(curr)) {
      continue;
    }

    // Do not break between alphabetics.
    // LB28: (AL | HL) × (AL | HL)
    if (_isALorHL(prev1) && _isALorHL(curr)) {
      continue;
    }

    // Do not break between numeric punctuation and alphabetics (“e.g.”).
    // LB29: IS × (AL | HL)
    if (prev1 == LineCharProperty.IS && _isALorHL(curr)) {
      continue;
    }

    // Do not break between letters, numbers, or ordinary symbols and opening or
    // closing parentheses.
    // LB30: (AL | HL | NU) × OP
    //
    // LB30 requires that we exclude characters that have an Eastern Asian width
    // property of value F, W or H classes.
    if ((_isALorHL(prev1) || prev1 == LineCharProperty.NU) &&
        curr == LineCharProperty.OP &&
        !_hasEastAsianWidthFWH(text.codeUnitAt(index))) {
      continue;
    }
    // LB30: CP × (AL | HL | NU)
    if (prev1 == LineCharProperty.CP &&
        !_hasEastAsianWidthFWH(text.codeUnitAt(index - 1)) &&
        (_isALorHL(curr) || curr == LineCharProperty.NU)) {
      continue;
    }

    // Break between two regional indicator symbols if and only if there are an
    // even number of regional indicators preceding the position of the break.
    // LB30a: sot (RI RI)* RI × RI
    //        [^RI] (RI RI)* RI × RI
    if (curr == LineCharProperty.RI) {
      if (regionalIndicatorCount.isOdd) {
        continue;
      } else {
        return LineBreakResult(
          index,
          lastNonNewlineIndex,
          lastNonSpaceIndex,
          LineBreakType.opportunity,
        );
      }
    }

    // Do not break between an emoji base and an emoji modifier.
    // LB30b: EB × EM
    if (prev1 == LineCharProperty.EB && curr == LineCharProperty.EM) {
      continue;
    }

    // Break everywhere else.
    // LB31: ALL ÷
    //       ÷ ALL
    return LineBreakResult(
      index,
      lastNonNewlineIndex,
      lastNonSpaceIndex,
      LineBreakType.opportunity,
    );
  }
  return LineBreakResult(
    text.length,
    lastNonNewlineIndex,
    lastNonSpaceIndex,
    LineBreakType.endOfText,
  );
}