CodeCompletionCore::RuleEndStatus CodeCompletionCore::processRule()

in mysqlshdk/libs/parser/code-completion/CodeCompletionCore.cpp [297:569]


CodeCompletionCore::RuleEndStatus CodeCompletionCore::processRule(
    ATNState *startState, size_t tokenIndex, std::vector<size_t> &callStack,
    std::string indentation) {
  // Start with rule specific handling before going into the ATN walk.

  // Check first if we've taken this path with the same input before.
  auto &positionMap = _shortcutMap[startState->ruleIndex];
  if (positionMap.find(tokenIndex) != positionMap.end()) {
    if (showDebugOutput) {
      log_debug3("=====> shortcut");
    }
    return positionMap[tokenIndex];
  }

  RuleEndStatus result;

  // For rule start states we determine and cache the follow set, which gives us
  // 3 advantages: 1) We can quickly check if a symbol would be matched when we
  // follow that rule. We can so check in advance
  //    and can save us all the intermediate steps if there is no match.
  // 2) We'll have all symbols that are collectable already together when we are
  // at the caret when entering a rule. 3) We get this lookup for free with any
  // 2nd or further visit of the same rule, which often happens
  //    in non trivial grammars, especially with (recursive) expressions and of
  //    course when invoking code completion multiple times.
  FollowSetsPerState &setsPerState = _followSetsByATN[typeid(_parser)];
  if (setsPerState.count(startState->stateNumber) == 0) {
    ATNState *stop = _atn.ruleToStopState[startState->ruleIndex];
    setsPerState[startState->stateNumber].sets =
        determineFollowSets(startState, stop);

    // Sets are split by path to allow translating them to preferred rules. But
    // for quick hit tests it is also useful to have a set with all symbols
    // combined.
    IntervalSet combined;
    for (auto &set : setsPerState[startState->stateNumber].sets)
      combined.addAll(set.intervals);
    setsPerState[startState->stateNumber].combined = combined;
  }

  const auto &followSets = setsPerState[startState->stateNumber];
  callStack.push_back(startState->ruleIndex);

  if (tokenIndex >= _tokens.size() - 1) {  // At caret?
    if (preferredRules.count(startState->ruleIndex) > 0) {
      // No need to go deeper when collecting entries and we reach a rule that
      // we want to collect anyway.
      translateToRuleIndex(callStack);
    } else {
      // Convert all follow sets to either single symbols or their associated
      // preferred rule and add the result to our candidates list.
      auto fullPath = callStack;
      const auto size = callStack.size();

      for (const auto &set : followSets.sets) {
        fullPath.resize(size);
        fullPath.insert(fullPath.end(), set.path.begin(), set.path.end());

        if (!translateToRuleIndex(fullPath)) {
          for (ssize_t symbol : set.intervals.toList()) {
            if (ignoredTokens.count(symbol) == 0) {
              if (showDebugOutput) {
                std::string debug = _vocabulary.getDisplayName(symbol);

                if (!set.following.empty()) {
                  debug += " ->";

                  for (const auto follows : set.following) {
                    debug += " " + _vocabulary.getDisplayName(follows);
                  }
                }

                log_debug3("=====> collected: %s", debug.c_str());
              }

              // Following is empty if there is more than one entry in the set.
              auto token =
                  _candidates.tokens.try_emplace(symbol, set.following);

              if (!token.second) {
                // Insertion didn't happen, more than one following list for the
                // same symbol.
                if (token.first->second != set.following) {
                  token.first->second.clear();
                }
              }
            }
          }
        }
      }
    }

    callStack.pop_back();
    return {};

  } else {
    // Process the rule if we either could pass it without consuming anything
    // (epsilon transition) or if the current input symbol will be matched
    // somewhere after this entry point.
    size_t currentSymbol = _tokens[tokenIndex];
    if (!followSets.combined.contains(Token::EPSILON) &&
        !followSets.combined.contains(currentSymbol)) {
      callStack.pop_back();
      return {};
    }
  }

  // The current state execution pipeline contains all yet-to-be-processed ATN
  // states in this rule. For each such state we store the token index + a list
  // of rules that lead to it.
  std::vector<PipelineEntry> statePipeline;
  PipelineEntry currentEntry;

  // Bootstrap the pipeline.
  statePipeline.push_back({startState, tokenIndex});

  while (!statePipeline.empty()) {
    currentEntry = statePipeline.back();
    statePipeline.pop_back();
    ++_statesProcessed;

    bool atCaret = currentEntry.tokenIndex >= _tokens.size() - 1;
    if (showDebugOutput) {
      printDescription(indentation, currentEntry.state,
                       generateBaseDescription(currentEntry.state),
                       currentEntry.tokenIndex);
      if (showRuleStack) printRuleState(indentation, callStack);
    }

    switch (currentEntry.state->getStateType()) {
      case ATNStateType::RULE_START:  // Happens only for the first state in
                                      // this rule, not subrules.
        indentation += "  ";
        break;

      // Found the end of this rule. Determine the following states and return
      // to the caller.
      case ATNStateType::RULE_STOP: {
        // Record the token index we are at, to report it to the caller.
        result.insert(currentEntry.tokenIndex);
        continue;
      }

      default:
        break;
    }

    for (const auto &transition : currentEntry.state->transitions) {
      switch (transition->getTransitionType()) {
        case TransitionType::RULE: {
          auto endStatus =
              processRule(transition->target, currentEntry.tokenIndex,
                          callStack, indentation);
          for (auto &status : endStatus)
            statePipeline.push_back(
                {static_cast<const RuleTransition *>(transition.get())
                     ->followState,
                 status});
          break;
        }

        case TransitionType::PREDICATE: {
          if (checkPredicate(
                  static_cast<const PredicateTransition *>(transition.get())))
            statePipeline.push_back(
                {transition->target, currentEntry.tokenIndex});
          break;
        }

        case TransitionType::WILDCARD: {
          if (atCaret) {
            if (!translateToRuleIndex(callStack)) {
              for (ssize_t token :
                   misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE,
                                         (ssize_t)_atn.maxTokenType)
                       .toList())
                if (ignoredTokens.count(token) == 0)
                  _candidates.tokens[token].clear();
            }
          } else {
            statePipeline.push_back(
                {transition->target, currentEntry.tokenIndex + 1});
          }
          break;
        }

        default: {
          if (transition->isEpsilon()) {
            if (atCaret) {
              const auto next_state = transition->target->getStateType();

              // if this is an epsilon transition into a rule stop or a block
              // end, we've already consumed the whole rule, so this should not
              // be a candidate
              if (ATNStateType::RULE_STOP != next_state &&
                  ATNStateType::BLOCK_END != next_state) {
                translateToRuleIndex(callStack);
              }
            }

            // Jump over simple states with a single outgoing epsilon
            // transition.
            statePipeline.push_back(
                {transition->target, currentEntry.tokenIndex});
            continue;
          }

          misc::IntervalSet set = transition->label();
          if (!set.isEmpty()) {
            if (transition->getTransitionType() == TransitionType::NOT_SET) {
              set = set.complement(misc::IntervalSet::of(
                  Token::MIN_USER_TOKEN_TYPE, (ssize_t)_atn.maxTokenType));
            }
            if (atCaret) {
              if (!translateToRuleIndex(callStack)) {
                std::vector<ssize_t> list = set.toList();
                bool addFollowing = list.size() == 1;
                for (ssize_t symbol : list)
                  if (ignoredTokens.count(symbol) == 0) {
                    TokenList following;

                    if (addFollowing) {
                      following = getFollowingTokens(transition.get());
                    }

                    if (showDebugOutput) {
                      std::string debug = _vocabulary.getDisplayName(symbol);

                      if (!following.empty()) {
                        debug += " ->";

                        for (const auto follows : following) {
                          debug += " " + _vocabulary.getDisplayName(follows);
                        }
                      }

                      log_debug3("=====> collected: %s", debug.c_str());
                    }

                    auto token =
                        _candidates.tokens.try_emplace(symbol, following);

                    if (!token.second) {
                      // Insertion didn't happen, more than one following list
                      // for the same symbol.
                      if (token.first->second != following) {
                        token.first->second.clear();
                      }
                    }
                  }
              }
            } else {
              size_t currentSymbol = _tokens[currentEntry.tokenIndex];
              if (set.contains(currentSymbol)) {
                if (showDebugOutput) {
                  log_debug3("=====> consumed: %s",
                             _vocabulary.getDisplayName(currentSymbol).c_str());
                }

                statePipeline.push_back(
                    {transition->target, currentEntry.tokenIndex + 1});
              }
            }
          }
          break;
        }
      }
    }
  }

  callStack.pop_back();
  return result;
}