in mysqlshdk/libs/parser/code-completion/CodeCompletionCore.cpp [297:569]
CodeCompletionCore::RuleEndStatus CodeCompletionCore::processRule(
ATNState *startState, size_t tokenIndex, std::vector<size_t> &callStack,
std::string indentation) {
// Start with rule specific handling before going into the ATN walk.
// Check first if we've taken this path with the same input before.
auto &positionMap = _shortcutMap[startState->ruleIndex];
if (positionMap.find(tokenIndex) != positionMap.end()) {
if (showDebugOutput) {
log_debug3("=====> shortcut");
}
return positionMap[tokenIndex];
}
RuleEndStatus result;
// For rule start states we determine and cache the follow set, which gives us
// 3 advantages: 1) We can quickly check if a symbol would be matched when we
// follow that rule. We can so check in advance
// and can save us all the intermediate steps if there is no match.
// 2) We'll have all symbols that are collectable already together when we are
// at the caret when entering a rule. 3) We get this lookup for free with any
// 2nd or further visit of the same rule, which often happens
// in non trivial grammars, especially with (recursive) expressions and of
// course when invoking code completion multiple times.
FollowSetsPerState &setsPerState = _followSetsByATN[typeid(_parser)];
if (setsPerState.count(startState->stateNumber) == 0) {
ATNState *stop = _atn.ruleToStopState[startState->ruleIndex];
setsPerState[startState->stateNumber].sets =
determineFollowSets(startState, stop);
// Sets are split by path to allow translating them to preferred rules. But
// for quick hit tests it is also useful to have a set with all symbols
// combined.
IntervalSet combined;
for (auto &set : setsPerState[startState->stateNumber].sets)
combined.addAll(set.intervals);
setsPerState[startState->stateNumber].combined = combined;
}
const auto &followSets = setsPerState[startState->stateNumber];
callStack.push_back(startState->ruleIndex);
if (tokenIndex >= _tokens.size() - 1) { // At caret?
if (preferredRules.count(startState->ruleIndex) > 0) {
// No need to go deeper when collecting entries and we reach a rule that
// we want to collect anyway.
translateToRuleIndex(callStack);
} else {
// Convert all follow sets to either single symbols or their associated
// preferred rule and add the result to our candidates list.
auto fullPath = callStack;
const auto size = callStack.size();
for (const auto &set : followSets.sets) {
fullPath.resize(size);
fullPath.insert(fullPath.end(), set.path.begin(), set.path.end());
if (!translateToRuleIndex(fullPath)) {
for (ssize_t symbol : set.intervals.toList()) {
if (ignoredTokens.count(symbol) == 0) {
if (showDebugOutput) {
std::string debug = _vocabulary.getDisplayName(symbol);
if (!set.following.empty()) {
debug += " ->";
for (const auto follows : set.following) {
debug += " " + _vocabulary.getDisplayName(follows);
}
}
log_debug3("=====> collected: %s", debug.c_str());
}
// Following is empty if there is more than one entry in the set.
auto token =
_candidates.tokens.try_emplace(symbol, set.following);
if (!token.second) {
// Insertion didn't happen, more than one following list for the
// same symbol.
if (token.first->second != set.following) {
token.first->second.clear();
}
}
}
}
}
}
}
callStack.pop_back();
return {};
} else {
// Process the rule if we either could pass it without consuming anything
// (epsilon transition) or if the current input symbol will be matched
// somewhere after this entry point.
size_t currentSymbol = _tokens[tokenIndex];
if (!followSets.combined.contains(Token::EPSILON) &&
!followSets.combined.contains(currentSymbol)) {
callStack.pop_back();
return {};
}
}
// The current state execution pipeline contains all yet-to-be-processed ATN
// states in this rule. For each such state we store the token index + a list
// of rules that lead to it.
std::vector<PipelineEntry> statePipeline;
PipelineEntry currentEntry;
// Bootstrap the pipeline.
statePipeline.push_back({startState, tokenIndex});
while (!statePipeline.empty()) {
currentEntry = statePipeline.back();
statePipeline.pop_back();
++_statesProcessed;
bool atCaret = currentEntry.tokenIndex >= _tokens.size() - 1;
if (showDebugOutput) {
printDescription(indentation, currentEntry.state,
generateBaseDescription(currentEntry.state),
currentEntry.tokenIndex);
if (showRuleStack) printRuleState(indentation, callStack);
}
switch (currentEntry.state->getStateType()) {
case ATNStateType::RULE_START: // Happens only for the first state in
// this rule, not subrules.
indentation += " ";
break;
// Found the end of this rule. Determine the following states and return
// to the caller.
case ATNStateType::RULE_STOP: {
// Record the token index we are at, to report it to the caller.
result.insert(currentEntry.tokenIndex);
continue;
}
default:
break;
}
for (const auto &transition : currentEntry.state->transitions) {
switch (transition->getTransitionType()) {
case TransitionType::RULE: {
auto endStatus =
processRule(transition->target, currentEntry.tokenIndex,
callStack, indentation);
for (auto &status : endStatus)
statePipeline.push_back(
{static_cast<const RuleTransition *>(transition.get())
->followState,
status});
break;
}
case TransitionType::PREDICATE: {
if (checkPredicate(
static_cast<const PredicateTransition *>(transition.get())))
statePipeline.push_back(
{transition->target, currentEntry.tokenIndex});
break;
}
case TransitionType::WILDCARD: {
if (atCaret) {
if (!translateToRuleIndex(callStack)) {
for (ssize_t token :
misc::IntervalSet::of(Token::MIN_USER_TOKEN_TYPE,
(ssize_t)_atn.maxTokenType)
.toList())
if (ignoredTokens.count(token) == 0)
_candidates.tokens[token].clear();
}
} else {
statePipeline.push_back(
{transition->target, currentEntry.tokenIndex + 1});
}
break;
}
default: {
if (transition->isEpsilon()) {
if (atCaret) {
const auto next_state = transition->target->getStateType();
// if this is an epsilon transition into a rule stop or a block
// end, we've already consumed the whole rule, so this should not
// be a candidate
if (ATNStateType::RULE_STOP != next_state &&
ATNStateType::BLOCK_END != next_state) {
translateToRuleIndex(callStack);
}
}
// Jump over simple states with a single outgoing epsilon
// transition.
statePipeline.push_back(
{transition->target, currentEntry.tokenIndex});
continue;
}
misc::IntervalSet set = transition->label();
if (!set.isEmpty()) {
if (transition->getTransitionType() == TransitionType::NOT_SET) {
set = set.complement(misc::IntervalSet::of(
Token::MIN_USER_TOKEN_TYPE, (ssize_t)_atn.maxTokenType));
}
if (atCaret) {
if (!translateToRuleIndex(callStack)) {
std::vector<ssize_t> list = set.toList();
bool addFollowing = list.size() == 1;
for (ssize_t symbol : list)
if (ignoredTokens.count(symbol) == 0) {
TokenList following;
if (addFollowing) {
following = getFollowingTokens(transition.get());
}
if (showDebugOutput) {
std::string debug = _vocabulary.getDisplayName(symbol);
if (!following.empty()) {
debug += " ->";
for (const auto follows : following) {
debug += " " + _vocabulary.getDisplayName(follows);
}
}
log_debug3("=====> collected: %s", debug.c_str());
}
auto token =
_candidates.tokens.try_emplace(symbol, following);
if (!token.second) {
// Insertion didn't happen, more than one following list
// for the same symbol.
if (token.first->second != following) {
token.first->second.clear();
}
}
}
}
} else {
size_t currentSymbol = _tokens[currentEntry.tokenIndex];
if (set.contains(currentSymbol)) {
if (showDebugOutput) {
log_debug3("=====> consumed: %s",
_vocabulary.getDisplayName(currentSymbol).c_str());
}
statePipeline.push_back(
{transition->target, currentEntry.tokenIndex + 1});
}
}
}
break;
}
}
}
}
callStack.pop_back();
return result;
}