in backend/query/search/search_evaluator.cc [77:225]
absl::StatusOr<MatchResult> SearchEvaluator::MatchQueryNode(
const SimpleNode* root, const TokenMap& tokenmap, bool in_phrase) {
int node_id = root->getId();
switch (node_id) {
case JJTNUMBER:
// Number nodes are used only as children of AROUND node so there's
// no need to match number nodes.
break;
case JJTTERM:
return MatchQueryTerm(root, tokenmap, in_phrase);
case JJTOR: {
ZETASQL_ASSIGN_OR_RETURN(std::vector<MatchResult> child_results,
MatchChildren(root, tokenmap, in_phrase));
bool did_match = false;
std::vector<Hit> hits;
for (const auto& result : child_results) {
if (!result.is_match()) continue;
if (in_phrase) {
hits.insert(hits.end(), result.hits().begin(), result.hits().end());
}
did_match = true;
}
if (!did_match) return MatchResult::NoMatch();
return MatchResult::Match(std::move(hits));
}
case JJTNOT: {
// NOT cannot appear in a phrase, so we never need to report hit position.
ZETASQL_RET_CHECK(!in_phrase) << "NOT nodes cannot appear in phrases";
ZETASQL_RET_CHECK_EQ(1, root->jjtGetNumChildren());
SimpleNode* child = dynamic_cast<SimpleNode*>(root->jjtGetChild(0));
ZETASQL_RET_CHECK(child != nullptr);
ZETASQL_ASSIGN_OR_RETURN(auto child_matches, MatchQueryNode(child, tokenmap));
if (child_matches.is_match()) return MatchResult::NoMatch();
return MatchResult::Match();
}
case JJTAND: {
// AND cannot appear in a phrase.
ZETASQL_RET_CHECK(!in_phrase) << "AND nodes cannot appear in phrases";
ZETASQL_ASSIGN_OR_RETURN(std::vector<MatchResult> child_results,
MatchChildren(root, tokenmap, in_phrase));
for (const auto& result : child_results) {
if (!result.is_match()) return result;
}
// AND cannot appear in a phrases, so there is no need to
// report hit positions.
return MatchResult::Match();
}
case JJTAROUND: {
ZETASQL_ASSIGN_OR_RETURN(std::vector<MatchResult> child_results,
MatchChildren(root, tokenmap, /*in_phrase=*/true));
int max_allowed_gap = -1;
std::vector<std::vector<Hit>> hits;
hits.reserve(child_results.size());
int cur_child_result = 0;
for (int i = 0; i < root->jjtGetNumChildren(); ++i) {
SimpleNode* child = dynamic_cast<SimpleNode*>(root->jjtGetChild(i));
ZETASQL_RET_CHECK(child != nullptr);
if (child->getId() == JJTNUMBER) {
int gap;
if (absl::SimpleAtoi(child->image(), &gap)) {
if (max_allowed_gap == -1) {
// First distance child
max_allowed_gap = gap;
} else if (gap != max_allowed_gap) {
// Multiple distance specified, drop them all but using the
// default distance 5
max_allowed_gap = kDefaultMaxAllowedGap;
}
}
} else if (child->image() == "*") {
// No need to record wildcard position.
cur_child_result++;
} else {
// All non-wildcard children must match.
if (!child_results[cur_child_result].is_match()) {
return MatchResult::NoMatch();
}
hits.emplace_back(std::move(child_results[cur_child_result].hits()));
cur_child_result++;
}
}
if (max_allowed_gap == -1) {
return absl::InvalidArgumentError(
"Invalid distance specified in AROUND expression.");
}
PhraseMatcher matcher(hits, max_allowed_gap);
std::vector<Hit> result = matcher.FindMatchingHits();
return result.empty() ? MatchResult::NoMatch()
: MatchResult::Match(std::move(result));
}
case JJTPHRASE: {
ZETASQL_ASSIGN_OR_RETURN(std::vector<MatchResult> child_results,
MatchChildren(root, tokenmap, /*in_phrase=*/true));
int current_offset = 0;
std::vector<int> offsets;
std::vector<std::vector<Hit>> hits;
hits.reserve(child_results.size());
// exterior_gaps indicates the gaps (denoted by wildcards) before
// and after literal terms. e.g. "* cloud spanner *" has exterior_gaps
// of (1, 1).
std::pair<int, int> exterior_gaps = {0, 0};
for (int i = 0; i < root->jjtGetNumChildren(); ++i) {
SimpleNode* child = dynamic_cast<SimpleNode*>(root->jjtGetChild(i));
ZETASQL_RET_CHECK(child != nullptr);
// In phrases, wildcards increments the next offset, rather than
// being matched directly. Offsets before the first non-wildcard
// term have no effect.
if (child->image() != "*") {
// All non-wildcard children must match.
if (!child_results[i].is_match()) return MatchResult::NoMatch();
hits.emplace_back(std::move(child_results[i].hits()));
if (offsets.empty()) {
exterior_gaps.first = current_offset;
}
offsets.push_back(current_offset);
}
++current_offset;
}
// current_offset is incremented after each term, so we need to subtract
// 1 from the difference to find the number of star terms after the last
// non-star term.
exterior_gaps.second =
offsets.empty() ? 0 : current_offset - offsets.back() - 1;
PhraseMatcher matcher(hits, offsets, exterior_gaps);
std::vector<Hit> result = matcher.FindMatchingHits();
return result.empty() ? MatchResult::NoMatch()
: MatchResult::Match(std::move(result));
}
default:
ZETASQL_RET_CHECK_FAIL() << "Bad query: unsupported search query node type "
<< node_id;
}
ZETASQL_RET_CHECK_FAIL() << "Bad query: unable to get match result from the query ";
}