in csrc/suffix_decoding/suffix_tree.cc [282:340]
std::string SuffixTree::check_integrity() {
// 1. Check structural integrity of all nodes.
std::queue<Node*> queue;
queue.push(_root.get());
while (!queue.empty()) {
Node* node = queue.front();
queue.pop();
std::string ret = _check_node_integrity(node);
if (!ret.empty()) {
return ret;
}
for (const auto& [token, child] : node->children) {
queue.push(child.get());
}
}
// 2. Check all sequences are represented in the tree.
std::unordered_map<Node*, int64_t> visit_count;
for (int seq_id = 0; seq_id < _seqs.size(); seq_id++) {
const std::vector<int>& seq = _seqs[seq_id];
// Loop through all suffix starting indices.
for (int start = 0; start < seq.size(); start++) {
int idx = start;
// Traverse the tree along this suffix.
Node* node = _root.get();
visit_count[node]++;
while (idx < seq.size() && idx - start < _max_depth) {
CHECK_OR_RETURN(node->children.contains(seq[idx]),
"missing child node for sequence");
node = node->children[seq[idx]].get();
visit_count[node]++;
CHECK_OR_RETURN(idx + node->length <= seq.size(),
"path exceeds sequence length");
for (int i = 0; i < node->length; ++i) {
int ref_seq = node->ref_seq;
int ref_idx = node->ref_idx + i;
CHECK_OR_RETURN(seq[idx + i] == _seqs[ref_seq][ref_idx],
"path does not match sequence tokens");
}
idx += node->length;
}
// The last node on this path should have an endpoint.
CHECK_OR_RETURN(node->endpoints.contains(seq_id),
"missing endpoint for sequence");
}
}
// 3. Check all nodes were visited the correct number of times.
assert(queue.empty());
queue.push(_root.get());
while (!queue.empty()) {
Node* node = queue.front();
queue.pop();
CHECK_OR_RETURN(node->count == visit_count[node],
"node count does not match visit count");
for (const auto& [token, child] : node->children) {
queue.push(child.get());
}
}
return "";
}