std::string SuffixTree::check_integrity()

in csrc/suffix_decoding/suffix_tree.cc [282:340]


std::string SuffixTree::check_integrity() {
    // 1. Check structural integrity of all nodes.
    std::queue<Node*> queue;
    queue.push(_root.get());
    while (!queue.empty()) {
        Node* node = queue.front();
        queue.pop();
        std::string ret = _check_node_integrity(node);
        if (!ret.empty()) {
            return ret;
        }
        for (const auto& [token, child] : node->children) {
            queue.push(child.get());
        }
    }
    // 2. Check all sequences are represented in the tree.
    std::unordered_map<Node*, int64_t> visit_count;
    for (int seq_id = 0; seq_id < _seqs.size(); seq_id++) {
        const std::vector<int>& seq = _seqs[seq_id];
        // Loop through all suffix starting indices.
        for (int start = 0; start < seq.size(); start++) {
            int idx = start;
            // Traverse the tree along this suffix.
            Node* node = _root.get();
            visit_count[node]++;
            while (idx < seq.size() && idx - start < _max_depth) {
                CHECK_OR_RETURN(node->children.contains(seq[idx]),
                                "missing child node for sequence");
                node = node->children[seq[idx]].get();
                visit_count[node]++;
                CHECK_OR_RETURN(idx + node->length <= seq.size(),
                                "path exceeds sequence length");
                for (int i = 0; i < node->length; ++i) {
                    int ref_seq = node->ref_seq;
                    int ref_idx = node->ref_idx + i;
                    CHECK_OR_RETURN(seq[idx + i] == _seqs[ref_seq][ref_idx],
                                    "path does not match sequence tokens");
                }
                idx += node->length;
            }
            // The last node on this path should have an endpoint.
            CHECK_OR_RETURN(node->endpoints.contains(seq_id),
                            "missing endpoint for sequence");
        }
    }
    // 3. Check all nodes were visited the correct number of times.
    assert(queue.empty());
    queue.push(_root.get());
    while (!queue.empty()) {
        Node* node = queue.front();
        queue.pop();
        CHECK_OR_RETURN(node->count == visit_count[node],
                        "node count does not match visit count");
        for (const auto& [token, child] : node->children) {
            queue.push(child.get());
        }
    }
    return "";
}