in tantivy/src/indexer/merger.rs [1232:1536]
fn test_index_merger_with_deletes() -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let text_fieldtype = schema::TextOptions::default()
.set_indexing_options(
TextFieldIndexing::default().set_index_option(IndexRecordOption::WithFreqs),
)
.set_stored();
let text_field = schema_builder.add_text_field("text", text_fieldtype);
let score_fieldtype = schema::NumericOptions::default().set_fast(Cardinality::SingleValue);
let score_field = schema_builder.add_u64_field("score", score_fieldtype);
let bytes_score_field = schema_builder.add_bytes_field("score_bytes", FAST);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests()?;
let reader = index.reader().unwrap();
let search_term = |searcher: &Searcher, term: Term| {
let collector = FastFieldTestCollector::for_field(score_field);
let bytes_collector = BytesFastFieldTestCollector::for_field(bytes_score_field);
let term_query = TermQuery::new(term, IndexRecordOption::Basic);
searcher
.search(&term_query, &(collector, bytes_collector))
.map(|(scores, bytes)| {
let mut score_bytes = &bytes[..];
for &score in &scores {
assert_eq!(score as u32, score_bytes.read_u32::<BigEndian>().unwrap());
}
scores
})
};
let empty_vec = Vec::<u64>::new();
{
// a first commit
index_writer.add_document(doc!(
text_field => "a b d",
score_field => 1u64,
bytes_score_field => vec![0u8, 0, 0, 1],
))?;
index_writer.add_document(doc!(
text_field => "b c",
score_field => 2u64,
bytes_score_field => vec![0u8, 0, 0, 2],
))?;
index_writer.delete_term(Term::from_field_text(text_field, "c"));
index_writer.add_document(doc!(
text_field => "c d",
score_field => 3u64,
bytes_score_field => vec![0u8, 0, 0, 3],
))?;
index_writer.commit()?;
reader.reload()?;
let searcher = reader.searcher();
assert_eq!(searcher.num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].max_doc(), 3);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "a"))?,
vec![1]
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "b"))?,
vec![1]
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "c"))?,
vec![3]
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "d"))?,
vec![1, 3]
);
}
{
// a second commit
index_writer.add_document(doc!(
text_field => "a d e",
score_field => 4_000u64,
bytes_score_field => vec![0u8, 0, 0, 4],
))?;
index_writer.add_document(doc!(
text_field => "e f",
score_field => 5_000u64,
bytes_score_field => vec![0u8, 0, 0, 5],
))?;
index_writer.delete_term(Term::from_field_text(text_field, "a"));
index_writer.delete_term(Term::from_field_text(text_field, "f"));
index_writer.add_document(doc!(
text_field => "f g",
score_field => 6_000u64,
bytes_score_field => vec![0u8, 0, 23, 112],
))?;
index_writer.add_document(doc!(
text_field => "g h",
score_field => 7_000u64,
bytes_score_field => vec![0u8, 0, 27, 88],
))?;
index_writer.commit()?;
reader.reload()?;
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 2);
assert_eq!(searcher.num_docs(), 3);
assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].max_doc(), 4);
assert_eq!(searcher.segment_readers()[1].num_docs(), 1);
assert_eq!(searcher.segment_readers()[1].max_doc(), 3);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "a"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "b"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "c"))?,
vec![3]
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "d"))?,
vec![3]
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "e"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "f"))?,
vec![6_000]
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "g"))?,
vec![6_000, 7_000]
);
let score_field_reader = searcher
.segment_reader(0)
.fast_fields()
.u64(score_field)
.unwrap();
assert_eq!(score_field_reader.min_value(), 4000);
assert_eq!(score_field_reader.max_value(), 7000);
let score_field_reader = searcher
.segment_reader(1)
.fast_fields()
.u64(score_field)
.unwrap();
assert_eq!(score_field_reader.min_value(), 1);
assert_eq!(score_field_reader.max_value(), 3);
}
{
// merging the segments
let segment_ids = index.searchable_segment_ids()?;
index_writer.merge(&segment_ids).wait()?;
reader.reload()?;
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
assert_eq!(searcher.num_docs(), 3);
assert_eq!(searcher.segment_readers()[0].num_docs(), 3);
assert_eq!(searcher.segment_readers()[0].max_doc(), 3);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "a"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "b"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "c"))?,
vec![3]
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "d"))?,
vec![3]
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "e"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "f"))?,
vec![6_000]
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "g"))?,
vec![6_000, 7_000]
);
let score_field_reader = searcher
.segment_reader(0)
.fast_fields()
.u64(score_field)
.unwrap();
assert_eq!(score_field_reader.min_value(), 3);
assert_eq!(score_field_reader.max_value(), 7000);
}
{
// test a commit with only deletes
index_writer.delete_term(Term::from_field_text(text_field, "c"));
index_writer.commit()?;
reader.reload()?;
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
assert_eq!(searcher.num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].max_doc(), 3);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "a"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "b"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "c"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "d"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "e"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "f"))?,
vec![6_000]
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "g"))?,
vec![6_000, 7_000]
);
let score_field_reader = searcher
.segment_reader(0)
.fast_fields()
.u64(score_field)
.unwrap();
assert_eq!(score_field_reader.min_value(), 3);
assert_eq!(score_field_reader.max_value(), 7000);
}
{
// Test merging a single segment in order to remove deletes.
let segment_ids = index.searchable_segment_ids()?;
index_writer.merge(&segment_ids).wait()?;
reader.reload()?;
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
assert_eq!(searcher.num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].num_docs(), 2);
assert_eq!(searcher.segment_readers()[0].max_doc(), 2);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "a"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "b"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "c"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "d"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "e"))?,
empty_vec
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "f"))?,
vec![6_000]
);
assert_eq!(
search_term(&searcher, Term::from_field_text(text_field, "g"))?,
vec![6_000, 7_000]
);
let score_field_reader = searcher
.segment_reader(0)
.fast_fields()
.u64(score_field)
.unwrap();
assert_eq!(score_field_reader.min_value(), 6000);
assert_eq!(score_field_reader.max_value(), 7000);
}
{
// Test removing all docs
index_writer.delete_term(Term::from_field_text(text_field, "g"));
index_writer.commit()?;
let segment_ids = index.searchable_segment_ids()?;
reader.reload()?;
let searcher = reader.searcher();
assert!(segment_ids.is_empty());
assert!(searcher.segment_readers().is_empty());
assert_eq!(searcher.num_docs(), 0);
}
Ok(())
}