fn add_term_frequencies()

in tantivy/src/query/more_like_this/more_like_this.rs [165:281]


    fn add_term_frequencies(
        &self,
        searcher: &Searcher,
        field: Field,
        values: &[Value],
        term_frequencies: &mut HashMap<Term, usize>,
    ) -> Result<()> {
        let schema = searcher.schema();
        let tokenizer_manager = searcher.index().tokenizers();

        let field_entry = schema.get_field_entry(field);
        if !field_entry.is_indexed() {
            return Ok(());
        }

        // extract the raw value, possibly tokenizing & filtering to update the term frequency map
        match field_entry.field_type() {
            FieldType::Facet(_) => {
                let facets: Vec<&str> = values
                    .iter()
                    .map(|value| match value {
                        Value::Facet(ref facet) => Ok(facet.encoded_str()),
                        _ => Err(TantivyError::InvalidArgument(
                            "invalid field value".to_string(),
                        )),
                    })
                    .collect::<Result<Vec<_>>>()?;
                for fake_str in facets {
                    FacetTokenizer.token_stream(fake_str).process(&mut |token| {
                        if self.is_noise_word(token.text.clone()) {
                            let term = Term::from_field_text(field, &token.text);
                            *term_frequencies.entry(term).or_insert(0) += 1;
                        }
                    });
                }
            }
            FieldType::Str(text_options) => {
                let mut token_streams: Vec<BoxTokenStream> = vec![];

                for value in values {
                    match value {
                        Value::PreTokStr(tok_str) => {
                            token_streams.push(PreTokenizedStream::from(tok_str.clone()).into());
                        }
                        Value::Str(ref text) => {
                            if let Some(tokenizer) = text_options
                                .get_indexing_options()
                                .map(|text_indexing_options| {
                                    text_indexing_options.tokenizer().to_string()
                                })
                                .and_then(|tokenizer_name| tokenizer_manager.get(&tokenizer_name))
                            {
                                token_streams.push(tokenizer.token_stream(text));
                            }
                        }
                        _ => (),
                    }
                }

                for mut token_stream in token_streams {
                    token_stream.process(&mut |token| {
                        if !self.is_noise_word(token.text.clone()) {
                            let term = Term::from_field_text(field, &token.text);
                            *term_frequencies.entry(term).or_insert(0) += 1;
                        }
                    });
                }
            }
            FieldType::U64(_) => {
                for value in values {
                    let val = value.as_u64().ok_or_else(|| {
                        TantivyError::InvalidArgument("invalid value".to_string())
                    })?;
                    if !self.is_noise_word(val.to_string()) {
                        let term = Term::from_field_u64(field, val);
                        *term_frequencies.entry(term).or_insert(0) += 1;
                    }
                }
            }
            FieldType::Date(_) => {
                for value in values {
                    let timestamp_micros = value
                        .as_date()
                        .ok_or_else(|| TantivyError::InvalidArgument("invalid value".to_string()))?
                        .into_timestamp_micros();
                    if !self.is_noise_word(timestamp_micros.to_string()) {
                        let term = Term::from_field_i64(field, timestamp_micros);
                        *term_frequencies.entry(term).or_insert(0) += 1;
                    }
                }
            }
            FieldType::I64(_) => {
                for value in values {
                    let val = value.as_i64().ok_or_else(|| {
                        TantivyError::InvalidArgument("invalid value".to_string())
                    })?;
                    if !self.is_noise_word(val.to_string()) {
                        let term = Term::from_field_i64(field, val);
                        *term_frequencies.entry(term).or_insert(0) += 1;
                    }
                }
            }
            FieldType::F64(_) => {
                for value in values {
                    let val = value.as_f64().ok_or_else(|| {
                        TantivyError::InvalidArgument("invalid value".to_string())
                    })?;
                    if !self.is_noise_word(val.to_string()) {
                        let term = Term::from_field_f64(field, val);
                        *term_frequencies.entry(term).or_insert(0) += 1;
                    }
                }
            }
            _ => {}
        }
        Ok(())
    }