fn apply_template()

in tokenizers/src/processors/template.rs [544:643]


    fn apply_template(
        &self,
        template: &[Piece],
        mut encodings: Vec<Encoding>,
        add_special_tokens: bool,
    ) -> Result<Vec<Encoding>> {
        let final_encodings: Vec<Encoding> = template
            .iter()
            .flat_map(|piece| {
                match piece {
                    Piece::Sequence { id, type_id } => {
                        let i = usize::from(*id != Sequence::A);
                        let encoding = &mut encodings[i];
                        encoding.set_type_ids(vec![*type_id; encoding.len()]);
                        encoding.set_sequence_id(i);
                        Some(encoding.clone())
                    }
                    Piece::SpecialToken { id, type_id } => {
                        if add_special_tokens {
                            let tok = &self.special_tokens.0[id]; // We already checked existence above
                            let len = tok.ids.len();

                            let encoding = Encoding::new(
                                tok.ids.clone(),
                                std::iter::repeat_n(*type_id, len).collect(),
                                tok.tokens.clone(),
                                // words
                                std::iter::repeat_n(None, len).collect(),
                                // offsets
                                std::iter::repeat_n((0, 0), len).collect(),
                                // special_tokens_mask
                                std::iter::repeat_n(1, len).collect(),
                                // attention_mask
                                std::iter::repeat_n(1, len).collect(),
                                // overflowing
                                vec![],
                                // sequence_range
                                AHashMap::new(),
                            );
                            Some(encoding)
                        } else {
                            None
                        }
                    }
                }
            })
            .collect();

        //let mut pair = if encodings.len() > 1 {
        //    Some(encodings.pop().unwrap())
        //} else {
        //    None
        //};
        //let mut encoding = encodings.pop().unwrap();

        //let pair_overflowing = pair.as_mut().map_or(vec![], |e| e.take_overflowing());
        //let mut overflowing: Vec<Encoding> = encoding
        //    .take_overflowing()
        //    .iter()
        //    .map(|encoding| -> Result<Vec<Encoding>> {
        //        // 1. The pair itself
        //        let mut overflowings = self.apply_template(
        //            template,
        //            if encodings.len() > 1 {
        //                vec![encoding.clone(), encodings[1].clone()]
        //            } else {
        //                vec![encoding.clone()]
        //            },
        //            add_special_tokens,
        //        )?;

        //        // 2. Its overflowings
        //        for other_o in &pair_overflowing {
        //            overflowings.extend(self.apply_template(
        //                template,
        //                vec![encoding.clone(), other_o.clone()],
        //                add_special_tokens,
        //            )?);
        //        }

        //        Ok(overflowings)
        //    })
        //    .collect::<Result<Vec<Vec<Encoding>>>>()?
        //    .into_iter()
        //    .flatten()
        //    .collect();
        //// We also need to combine the first sequence with all other overflowings
        //overflowing.extend(
        //    pair_overflowing
        //        .into_iter()
        //        .map(|pair| {
        //            self.apply_template(template, vec![encoding.clone(), pair], add_special_tokens)
        //        })
        //        .collect::<Result<Vec<_>>>()?
        //        .into_iter()
        //        .flatten(),
        //);

        Ok(final_encodings)
    }