in tokenizers/src/processors/template.rs [468:524]
fn validate(&self) -> std::result::Result<(), String> {
let pair_has_both = self.pair.as_ref().is_none_or(|pair| {
let mut has_a = false;
let mut has_b = false;
for piece in &pair.0 {
if let Piece::Sequence {
id: Sequence::A, ..
} = piece
{
has_a = true;
}
if let Piece::Sequence {
id: Sequence::B, ..
} = piece
{
has_b = true;
}
}
has_a && has_b
});
if !pair_has_both {
return Err("Template for `pair` must use both sequences".into());
}
let check = |sp| {
let exist = self
.special_tokens
.as_ref()
.is_some_and(|map| map.0.contains_key(sp));
match exist {
false => Some(sp),
true => None,
}
};
let empty = [];
let missing: AHashSet<&str> = self
.single
.as_ref()
.map_or(empty.iter(), |s| s.0.iter())
.chain(self.pair.as_ref().map_or(empty.iter(), |s| s.0.iter()))
.filter_map(|piece| match piece {
Piece::Sequence { .. } => None,
Piece::SpecialToken { id, .. } => check(id.as_ref()),
})
.collect::<AHashSet<_>>();
if missing.is_empty() {
Ok(())
} else {
Err(format!(
"Missing SpecialToken(s) with id(s) `{}`",
missing.iter().join(", ")
))
}
}