in tokenizers/src/models/wordpiece/serialization.rs [57:106]
fn visit_map<V>(self, mut map: V) -> std::result::Result<Self::Value, V::Error>
where
V: MapAccess<'de>,
{
let mut builder = WordPieceBuilder::new();
let mut missing_fields = vec![
// for retrocompatibility the "type" field is not mandatory
"unk_token",
"continuing_subword_prefix",
"max_input_chars_per_word",
"vocab",
]
.into_iter()
.collect::<AHashSet<_>>();
while let Some(key) = map.next_key::<String>()? {
match key.as_ref() {
"unk_token" => builder = builder.unk_token(map.next_value()?),
"continuing_subword_prefix" => {
builder = builder.continuing_subword_prefix(map.next_value()?)
}
"max_input_chars_per_word" => {
builder = builder.max_input_chars_per_word(map.next_value()?)
}
"vocab" => {
let vocab: AHashMap<String, u32> = map.next_value()?;
builder = builder.vocab(vocab)
}
"type" => match map.next_value()? {
"WordPiece" => {}
u => {
return Err(serde::de::Error::invalid_value(
serde::de::Unexpected::Str(u),
&"WordPiece",
))
}
},
_ => {}
}
missing_fields.remove::<str>(&key);
}
if !missing_fields.is_empty() {
Err(serde::de::Error::missing_field(
missing_fields.iter().next().unwrap(),
))
} else {
Ok(builder.build().map_err(serde::de::Error::custom)?)
}
}