in bindings/python/src/models.rs [443:480]
fn new(
py: Python<'_>,
vocab: Option<PyVocab>,
merges: Option<PyMerges>,
kwargs: Option<&Bound<'_, PyDict>>,
) -> PyResult<(Self, PyModel)> {
if (vocab.is_some() && merges.is_none()) || (vocab.is_none() && merges.is_some()) {
return Err(exceptions::PyValueError::new_err(
"`vocab` and `merges` must be both specified",
));
}
let mut builder = BPE::builder();
if let (Some(vocab), Some(merges)) = (vocab, merges) {
match (vocab, merges) {
(PyVocab::Vocab(vocab), PyMerges::Merges(merges)) => {
let vocab: AHashMap<_, _> = vocab.into_iter().collect();
builder = builder.vocab_and_merges(vocab, merges);
}
(PyVocab::Filename(vocab_filename), PyMerges::Filename(merges_filename)) => {
deprecation_warning(
py,
"0.9.0",
"BPE.__init__ will not create from files anymore, try `BPE.from_file` instead",
)?;
builder =
builder.files(vocab_filename.to_string(), merges_filename.to_string());
}
_ => {
return Err(exceptions::PyValueError::new_err(
"`vocab` and `merges` must be both be from memory or both filenames",
));
}
}
}
PyBPE::with_builder(builder, kwargs)
}