in src/main.rs [140:169]
fn parse_tokenizer_options(s: &str) -> Result<TokenizeOptions, Error> {
let mut tokenizer_options = TokenizeOptions::new();
let items = s.split(",").collect::<Vec<&str>>();
for item in items.iter() {
let key_value = item.split("=").collect::<Vec<&str>>();
if key_value.len() != 2 {
return Err(Error::new(InvalidValue));
}
match key_value[0] {
"num_tokens" => {
tokenizer_options.num_tokens = Some(key_value[1].parse::<u64>().unwrap())
}
"min_tokens" => tokenizer_options.min_tokens = key_value[1].parse::<u64>().unwrap(),
"max_tokens" => tokenizer_options.max_tokens = key_value[1].parse::<u64>().unwrap(),
"variance" => tokenizer_options.variance = key_value[1].parse::<u64>().unwrap(),
_ => return Err(Error::new(InvalidValue)),
}
}
if tokenizer_options.num_tokens.is_some()
&& (tokenizer_options.num_tokens.unwrap() == 0
|| tokenizer_options.min_tokens == 0
|| tokenizer_options.max_tokens == 0)
{
return Err(Error::new(InvalidValue));
}
if tokenizer_options.min_tokens > tokenizer_options.max_tokens {
return Err(Error::new(InvalidValue));
}
Ok(tokenizer_options)
}