in tokenizers/src/processors/template.rs [889:951]
fn template_processing() {
let processor = tests::get_bert_template();
assert_eq!(processor.added_tokens(false), 2);
assert_eq!(processor.added_tokens(true), 3);
use crate::Token;
let encoding = Encoding::from_tokens(
vec![
Token::new(12, "Hello".into(), (0, 5)),
Token::new(14, "there".into(), (6, 11)),
],
0,
);
let pair = Encoding::from_tokens(vec![Token::new(15, "pair".into(), (0, 4))], 0);
let single_encoding = processor.process(encoding.clone(), None, true).unwrap();
assert_eq!(
single_encoding,
Encoding::new(
vec![1, 12, 14, 0],
vec![0, 0, 0, 0],
vec![
"[CLS]".into(),
"Hello".into(),
"there".into(),
"[SEP]".into()
],
vec![None, None, None, None],
vec![(0, 0), (0, 5), (6, 11), (0, 0)],
vec![1, 0, 0, 1],
vec![1, 1, 1, 1],
vec![],
AHashMap::from_iter(vec![(0, 1..3)]),
)
);
assert_eq!(single_encoding.token_to_sequence(2), Some(0));
assert_eq!(single_encoding.token_to_sequence(3), None);
let pair_encoding = processor.process(encoding, Some(pair), true).unwrap();
assert_eq!(
pair_encoding,
Encoding::new(
vec![1, 12, 14, 0, 15, 0],
vec![0, 0, 0, 0, 1, 1],
vec![
"[CLS]".into(),
"Hello".into(),
"there".into(),
"[SEP]".into(),
"pair".into(),
"[SEP]".into()
],
vec![None, None, None, None, None, None],
vec![(0, 0), (0, 5), (6, 11), (0, 0), (0, 4), (0, 0)],
vec![1, 0, 0, 1, 0, 1],
vec![1, 1, 1, 1, 1, 1],
vec![],
AHashMap::from_iter(vec![(0, 1..3), (1, 4..5)]),
)
);
assert_eq!(pair_encoding.token_to_sequence(2), Some(0));
assert_eq!(pair_encoding.token_to_sequence(3), None);
assert_eq!(pair_encoding.token_to_sequence(4), Some(1));
assert_eq!(pair_encoding.token_to_sequence(5), None);
}