in tensorrtllm/run_eval.py [0:0]
def __init__(self,
engine_dir,
assets_dir="assets",
batch_size=64):
encoder_config = read_config('encoder', engine_dir)
decoder_config = read_config('decoder', engine_dir)
self.n_mels = encoder_config['n_mels']
self.num_languages = encoder_config['num_languages']
is_multilingual = (decoder_config['vocab_size'] >= 51865)
if is_multilingual:
tokenizer_name = "multilingual"
assert (Path(assets_dir) / "multilingual.tiktoken").exists(
), "multilingual.tiktoken file is not existed in assets_dir"
else:
tokenizer_name = "gpt2"
assert (Path(assets_dir) / "gpt2.tiktoken").exists(
), "gpt2.tiktoken file is not existed in assets_dir"
self.text_prefix="<|startoftranscript|><|en|><|transcribe|><|notimestamps|>" if is_multilingual else "<|startoftranscript|><|notimestamps|>"
self.tokenizer = get_tokenizer(name=tokenizer_name,
num_languages=self.num_languages,
tokenizer_dir=assets_dir)
self.eot_id = self.tokenizer.encode(
"<|endoftext|>",
allowed_special=self.tokenizer.special_tokens_set)[0]
json_config = GptJsonConfig.parse_file(Path(engine_dir) / 'decoder' / 'config.json')
assert json_config.model_config.supports_inflight_batching
runner_kwargs = dict(engine_dir=engine_dir,
is_enc_dec=True,
max_batch_size=batch_size,
max_input_len=3000,
max_output_len=96,
max_beam_width=1,
debug_mode=False,
kv_cache_free_gpu_memory_fraction=0.9)
self.model_runner_cpp = ModelRunnerCpp.from_dir(**runner_kwargs)