in src/pixparse/tokenizers/config.py [0:0]
def _scan_tokenizer_configs():
global _TOKENIZER_CONFIGS
config_ext = ('.json',)
config_files = []
for config_path in _TOKENIZER_CONFIG_PATHS:
if config_path.is_file() and config_path.suffix in config_ext:
config_files.append(config_path)
elif config_path.is_dir():
for ext in config_ext:
config_files.extend(config_path.glob(f'*{ext}'))
for cf in config_files:
tokenizer_cfg = TokenizerCfg.load(cf)
_TOKENIZER_CONFIGS[cf.stem] = tokenizer_cfg
_TOKENIZER_CONFIGS = {k: v for k, v in sorted(_TOKENIZER_CONFIGS.items(), key=lambda x: _natural_key(x[0]))}