def _scan_tokenizer_configs()

in src/pixparse/tokenizers/config.py [0:0]


def _scan_tokenizer_configs():
    global _TOKENIZER_CONFIGS

    config_ext = ('.json',)
    config_files = []
    for config_path in _TOKENIZER_CONFIG_PATHS:
        if config_path.is_file() and config_path.suffix in config_ext:
            config_files.append(config_path)
        elif config_path.is_dir():
            for ext in config_ext:
                config_files.extend(config_path.glob(f'*{ext}'))

    for cf in config_files:
        tokenizer_cfg = TokenizerCfg.load(cf)
        _TOKENIZER_CONFIGS[cf.stem] = tokenizer_cfg

    _TOKENIZER_CONFIGS = {k: v for k, v in sorted(_TOKENIZER_CONFIGS.items(), key=lambda x: _natural_key(x[0]))}