in lmms_eval/__main__.py [0:0]
def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
if not args:
args = parse_eval_args()
# Check if no arguments were passed after parsing
if len(sys.argv) == 1:
print("┌───────────────────────────────────────────────────────────────────────────────┐")
print("│ Please provide arguments to evaluate the model. e.g. │")
print("│ `lmms-eval --model llava --model_path liuhaotian/llava-v1.6-7b --tasks okvqa` │")
print("│ Use `lmms-eval --help` for more information. │")
print("└───────────────────────────────────────────────────────────────────────────────┘")
sys.exit(1)
set_loggers(args)
eval_logger = logging.getLogger("lmms-eval")
eval_logger.setLevel(getattr(logging, f"{args.verbosity}"))
eval_logger.info(f"Verbosity set to {args.verbosity}")
os.environ["TOKENIZERS_PARALLELISM"] = "false"
args_list = []
results_list = []
if args.config:
if not os.path.exists(args.config):
raise ValueError(f"Config file does not exist: {args.config}")
with open(args.config, "r") as file:
config_args = yaml.safe_load(file)
config_args = [config_args] if type(config_args) != list else config_args
# multiple configs, create args list first
for config in config_args:
args_copy = argparse.Namespace(**vars(args))
for key, value in config.items():
setattr(args_copy, key, value)
args_list.append(args_copy)
else:
args_list.append(args)
# initialize Accelerator
kwargs_handler = InitProcessGroupKwargs(timeout=datetime.timedelta(seconds=60000))
accelerator = Accelerator(kwargs_handlers=[kwargs_handler])
if accelerator.is_main_process:
is_main_process = True
else:
is_main_process = False
for args in args_list:
try:
if is_main_process and args.wandb_args: # thoughtfully we should only init wandb once, instead of multiple ranks to avoid network traffics and unwanted behaviors.
wandb_logger = WandbLogger(args)
results, samples = cli_evaluate_single(args)
results_list.append(results)
accelerator.wait_for_everyone()
if is_main_process and args.wandb_args:
wandb_logger.post_init(results)
wandb_logger.log_eval_result()
if args.wandb_log_samples and samples is not None:
wandb_logger.log_eval_samples(samples)
wandb_logger.finish()
except Exception as e:
traceback.print_exc()
eval_logger.error(f"Error during evaluation: {e}")
traceback.print_exc()
results_list.append(None)
for args, results in zip(args_list, results_list):
# cli_evaluate will return none if the process is not the main process (rank 0)
if results is not None:
print_results(args, results)