src/sal/search/diverse_verifier_tree_search.py (4 lines): - line 87: # TODO: set the augmented template from a file - line 108: # rarely ~1/1000 the model will generate few beams than expected. #TODO: investigate why - line 115: # scoring and chose best generation per beam TODO: add option for selection across beams within the same prompt - line 189: # TODO: construct and store the tree src/sal/models/reward_models.py (3 lines): - line 118: # TODO: tokenize each batch independently so there is less padding and faster inference - line 132: # stripped_output_scores = [] TODO: strip out the reward for previous steps - line 187: # TODO: add the system prompt like we did for math shepard? src/sal/utils/qwen_math_parser.py (2 lines): - line 380: # TODO: SFT models - line 488: # TODO check multiple choice src/sal/search/best_of_n.py (1 line): - line 35: # TODO: set the augmented template from a file