in yourbench/pipeline/summarization.py [0:0]
def _build_combine_calls(summaries_by_doc: list[list[str]]) -> tuple[list[InferenceCall], list[int]]:
"""Prepare second-stage calls to merge multiple chunk summaries into a single summary."""
calls: list[InferenceCall] = []
doc_indices_for_combine: list[int] = []
skipped_doc_count = 0
for doc_idx, chunk_summaries in enumerate(summaries_by_doc):
if len(chunk_summaries) <= 1: # Already a single summary (or empty), skip combine
skipped_doc_count += 1
continue
valid_summaries = [s for s in chunk_summaries if s]
if not valid_summaries:
skipped_doc_count += 1
continue
bullet_list = "\\n".join(f"- {s}" for s in valid_summaries)
prompt = COMBINE_SUMMARIES_USER_PROMPT.format(chunk_summaries=bullet_list)
calls.append(InferenceCall(messages=[{"role": "user", "content": prompt}], tags=["merge_summary"]))
doc_indices_for_combine.append(doc_idx)
logger.info(
f"Prepared {len(calls)} combine-stage inference calls ({skipped_doc_count} docs skipped – single/empty chunk list)."
)
return calls, doc_indices_for_combine