in yourbench/pipeline/chunking.py [0:0]
def chunk_document(text: str, doc_id: str, max_tokens: int) -> list[SingleHopChunk]:
"""
Chunk a document into segments based on token count.
Args:
text: Document text to chunk
doc_id: Unique document identifier
max_tokens: Maximum tokens per chunk
Returns:
List of single-hop chunks
"""
if not text or not text.strip():
return []
chunk_texts = split_into_token_chunks(text, chunk_tokens=max_tokens, overlap=0)
return [SingleHopChunk(chunk_id=f"{doc_id}_{i}", chunk_text=chunk) for i, chunk in enumerate(chunk_texts)]