def chunk_document()

in yourbench/pipeline/chunking.py [0:0]


def chunk_document(text: str, doc_id: str, max_tokens: int) -> list[SingleHopChunk]:
    """
    Chunk a document into segments based on token count.

    Args:
        text: Document text to chunk
        doc_id: Unique document identifier
        max_tokens: Maximum tokens per chunk

    Returns:
        List of single-hop chunks
    """
    if not text or not text.strip():
        return []

    chunk_texts = split_into_token_chunks(text, chunk_tokens=max_tokens, overlap=0)
    return [SingleHopChunk(chunk_id=f"{doc_id}_{i}", chunk_text=chunk) for i, chunk in enumerate(chunk_texts)]