def _convert_document_to_markdown()

in yourbench/pipeline/ingestion.py [0:0]


def _convert_document_to_markdown(file_path: str, output_dir: str, markdown_processor: MarkItDown) -> None:
    """
    Convert a single source file into Markdown and save the result.

    Args:
        file_path (str): The path to the source document.
        output_dir (str): Directory where the converted .md file will be written.
        markdown_processor (MarkItDown): Configured MarkItDown instance for conversions.

    Returns:
        None

    Logs:
        - Debug info about the file being processed.
        - Warning if conversion fails or the file is empty.
    """
    logger.debug("Converting file: {}", file_path)
    try:
        content = _get_markdown_content(file_path, markdown_processor)

        if content is None:
            logger.warning(f"No content could be generated for file '{file_path}' after processing. Skipping output.")
            return

        # Construct an output filename with .md extension
        base_name = os.path.basename(file_path)
        file_name_no_ext = os.path.splitext(base_name)[0]
        output_file = os.path.join(output_dir, f"{file_name_no_ext}.md")

        # Write the converted Markdown to disk
        with open(output_file, "w", encoding="utf-8") as out_f:
            out_f.write(content)

        logger.info(f"Successfully processed '{file_path}' and saved as '{output_file}'.")
    except Exception as exc:
        logger.error(f"Failed to convert '{file_path}'. Error details: {exc}")