in yourbench/pipeline/ingestion.py [0:0]
def _convert_document_to_markdown(file_path: str, output_dir: str, markdown_processor: MarkItDown) -> None:
"""
Convert a single source file into Markdown and save the result.
Args:
file_path (str): The path to the source document.
output_dir (str): Directory where the converted .md file will be written.
markdown_processor (MarkItDown): Configured MarkItDown instance for conversions.
Returns:
None
Logs:
- Debug info about the file being processed.
- Warning if conversion fails or the file is empty.
"""
logger.debug("Converting file: {}", file_path)
try:
content = _get_markdown_content(file_path, markdown_processor)
if content is None:
logger.warning(f"No content could be generated for file '{file_path}' after processing. Skipping output.")
return
# Construct an output filename with .md extension
base_name = os.path.basename(file_path)
file_name_no_ext = os.path.splitext(base_name)[0]
output_file = os.path.join(output_dir, f"{file_name_no_ext}.md")
# Write the converted Markdown to disk
with open(output_file, "w", encoding="utf-8") as out_f:
out_f.write(content)
logger.info(f"Successfully processed '{file_path}' and saved as '{output_file}'.")
except Exception as exc:
logger.error(f"Failed to convert '{file_path}'. Error details: {exc}")