tablestore-java-mcp-server-rag/knowledge-data-generator/pdf_to_markdown.py (12 lines of code) (raw):

import pdfplumber from markdownify import markdownify def pdf_to_markdown(pdf_path): with pdfplumber.open(pdf_path) as pdf: text = "" for page in pdf.pages: text += page.extract_text() # 将文本转换为 Markdown 格式 markdown_content = markdownify(text) return markdown_content pdf_path = "example.pdf" markdown_result = pdf_to_markdown(pdf_path) print(markdown_result)