in tablestore-java-mcp-server-rag/knowledge-data-generator/pdf_to_markdown.py [0:0]
def pdf_to_markdown(pdf_path):
with pdfplumber.open(pdf_path) as pdf:
text = ""
for page in pdf.pages:
text += page.extract_text()
# 将文本转换为 Markdown 格式
markdown_content = markdownify(text)
return markdown_content