def pdf_to_markdown()

in tablestore-java-mcp-server-rag/knowledge-data-generator/pdf_to_markdown.py [0:0]


def pdf_to_markdown(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ""
        for page in pdf.pages:
            text += page.extract_text()

    # 将文本转换为 Markdown 格式
    markdown_content = markdownify(text)
    return markdown_content