in src/doc_builder/build_embeddings.py [0:0]
def get_autodoc_sections_helper(markdown_text: str) -> List[dict[str, any]]:
"""
Split markdown text by headings and include parent headings for each section.
Args:
markdown_text (str): The input markdown text
Returns:
List of dictionaries, each containing:
- 'heading': the current section heading
- 'parent_headings': list of parent headings (from highest to lowest level)
- 'full_heading_path': complete heading path as string
- 'content': the text content of the section
- 'level': heading level (1 for #, 2 for ##, etc.)
"""
lines = markdown_text.split("\n")
sections = []
# Pattern to match headings (# ## ### etc.)
heading_pattern = r"^(#+)\s+(.+)$"
# Keep track of heading hierarchy
heading_stack = [] # Stack to maintain parent headings
current_content = []
for line in lines:
heading_match = re.match(heading_pattern, line)
if heading_match:
# Save previous section if it exists
if heading_stack or current_content:
if heading_stack:
sections.append(
{
"heading": heading_stack[-1]["text"],
"parent_headings": [h["text"] for h in heading_stack[:-1]],
"full_heading_path": " > ".join([h["text"] for h in heading_stack]),
"content": "\n".join(current_content).strip(),
"level": heading_stack[-1]["level"],
}
)
else:
# Content before any heading
sections.append(
{
"heading": "",
"parent_headings": [],
"full_heading_path": "",
"content": "\n".join(current_content).strip(),
"level": 0,
}
)
# Process new heading
level = len(heading_match.group(1))
heading_text = heading_match.group(2).strip()
full_heading = heading_match.group(1) + " " + heading_text
# Update heading stack based on level
# Remove headings at same or lower level
while heading_stack and heading_stack[-1]["level"] >= level:
heading_stack.pop()
# Add current heading to stack
heading_stack.append({"text": full_heading, "level": level})
# Reset content for new section
current_content = []
else:
# Add line to current section content
current_content.append(line)
# Don't forget the last section
if heading_stack or current_content:
if heading_stack:
sections.append(
{
"heading": heading_stack[-1]["text"],
"parent_headings": [h["text"] for h in heading_stack[:-1]],
"full_heading_path": " > ".join([h["text"] for h in heading_stack]),
"content": "\n".join(current_content).strip(),
"level": heading_stack[-1]["level"],
}
)
else:
# Content at the end without heading
sections.append(
{
"heading": "",
"parent_headings": [],
"full_heading_path": "",
"content": "\n".join(current_content).strip(),
"level": 0,
}
)
return sections