def get_chunks()

in src/doc_builder/build_embeddings.py [0:0]


    def get_chunks(self, page_info, chunk_len_chars, headings=[]):
        chunks = []
        headings = headings + [self.heading]
        split_content = self.split_markdown(self.content)
        if not len(split_content):
            return []
        chunk_str = ""
        for content in split_content:
            if len(chunk_str) > chunk_len_chars:
                chunks.append(
                    Chunk(
                        text=chunk_str.strip(),
                        source_page_url=f"https://huggingface.co/docs/{page_info['package_name']}/{page_info['page']}#{self.anchor}",
                        source_page_title=get_page_title(page_info["page"]),
                        package_name=page_info["package_name"],
                        headings=headings,
                    )
                )
                chunk_str = ""
            chunk_str += content + " "

        if len(chunk_str):
            chunks.append(
                Chunk(
                    text=chunk_str.strip(),
                    source_page_url=f"https://huggingface.co/docs/{page_info['package_name']}/{page_info['page']}#{self.anchor}",
                    source_page_title=get_page_title(page_info["page"]),
                    package_name=page_info["package_name"],
                    headings=headings,
                )
            )

        for child in self.children:
            child_chunks = child.get_chunks(page_info, chunk_len_chars, headings=headings)
            chunks.extend(child_chunks)

        return chunks