def _parse_from_results()

in gemini/sample-apps/llamaindex-rag/backend/indexing/docai_parser.py [0:0]


    def _parse_from_results(self, results: list["DocAIParsingResults"]):  # noqa: F821
        documents = []
        storage_client = storage.Client()

        for result in results:
            print(
                f"Processing result: source_path={result.source_path}, "
                f"parsed_path={result.parsed_path}"
            )
            if not result.parsed_path:
                print(
                    "Warning: Empty parsed_path for source "
                    f"{result.source_path}. Skipping."
                )
                continue

            try:
                bucket_name, prefix = result.parsed_path.replace("gs://", "").split(
                    "/", 1
                )
            except ValueError:
                print(
                    f"Error: Invalid parsed_path format for {result.source_path}. Skipping."
                )
                continue

            bucket = storage_client.bucket(bucket_name)
            blobs = list(bucket.list_blobs(prefix=prefix))
            print(f"Found {len(blobs)} blobs in {result.parsed_path}")

            for blob in blobs:
                if blob.name.endswith(".json"):
                    print(f"Processing JSON blob: {blob.name}")
                    try:
                        content = blob.download_as_text()
                        doc_data = json.loads(content)

                        if (
                            "chunkedDocument" in doc_data
                            and "chunks" in doc_data["chunkedDocument"]
                        ):
                            for chunk in doc_data["chunkedDocument"]["chunks"]:
                                doc = Document(
                                    text=chunk["content"],
                                    metadata={
                                        "chunk_id": chunk["chunkId"],
                                        "source": result.source_path,
                                    },
                                )
                                documents.append(doc)
                        else:
                            print(
                                "Warning: Expected 'chunkedDocument' "
                                f"structure not found in {blob.name}"
                            )
                    except Exception as e:
                        print(f"Error processing blob {blob.name}: {str(e)}")

        print(f"Total documents created: {len(documents)}")
        return documents