def _parse_document_result()

in search/cloud-function/python/vertex_ai_search_client.py [0:0]


    def _parse_document_result(self, document: dict[str, Any]) -> dict[str, Any]:
        """
        Parse a single document result from the search response.

        This supports both structured and unstructured data, and also supports
        extractive segments and answers and snippets.

        Args:
            document (Dict[str, Any]): The document data from the search result.

        Returns:
            Dict[str, Any]: The parsed document page_content and metadata.
        """
        metadata = {
            **document.get("derived_struct_data", {}),
            **document.get("struct_data", {}),
        }

        json_data = document.get("json_data", {})
        if isinstance(json_data, str):
            try:
                json_data = json.loads(json_data)
            except json.JSONDecodeError:
                print(f"Warning: Failed to parse json_data: {json_data}")
                json_data = {}

        metadata.update(json_data)
        result: dict[str, Any] = {"metadata": metadata}

        if self.config.engine_data_type == "STRUCTURED":
            structured_data = (
                json_data if json_data else document.get("struct_data", {})
            )
            result["page_content"] = json.dumps(structured_data, indent=2)
            for key in structured_data.keys():
                result["metadata"].pop(key, None)
        elif "extractive_answers" in metadata:
            result["page_content"] = self._parse_segments(
                metadata.get("extractive_answers", [])
            )
        elif "snippets" in metadata:
            result["page_content"] = self._parse_snippets(metadata.get("snippets", []))
        else:
            result["page_content"] = metadata.get("content", "")

        return result