def _subgraph

def _subgraph_query()

in hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py [0:0]
67 lines of code
14 McCabe index (conditional complexity)

    def _subgraph_query(self, context: Dict[str, Any]) -> Dict[str, Any]:
        # 1. Extract params from context
        matched_vids = context.get("match_vids")
        if isinstance(context.get("max_deep"), int):
            self._max_deep = context["max_deep"]
        if isinstance(context.get("max_items"), int):
            self._max_items = context["max_items"]
        if isinstance(context.get("prop_to_match"), str):
            self._prop_to_match = context["prop_to_match"]

        # 2. Extract edge_labels from graph schema
        _, edge_labels = self._extract_labels_from_schema()
        edge_labels_str = ",".join("'" + label + "'" for label in edge_labels)
        # TODO: enhance the limit logic later
        edge_limit_amount = len(edge_labels) * huge_settings.edge_limit_pre_label

        use_id_to_match = self._prop_to_match is None
        if use_id_to_match:
            if not matched_vids:
                return context

            gremlin_query = VERTEX_QUERY_TPL.format(keywords=matched_vids)
            vertexes = self._client.gremlin().exec(gremlin=gremlin_query)["data"]
            log.debug("Vids gremlin query: %s", gremlin_query)

            vertex_knowledge = self._format_graph_from_vertex(query_result=vertexes)
            paths: List[Any] = []
            # TODO: use generator or asyncio to speed up the query logic
            for matched_vid in matched_vids:
                gremlin_query = VID_QUERY_NEIGHBOR_TPL.format(
                    keywords=f"'{matched_vid}'",
                    max_deep=self._max_deep,
                    edge_labels=edge_labels_str,
                    edge_limit=edge_limit_amount,
                    max_items=self._max_items,
                )
                log.debug("Kneighbor gremlin query: %s", gremlin_query)
                paths.extend(self._client.gremlin().exec(gremlin=gremlin_query)["data"])

            graph_chain_knowledge, vertex_degree_list, knowledge_with_degree = self._format_graph_query_result(
                query_paths=paths
            )

            # TODO: we may need to optimize the logic here with global deduplication (may lack some single vertex)
            if not graph_chain_knowledge:
                graph_chain_knowledge.update(vertex_knowledge)
            if vertex_degree_list:
                vertex_degree_list[0].update(vertex_knowledge)
            else:
                vertex_degree_list.append(vertex_knowledge)
        else:
            # WARN: When will the query enter here?
            keywords = context.get("keywords")
            assert keywords, "No related property(keywords) for graph query."
            keywords_str = ",".join("'" + kw + "'" for kw in keywords)
            gremlin_query = PROPERTY_QUERY_NEIGHBOR_TPL.format(
                prop=self._prop_to_match,
                keywords=keywords_str,
                edge_labels=edge_labels_str,
                edge_limit=edge_limit_amount,
                max_deep=self._max_deep,
                max_items=self._max_items,
            )
            log.warning("Unable to find vid, downgraded to property query, please confirm if it meets expectation.")

            paths: List[Any] = self._client.gremlin().exec(gremlin=gremlin_query)["data"]
            graph_chain_knowledge, vertex_degree_list, knowledge_with_degree = self._format_graph_query_result(
                query_paths=paths
            )

        context["graph_result"] = list(graph_chain_knowledge)
        if context["graph_result"]:
            context["graph_result_flag"] = 0
            context["vertex_degree_list"] = [list(vertex_degree) for vertex_degree in vertex_degree_list]
            context["knowledge_with_degree"] = knowledge_with_degree
            context["graph_context_head"] = (
                f"The following are graph knowledge in {self._max_deep} depth, e.g:\n"
                "`vertexA--[links]-->vertexB<--[links]--vertexC ...`"
                "extracted based on key entities as subject:\n"
            )
        return context