in hugegraph-llm/src/hugegraph_llm/operators/hugegraph_op/graph_rag_query.py [0:0]
def _subgraph_query(self, context: Dict[str, Any]) -> Dict[str, Any]:
# 1. Extract params from context
matched_vids = context.get("match_vids")
if isinstance(context.get("max_deep"), int):
self._max_deep = context["max_deep"]
if isinstance(context.get("max_items"), int):
self._max_items = context["max_items"]
if isinstance(context.get("prop_to_match"), str):
self._prop_to_match = context["prop_to_match"]
# 2. Extract edge_labels from graph schema
_, edge_labels = self._extract_labels_from_schema()
edge_labels_str = ",".join("'" + label + "'" for label in edge_labels)
# TODO: enhance the limit logic later
edge_limit_amount = len(edge_labels) * huge_settings.edge_limit_pre_label
use_id_to_match = self._prop_to_match is None
if use_id_to_match:
if not matched_vids:
return context
gremlin_query = VERTEX_QUERY_TPL.format(keywords=matched_vids)
vertexes = self._client.gremlin().exec(gremlin=gremlin_query)["data"]
log.debug("Vids gremlin query: %s", gremlin_query)
vertex_knowledge = self._format_graph_from_vertex(query_result=vertexes)
paths: List[Any] = []
# TODO: use generator or asyncio to speed up the query logic
for matched_vid in matched_vids:
gremlin_query = VID_QUERY_NEIGHBOR_TPL.format(
keywords=f"'{matched_vid}'",
max_deep=self._max_deep,
edge_labels=edge_labels_str,
edge_limit=edge_limit_amount,
max_items=self._max_items,
)
log.debug("Kneighbor gremlin query: %s", gremlin_query)
paths.extend(self._client.gremlin().exec(gremlin=gremlin_query)["data"])
graph_chain_knowledge, vertex_degree_list, knowledge_with_degree = self._format_graph_query_result(
query_paths=paths
)
# TODO: we may need to optimize the logic here with global deduplication (may lack some single vertex)
if not graph_chain_knowledge:
graph_chain_knowledge.update(vertex_knowledge)
if vertex_degree_list:
vertex_degree_list[0].update(vertex_knowledge)
else:
vertex_degree_list.append(vertex_knowledge)
else:
# WARN: When will the query enter here?
keywords = context.get("keywords")
assert keywords, "No related property(keywords) for graph query."
keywords_str = ",".join("'" + kw + "'" for kw in keywords)
gremlin_query = PROPERTY_QUERY_NEIGHBOR_TPL.format(
prop=self._prop_to_match,
keywords=keywords_str,
edge_labels=edge_labels_str,
edge_limit=edge_limit_amount,
max_deep=self._max_deep,
max_items=self._max_items,
)
log.warning("Unable to find vid, downgraded to property query, please confirm if it meets expectation.")
paths: List[Any] = self._client.gremlin().exec(gremlin=gremlin_query)["data"]
graph_chain_knowledge, vertex_degree_list, knowledge_with_degree = self._format_graph_query_result(
query_paths=paths
)
context["graph_result"] = list(graph_chain_knowledge)
if context["graph_result"]:
context["graph_result_flag"] = 0
context["vertex_degree_list"] = [list(vertex_degree) for vertex_degree in vertex_degree_list]
context["knowledge_with_degree"] = knowledge_with_degree
context["graph_context_head"] = (
f"The following are graph knowledge in {self._max_deep} depth, e.g:\n"
"`vertexA--[links]-->vertexB<--[links]--vertexC ...`"
"extracted based on key entities as subject:\n"
)
return context