in hugegraph-llm/src/hugegraph_llm/operators/llm_op/property_graph_extract.py [0:0]
def _extract_and_filter_label(self, schema, text) -> List[Dict[str, Any]]:
# Use regex to extract a JSON object with curly braces
json_match = re.search(r'({.*})', text, re.DOTALL)
if not json_match:
log.critical("Invalid property graph! No JSON object found, "
"please check the output format example in prompt.")
return []
json_str = json_match.group(1).strip()
items = []
try:
property_graph = json.loads(json_str)
# Expect property_graph to be a dict with keys "vertices" and "edges"
if not (isinstance(property_graph, dict) and "vertices" in property_graph and "edges" in property_graph):
log.critical("Invalid property graph format; expecting 'vertices' and 'edges'.")
return items
# Create sets for valid vertex and edge labels based on the schema
vertex_label_set = {vertex["name"] for vertex in schema["vertexlabels"]}
edge_label_set = {edge["name"] for edge in schema["edgelabels"]}
def process_items(item_list, valid_labels, item_type):
for item in item_list:
if not isinstance(item, dict):
log.warning("Invalid property graph item type '%s'.", type(item))
continue
if not self.NECESSARY_ITEM_KEYS.issubset(item.keys()):
log.warning("Invalid item keys '%s'.", item.keys())
continue
if item["label"] not in valid_labels:
log.warning("Invalid %s label '%s' has been ignored.", item_type, item["label"])
continue
items.append(item)
process_items(property_graph["vertices"], vertex_label_set, "vertex")
process_items(property_graph["edges"], edge_label_set, "edge")
except json.JSONDecodeError:
log.critical("Invalid property graph JSON! Please check the extracted JSON data carefully")
return items