in hugegraph-llm/src/hugegraph_llm/operators/llm_op/info_extract.py [0:0]
def extract_triples_by_regex_with_schema(schema, text, graph):
text = text.replace("\\n", " ").replace("\\", " ").replace("\n", " ")
pattern = r"\((.*?), (.*?), (.*?)\) - ([^ ]*)"
matches = re.findall(pattern, text)
vertices_dict = {v["id"]: v for v in graph["vertices"]}
for match in matches:
s, p, o, label = [item.strip() for item in match]
if None in [label, s, p, o]:
continue
# TODO: use a more efficient way to compare the extract & input property
p_lower = p.lower()
for vertex in schema["vertices"]:
if vertex["vertex_label"] == label and any(pp.lower() == p_lower
for pp in vertex["properties"]):
id = f"{label}-{s}"
if id not in vertices_dict:
vertices_dict[id] = {"id": id, "name": s, "label": label, "properties": {p: o}}
else:
vertices_dict[id]["properties"].update({p: o})
break
for edge in schema["edges"]:
if edge["edge_label"] == label:
source_label = edge["source_vertex_label"]
source_id = f"{source_label}-{s}"
if source_id not in vertices_dict:
vertices_dict[source_id] = {"id": source_id, "name": s, "label": source_label,
"properties": {}}
target_label = edge["target_vertex_label"]
target_id = f"{target_label}-{o}"
if target_id not in vertices_dict:
vertices_dict[target_id] = {"id": target_id, "name": o, "label": target_label,
"properties": {}}
graph["edges"].append({"start": source_id, "end": target_id, "type": label,
"properties": {}})
break
graph["vertices"] = vertices_dict.values()