in packages/python-packages/apiview-copilot/src/_search_manager.py [0:0]
def build_context(self, guideline_results: List[SearchResult], example_results: List[SearchResult]) -> Context:
self._ensure_env_vars(["AZURE_COSMOS_ACC_NAME", "AZURE_COSMOS_DB_NAME"])
client = CosmosClient(COSMOS_ENDPOINT, credential=CREDENTIAL)
database = client.get_database_client(COSMOS_DB_NAME)
guidelines_container = database.get_container_client("guidelines")
examples_container = database.get_container_client("examples")
# initial ids from the search queries
starting_example_ids = list(set([x.id for x in example_results]))
starting_guideline_ids = list(set([x.id for x in guideline_results]))
# track processed IDs to avoid loops
seen_guideline_ids = set()
seen_example_ids = set()
# track the final results
final_guidelines = {}
final_examples = {}
for ex in starting_example_ids:
final_examples[ex] = None
# queue for BFS traversal
queue = deque(starting_guideline_ids)
batch_size = 50
def batch_query(container: CosmosClient, id_list: List[str]) -> List[object]:
"""
Helper function to batch query the container.
"""
results = []
for i in range(0, len(id_list), batch_size):
batch = id_list[i : i + batch_size]
placeholders = ",".join([f"@id{i}" for i in range(len(batch))])
query = f"SELECT * FROM c WHERE c.id IN ({placeholders})"
parameters = [{"name": f"@id{i}", "value": value} for i, value in enumerate(batch)]
results.extend(
list(
container.query_items(
query=query,
parameters=parameters,
enable_cross_partition_query=True,
)
)
)
return results
while queue:
batch_ids = list(
set([queue.popleft() for _ in range(min(batch_size, len(queue))) if _ not in seen_guideline_ids])
)
if not batch_ids:
continue
guidelines = batch_query(guidelines_container, batch_ids)
for guideline in guidelines:
gid = guideline["id"]
if gid in seen_guideline_ids:
continue
seen_guideline_ids.add(gid)
final_guidelines[gid] = guideline
# queue up related guidelines
for rel in guideline.get("related_guidelines") or []:
if rel not in seen_guideline_ids:
queue.append(rel)
# now do the same for examples
for ex in guideline.get("related_examples") or []:
try:
if ex not in seen_example_ids:
seen_example_ids.add(ex)
final_examples[ex] = None
except TypeError:
# FIXME: This shouldn't happen once the data integrity is cleaned up
print(f"WARNING: Examples for guideline {gid} is not a string! Skipping.")
continue
# now resolve all examples
example_ids_to_lookup = [eid for eid, val in final_examples.items() if val is None]
examples = batch_query(examples_container, example_ids_to_lookup)
for ex in examples:
ex_id = ex["id"]
final_examples[ex_id] = ex
# queue up more related guidelines from the example
for gid in ex.get("guideline_ids", []):
if gid not in seen_guideline_ids:
queue.append(gid)
# flatten the results to just the values
final_guidelines = [Guideline(**v) for v in final_guidelines.values() if v is not None]
final_examples = [Example(**v) for v in final_examples.values() if v is not None]
context = Context(guidelines=final_guidelines, examples=final_examples)
return context