def build_context()

in packages/python-packages/apiview-copilot/src/_search_manager.py [0:0]


    def build_context(self, guideline_results: List[SearchResult], example_results: List[SearchResult]) -> Context:
        self._ensure_env_vars(["AZURE_COSMOS_ACC_NAME", "AZURE_COSMOS_DB_NAME"])
        client = CosmosClient(COSMOS_ENDPOINT, credential=CREDENTIAL)
        database = client.get_database_client(COSMOS_DB_NAME)
        guidelines_container = database.get_container_client("guidelines")
        examples_container = database.get_container_client("examples")

        # initial ids from the search queries
        starting_example_ids = list(set([x.id for x in example_results]))
        starting_guideline_ids = list(set([x.id for x in guideline_results]))

        # track processed IDs to avoid loops
        seen_guideline_ids = set()
        seen_example_ids = set()

        # track the final results
        final_guidelines = {}
        final_examples = {}
        for ex in starting_example_ids:
            final_examples[ex] = None

        # queue for BFS traversal
        queue = deque(starting_guideline_ids)
        batch_size = 50

        def batch_query(container: CosmosClient, id_list: List[str]) -> List[object]:
            """
            Helper function to batch query the container.
            """
            results = []
            for i in range(0, len(id_list), batch_size):
                batch = id_list[i : i + batch_size]
                placeholders = ",".join([f"@id{i}" for i in range(len(batch))])
                query = f"SELECT * FROM c WHERE c.id IN ({placeholders})"
                parameters = [{"name": f"@id{i}", "value": value} for i, value in enumerate(batch)]
                results.extend(
                    list(
                        container.query_items(
                            query=query,
                            parameters=parameters,
                            enable_cross_partition_query=True,
                        )
                    )
                )
            return results

        while queue:
            batch_ids = list(
                set([queue.popleft() for _ in range(min(batch_size, len(queue))) if _ not in seen_guideline_ids])
            )
            if not batch_ids:
                continue

            guidelines = batch_query(guidelines_container, batch_ids)
            for guideline in guidelines:
                gid = guideline["id"]
                if gid in seen_guideline_ids:
                    continue

                seen_guideline_ids.add(gid)
                final_guidelines[gid] = guideline

                # queue up related guidelines
                for rel in guideline.get("related_guidelines") or []:
                    if rel not in seen_guideline_ids:
                        queue.append(rel)

                # now do the same for examples
                for ex in guideline.get("related_examples") or []:
                    try:
                        if ex not in seen_example_ids:
                            seen_example_ids.add(ex)
                            final_examples[ex] = None
                    except TypeError:
                        # FIXME: This shouldn't happen once the data integrity is cleaned up
                        print(f"WARNING: Examples for guideline {gid} is not a string! Skipping.")
                        continue

            # now resolve all examples
            example_ids_to_lookup = [eid for eid, val in final_examples.items() if val is None]
            examples = batch_query(examples_container, example_ids_to_lookup)

            for ex in examples:
                ex_id = ex["id"]
                final_examples[ex_id] = ex

                # queue up more related guidelines from the example
                for gid in ex.get("guideline_ids", []):
                    if gid not in seen_guideline_ids:
                        queue.append(gid)

        # flatten the results to just the values
        final_guidelines = [Guideline(**v) for v in final_guidelines.values() if v is not None]
        final_examples = [Example(**v) for v in final_examples.values() if v is not None]

        context = Context(guidelines=final_guidelines, examples=final_examples)
        return context