def embed_chunks()

in src/databao_context_engine/services/chunk_embedding_service.py [0:0]


    def embed_chunks(self, *, datasource_run_id: int, chunks: list[EmbeddableChunk], result: str) -> None:
        """
        Turn plugin chunks into persisted chunks and embeddings

        Flow:
        1) Embed each chunk into an embedded vector
        2) Get or create embedding table for the appropriate model and embedding dimensions
        3) Persist chunks and embeddings vectors in a single transaction
        """

        if not chunks:
            return

        logger.debug(
            f"Embedding {len(chunks)} chunks for datasource run {datasource_run_id}, with chunk_embedding_mode={self._chunk_embedding_mode}"
        )

        enriched_embeddings: list[ChunkEmbedding] = []
        for chunk in chunks:
            chunk_display_text = to_yaml_string(chunk.content)

            generated_description = ""
            match self._chunk_embedding_mode:
                case ChunkEmbeddingMode.EMBEDDABLE_TEXT_ONLY:
                    embedding_text = chunk.embeddable_text
                case ChunkEmbeddingMode.GENERATED_DESCRIPTION_ONLY:
                    generated_description = cast(DescriptionProvider, self._description_provider).describe(
                        text=chunk_display_text, context=result
                    )
                    embedding_text = generated_description
                case ChunkEmbeddingMode.EMBEDDABLE_TEXT_AND_GENERATED_DESCRIPTION:
                    generated_description = cast(DescriptionProvider, self._description_provider).describe(
                        text=chunk_display_text, context=result
                    )
                    embedding_text = generated_description + "\n" + chunk.embeddable_text

            vec = self._embedding_provider.embed(embedding_text)

            enriched_embeddings.append(
                ChunkEmbedding(
                    chunk=chunk,
                    vec=vec,
                    display_text=chunk_display_text,
                    generated_description=generated_description,
                )
            )

        table_name = self._shard_resolver.resolve_or_create(
            embedder=self._embedding_provider.embedder,
            model_id=self._embedding_provider.model_id,
            dim=self._embedding_provider.dim,
        )

        self._persistence_service.write_chunks_and_embeddings(
            datasource_run_id=datasource_run_id,
            chunk_embeddings=enriched_embeddings,
            table_name=table_name,
        )