in code/embedding-function/utilities/helpers/embedders/push_embedder.py [0:0]
def __convert_to_search_document(self, document: SourceDocument, sharepoint_file_id: str):
logger.info(f"Converting document ID {document.id} to search document format")
embedded_content = self.llm_helper.generate_embeddings(document.content)
metadata = {
self.env_helper.AZURE_SEARCH_FIELDS_ID: document.id,
self.env_helper.AZURE_SEARCH_SOURCE_COLUMN: document.source,
self.env_helper.AZURE_SEARCH_TITLE_COLUMN: document.title,
self.env_helper.AZURE_SEARCH_CHUNK_COLUMN: document.chunk,
self.env_helper.AZURE_SEARCH_OFFSET_COLUMN: document.offset,
"page_number": document.page_number,
"chunk_id": document.chunk_id,
self.env_helper.AZURE_SEARCH_SHAREPOINT_FILE_ID_COLUMN: sharepoint_file_id
}
return {
self.env_helper.AZURE_SEARCH_FIELDS_ID: document.id,
self.env_helper.AZURE_SEARCH_CONTENT_COLUMN: document.content,
self.env_helper.AZURE_SEARCH_CONTENT_VECTOR_COLUMN: embedded_content,
self.env_helper.AZURE_SEARCH_FIELDS_METADATA: json.dumps(metadata),
self.env_helper.AZURE_SEARCH_TITLE_COLUMN: document.title,
self.env_helper.AZURE_SEARCH_SOURCE_COLUMN: document.source,
self.env_helper.AZURE_SEARCH_CHUNK_COLUMN: document.chunk,
self.env_helper.AZURE_SEARCH_OFFSET_COLUMN: document.offset,
self.env_helper.AZURE_SEARCH_SHAREPOINT_FILE_ID_COLUMN: sharepoint_file_id
}