in elasticsearch/helpers/vectorstore/_sync/vectorstore.py [0:0]
def _create_index_if_not_exists(self) -> None:
exists = self.client.indices.exists(index=self.index)
if exists.meta.status == 200:
logger.debug(f"Index {self.index} already exists. Skipping creation.")
return
if self.retrieval_strategy.needs_inference():
if not self.num_dimensions and not self.embedding_service:
raise ValueError(
"retrieval strategy requires embeddings; either embedding_service "
"or num_dimensions need to be specified"
)
if not self.num_dimensions and self.embedding_service:
vector = self.embedding_service.embed_query("get num dimensions")
self.num_dimensions = len(vector)
mappings, settings = self.retrieval_strategy.es_mappings_settings(
text_field=self.text_field,
vector_field=self.vector_field,
num_dimensions=self.num_dimensions,
)
if self.custom_index_settings:
conflicting_keys = set(self.custom_index_settings.keys()) & set(
settings.keys()
)
if conflicting_keys:
raise ValueError(f"Conflicting settings: {conflicting_keys}")
else:
settings.update(self.custom_index_settings)
if self.metadata_mappings:
metadata = mappings["properties"].get("metadata", {"properties": {}})
for key in self.metadata_mappings.keys():
if key in metadata:
raise ValueError(f"metadata key {key} already exists in mappings")
metadata = dict(**metadata["properties"], **self.metadata_mappings)
mappings["properties"]["metadata"] = {"properties": metadata}
self.retrieval_strategy.before_index_creation(
client=self.client,
text_field=self.text_field,
vector_field=self.vector_field,
)
self.client.indices.create(
index=self.index, mappings=mappings, settings=settings
)