def backup_and_restore

def backup_and_restore_index()

in migrate.py [0:0]
58 lines of code
21 McCabe index (conditional complexity)

def backup_and_restore_index(source_endpoint, source_key, source_index_name, target_endpoint, target_key, target_index_name):  
    # Create search and index clients  
    source_search_client, source_index_client = create_clients(source_endpoint, source_key, source_index_name)  
    target_search_client, target_index_client = create_clients(target_endpoint, target_key, target_index_name)

    # Load target vector profiles
    vectors = json.load(open("vectors.json"))
    vectors = add_api_key(vectors)
    vector_search = VectorSearch.from_dict(vectors)

    # Load json file for column mapping to vector
    vector_mapping = json.load(open("vector_mapping.json"))
    embeddings_model = initialize_embedding_model()
  
    # Get the source index definition  
    source_index = source_index_client.get_index(name=source_index_name)
    target_fields = []
    non_retrievable_fields = []
    for field in source_index.fields:
        if field.hidden == True:
            non_retrievable_fields.append(field)
        if field.key == True:
            key_field = field
        if field.vector_search_dimensions is not None:
            for key in vector_mapping:
                if key["target"] == field.name:
                    field.vector_search_dimensions = key["vector_length"]
        target_fields.append(field)

    if not key_field:
        raise Exception("Key Field Not Found")
    
    if len(non_retrievable_fields) > 0:
        print(f"WARNING: The following fields are not marked as retrievable and cannot be backed up and restored: {', '.join(f.name for f in non_retrievable_fields)}")
  
    # Create target index with the same definition 
    # source_index.name = target_index_name
    target_index = SearchIndex(name=target_index_name, fields=target_fields, vector_search=vector_search, semantic_search=source_index.semantic_search)
    target_index_client.create_or_update_index(target_index)
  
    document_count = total_count(source_search_client)
    can_use_filter = key_field.sortable and key_field.filterable
    if not can_use_filter:
        print("WARNING: The key field is not filterable or not sortable. A maximum of 100,000 records can be backed up and restored.")
    # Backup and restore documents  
    all_documents = search_results_with_filter(source_search_client, key_field.name) if can_use_filter else search_results_without_filter(source_search_client)

    print("Backing up and restoring documents:")  
    failed_documents = 0  
    failed_keys = []  
    with tqdm.tqdm(total=document_count) as progress_bar:  
        for page in all_documents:
            new_page=[]
            for document in page:
                for key in vector_mapping:
                    source = key["source"]
                    embedding_text = get_embedding(embeddings_model, document[source])
                    document[key["target"]] = embedding_text
                new_page.append(document)
                # print(document)
            result = target_search_client.upload_documents(documents=new_page)  
            progress_bar.update(len(result))  

            for item in result:  
                if item.succeeded is not True:  
                    failed_documents += 1
                    failed_keys.append(page[result.index_of(item)].id)  
                    print(f"Document upload error: {item.error.message}")  
  
    if failed_documents > 0:  
        print(f"Failed documents: {failed_documents}")  
        print(f"Failed document keys: {failed_keys}")  
    else:  
        print("All documents uploaded successfully.")  
  
    print(f"Successfully backed up '{source_index_name}' and restored to '{target_index_name}'")  
    return source_search_client, target_search_client, all_documents