def upload_file()

in sample_app/cerebral_genai/code/rag-on-edge-vectorDB/modules/VDBModule/main.py [0:0]


def upload_file():
    data = request.json
    index_name = data.get('index_name')
    base64_data = data.get('file_data')
    # ids = data.get('ids')
    # documents = data.get('documents')

    if not index_name or not base64_data:
        return jsonify({'status': 'error', 'message': 'Index name or file_data are not provided'}), 400

    try:
        # Convert Base64-encoded string back to bytes
        bytes_data = base64.b64decode(base64_data.encode('utf-8'))
        pdf_file = BytesIO(bytes_data)
        # read pdf file
        pdf_reader = NormalizeText()
        longtxt = pdf_reader.get_doc_content_txt(pdf_file)

        pdf_reader = LangChanSplitter()
        stirnglist = pdf_reader.TokenTextSplitter(100,10,longtxt)

        df = pd.DataFrame({'document': stirnglist})
        df = df.dropna() 
        df['id'] = df.apply(lambda x : str(uuid.uuid4()), axis=1)  

        # split df to 50 records per batch
        df_array = np.array_split(df, len(df) // 50 + 1)  
        data_array_count = len(df_array)

        new_df_array = []
        current_job_number = 1
        for sub_df in df_array:
            logging.info("working on: " + str(current_job_number) + "/" +str(data_array_count))

            documents = sub_df["document"].to_list()
            ids = sub_df["id"].to_list()  
            chromaHelper.upload_documents(index_name, ids, documents)

            new_df_array.append(sub_df)
            current_job_number+=1
        new_df = pd.concat(new_df_array, axis=0, ignore_index=True) 
        logging.info(str(len(new_df)) + " records uploaded.")

        return jsonify({'status': 'success', 'message': f'{str(len(new_df))} records uploaded successfully'})
    except Exception as e:
        return jsonify({'status': 'error', 'message': f'Error uploading file: {str(e)}'}), 500