in src/backend/main.py [0:0]
def fold():
user_info = valid_user()
if user_info is not None:
# check if the post request has the file part
if 'file' not in request.files:
flash('No file part')
return Response('{"status":"uploaded file missing."}',status=400, mimetype='application/json')
f = request.files['file']
timestamp = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
form = dict(request.form)
print(f"Folding request received on {timestamp}, with parameters {form}")
# Saving the received file to local disk
filename = secure_filename(f.filename )
print(f"The received filename was: {filename}")
# Copying the received file to cloud storage and saving the path
gcs_sequence_path = f'fasta/{filename}' # TODO add username subfolder
# save file locally, for fasta validation purpose
save_file_locally(f, filename)
gcs_path = upload_to_bucket(gcs_sequence_path,filename,BUCKET_NAME)
print(f"Protein file uploaded to: {gcs_path}")
# is_monomer, sequences = fasta_utils.validate_fasta_file_gcs(gcs_path,gcs_sequence_path,BUCKET_NAME,storage_client)
is_monomer, sequences = fasta_utils.validate_fasta_file(filename)
# TODO delete local disk fasta file after validate
print(f'IS MONOMER {is_monomer}\n{sequences}')
# Capturing the running parameters of the pipeline
params = {
'sequence_path': gcs_path,
'max_template_date': '2030-01-01',
'use_small_bfd': True if form["smallBFD"] == "yes" else False,
'num_multimer_predictions_per_model': int(form["predictionCount"]),
'is_run_relax': 'relax' if str(form["relaxation"]).lower() == "yes" else '',
'model_preset': str(form["proteinType"]).lower(),
'project': PROJECT_ID,
'region': REGION
}
os.environ['PREDICT_MACHINE_TYPE'] = str(form["predictMachineType"]).lower()
os.environ['PREDICT_ACCELERATOR_COUNT'] = str(form["acceleratorCount"]).lower()
os.environ['PREDICT_ACCELERATOR_TYPE'] = decide_accelerator_type(str(form["predictMachineType"]))
os.environ['RELAX_MACHINE_TYPE'] = str(form["relaxMachineType"]).lower()
os.environ['RELAX_ACCELERATOR_COUNT'] = str(form["relaxAcceleratorCount"]).lower()
os.environ['RELAX_ACCELERATOR_TYPE'] = decide_accelerator_type(str(form["relaxMachineType"]))
os.environ['ALPHAFOLD_COMPONENTS_IMAGE'] = IMAGE_URI
os.environ['NFS_SERVER'] = FILESTORE_IP
os.environ['NFS_PATH'] = FILESTORE_SHARE
os.environ['NETWORK'] = FILESTORE_NETWORK
os.environ['MODEL_PARAMS_GCS_LOCATION'] = MODEL_PARAMS
os.environ['PARALLELISM'] = '5'
# Compile the pipeline
from pipelines.alphafold_inference_pipeline import alphafold_inference_pipeline as pipeline
run_tag = str(form["runTag"]).lower()
experiment_id = str(form["experimentId"]).lower()
pipeline_name = f'universal-pipeline-{experiment_id}'
compiler.Compiler().compile(
pipeline_func=pipeline,
package_path=f'{pipeline_name}.json')
# Run FOLD
# Running the existing pipeline
labels = {'run_tag': run_tag
,'experiment_id': experiment_id
, 'sequence_id': filename.split(sep='.')[0].lower()
, 'user': f'{user_info["given_name"].lower()}_{user_info["family_name"].lower()}'
}
pipeline_job = vertex_ai.PipelineJob(
display_name=pipeline_name,
template_path=f'{pipeline_name}.json', # pipeline name need to be unique, and subfolders in GCS
pipeline_root=f'gs://{BUCKET_NAME}/pipeline_runs/{pipeline_name}',
parameter_values=params,
enable_caching=True,
labels=labels)
pipeline_job.run(sync=False)
pipeline_job.wait_for_resource_creation()
# Log folding job
message = f"Folding started for experiment ID {experiment_id} with parameters {params}"
print(f'/fold MESAGE {message}')
return Response("status: folding is in progress... }",
status=200,mimetype='application/json')
else:
return Response("{'status':'Unauthorized'}", status=401, mimetype='application/json')