in modules/genomics_dsub/scripts/build/cloud_functions/function-source/main.py [0:0]
def ngs_qc_trigger(event, context):
"""
Cloud function that uses dsub (https://github.com/DataBiosphere/dsub) to execute
pipeline jobs using lifesciences api in GCP.
GCS EVENT TRIGGER:
Triggered by a change to a Cloud Storage bucket.
Function is triggered when a (e.g Fastq) file is added to a GCS bucket.
The trigger event and bucket to be selected during the function creation
Requirements.txt:
dsub is included in the requirements.txt and gets installed through pip
Python libraries:
function executes dsub as a python subprocess. The function uses subprocess packaged already with python 3.8
Args:
To be defined: GCP_PROJECT,GCS_INPUT_BUCKET,GCS_OUTPUT_BUCKET,GCS_LOG_LOCATION,CONTAINER_IMAGE
Based on the file events, variables around file name, path and bucket information are pulled from the event info
"""
file = event
GCS_INPUT_BUCKET = file['bucket']
GCS_INPUT_FASTQ_FILE = file['name']
GCP_PROJECT = os.environ.get('GCP_PROJECT', 'Project id not set')
GCS_OUTPUT_BUCKET = os.environ.get('GCS_OUTPUT_BUCKET', 'Output GCS bucket not set')
GCS_LOG_LOCATION = os.environ.get('GCS_OUTPUT_BUCKET', 'log GCS bucket not set')
CONTAINER_IMAGE = os.environ.get('CONTAINER_IMAGE', 'Unable to find container image')
REGION = os.environ.get('REGION', 'Region for lifesciences api not set')
NETWORK = os.environ.get('NETWORK', 'NETWORK for lifesciences api not set')
SUBNETWORK = os.environ.get('SUBNETWORK', 'SUBNETWORK for lifesciences api not set')
ZONES =os.environ.get('ZONES', 'ZONES for lifesciences api not set')
DISK_SIZE =os.environ.get('DISK_SIZE', 'ZONES for lifesciences api not set')
SERVICE_ACCOUNT = os.environ.get('SERVICE_ACCOUNT','NGS Service Account not set')
print(f"Processing fastq file: {GCS_INPUT_FASTQ_FILE}.")
print(file)
dsub_params = f"dsub --provider google-cls-v2 --project {GCP_PROJECT} --network {NETWORK} --subnetwork {SUBNETWORK} --disk-size {DISK_SIZE} --logging {GCS_LOG_LOCATION} --location {REGION} --zones {ZONES} --input FASTQ=gs://{GCS_INPUT_BUCKET}/{GCS_INPUT_FASTQ_FILE} --output HTML={GCS_OUTPUT_BUCKET}/* --image {CONTAINER_IMAGE} --service-account {SERVICE_ACCOUNT} "
fastq_cmd = "--command 'fastqc ${FASTQ} --outdir=$(dirname ${HTML})' --enable-stackdriver-monitoring"
cmd = dsub_params + fastq_cmd
print(cmd)
p = subprocess.run(cmd, shell=True)
print(p)