def ngs_qc_trigger()

in modules/genomics_dsub/scripts/build/cloud_functions/function-source/main.py [0:0]


def ngs_qc_trigger(event, context):

    """
    Cloud function that uses dsub (https://github.com/DataBiosphere/dsub) to execute
    pipeline jobs using lifesciences api in GCP.

    GCS EVENT TRIGGER:
    Triggered by a change to a Cloud Storage bucket.
    Function is triggered when a (e.g Fastq) file is added to a GCS bucket.
    The trigger event and bucket to be selected during the function creation

    Requirements.txt:
    dsub is included in the requirements.txt and gets installed through pip

    Python libraries:
    function executes dsub as a python subprocess. The function uses subprocess packaged already with python 3.8

    Args:
    To be defined: GCP_PROJECT,GCS_INPUT_BUCKET,GCS_OUTPUT_BUCKET,GCS_LOG_LOCATION,CONTAINER_IMAGE
    Based on the file events, variables around file name, path and bucket information are pulled from the event info
    """

    file = event
    GCS_INPUT_BUCKET = file['bucket']
    GCS_INPUT_FASTQ_FILE = file['name']

    GCP_PROJECT = os.environ.get('GCP_PROJECT', 'Project id not set')
    GCS_OUTPUT_BUCKET = os.environ.get('GCS_OUTPUT_BUCKET', 'Output GCS bucket not set')
    GCS_LOG_LOCATION = os.environ.get('GCS_OUTPUT_BUCKET', 'log GCS bucket not set')
    CONTAINER_IMAGE = os.environ.get('CONTAINER_IMAGE', 'Unable to find container image')
    REGION = os.environ.get('REGION', 'Region for lifesciences api not set')
    NETWORK = os.environ.get('NETWORK', 'NETWORK for lifesciences api not set')
    SUBNETWORK = os.environ.get('SUBNETWORK', 'SUBNETWORK for lifesciences api not set')
    ZONES =os.environ.get('ZONES', 'ZONES for lifesciences api not set')
    DISK_SIZE =os.environ.get('DISK_SIZE', 'ZONES for lifesciences api not set')
    SERVICE_ACCOUNT = os.environ.get('SERVICE_ACCOUNT','NGS Service Account not set')

    print(f"Processing fastq file: {GCS_INPUT_FASTQ_FILE}.")
    print(file)

    dsub_params = f"dsub --provider google-cls-v2 --project {GCP_PROJECT} --network {NETWORK} --subnetwork {SUBNETWORK} --disk-size {DISK_SIZE} --logging {GCS_LOG_LOCATION} --location {REGION} --zones {ZONES} --input FASTQ=gs://{GCS_INPUT_BUCKET}/{GCS_INPUT_FASTQ_FILE} --output HTML={GCS_OUTPUT_BUCKET}/*  --image {CONTAINER_IMAGE} --service-account {SERVICE_ACCOUNT} "
    fastq_cmd = "--command 'fastqc ${FASTQ} --outdir=$(dirname ${HTML})' --enable-stackdriver-monitoring"
    cmd = dsub_params + fastq_cmd
    print(cmd)
    p = subprocess.run(cmd, shell=True)
    print(p)