datasets/deepmind/pipelines/alphafold/alphafold_dag.py [157:306]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        source_objects=["metadata/*.json"],
        source_format="NEWLINE_DELIMITED_JSON",
        destination_project_dataset_table="deepmind_alphafold.metadata",
        write_disposition="WRITE_TRUNCATE",
        schema_fields=[
            {
                "description": "An array of AFDB versions this prediction has had",
                "mode": "REPEATED",
                "name": "allVersions",
                "type": "INTEGER",
            },
            {
                "description": "The latest AFDB version for this prediction",
                "mode": "NULLABLE",
                "name": "latestVersion",
                "type": "INTEGER",
            },
            {
                "description": "List of common organism names",
                "mode": "REPEATED",
                "name": "organismCommonNames",
                "type": "STRING",
            },
            {
                "description": "Number of the last residue in the entry relative to the UniProt entry. This is equal to the length of the protein unless we are dealing with protein fragments",
                "mode": "NULLABLE",
                "name": "uniprotEnd",
                "type": "INTEGER",
            },
            {
                "description": "Short names of the protein",
                "mode": "REPEATED",
                "name": "proteinShortNames",
                "type": "STRING",
            },
            {
                "description": "Number of the first residue in the entry relative to the UniProt entry. This is 1 unless we are dealing with protein fragments",
                "mode": "NULLABLE",
                "name": "uniprotStart",
                "type": "INTEGER",
            },
            {
                "description": "Fraction of the residues in the prediction with pLDDT between 70 and 90",
                "mode": "NULLABLE",
                "name": "fractionPlddtConfident",
                "type": "FLOAT",
            },
            {
                "description": "List of synonyms for the organism",
                "mode": "REPEATED",
                "name": "organismSynonyms",
                "type": "STRING",
            },
            {
                "description": "Fraction of the residues in the prediction with pLDDT greater than 90",
                "mode": "NULLABLE",
                "name": "fractionPlddtVeryHigh",
                "type": "FLOAT",
            },
            {
                "description": "Full names of the protein",
                "mode": "REPEATED",
                "name": "proteinFullNames",
                "type": "STRING",
            },
            {
                "description": "The mean pLDDT of this prediction",
                "mode": "NULLABLE",
                "name": "globalMetricValue",
                "type": "FLOAT",
            },
            {
                "description": "The scientific name of the organism",
                "mode": "NULLABLE",
                "name": "organismScientificName",
                "type": "STRING",
            },
            {
                "description": "The name recommended by the UniProt consortium",
                "mode": "NULLABLE",
                "name": "uniprotDescription",
                "type": "STRING",
            },
            {
                "description": "Fraction of the residues in the prediction with pLDDT between 50 and 70",
                "mode": "NULLABLE",
                "name": "fractionPlddtLow",
                "type": "FLOAT",
            },
            {
                "description": "Uniprot accession ID",
                "mode": "NULLABLE",
                "name": "uniprotAccession",
                "type": "STRING",
            },
            {
                "description": "CRC64 hash of the sequence. Can be used for cheaper lookups.",
                "mode": "NULLABLE",
                "name": "sequenceChecksum",
                "type": "STRING",
            },
            {
                "description": "NCBI taxonomic id of the originating species",
                "mode": "NULLABLE",
                "name": "taxId",
                "type": "INTEGER",
            },
            {
                "description": "The Uniprot EntryName field",
                "mode": "NULLABLE",
                "name": "uniprotId",
                "type": "STRING",
            },
            {
                "description": 'The date of creation for this entry, e.g. "2022-06-01"',
                "mode": "NULLABLE",
                "name": "modelCreatedDate",
                "type": "DATE",
            },
            {
                "description": "Fraction of the residues in the prediction with pLDDT less than 50",
                "mode": "NULLABLE",
                "name": "fractionPlddtVeryLow",
                "type": "FLOAT",
            },
            {
                "description": "Date when the sequence data was last modified in UniProt",
                "mode": "NULLABLE",
                "name": "sequenceVersionDate",
                "type": "DATE",
            },
            {
                "description": 'The AFDB entry ID, e.g. "AF-Q1HGU3-F1"',
                "mode": "NULLABLE",
                "name": "entryId",
                "type": "STRING",
            },
            {
                "description": "Additional synonyms for the gene",
                "mode": "REPEATED",
                "name": "geneSynonyms",
                "type": "STRING",
            },
            {
                "description": "Amino acid sequence for this prediction",
                "mode": "NULLABLE",
                "name": "uniprotSequence",
                "type": "STRING",
            },
            {
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



datasets/deepmind/pipelines/alphafold_v4/alphafold_v4_dag.py [151:300]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        source_objects=["metadata/*.json"],
        source_format="NEWLINE_DELIMITED_JSON",
        destination_project_dataset_table="deepmind_alphafold.metadata",
        write_disposition="WRITE_TRUNCATE",
        schema_fields=[
            {
                "description": "An array of AFDB versions this prediction has had",
                "mode": "REPEATED",
                "name": "allVersions",
                "type": "INTEGER",
            },
            {
                "description": "The latest AFDB version for this prediction",
                "mode": "NULLABLE",
                "name": "latestVersion",
                "type": "INTEGER",
            },
            {
                "description": "List of common organism names",
                "mode": "REPEATED",
                "name": "organismCommonNames",
                "type": "STRING",
            },
            {
                "description": "Number of the last residue in the entry relative to the UniProt entry. This is equal to the length of the protein unless we are dealing with protein fragments",
                "mode": "NULLABLE",
                "name": "uniprotEnd",
                "type": "INTEGER",
            },
            {
                "description": "Short names of the protein",
                "mode": "REPEATED",
                "name": "proteinShortNames",
                "type": "STRING",
            },
            {
                "description": "Number of the first residue in the entry relative to the UniProt entry. This is 1 unless we are dealing with protein fragments",
                "mode": "NULLABLE",
                "name": "uniprotStart",
                "type": "INTEGER",
            },
            {
                "description": "Fraction of the residues in the prediction with pLDDT between 70 and 90",
                "mode": "NULLABLE",
                "name": "fractionPlddtConfident",
                "type": "FLOAT",
            },
            {
                "description": "List of synonyms for the organism",
                "mode": "REPEATED",
                "name": "organismSynonyms",
                "type": "STRING",
            },
            {
                "description": "Fraction of the residues in the prediction with pLDDT greater than 90",
                "mode": "NULLABLE",
                "name": "fractionPlddtVeryHigh",
                "type": "FLOAT",
            },
            {
                "description": "Full names of the protein",
                "mode": "REPEATED",
                "name": "proteinFullNames",
                "type": "STRING",
            },
            {
                "description": "The mean pLDDT of this prediction",
                "mode": "NULLABLE",
                "name": "globalMetricValue",
                "type": "FLOAT",
            },
            {
                "description": "The scientific name of the organism",
                "mode": "NULLABLE",
                "name": "organismScientificName",
                "type": "STRING",
            },
            {
                "description": "The name recommended by the UniProt consortium",
                "mode": "NULLABLE",
                "name": "uniprotDescription",
                "type": "STRING",
            },
            {
                "description": "Fraction of the residues in the prediction with pLDDT between 50 and 70",
                "mode": "NULLABLE",
                "name": "fractionPlddtLow",
                "type": "FLOAT",
            },
            {
                "description": "Uniprot accession ID",
                "mode": "NULLABLE",
                "name": "uniprotAccession",
                "type": "STRING",
            },
            {
                "description": "CRC64 hash of the sequence. Can be used for cheaper lookups.",
                "mode": "NULLABLE",
                "name": "sequenceChecksum",
                "type": "STRING",
            },
            {
                "description": "NCBI taxonomic id of the originating species",
                "mode": "NULLABLE",
                "name": "taxId",
                "type": "INTEGER",
            },
            {
                "description": "The Uniprot EntryName field",
                "mode": "NULLABLE",
                "name": "uniprotId",
                "type": "STRING",
            },
            {
                "description": 'The date of creation for this entry, e.g. "2022-06-01"',
                "mode": "NULLABLE",
                "name": "modelCreatedDate",
                "type": "DATE",
            },
            {
                "description": "Fraction of the residues in the prediction with pLDDT less than 50",
                "mode": "NULLABLE",
                "name": "fractionPlddtVeryLow",
                "type": "FLOAT",
            },
            {
                "description": "Date when the sequence data was last modified in UniProt",
                "mode": "NULLABLE",
                "name": "sequenceVersionDate",
                "type": "DATE",
            },
            {
                "description": 'The AFDB entry ID, e.g. "AF-Q1HGU3-F1"',
                "mode": "NULLABLE",
                "name": "entryId",
                "type": "STRING",
            },
            {
                "description": "Additional synonyms for the gene",
                "mode": "REPEATED",
                "name": "geneSynonyms",
                "type": "STRING",
            },
            {
                "description": "Amino acid sequence for this prediction",
                "mode": "NULLABLE",
                "name": "uniprotSequence",
                "type": "STRING",
            },
            {
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



