def _get_client_id

def _get_client_id_field()

in bigquery_etl/shredder/config.py [0:0]
19 lines of code
13 McCabe index (conditional complexity)

    def _get_client_id_field(table, deletion_request_view=False, study_name=None):
        """Determine which column should be used as client id for a given table."""
        if table.dataset_id.startswith("rally_"):
            # `rally_zero_one` is a special case where top-level rally_id is used
            # both in the ping tables and the deletion_requests view
            if table.dataset_id in ["rally_zero_one_stable", "rally_zero_one_derived"]:
                return RALLY_ID_TOP_LEVEL
            # deletion request views expose rally_id as a top-level field
            if deletion_request_view:
                return RALLY_ID_TOP_LEVEL
            else:
                return RALLY_ID
        elif table.dataset_id == "analysis":
            # Rally analysis tables do not have schemas specified upfront,
            # analysts might decide to use either nested or top-level rally_id.
            # Shared datasets, like attention stream, may also have derived
            # datasets with rally IDs
            # See https://github.com/mozilla-services/cloudops-infra/blob/master/projects/data-pioneer/tf/prod/envs/prod/study-projects/main.tf#L60-L67 # noqa
            if any(_has_nested_rally_id(field) for field in table.schema):
                return RALLY_ID
            elif any(field.name == RALLY_ID_TOP_LEVEL for field in table.schema):
                return RALLY_ID_TOP_LEVEL
            # Pioneer derived tables will have a PIONEER_ID
            elif any(field.name == PIONEER_ID for field in table.schema):
                return PIONEER_ID
            else:
                logging.error(f"Failed to find client_id field for {table}")
        else:
            return PIONEER_ID