gen_backfill_report.py [250:387]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def query_activedata(query_json):
    """
    Used to run queries on active data.
    """
    active_data_url = "http://activedata.allizom.org/query"

    req = urllib.request.Request(active_data_url)
    req.add_header("Content-Type", "application/json")
    jsondata = json.dumps(query_json)

    jsondataasbytes = jsondata.encode("utf-8")
    req.add_header("Content-Length", len(jsondataasbytes))

    print("Querying Active-data...")
    response = urllib.request.urlopen(req, jsondataasbytes)
    print("Status:" + str(response.getcode()))

    data = json.loads(response.read().decode("utf8").replace("'", '"'))["data"]
    return data


def get_owner_information(owners, taskids):
    """
    Uses the given task IDs to determine the owner or
    person who created them.
    """
    filter_by_owners = {}

    AD_BK_OWNER_QUERY["where"]["and"].append(
        {"in": {"task.id": taskids}},
    )
    owner_data = query_activedata(AD_BK_OWNER_QUERY)

    for c, taskid in enumerate(owner_data["task.id"]):
        possible_owners = [o for o in owner_data["task.tags.value"][c] if o]
        if not possible_owners:
            # Missing owner information
            continue

        # There should only every be one owner. If
        # either of the requested owners match it,
        # then we keep this task and download
        # artifacts from it.
        task_owner = possible_owners[0]
        for owner in owners:
            if owner in task_owner:
                filter_by_owners[taskid] = True
                break

    return filter_by_owners


def generate_backfill_report(
    start_date="",
    end_date="",
    task_name_regex="",
    talos=False,
    raptor=False,
    browsertime=False,
    awsy=False,
    symbols=[],
    branches=["autoland"],
    find_long_tasks=False,
    owners=[],
    additional_conditions=[],
    no_cache=False,
    clobber_cache=False,
):
    """
    This generation works as follows:
            (i):   Find all backfill tasks between the given dates.
            If no dates are given, then we look over the past year.
            If only a start date is given, then we look from then to now.
            If only an end date is given, then we look from 1 year ago up
            to the end date.

            (ii):  Using the backfill tasks that were found, download all
            the to-run-<PUSH_ID>.json files and label-to-taskid-<PUSH_ID>.json
            files.

            (iii): For each to-run file, find the tests that are
            being retriggered and their taskid. Then, obtain the sum
            of the runtime for all these taskids.
    """
    if clobber_cache and os.path.exists(BACKFILL_CACHE):
        shutil.rmtree(BACKFILL_CACHE)

    if no_cache:
        print("Not caching downloaded data")
    else:
        print("Downloaded data will be cached here: %s" % BACKFILL_CACHE)
        os.makedirs(BACKFILL_CACHE, exist_ok=True)

    conditions = [
        {"eq": {"job.type.symbol": "Bk"}},
        {"in": {"repo.branch.name": branches}},
    ]

    where_clause = {"and": conditions}

    # Setup the time range
    if end_date:
        conditions.append({"lt": {"action.start_time": {"date": str(end_date)}}})
    if start_date:
        conditions.append({"gte": {"action.start_time": {"date": str(start_date)}}})
    else:
        # Restrict to 1 year back
        print("Setting start-date as 1 year ago. This query will take some time...")
        conditions.append({"gte": {"action.start_time": {"date": "today-year"}}})

    if start_date or end_date:
        print(
            "Date specifications detected. "
            "Ensure that they follow these guidelines: "
            "https://github.com/mozilla/ActiveData/blob/dev/docs/jx_time.md"
        )

    # Query active data for the backfilled tasks
    AD_BACKFILL_QUERY["where"] = where_clause
    debug(json.dumps(AD_BACKFILL_QUERY, indent=4))
    data = query_activedata(AD_BACKFILL_QUERY)

    if "build.revision" not in data:
        print("No backfill tasks found for the given time range")
        return

    debug("Analyzing backfills performed on the revisions: %s" % data["build.revision"])

    # Find the tasks that are specific to the requested owners
    filter_by_owners = {}
    if owners:
        # Get the owners of the backfills  if needed
        print("Getting backfill task owner information...")
        filter_by_owners = get_owner_information(owners, data["run.taskcluster.id"])

    # Go through all the URL groupings and match up data from each PUSHID
    alltaskids = []
    total_groups = len(data["job.details.url"])
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


gen_backfill_report_v2.py [269:406]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def query_activedata(query_json):
    """
    Used to run queries on active data.
    """
    active_data_url = "http://activedata.allizom.org/query"

    req = urllib.request.Request(active_data_url)
    req.add_header("Content-Type", "application/json")
    jsondata = json.dumps(query_json)

    jsondataasbytes = jsondata.encode("utf-8")
    req.add_header("Content-Length", len(jsondataasbytes))

    print("Querying Active-data...")
    response = urllib.request.urlopen(req, jsondataasbytes)
    print("Status:" + str(response.getcode()))

    data = json.loads(response.read().decode("utf8").replace("'", '"'))["data"]
    return data


def get_owner_information(owners, taskids):
    """
    Uses the given task IDs to determine the owner or
    person who created them.
    """
    filter_by_owners = {}

    AD_BK_OWNER_QUERY["where"]["and"].append(
        {"in": {"task.id": taskids}},
    )
    owner_data = query_activedata(AD_BK_OWNER_QUERY)

    for c, taskid in enumerate(owner_data["task.id"]):
        possible_owners = [o for o in owner_data["task.tags.value"][c] if o]
        if not possible_owners:
            # Missing owner information
            continue

        # There should only ever be one owner. If
        # either of the requested owners match it,
        # then we keep this task and download
        # artifacts from it.
        task_owner = possible_owners[0]
        for owner in owners:
            if owner in task_owner:
                filter_by_owners[taskid] = True
                break

    return filter_by_owners


def generate_backfill_report(
    start_date="",
    end_date="",
    task_name_regex="",
    talos=False,
    raptor=False,
    browsertime=False,
    awsy=False,
    symbols=[],
    branches=["autoland"],
    find_long_tasks=False,
    owners=[],
    additional_conditions=[],
    no_cache=False,
    clobber_cache=False,
):
    """
    This generation works as follows:
            (i):   Find all backfill tasks between the given dates.
            If no dates are given, then we look over the past year.
            If only a start date is given, then we look from then to now.
            If only an end date is given, then we look from 1 year ago up
            to the end date.

            (ii):  Using the backfill tasks that were found, download all
            the to-run-<PUSH_ID>.json files and label-to-taskid-<PUSH_ID>.json
            files.

            (iii): For each to-run file, find the tests that are
            being retriggered and their taskid. Then, obtain the sum
            of the runtime for all these taskids.
    """
    if clobber_cache and os.path.exists(BACKFILL_CACHE):
        shutil.rmtree(BACKFILL_CACHE)

    if no_cache:
        print("Not caching downloaded data")
    else:
        print("Downloaded data will be cached here: %s" % BACKFILL_CACHE)
        os.makedirs(BACKFILL_CACHE, exist_ok=True)

    conditions = [
        {"eq": {"job.type.symbol": "Bk"}},
        {"in": {"repo.branch.name": branches}},
    ]

    where_clause = {"and": conditions}

    # Setup the time range
    if end_date:
        conditions.append({"lt": {"action.start_time": {"date": str(end_date)}}})
    if start_date:
        conditions.append({"gte": {"action.start_time": {"date": str(start_date)}}})
    else:
        # Restrict to 1 year back
        print("Setting start-date as 1 year ago. This query will take some time...")
        conditions.append({"gte": {"action.start_time": {"date": "today-year"}}})

    if start_date or end_date:
        print(
            "Date specifications detected. "
            "Ensure that they follow these guidelines: "
            "https://github.com/mozilla/ActiveData/blob/dev/docs/jx_time.md"
        )

    # Query active data for the backfilled tasks
    AD_BACKFILL_QUERY["where"] = where_clause
    debug(json.dumps(AD_BACKFILL_QUERY, indent=4))
    data = query_activedata(AD_BACKFILL_QUERY)

    if "build.revision" not in data:
        print("No backfill tasks found for the given time range")
        return

    debug("Analyzing backfills performed on the revisions: %s" % data["build.revision"])

    # Find the tasks that are specific to the requested owners
    filter_by_owners = {}
    if owners:
        # Get the owners of the backfills  if needed
        print("Getting backfill task owner information...")
        filter_by_owners = get_owner_information(owners, data["run.taskcluster.id"])

    # Go through all the URL groupings and match up data from each PUSHID
    alltaskids = []
    total_groups = len(data["job.details.url"])
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -