def _schedule_log_parsing()

in treeherder/etl/jobs.py [0:0]


def _schedule_log_parsing(job, job_logs, result, repository):
    """Kick off the initial task that parses the log data.

    log_data is a list of job log objects and the result for that job
    """

    # importing here to avoid an import loop
    from treeherder.log_parser.tasks import parse_logs

    task_types = {"errorsummary_json", "live_backing_log"}
    sheriffed_repos = {
        "autoland",
        "mozilla-central",
        "mozilla-beta",
        "mozilla-release",
        "mozilla-esr115",
        "mozilla-esr128",
        "reference-browser",
        "toolchains",
    }

    job_log_ids = []
    for job_log in job_logs:
        # a log can be submitted already parsed.  So only schedule
        # a parsing task if it's ``pending``
        # the submitter is then responsible for submitting the
        # text_log_summary artifact
        if job_log.status != JobLog.PENDING:
            continue

        # if this is not a known type of log, abort parse
        if job_log.name not in task_types:
            continue

        job_log_ids.append(job_log.id)

        # TODO: Replace the use of different queues for failures vs not with the
        # RabbitMQ priority feature (since the idea behind separate queues was
        # only to ensure failures are dealt with first if there is a backlog).
        if result != "success":
            if job_log.name == "errorsummary_json":
                queue = "log_parser_fail_json"
                priority = "failures"
            else:
                queue = "log_parser_fail_raw"
                priority = "failures"
            if repository.name in sheriffed_repos:
                queue += "_sheriffed"
            else:
                queue += "_unsheriffed"
        else:
            queue = "log_parser"
            priority = "normal"

        parse_logs.apply_async(queue=queue, args=[job.id, [job_log.id], priority])