def do_upload_via_predictions()

in api/controllers/models.py [0:0]
87 lines of code
22 McCabe index (conditional complexity)

def do_upload_via_predictions(credentials, tid, model_name):
    u = UserModel()
    user_id = credentials["id"]
    user = u.get(user_id)
    if not user:
        logger.error("Invalid user detail for id (%s)" % (user_id))
        bottle.abort(404, "User information not found")

    tm = TaskModel()
    task = tm.get(tid)
    if not task.has_predictions_upload:
        bottle.abort(
            403,
            """This task does not allow prediction uploads. Submit a model instead.""",
        )

    m = ModelModel()
    if (
        bottle.default_app().config["mode"] == "prod"
        and m.getCountByUidTidAndHrDiff(
            user_id, tid=task.id, hr_diff=task.dynalab_hr_diff
        )
        >= task.dynalab_threshold
    ):
        logger.error("Submission limit reached for user (%s)" % (user_id))
        bottle.abort(429, "Submission limit reached")

    uploads = {}
    dm = DatasetModel()
    datasets = list(dm.getByTid(tid))
    dataset_names = [dataset.name for dataset in datasets]
    for name in dataset_names:
        uploads[name] = bottle.request.files.get(name)

    # Users don't need to upload preds for all datasets.
    uploads = {
        name: uploads[name]
        for name, upload in uploads.items()
        if uploads[name] is not None
    }

    for dataset in datasets:
        if (
            dataset.access_type == AccessTypeEnum.scoring
            and dataset.name not in uploads.keys()
        ):
            bottle.abort(400, "Need to upload predictions for all leaderboard datasets")

    parsed_uploads = {}
    # Ensure correct format
    for name, upload in uploads.items():
        try:
            parsed_upload = [
                util.json_decode(line)
                for line in upload.file.read().decode("utf-8").splitlines()
            ]
        except Exception as ex:
            logger.exception(ex)
            bottle.abort(400, "Could not parse prediction file. Is it a utf-8 jsonl?")

        for io in parsed_upload:
            try:
                assert "uid" in io, "'uid' must be present for every example"
            except Exception as ex:
                bottle.abort(400, str(ex))

            verified, message = task.verify_annotation(
                io, mode=AnnotationVerifierMode.predictions_upload
            )
            if not verified:
                bottle.abort(400, message)
        parsed_uploads[name] = parsed_upload

    endpoint_name = f"ts{int(time.time())}-{model_name}"

    status_dict = {}
    # Create local model db object
    model = m.create(
        task_id=tid,
        user_id=user_id,
        name=model_name,
        shortname="",
        longdesc="",
        desc="",
        upload_datetime=db.sql.func.now(),
        endpoint_name=endpoint_name,
        deployment_status=DeploymentStatusEnum.predictions_upload,
        secret=secrets.token_hex(),
    )
    with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp:
        for dataset_name, parsed_upload in parsed_uploads.items():
            with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp:
                for datum in parsed_upload:
                    datum["id"] = datum["uid"]  # TODO: right now, dynalab models
                    # Expect an input with "uid" but output "id" in their predictions.
                    # Why do we use two seperate names for the same thing? Can we make
                    # this consistent?
                    del datum["uid"]
                    tmp.write(util.json_encode(datum) + "\n")
                tmp.close()
                ret = _eval_dataset(dataset_name, endpoint_name, model, task, tmp.name)
                status_dict.update(ret)

    return util.json_encode({"success": "ok", "model_id": model.id})