in api/controllers/models.py [0:0]
def do_upload_via_predictions(credentials, tid, model_name):
u = UserModel()
user_id = credentials["id"]
user = u.get(user_id)
if not user:
logger.error("Invalid user detail for id (%s)" % (user_id))
bottle.abort(404, "User information not found")
tm = TaskModel()
task = tm.get(tid)
if not task.has_predictions_upload:
bottle.abort(
403,
"""This task does not allow prediction uploads. Submit a model instead.""",
)
m = ModelModel()
if (
bottle.default_app().config["mode"] == "prod"
and m.getCountByUidTidAndHrDiff(
user_id, tid=task.id, hr_diff=task.dynalab_hr_diff
)
>= task.dynalab_threshold
):
logger.error("Submission limit reached for user (%s)" % (user_id))
bottle.abort(429, "Submission limit reached")
uploads = {}
dm = DatasetModel()
datasets = list(dm.getByTid(tid))
dataset_names = [dataset.name for dataset in datasets]
for name in dataset_names:
uploads[name] = bottle.request.files.get(name)
# Users don't need to upload preds for all datasets.
uploads = {
name: uploads[name]
for name, upload in uploads.items()
if uploads[name] is not None
}
for dataset in datasets:
if (
dataset.access_type == AccessTypeEnum.scoring
and dataset.name not in uploads.keys()
):
bottle.abort(400, "Need to upload predictions for all leaderboard datasets")
parsed_uploads = {}
# Ensure correct format
for name, upload in uploads.items():
try:
parsed_upload = [
util.json_decode(line)
for line in upload.file.read().decode("utf-8").splitlines()
]
except Exception as ex:
logger.exception(ex)
bottle.abort(400, "Could not parse prediction file. Is it a utf-8 jsonl?")
for io in parsed_upload:
try:
assert "uid" in io, "'uid' must be present for every example"
except Exception as ex:
bottle.abort(400, str(ex))
verified, message = task.verify_annotation(
io, mode=AnnotationVerifierMode.predictions_upload
)
if not verified:
bottle.abort(400, message)
parsed_uploads[name] = parsed_upload
endpoint_name = f"ts{int(time.time())}-{model_name}"
status_dict = {}
# Create local model db object
model = m.create(
task_id=tid,
user_id=user_id,
name=model_name,
shortname="",
longdesc="",
desc="",
upload_datetime=db.sql.func.now(),
endpoint_name=endpoint_name,
deployment_status=DeploymentStatusEnum.predictions_upload,
secret=secrets.token_hex(),
)
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp:
for dataset_name, parsed_upload in parsed_uploads.items():
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp:
for datum in parsed_upload:
datum["id"] = datum["uid"] # TODO: right now, dynalab models
# Expect an input with "uid" but output "id" in their predictions.
# Why do we use two seperate names for the same thing? Can we make
# this consistent?
del datum["uid"]
tmp.write(util.json_encode(datum) + "\n")
tmp.close()
ret = _eval_dataset(dataset_name, endpoint_name, model, task, tmp.name)
status_dict.update(ret)
return util.json_encode({"success": "ok", "model_id": model.id})