in data_annotation_platform/app/utils/datasets.py [0:0]
def validate_dataset(filename):
if not os.path.exists(filename):
return "File not found."
with open(filename, "rb") as fp:
try:
data = json.load(fp)
except json.JSONDecodeError as err:
return "JSON decoding error: %s" % err.msg
try:
schema = load_schema()
except FileNotFoundError:
return "Schema file not found."
try:
jsonschema.validate(instance=data, schema=schema)
except jsonschema.ValidationError as err:
return "JSONSchema validation error: %s" % err.message
if len(data["series"]) != data["n_dim"]:
return "Number of dimensions and number of series don't match"
if "time" in data.keys():
if not "format" in data["time"] and "raw" in data["time"]:
return "'raw' must be accompanied by format"
if "format" in data["time"] and not "raw" in data["time"]:
return "Format must be accompanied by 'raw'"
if "index" in data["time"]:
if not data["time"]["index"][0] == 0:
return "Index should start at zero."
if not len(data["time"]["index"]) == data["n_obs"]:
return "Number of indices must match number of observations"
if "raw" in data["time"]:
if len(data["time"]["raw"]) != data["n_obs"]:
return "Number of time points doesn't match number of observations"
if None in data["time"]["raw"]:
return "Null is not supported in time axis. Use 'NaN' instead."
has_missing = False
for var in data["series"]:
if len(var["raw"]) != data["n_obs"]:
return "Number of observations doesn't match for %s" % var["label"]
if None in var["raw"]:
return "Null is not supported in series. Use 'NaN' instead."
has_missing = has_missing or any(map(math.isnan, var["raw"]))
# this doesn't happen in any dataset yet, so let's not implement it until
# we need it.
if data["n_dim"] > 1 and has_missing:
return "Missing values are not yet supported for multidimensional data"
return None