in training/data.py [0:0]
def __call__(self, x):
if "json" not in x:
return False
try:
x_json = json.loads(x["json"])
except:
return False
# For all requirements, if the necessary key(s) are not present, we assume
# the requirement does not hold. Note that many checks are done on different keys
# which is due to different datasets being used with different metadata dicts.
# size
if "original_width" not in x_json or "original_height" not in x_json:
return False
original_width = x_json["original_width"]
original_height = x_json["original_height"]
is_less_than_min_size = original_width < self.min_size or original_height < self.min_size
if is_less_than_min_size:
return False
# watermark
if (
("pwatermark" not in x_json or x_json["pwatermark"] is None)
and "watermark_score" not in x_json
and ("stability_metadata" not in x_json or "p_watermarkdf" not in x_json["stability_metadata"])
):
return False
if "pwatermark" in x_json and x_json["pwatermark"] is not None:
is_watermarked = x_json["pwatermark"] > self.max_pwatermark
if is_watermarked:
return False
if "watermark_score" in x_json:
is_watermarked_coyo = x_json["watermark_score"] > self.max_pwatermark
if is_watermarked_coyo:
return False
if "stability_metadata" in x_json and "p_watermarkdf" in x_json["stability_metadata"]:
is_watermarked_stability_metadata = x_json["stability_metadata"]["p_watermarkdf"] > self.max_pwatermark
if is_watermarked_stability_metadata:
return False
# aesthetic
if (
"aesthetic" not in x_json
and "AESTHETIC_SCORE" not in x_json
and "aesthetic_score_laion_v2" not in x_json
and ("stability_metadata" not in x_json or "aes_scorelv2" not in x_json["stability_metadata"])
):
return False
if "aesthetic" in x_json:
is_under_min_aesthetic_threshold = x_json["aesthetic"] < self.min_aesthetic_score
if is_under_min_aesthetic_threshold:
return False
if "AESTHETIC_SCORE" in x_json:
is_under_min_aesthetic_threshold_b = x_json["AESTHETIC_SCORE"] < self.min_aesthetic_score
if is_under_min_aesthetic_threshold_b:
return False
if "aesthetic_score_laion_v2" in x_json:
is_under_min_aesthetic_threshold_coyo = x_json["aesthetic_score_laion_v2"] < self.min_aesthetic_score
if is_under_min_aesthetic_threshold_coyo:
return False
if "stability_metadata" in x_json and "aes_scorelv2" in x_json["stability_metadata"]:
is_under_min_aesthetic_threshold_stability_metadata = (
x_json["stability_metadata"]["aes_scorelv2"] < self.min_aesthetic_score
)
if is_under_min_aesthetic_threshold_stability_metadata:
return False
# spawning
if self.require_marked_as_ok_by_spawning:
if "stability_metadata" not in x_json or "is_spawning" not in x_json["stability_metadata"]:
return False
is_marked_as_not_ok_by_spawning = x_json["stability_metadata"]["is_spawning"]
if is_marked_as_not_ok_by_spawning:
return False
# getty
if self.require_marked_as_not_getty:
if "stability_metadata" not in x_json or "is_getty" not in x_json["stability_metadata"]:
return False
is_marked_as_getty = x_json["stability_metadata"]["is_getty"]
if is_marked_as_getty:
return False
# nsfw
if self.max_pnsfw is not None:
if "stability_metadata" not in x_json or "p_nsfwdf" not in x_json["stability_metadata"]:
return False
is_above_max_nsfw = x_json["stability_metadata"]["p_nsfwdf"] > self.max_pnsfw
if is_above_max_nsfw:
return False
return True