in training/data.py [0:0]
def sdxl_synthetic_dataset_map(sample):
clip_scores = sample["clip_scores.txt"].decode("utf-8")
clip_scores = clip_scores.split(",")
clip_scores = [float(x) for x in clip_scores]
index_of_max = 0
for i in range(1, len(clip_scores)):
if clip_scores[i] > clip_scores[index_of_max]:
index_of_max = i
key_of_best_clip_score_image = f"{index_of_max}.png"
if key_of_best_clip_score_image not in sample:
raise ValueError(
f"{key_of_best_clip_score_image} was not found in sample. The dataset should have files <sample"
" key>.<x>.png where <x> coresponds to an index of the clip scores in clip_scores.txt"
)
return {
"__key__": sample["__key__"],
"__url__": sample["__url__"],
"txt": sample["txt"],
"png": sample[key_of_best_clip_score_image], # only include the image with the best clip score
# For other datasets, we rely on the following for micro conditioning.
# The original height and width are known because we create the dataset with
# sdxl. The laion aesthetic score of 5 seems like a reasonable approximation
# NOTE: we unfortunately have to serialize and encode the json so it looks like
# it was read out of a file since wds decoders will need to decode it. There
# is probably some way to avoid this but it is not obvious with the wds apis.
"json": json.dumps({"aesthetic": 5, "original_width": 1024, "original_height": 1024}).encode(),
}