in datasets.py [0:0]
def fashionGenSetup(fashionGenPath,
outputPath):
basePath = os.path.splitext(outputPath)[0]
if not os.path.isdir(basePath):
os.mkdir(basePath)
outputPath = os.path.join(basePath, os.path.basename(basePath))
h5file = h5py.File(fashionGenPath)
imgKey = 'input_image'
validClasses = ["input_gender", "input_category", "input_pose"]
nImgs = h5file[imgKey].shape[0]
outIndexes = {}
statsPartition = {"GLOBAL": {"input_department": {},
"totalSize": 0}
}
for attribute in validClasses:
statsPartition["GLOBAL"][attribute] = {}
partitionCategory = "input_department"
print("Building the partition..")
for index in range(nImgs):
rawVal = str(h5file[partitionCategory][index][0])
val = rawVal.replace("b'", "").replace("'", "")
strVal = str(val)
# Hand-made fix for the clothing dataset : some pose attributes
# correspond only to miss-labelled data
if strVal == "CLOTHING" \
and str(h5file["input_pose"][index][0]) in \
["b'id_gridfs_6'", "b'id_gridfs_5'"]:
continue
if strVal not in statsPartition:
statsPartition[strVal] = {attribute: {}
for attribute in validClasses}
statsPartition[strVal]["totalSize"] = 0
outIndexes[val] = []
statsPartition["GLOBAL"]["input_department"][rawVal] = 0
outIndexes[val].append(index)
statsPartition[strVal]["totalSize"] += 1
statsPartition["GLOBAL"]["input_department"][rawVal] += 1
statsPartition["GLOBAL"]["totalSize"] += 1
for attribute in validClasses:
label = str(h5file[attribute][index][0])
if label not in statsPartition[strVal][attribute]:
statsPartition[strVal][attribute][label] = 0
if label not in statsPartition["GLOBAL"][attribute]:
statsPartition["GLOBAL"][attribute][label] = 0
statsPartition[strVal][attribute][label] += 1
statsPartition["GLOBAL"][attribute][label] += 1
printProgressBar(index, nImgs)
printProgressBar(nImgs, nImgs)
h5file.close()
pathPartition = outputPath + "_partition.h5"
f = h5py.File(pathPartition, 'w')
for key, value in outIndexes.items():
f.create_dataset(key, data=np.array(value))
f.close()
pathStats = outputPath + "_stats.json"
with open(pathStats, 'w') as file:
json.dump(statsPartition, file, indent=2)
return pathPartition, pathStats