in obelics/utils/simplification_utils.py [0:0]
def format_filename(filename):
# TODO: refine this function. fairly imprefect.
# Potential improvements: `Untitled`, `untitled`, `blank`, check whether each word is in a dictionary
_, simp_filename = os.path.split(filename)
simp_filename = simp_filename.split(".")[0]
if re.findall(
r"\?[A-Za-z0-9]+=", simp_filename
): # Example `it?ids=2019042515182454151475%3A027064510%3A001&ca=n&coo=y`
return ""
simp_filename = re.sub(r"[_-]", " ", simp_filename) # Example: `Chocolate_Berry_Frozen_Yogurt_Bark`
simp_filename = re.sub(r"%2[0]*", " ", simp_filename) # Example: `hearts%2Band%2Bhome%20Bbadge`
simp_filename = re.sub(r"[0-9]+x[0-9]+", "", simp_filename) # Example: `104x403`
simp_filename = re.sub(r"[0-9]+", " ", simp_filename) # Example: `icon18_wrench_allbkg`
simp_filename = re.sub(r"[ ]{2,}", " ", simp_filename) # Example: `icon wrenchallbkg`
for r in ["\n", "+", "%B", "%"]:
simp_filename = simp_filename.replace(r, " ")
simp_filename = simp_filename.strip()
if len(simp_filename) <= 1:
return ""
else:
return simp_filename