in distilvit/curate_gpt.py [0:0]
def parse_args():
parser = argparse.ArgumentParser(
description="Generate alternative text for images in a dataset.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--sample",
type=int,
default=None,
help="Run on a sample",
)
parser.add_argument(
"--data-dir",
type=str,
default="./dummy_data",
help="Dataset dir",
)
parser.add_argument(
"--dataset",
type=str,
default="nlphuji/flickr30k",
help="Name of the dataset to use",
)
parser.add_argument(
"--target-dataset",
type=str,
default="mozilla/flickr30k-transformed-captions-gpt4o",
help="Name of the target dataset to save to",
)
parser.add_argument(
"--dataset-split",
type=str,
default="test",
help="Name of the dataset split",
)
parser.add_argument(
"--image-column",
type=str,
default="image",
help="Name of the image column in the dataset",
)
parser.add_argument(
"--generated-alt-text-column",
type=str,
default="alt_text",
help="Name of the resulting alt text column",
)
parser.add_argument(
"--image-id-column",
type=str,
default="img_id",
help="Name of the image id column in the dataset",
)
return parser.parse_args()