in build_and_upload.py [0:0]
def main():
parser = argparse.ArgumentParser(description="Create and upload a video dataset to Hugging Face")
parser.add_argument("--video-dir", required=True, help="Directory containing source video files")
parser.add_argument("--metadata-dir", required=True, help="Directory containing metadata JSON files")
parser.add_argument("--hf-dataset-name", required=True, help="Hugging Face dataset name (e.g., 'username/dataset-name')")
parser.add_argument("--examples-per-folder", type=int, default=9500,
help="Maximum examples per folder (max 10000)")
parser.add_argument("--max-examples", type=int, help="Maximum total examples to process")
parser.add_argument("--temp-dir", help="Temporary directory for dataset creation (default: system temp directory)")
args = parser.parse_args()
# Validate examples_per_folder
if args.examples_per_folder > 10000:
print("Error: examples-per-folder cannot exceed 10000")
sys.exit(1)
elif args.examples_per_folder <= 0:
print("Error: examples-per-folder must be greater than 0")
sys.exit(1)
# Use provided temp directory or create one
temp_base_dir = args.temp_dir or tempfile.mkdtemp()
print(f"Using temporary directory: {temp_base_dir}")
try:
# Create dataset
processed_examples = create_video_dataset(
source_video_dir=args.video_dir,
source_metadata_dir=args.metadata_dir,
output_base_dir=temp_base_dir,
examples_per_folder=args.examples_per_folder,
max_total_examples=args.max_examples
)
if processed_examples > 0:
# Upload to Hugging Face
upload_to_huggingface(temp_base_dir, args.hf_dataset_name)
else:
print("No examples were processed. Aborting upload.")
sys.exit(1)
finally:
if not args.temp_dir: # Only remove if we created the temp directory
shutil.rmtree(temp_base_dir, ignore_errors=True)