def main()

in build_and_upload.py [0:0]


def main():
    parser = argparse.ArgumentParser(description="Create and upload a video dataset to Hugging Face")
    parser.add_argument("--video-dir", required=True, help="Directory containing source video files")
    parser.add_argument("--metadata-dir", required=True, help="Directory containing metadata JSON files")
    parser.add_argument("--hf-dataset-name", required=True, help="Hugging Face dataset name (e.g., 'username/dataset-name')")
    parser.add_argument("--examples-per-folder", type=int, default=9500, 
                       help="Maximum examples per folder (max 10000)")
    parser.add_argument("--max-examples", type=int, help="Maximum total examples to process")
    parser.add_argument("--temp-dir", help="Temporary directory for dataset creation (default: system temp directory)")
    
    args = parser.parse_args()
    
    # Validate examples_per_folder
    if args.examples_per_folder > 10000:
        print("Error: examples-per-folder cannot exceed 10000")
        sys.exit(1)
    elif args.examples_per_folder <= 0:
        print("Error: examples-per-folder must be greater than 0")
        sys.exit(1)
    
    # Use provided temp directory or create one
    temp_base_dir = args.temp_dir or tempfile.mkdtemp()
    print(f"Using temporary directory: {temp_base_dir}")
    
    try:
        # Create dataset
        processed_examples = create_video_dataset(
            source_video_dir=args.video_dir,
            source_metadata_dir=args.metadata_dir,
            output_base_dir=temp_base_dir,
            examples_per_folder=args.examples_per_folder,
            max_total_examples=args.max_examples
        )
        
        if processed_examples > 0:
            # Upload to Hugging Face
            upload_to_huggingface(temp_base_dir, args.hf_dataset_name)
        else:
            print("No examples were processed. Aborting upload.")
            sys.exit(1)
            
    finally:
        if not args.temp_dir:  # Only remove if we created the temp directory
            shutil.rmtree(temp_base_dir, ignore_errors=True)