in get_clusters.py [0:0]
def parse_args():
def str2bool(v):
v = v.lower()
if v in ('yes', 'true', 't', '1'):
return True
elif v in ('no', 'false', 'f', '0'):
return False
raise ValueError('Boolean argument needs to be true or false. '
'Instead, it is %s.' % v)
import argparse
parser = argparse.ArgumentParser(description='Video Cluster Fit')
parser.register('type', 'bool', str2bool)
parser.add_argument('--output_dir', default='.', type=str,
help='path where to save')
parser.add_argument('--weights_path', default='', type=str,
help='Path to weights file')
parser.add_argument('--exp_desc', default='vggsound_clusters', type=str,
help='desc of exp')
parser.add_argument('--pretrained', default='False', type='bool',
help="Use pre-trained models from the modelzoo")
parser.add_argument('--dataset', default='vggsound', type=str,
choices=['kinetics', 'vggsound', 'kinetics_sound', 'ave'],
help='name of dataset')
parser.add_argument("--root_dir", type=str, default="/path/to/dataset",
help="root dir of dataset")
parser.add_argument('--mode', default='val', type=str,
help='mode of dataset')
parser.add_argument('--num_data_samples', default=14032, type=int,
help='number of samples in dataset')
# AUDIO UTILS
parser.add_argument("--num_sec_aud", type=int, default=1,
help="number of seconds of audio")
parser.add_argument("--aud_sample_rate", type=int, default=24000,
help="audio sample rate")
parser.add_argument("--aud_spec_type", type=int, default=2,
help="audio spec type")
parser.add_argument('--use_volume_jittering', type='bool', default='False',
help='use volume jittering')
parser.add_argument('--use_audio_temp_jittering', type='bool', default='False',
help='use audio temporal jittering')
parser.add_argument('--z_normalize', type='bool', default='True',
help='z-normalize the audio')
### DATA
parser.add_argument('--batch_size', default=96, type=int)
parser.add_argument('--workers', default=10, type=int,
help='number of data loading workers (default: 16)')
### MODEL
parser.add_argument("--vid_base_arch", default="r2plus1d_18", type=str,
help="video architecture", choices=['r2plus1d_18'])
parser.add_argument("--aud_base_arch", default="resnet9", type=str,
help="audio architecture", choices=['resnet9', 'resnet18'])
parser.add_argument('--use_mlp', type='bool', default='True',
help='use MLP head')
parser.add_argument('--norm_feat', type='bool', default='False',
help='normalize pre-mlp features')
parser.add_argument("--num_clusters", default=256, type=int,
help="final layer dimension in projection head")
parser.add_argument("--headcount", default=1, type=int,
help="number of heads")
# distributed training parameters
parser.add_argument("--dist_url", default="env://", type=str,
help="""url used to set up distributed