in ludwig/collect.py [0:0]
def cli_collect_activations(sys_argv):
"""Command Line Interface to communicate with the collection of tensors and
there are several options that can specified when calling this function:
--data_csv: Filepath for the input csv
--data_hdf5: Filepath for the input hdf5 file, if there is a csv file, this
is not read
--d: Refers to the dataset type of the file being read, by default is
*generic*
--s: Refers to the split of the data, can be one of: train, test,
validation, full
--m: Input model that is necessary to collect to the tensors, this is a
required *option*
--t: Tensors to collect
--od: Output directory of the model, defaults to results
--bs: Batch size
--g: Number of gpus that are to be used
--gf: Fraction of each GPUs memory to use.
--dbg: Debug if the model is to be started with python debugger
--v: Verbose: Defines the logging level that the user will be exposed to
"""
parser = argparse.ArgumentParser(
description='This script loads a pretrained model and uses it collect '
'tensors for each datapoint in the dataset.',
prog='ludwig collect_activations',
usage='%(prog)s [options]')
# ---------------
# Data parameters
# ---------------
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--data_csv', help='input data CSV file')
group.add_argument('--data_hdf5', help='input data HDF5 file')
parser.add_argument(
'-s',
'--split',
default=TEST,
choices=[TRAINING, VALIDATION, TEST, FULL],
help='the split to test the model on'
)
# ----------------
# Model parameters
# ----------------
parser.add_argument(
'-m',
'--model_path',
help='model to load',
required=True
)
parser.add_argument(
'-t',
'--tensors',
help='tensors to collect',
nargs='+',
required=True
)
# -------------------------
# Output results parameters
# -------------------------
parser.add_argument(
'-od',
'--output_directory',
type=str,
default='results',
help='directory that contains the results'
)
# ------------------
# Generic parameters
# ------------------
parser.add_argument(
'-bs',
'--batch_size',
type=int,
default=128,
help='size of batches'
)
# ------------------
# Runtime parameters
# ------------------
parser.add_argument(
'-g',
'--gpus',
type=int,
default=0,
help='list of gpu to use'
)
parser.add_argument(
'-gml',
'--gpu_memory_limit',
type=int,
default=None,
help='maximum memory in MB to allocate per GPU device'
)
parser.add_argument(
'-dpt',
'--disable_parallel_threads',
action='store_false',
dest='allow_parallel_threads',
help='disable TensorFlow from using multithreading for reproducibility'
)
parser.add_argument(
'-dbg',
'--debug',
action='store_true',
default=False,
help='enables debugging mode'
)
parser.add_argument(
'-l',
'--logging_level',
default='info',
help='the level of logging to use',
choices=['critical', 'error', 'warning', 'info', 'debug', 'notset']
)
args = parser.parse_args(sys_argv)
logging.getLogger('ludwig').setLevel(
logging_level_registry[args.logging_level]
)
global logger
logger = logging.getLogger('ludwig.collect')
print_ludwig('Collect Activations', LUDWIG_VERSION)
collect_activations(**vars(args))