in petastorm/benchmark/cli.py [0:0]
def _parse_args(args):
# If min-after-dequeue value is not explicitly set from the command line, it will be calculated from the total
# shuffling queue size multiplied by this ratio
DEFAULT_MIN_AFTER_DEQUEUE_TO_QUEUE_SIZE_RATIO = 0.8
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('dataset_path', type=str, help='Path to a petastorm dataset')
parser.add_argument('--field-regex', type=str, nargs='+',
help='A list of regular expressions. Only fields that match one of the regex patterns will '
'be used during the benchmark.')
parser.add_argument('-w', '--workers-count', type=int, default=3,
help='Number of workers used by the reader')
parser.add_argument('-p', '--pool-type', type=WorkerPoolType, default=WorkerPoolType.THREAD,
choices=list(WorkerPoolType),
help='Type of a worker pool used by the reader')
parser.add_argument('-m', '--warmup-cycles', type=int, default=200,
help='Number of warmup read cycles. Warmup read cycles run before measurement cycles and '
'the throughput during these cycles is not accounted for in the reported results.')
parser.add_argument('-n', '--measure-cycles', type=int, default=1000,
help='Number cycles used for benchmark measurements. Measurements cycles are run after '
'warmup cycles.')
parser.add_argument('--profile-threads', dest='profile_threads', action='store_true',
help='Enables profiling threads. Will print result when thread pool is shut down.')
parser.add_argument('-d', '--read-method', type=ReadMethod, choices=list(ReadMethod),
default=ReadMethod.PYTHON,
help='Which read mode to use: \'python\': using python implementation. '
'\'tf\': constructing a small TF graph streaming data from pure python implementation.')
parser.add_argument('-q', '--shuffling-queue-size', type=int, default=500, required=False,
help='Size of the shuffling queue used to decorrelate row-group chunks. ')
parser.add_argument('--min-after-dequeue', type=int, default=None, required=False,
help='Minimum number of elements in a shuffling queue before entries can be read from it. '
'Default value is set to {}%% of the --shuffling-queue-size '
'parameter'.format(100 * DEFAULT_MIN_AFTER_DEQUEUE_TO_QUEUE_SIZE_RATIO))
parser.add_argument('--pyarrow-serialize', action='store_true', required=False,
help='When specified, faster pyarrow.serialize library is used. However, it does not support '
'all data types and implicitly converts some datatypes (e.g. int64->int32) which may'
'trigger errors when reading the data from Tensorflow.')
parser.add_argument('-vv', action='store_true', default=False, help='Sets logging level to DEBUG.')
parser.add_argument('-v', action='store_true', default=False, help='Sets logging level to INFO.')
args = parser.parse_args(args)
if not args.min_after_dequeue:
args.min_after_dequeue = DEFAULT_MIN_AFTER_DEQUEUE_TO_QUEUE_SIZE_RATIO * args.shuffling_queue_size
return args