def _parse_args()

in petastorm/benchmark/cli.py [0:0]


def _parse_args(args):
    # If min-after-dequeue value is not explicitly set from the command line, it will be calculated from the total
    # shuffling queue size multiplied by this ratio
    DEFAULT_MIN_AFTER_DEQUEUE_TO_QUEUE_SIZE_RATIO = 0.8

    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('dataset_path', type=str, help='Path to a petastorm dataset')
    parser.add_argument('--field-regex', type=str, nargs='+',
                        help='A list of regular expressions. Only fields that match one of the regex patterns will '
                             'be used during the benchmark.')

    parser.add_argument('-w', '--workers-count', type=int, default=3,
                        help='Number of workers used by the reader')
    parser.add_argument('-p', '--pool-type', type=WorkerPoolType, default=WorkerPoolType.THREAD,
                        choices=list(WorkerPoolType),
                        help='Type of a worker pool used by the reader')

    parser.add_argument('-m', '--warmup-cycles', type=int, default=200,
                        help='Number of warmup read cycles. Warmup read cycles run before measurement cycles and '
                             'the throughput during these cycles is not accounted for in the reported results.')
    parser.add_argument('-n', '--measure-cycles', type=int, default=1000,
                        help='Number cycles used for benchmark measurements. Measurements cycles are run after '
                             'warmup cycles.')

    parser.add_argument('--profile-threads', dest='profile_threads', action='store_true',
                        help='Enables profiling threads. Will print result when thread pool is shut down.')

    parser.add_argument('-d', '--read-method', type=ReadMethod, choices=list(ReadMethod),
                        default=ReadMethod.PYTHON,
                        help='Which read mode to use: \'python\': using python implementation. '
                             '\'tf\': constructing a small TF graph streaming data from pure python implementation.')

    parser.add_argument('-q', '--shuffling-queue-size', type=int, default=500, required=False,
                        help='Size of the shuffling queue used to decorrelate row-group chunks. ')

    parser.add_argument('--min-after-dequeue', type=int, default=None, required=False,
                        help='Minimum number of elements in a shuffling queue before entries can be read from it. '
                             'Default value is set to {}%% of the --shuffling-queue-size '
                             'parameter'.format(100 * DEFAULT_MIN_AFTER_DEQUEUE_TO_QUEUE_SIZE_RATIO))

    parser.add_argument('--pyarrow-serialize', action='store_true', required=False,
                        help='When specified, faster pyarrow.serialize library is used. However, it does not support '
                             'all data types and implicitly converts some datatypes (e.g. int64->int32) which may'
                             'trigger errors when reading the data from Tensorflow.')

    parser.add_argument('-vv', action='store_true', default=False, help='Sets logging level to DEBUG.')
    parser.add_argument('-v', action='store_true', default=False, help='Sets logging level to INFO.')

    args = parser.parse_args(args)

    if not args.min_after_dequeue:
        args.min_after_dequeue = DEFAULT_MIN_AFTER_DEQUEUE_TO_QUEUE_SIZE_RATIO * args.shuffling_queue_size

    return args