def parse_arguments()

in ambari-infra-solr-client/src/main/python/solrDataManager.py [0:0]


def parse_arguments():
  parser = optparse.OptionParser("usage: %prog [options]", version="Solr Data Manager {0}".format(VERSION))

  parser.add_option("-m", "--mode", dest="mode", type="string", help="archive | delete | save")
  parser.add_option("-s", "--solr-url", dest="solr_url", type="string", help="the url of the solr server including the port and protocol")
  parser.add_option("-c", "--collection", dest="collection", type="string", help="the name of the solr collection")
  parser.add_option("-f", "--filter-field", dest="filter_field", type="string", help="the name of the field to filter on")
  parser.add_option("-r", "--read-block-size", dest="read_block_size", type="int", help="block size to use for reading from solr",
                    default=1000)
  parser.add_option("-w", "--write-block-size", dest="write_block_size", type="int", help="number of records in the output files",
                    default=100000)
  parser.add_option("-i", "--id-field", dest="id_field", type="string", help="the name of the id field", default="id")

  end_group = optparse.OptionGroup(parser, "specifying the end of the range")
  end_group.add_option("-e", "--end", dest="end", type="string", help="end of the range")
  end_group.add_option("-d", "--days", dest="days", type="int", help="number of days to keep")
  parser.add_option_group(end_group)

  parser.add_option("-o", "--date-format", dest="date_format", type="string", help="the date format to use for --days",
                    default="%Y-%m-%dT%H:%M:%S.%fZ")

  parser.add_option("-q", "--additional-filter", dest="additional_filter", type="string", help="additional solr filter")
  parser.add_option("-j", "--name", dest="name", type="string", help="name included in result files")

  parser.add_option("-g", "--ignore-unfinished-uploading", dest="ignore_unfinished_uploading", action="store_true", default=False)

  parser.add_option("--json-file", dest="json_file", help="create a json file instead of line delimited json", action="store_true", default=False)
  parser.add_option("-z", "--compression", dest="compression", help="none | tar.gz | tar.bz2 | zip | gz", default="gz")

  parser.add_option("-k", "--solr-keytab", dest="solr_keytab", type="string", help="the keytab for a kerberized solr")
  parser.add_option("-n", "--solr-principal", dest="solr_principal", type="string", help="the principal for a kerberized solr")

  parser.add_option("-a", "--hdfs-keytab", dest="hdfs_keytab", type="string", help="the keytab for a kerberized hdfs")
  parser.add_option("-l", "--hdfs-principal", dest="hdfs_principal", type="string", help="the principal for a kerberized hdfs")

  parser.add_option("-u", "--hdfs-user", dest="hdfs_user", type="string", help="the user for accessing hdfs")
  parser.add_option("-p", "--hdfs-path", dest="hdfs_path", type="string", help="the hdfs path to upload to")

  parser.add_option("-t", "--key-file-path", dest="key_file_path", type="string", help="the file that contains S3 <accessKey>,<secretKey>")
  parser.add_option("-b", "--bucket", dest="bucket", type="string", help="the bucket name for S3 upload")
  parser.add_option("-y", "--key-prefix", dest="key_prefix", type="string", help="the key prefix for S3 upload")

  parser.add_option("-x", "--local-path", dest="local_path", type="string", help="the local path to save the files to")

  parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False)

  parser.add_option("--solr-output-collection", dest="solr_output_collection", help="target output solr collection for archive", type="string", default=None)
  parser.add_option("--solr-output-url", dest="solr_output_url", default=None, type="string", help="the url of the output solr server including the port and protocol")
  parser.add_option("--exclude-fields", dest="exclude_fields", help="Comma separated list of excluded fields from json response", type="string", default=None)
  parser.add_option("--skip-date-usage", dest="skip_date_usage", action="store_true", default=False, help="datestamp field won't be used for queries (sort based on id field)")

  (options, args) = parser.parse_args()

  for r in ["mode", "solr_url", "collection"]:
    if options.__dict__[r] is None:
      print("argument '{0}' is mandatory".format(r))
      parser.print_help()
      sys.exit()

  if not options.skip_date_usage:
    if options.filter_field is None:
      print("argument 'filter_field' is mandatory")
      parser.print_help()
      sys.exit()

  mode_values = ["archive", "delete", "save"]
  if options.mode not in mode_values:
    print("mode must be one of {0}".format(" | ".join(mode_values)))
    parser.print_help()
    sys.exit()

  if options.mode == "delete":
    for r in ["name", "hdfs_keytab", "hdfs_principal", "hdfs_user", "hdfs_path", "key_file_path", "bucket", "key_prefix", "local_path"]:
      if options.__dict__[r] is not None:
        print("argument '{0}' may not be specified in delete mode".format(r))
        parser.print_help()
        sys.exit()

  if not options.skip_date_usage and options.__dict__["end"] is None and options.__dict__["days"] is None or \
          options.__dict__["end"] is not None and options.__dict__["days"] is not None:
    print("exactly one of 'end' or 'days' must be specfied")
    parser.print_help()
    sys.exit()

  is_any_solr_kerberos_property = options.__dict__["solr_keytab"] is not None or options.__dict__["solr_principal"] is not None
  is_all_solr_kerberos_property = options.__dict__["solr_keytab"] is not None and options.__dict__["solr_principal"] is not None
  if is_any_solr_kerberos_property and not is_all_solr_kerberos_property:
    print("either both 'solr-keytab' and 'solr-principal' must be specfied, or neither of them")
    parser.print_help()
    sys.exit()

  compression_values = ["none", "tar.gz", "tar.bz2", "zip", "gz"]
  if options.compression not in compression_values:
    print("compression must be one of {0}".format(" | ".join(compression_values)))
    parser.print_help()
    sys.exit()

  is_any_solr_output_property = options.__dict__["solr_output_collection"] is not None

  is_any_hdfs_kerberos_property = options.__dict__["hdfs_keytab"] is not None or options.__dict__["hdfs_principal"] is not None
  is_all_hdfs_kerberos_property = options.__dict__["hdfs_keytab"] is not None and options.__dict__["hdfs_principal"] is not None
  if is_any_hdfs_kerberos_property and not is_all_hdfs_kerberos_property:
    print("either both 'hdfs_keytab' and 'hdfs_principal' must be specfied, or neither of them")
    parser.print_help()
    sys.exit()

  is_any_hdfs_property = options.__dict__["hdfs_user"] is not None or options.__dict__["hdfs_path"] is not None
  is_all_hdfs_property = options.__dict__["hdfs_user"] is not None and options.__dict__["hdfs_path"] is not None
  if is_any_hdfs_property and not is_all_hdfs_property:
    print("either both 'hdfs_user' and 'hdfs_path' must be specfied, or neither of them")
    parser.print_help()
    sys.exit()

  is_any_s3_property = options.__dict__["key_file_path"] is not None or options.__dict__["bucket"] is not None or \
                       options.__dict__["key_prefix"] is not None
  is_all_s3_property = options.__dict__["key_file_path"] is not None and options.__dict__["bucket"] is not None and \
                       options.__dict__["key_prefix"] is not None
  if is_any_s3_property and not is_all_s3_property:
    print("either all the S3 arguments ('key_file_path', 'bucket', 'key_prefix') must be specfied, or none of them")
    parser.print_help()
    sys.exit()

  if options.mode in ["archive", "save"]:
    count = (1 if is_any_solr_output_property else 0) + (1 if is_any_hdfs_property else 0) + \
            (1 if is_any_s3_property else 0) + (1 if options.__dict__["local_path"] is not None else 0)
    if count != 1:
      print("exactly one of the HDFS arguments ('hdfs_user', 'hdfs_path') or the S3 arguments ('key_file_path', 'bucket', 'key_prefix') or the solr arguments ('solr_output_collection') or the 'local_path' argument must be specified")
      parser.print_help()
      sys.exit()

  if options.__dict__["hdfs_keytab"] is not None and options.__dict__["hdfs_user"] is None:
    print("HDFS kerberos keytab and principal may only be specified if the upload target is HDFS")
    parser.print_help()
    sys.exit()

  print(("You are running Solr Data Manager {0} with arguments:".format(VERSION)))
  print(("  mode: " + options.mode))
  print(("  solr-url: " + options.solr_url))
  print(("  collection: " + options.collection))
  if options.__dict__["filter_field"] is not None:
    print(("  filter-field: " + options.filter_field))
  if options.mode in ["archive", "save"]:
    print(("  id-field: " + options.id_field))
  if options.__dict__["exclude_fields"] is not None:
    print(("  exclude fields: " + options.exclude_fields))
  if options.__dict__["end"] is not None:
    print(("  end: " + options.end))
  else:
    print(("  days: " + str(options.days)))
    print(("  date-format: " + options.date_format))
  if options.__dict__["additional_filter"] is not None:
    print(("  additional-filter: " + str(options.additional_filter)))
  if options.__dict__["name"] is not None:
    print(("  name: " + str(options.name)))
  if options.mode in ["archive", "save"]:
    print(("  read-block-size: " + str(options.read_block_size)))
    print(("  write-block-size: " + str(options.write_block_size)))
    print(("  ignore-unfinished-uploading: " + str(options.ignore_unfinished_uploading)))
  if (options.__dict__["solr_keytab"] is not None):
    print(("  solr-keytab: " + options.solr_keytab))
    print(("  solr-principal: " + options.solr_principal))
  if options.mode in ["archive", "save"]:
    print(("  output: " + ("json" if options.json_file else "line-delimited-json")))
    print(("  compression: " + options.compression))
  if options.__dict__["solr_output_collection"] is not None:
    print(("  solr output collection: " + options.solr_output_collection))
  if options.__dict__["solr_output_url"] is not None:
    print(("  solr output url: " + options.solr_output_collection))
  if (options.__dict__["hdfs_keytab"] is not None):
    print(("  hdfs-keytab: " + options.hdfs_keytab))
    print(("  hdfs-principal: " + options.hdfs_principal))
  if (options.__dict__["hdfs_user"] is not None):
    print(("  hdfs-user: " + options.hdfs_user))
    print(("  hdfs-path: " + options.hdfs_path))
  if (options.__dict__["key_file_path"] is not None):
    print(("  key-file-path: " + options.key_file_path))
    print(("  bucket: " + options.bucket))
    print(("  key-prefix: " + options.key_prefix))
  if (options.__dict__["local_path"] is not None):
    print(("  local-path: " + options.local_path))
  print(("  skip-date-usage: " + str(options.skip_date_usage)))
  print(("  verbose: " + str(options.verbose)))
  print()

  if options.__dict__["additional_filter"] is not None and options.__dict__["name"] is None:
    go = False
    while not go:
      sys.stdout.write("It is recommended to set --name in case of any additional filter is set.\n")
      sys.stdout.write("Are you sure that you want to proceed without a name (yes/no)? ")
      choice = input().lower()
      if choice in ['yes', 'ye', 'y']:
        go = True
      elif choice in ['no', 'n']:
        sys.exit()

  return options