in ambari-infra-solr-client/src/main/python/solrDataManager.py [0:0]
def parse_arguments():
parser = optparse.OptionParser("usage: %prog [options]", version="Solr Data Manager {0}".format(VERSION))
parser.add_option("-m", "--mode", dest="mode", type="string", help="archive | delete | save")
parser.add_option("-s", "--solr-url", dest="solr_url", type="string", help="the url of the solr server including the port and protocol")
parser.add_option("-c", "--collection", dest="collection", type="string", help="the name of the solr collection")
parser.add_option("-f", "--filter-field", dest="filter_field", type="string", help="the name of the field to filter on")
parser.add_option("-r", "--read-block-size", dest="read_block_size", type="int", help="block size to use for reading from solr",
default=1000)
parser.add_option("-w", "--write-block-size", dest="write_block_size", type="int", help="number of records in the output files",
default=100000)
parser.add_option("-i", "--id-field", dest="id_field", type="string", help="the name of the id field", default="id")
end_group = optparse.OptionGroup(parser, "specifying the end of the range")
end_group.add_option("-e", "--end", dest="end", type="string", help="end of the range")
end_group.add_option("-d", "--days", dest="days", type="int", help="number of days to keep")
parser.add_option_group(end_group)
parser.add_option("-o", "--date-format", dest="date_format", type="string", help="the date format to use for --days",
default="%Y-%m-%dT%H:%M:%S.%fZ")
parser.add_option("-q", "--additional-filter", dest="additional_filter", type="string", help="additional solr filter")
parser.add_option("-j", "--name", dest="name", type="string", help="name included in result files")
parser.add_option("-g", "--ignore-unfinished-uploading", dest="ignore_unfinished_uploading", action="store_true", default=False)
parser.add_option("--json-file", dest="json_file", help="create a json file instead of line delimited json", action="store_true", default=False)
parser.add_option("-z", "--compression", dest="compression", help="none | tar.gz | tar.bz2 | zip | gz", default="gz")
parser.add_option("-k", "--solr-keytab", dest="solr_keytab", type="string", help="the keytab for a kerberized solr")
parser.add_option("-n", "--solr-principal", dest="solr_principal", type="string", help="the principal for a kerberized solr")
parser.add_option("-a", "--hdfs-keytab", dest="hdfs_keytab", type="string", help="the keytab for a kerberized hdfs")
parser.add_option("-l", "--hdfs-principal", dest="hdfs_principal", type="string", help="the principal for a kerberized hdfs")
parser.add_option("-u", "--hdfs-user", dest="hdfs_user", type="string", help="the user for accessing hdfs")
parser.add_option("-p", "--hdfs-path", dest="hdfs_path", type="string", help="the hdfs path to upload to")
parser.add_option("-t", "--key-file-path", dest="key_file_path", type="string", help="the file that contains S3 <accessKey>,<secretKey>")
parser.add_option("-b", "--bucket", dest="bucket", type="string", help="the bucket name for S3 upload")
parser.add_option("-y", "--key-prefix", dest="key_prefix", type="string", help="the key prefix for S3 upload")
parser.add_option("-x", "--local-path", dest="local_path", type="string", help="the local path to save the files to")
parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False)
parser.add_option("--solr-output-collection", dest="solr_output_collection", help="target output solr collection for archive", type="string", default=None)
parser.add_option("--solr-output-url", dest="solr_output_url", default=None, type="string", help="the url of the output solr server including the port and protocol")
parser.add_option("--exclude-fields", dest="exclude_fields", help="Comma separated list of excluded fields from json response", type="string", default=None)
parser.add_option("--skip-date-usage", dest="skip_date_usage", action="store_true", default=False, help="datestamp field won't be used for queries (sort based on id field)")
(options, args) = parser.parse_args()
for r in ["mode", "solr_url", "collection"]:
if options.__dict__[r] is None:
print "argument '{0}' is mandatory".format(r)
parser.print_help()
sys.exit()
if not options.skip_date_usage:
if options.filter_field is None:
print "argument 'filter_field' is mandatory"
parser.print_help()
sys.exit()
mode_values = ["archive", "delete", "save"]
if options.mode not in mode_values:
print "mode must be one of {0}".format(" | ".join(mode_values))
parser.print_help()
sys.exit()
if options.mode == "delete":
for r in ["name", "hdfs_keytab", "hdfs_principal", "hdfs_user", "hdfs_path", "key_file_path", "bucket", "key_prefix", "local_path"]:
if options.__dict__[r] is not None:
print "argument '{0}' may not be specified in delete mode".format(r)
parser.print_help()
sys.exit()
if not options.skip_date_usage and options.__dict__["end"] is None and options.__dict__["days"] is None or \
options.__dict__["end"] is not None and options.__dict__["days"] is not None:
print "exactly one of 'end' or 'days' must be specfied"
parser.print_help()
sys.exit()
is_any_solr_kerberos_property = options.__dict__["solr_keytab"] is not None or options.__dict__["solr_principal"] is not None
is_all_solr_kerberos_property = options.__dict__["solr_keytab"] is not None and options.__dict__["solr_principal"] is not None
if is_any_solr_kerberos_property and not is_all_solr_kerberos_property:
print "either both 'solr-keytab' and 'solr-principal' must be specfied, or neither of them"
parser.print_help()
sys.exit()
compression_values = ["none", "tar.gz", "tar.bz2", "zip", "gz"]
if options.compression not in compression_values:
print "compression must be one of {0}".format(" | ".join(compression_values))
parser.print_help()
sys.exit()
is_any_solr_output_property = options.__dict__["solr_output_collection"] is not None
is_any_hdfs_kerberos_property = options.__dict__["hdfs_keytab"] is not None or options.__dict__["hdfs_principal"] is not None
is_all_hdfs_kerberos_property = options.__dict__["hdfs_keytab"] is not None and options.__dict__["hdfs_principal"] is not None
if is_any_hdfs_kerberos_property and not is_all_hdfs_kerberos_property:
print "either both 'hdfs_keytab' and 'hdfs_principal' must be specfied, or neither of them"
parser.print_help()
sys.exit()
is_any_hdfs_property = options.__dict__["hdfs_user"] is not None or options.__dict__["hdfs_path"] is not None
is_all_hdfs_property = options.__dict__["hdfs_user"] is not None and options.__dict__["hdfs_path"] is not None
if is_any_hdfs_property and not is_all_hdfs_property:
print "either both 'hdfs_user' and 'hdfs_path' must be specfied, or neither of them"
parser.print_help()
sys.exit()
is_any_s3_property = options.__dict__["key_file_path"] is not None or options.__dict__["bucket"] is not None or \
options.__dict__["key_prefix"] is not None
is_all_s3_property = options.__dict__["key_file_path"] is not None and options.__dict__["bucket"] is not None and \
options.__dict__["key_prefix"] is not None
if is_any_s3_property and not is_all_s3_property:
print "either all the S3 arguments ('key_file_path', 'bucket', 'key_prefix') must be specfied, or none of them"
parser.print_help()
sys.exit()
if options.mode in ["archive", "save"]:
count = (1 if is_any_solr_output_property else 0) + (1 if is_any_hdfs_property else 0) + \
(1 if is_any_s3_property else 0) + (1 if options.__dict__["local_path"] is not None else 0)
if count != 1:
print "exactly one of the HDFS arguments ('hdfs_user', 'hdfs_path') or the S3 arguments ('key_file_path', 'bucket', 'key_prefix') or the solr arguments ('solr_output_collection') or the 'local_path' argument must be specified"
parser.print_help()
sys.exit()
if options.__dict__["hdfs_keytab"] is not None and options.__dict__["hdfs_user"] is None:
print "HDFS kerberos keytab and principal may only be specified if the upload target is HDFS"
parser.print_help()
sys.exit()
print("You are running Solr Data Manager {0} with arguments:".format(VERSION))
print(" mode: " + options.mode)
print(" solr-url: " + options.solr_url)
print(" collection: " + options.collection)
if options.__dict__["filter_field"] is not None:
print(" filter-field: " + options.filter_field)
if options.mode in ["archive", "save"]:
print(" id-field: " + options.id_field)
if options.__dict__["exclude_fields"] is not None:
print(" exclude fields: " + options.exclude_fields)
if options.__dict__["end"] is not None:
print(" end: " + options.end)
else:
print(" days: " + str(options.days))
print(" date-format: " + options.date_format)
if options.__dict__["additional_filter"] is not None:
print(" additional-filter: " + str(options.additional_filter))
if options.__dict__["name"] is not None:
print(" name: " + str(options.name))
if options.mode in ["archive", "save"]:
print(" read-block-size: " + str(options.read_block_size))
print(" write-block-size: " + str(options.write_block_size))
print(" ignore-unfinished-uploading: " + str(options.ignore_unfinished_uploading))
if (options.__dict__["solr_keytab"] is not None):
print(" solr-keytab: " + options.solr_keytab)
print(" solr-principal: " + options.solr_principal)
if options.mode in ["archive", "save"]:
print(" output: " + ("json" if options.json_file else "line-delimited-json"))
print(" compression: " + options.compression)
if options.__dict__["solr_output_collection"] is not None:
print(" solr output collection: " + options.solr_output_collection)
if options.__dict__["solr_output_url"] is not None:
print(" solr output url: " + options.solr_output_collection)
if (options.__dict__["hdfs_keytab"] is not None):
print(" hdfs-keytab: " + options.hdfs_keytab)
print(" hdfs-principal: " + options.hdfs_principal)
if (options.__dict__["hdfs_user"] is not None):
print(" hdfs-user: " + options.hdfs_user)
print(" hdfs-path: " + options.hdfs_path)
if (options.__dict__["key_file_path"] is not None):
print(" key-file-path: " + options.key_file_path)
print(" bucket: " + options.bucket)
print(" key-prefix: " + options.key_prefix)
if (options.__dict__["local_path"] is not None):
print(" local-path: " + options.local_path)
print (" skip-date-usage: " + str(options.skip_date_usage))
print(" verbose: " + str(options.verbose))
print
if options.__dict__["additional_filter"] is not None and options.__dict__["name"] is None:
go = False
while not go:
sys.stdout.write("It is recommended to set --name in case of any additional filter is set.\n")
sys.stdout.write("Are you sure that you want to proceed without a name (yes/no)? ")
choice = raw_input().lower()
if choice in ['yes', 'ye', 'y']:
go = True
elif choice in ['no', 'n']:
sys.exit()
return options