in tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java [532:637]
private void usage() {
PrintStream out = System.out;
out.println("usage: java -jar tika-app.jar [option...] [file...]");
out.println();
out.println("Options:");
out.println(" -? or --help Print this usage message");
out.println(" -v or --verbose Print debug level messages");
out.println(" -V or --version Print the Apache Tika version number");
out.println();
out.println(" -g or --gui Start the Apache Tika GUI");
out.println(" -f or --fork Use Fork Mode for out-of-process extraction");
out.println();
out.println(" --config=<tika-config.xml>");
out.println(" TikaConfig file. Must be specified before -g, -s, -f or the dump-x-config !");
out.println(" --dump-minimal-config Print minimal TikaConfig");
out.println(" --dump-current-config Print current TikaConfig");
out.println(" --dump-static-config Print static config");
out.println(" --dump-static-full-config Print static explicit config");
out.println("");
out.println(" -x or --xml Output XHTML content (default)");
out.println(" -h or --html Output HTML content");
out.println(" -t or --text Output plain text content (body)");
out.println(" -T or --text-main Output plain text content (main content only via boilerpipe handler)");
out.println(" -A or --text-all Output all text content");
out.println(" -m or --metadata Output only metadata");
out.println(" -j or --json Output metadata in JSON");
out.println(" -y or --xmp Output metadata in XMP");
out.println(" -J or --jsonRecursive Output metadata and content from all");
out.println(" embedded files (choose content type");
out.println(" with -x, -h, -t or -m; default is -x)");
out.println(" -a or --async Run Tika in async mode; must specify details in a" + " tikaConfig file");
out.println(" -l or --language Output only language");
out.println(" -d or --detect Detect document type");
out.println(" --digest=X Include digest X (md2, md5, sha1,");
out.println(" sha256, sha384, sha512");
out.println(" -eX or --encoding=X Use output encoding X");
out.println(" -pX or --password=X Use document password X");
out.println(" -z or --extract Extract all attachements into current directory");
out.println(" --extract-dir=<dir> Specify target directory for -z");
out.println(" -r or --pretty-print For JSON, XML and XHTML outputs, adds newlines and");
out.println(" whitespace, for better readability");
out.println();
out.println(" --list-parsers");
out.println(" List the available document parsers");
out.println(" --list-parser-details");
out.println(" List the available document parsers and their supported mime types");
out.println(" --list-parser-details-apt");
out.println(" List the available document parsers and their supported mime types in apt format.");
out.println(" --list-detectors");
out.println(" List the available document detectors");
out.println(" --list-met-models");
out.println(" List the available metadata models, and their supported keys");
out.println(" --list-supported-types");
out.println(" List all known media types and related information");
out.println();
out.println();
out.println(" --compare-file-magic=<dir>");
out.println(" Compares Tika's known media types to the File(1) tool's magic directory");
out.println("Description:");
out.println(" Apache Tika will parse the file(s) specified on the");
out.println(" command line and output the extracted text content");
out.println(" or metadata to standard output.");
out.println();
out.println(" Instead of a file name you can also specify the URL");
out.println(" of a document to be parsed.");
out.println();
out.println(" If no file name or URL is specified (or the special");
out.println(" name \"-\" is used), then the standard input stream");
out.println(" is parsed. If no arguments were given and no input");
out.println(" data is available, the GUI is started instead.");
out.println();
out.println("- GUI mode");
out.println();
out.println(" Use the \"--gui\" (or \"-g\") option to start the");
out.println(" Apache Tika GUI. You can drag and drop files from");
out.println(" a normal file explorer to the GUI window to extract");
out.println(" text content and metadata from the files.");
out.println();
out.println("- Batch mode");
out.println();
out.println(" Simplest method.");
out.println(" Specify two directories as args with no other args:");
out.println(" java -jar tika-app.jar <inputDirectory> <outputDirectory>");
out.println();
out.println("Batch Options:");
out.println(" -i or --inputDir Input directory");
out.println(" -o or --outputDir Output directory");
out.println(" -numConsumers Number of processing threads");
out.println(" -bc Batch config file");
out.println(" -maxRestarts Maximum number of times the ");
out.println(" watchdog process will restart the forked process.");
out.println(" -timeoutThresholdMillis Number of milliseconds allowed to a parse");
out.println(" before the process is terminated and restarted");
out.println(" -fileList List of files to process, with");
out.println(" paths relative to the input directory");
out.println(" -includeFilePat Regular expression to determine which");
out.println(" files to process, e.g. \"(?i)\\.pdf\"");
out.println(" -excludeFilePat Regular expression to determine which");
out.println(" files to avoid processing, e.g. \"(?i)\\.pdf\"");
out.println(" -maxFileSizeBytes Skip files longer than this value");
out.println();
out.println(" Control the type of output with -x, -h, -t and/or -J.");
out.println();
out.println(" To modify forked process jvm args, prepend \"J\" as in:");
out.println(" -JXmx4g or -JDlog4j.configuration=file:log4j.xml.");
}