in tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java [333:458]
public void embed(final Metadata metadata, final InputStream inputStream,
final OutputStream outputStream, final ParseContext context)
throws IOException, TikaException {
boolean inputToStdIn = true;
boolean outputFromStdOut = true;
boolean hasMetadataCommandArguments =
(metadataCommandArguments != null && !metadataCommandArguments.isEmpty());
boolean serializeMetadataCommandArgumentsToken = false;
boolean replacedMetadataCommandArgumentsToken = false;
TikaInputStream tikaInputStream = TikaInputStream.get(inputStream);
File tempOutputFile = null;
List<String> commandMetadataSegments = null;
if (hasMetadataCommandArguments) {
commandMetadataSegments = getCommandMetadataSegments(metadata);
}
// Build our command
String[] origCmd = command;
List<String> cmd = new ArrayList<>();
for (String commandSegment : origCmd) {
if (commandSegment.contains(ExternalParser.INPUT_FILE_TOKEN)) {
commandSegment = commandSegment.replace(ExternalParser.INPUT_FILE_TOKEN,
tikaInputStream.getFile().toString());
inputToStdIn = false;
}
if (commandSegment.contains(ExternalParser.OUTPUT_FILE_TOKEN)) {
tempOutputFile = tmp.createTemporaryFile();
commandSegment = commandSegment
.replace(ExternalParser.OUTPUT_FILE_TOKEN, tempOutputFile.toString());
outputFromStdOut = false;
}
if (commandSegment.contains(METADATA_COMMAND_ARGUMENTS_SERIALIZED_TOKEN)) {
serializeMetadataCommandArgumentsToken = true;
}
if (commandSegment.contains(METADATA_COMMAND_ARGUMENTS_TOKEN)) {
if (hasMetadataCommandArguments) {
cmd.addAll(commandMetadataSegments);
}
replacedMetadataCommandArgumentsToken = true;
} else {
cmd.add(commandSegment);
}
}
if (hasMetadataCommandArguments) {
if (serializeMetadataCommandArgumentsToken) {
// Find all metadata tokens and replace with encapsulated metadata
int i = 0;
for (String commandSegment : cmd) {
if (commandSegment.contains(METADATA_COMMAND_ARGUMENTS_SERIALIZED_TOKEN)) {
commandSegment = commandSegment
.replace(METADATA_COMMAND_ARGUMENTS_SERIALIZED_TOKEN,
serializeMetadata(commandMetadataSegments));
cmd.set(i, commandSegment);
}
i++;
}
} else if (!replacedMetadataCommandArgumentsToken &&
!serializeMetadataCommandArgumentsToken) {
// Tack metadata onto the end of the cmd as arguments
cmd.addAll(commandMetadataSegments);
}
}
// Execute
Process process;
if (cmd.toArray().length == 1) {
process = Runtime.getRuntime().exec(cmd.toArray(new String[]{})[0]);
} else {
process = Runtime.getRuntime().exec(cmd.toArray(new String[]{}));
}
UnsynchronizedByteArrayOutputStream stdErrOutputStream = UnsynchronizedByteArrayOutputStream.builder().get();
try {
sendStdErrToOutputStream(process, stdErrOutputStream);
if (inputToStdIn) {
sendInputStreamToStdIn(inputStream, process);
} else {
// We're not writing to std in this case so close
process.getOutputStream().close();
}
if (outputFromStdOut) {
sendStdOutToOutputStream(process, outputStream);
} else {
tmp.dispose();
try {
process.waitFor();
} catch (InterruptedException ignore) {
}
// The command is finished, read the output file into the given output stream
InputStream tempOutputFileInputStream = TikaInputStream.get(tempOutputFile);
IOUtils.copy(tempOutputFileInputStream, outputStream);
}
} finally {
if (outputFromStdOut) {
try {
process.waitFor();
} catch (InterruptedException ignore) {
}
} else {
try {
// Clean up temp output files
tempOutputFile.delete();
} catch (Exception e) {
//swallow
}
}
if (!inputToStdIn) {
// Close input file (and delete if created by up
// TemporaryResources.createTemporaryFile)
IOUtils.closeQuietly(tikaInputStream);
}
IOUtils.closeQuietly(outputStream);
IOUtils.closeQuietly(stdErrOutputStream);
if (process.exitValue() != 0) {
throw new TikaException("There was an error executing the command line" +
"\nExecutable Command:\n\n" + cmd + "\nExecutable Error:\n\n" +
stdErrOutputStream.toString(UTF_8.name()));
}
}
}