public void embed()

in tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java [333:458]


    public void embed(final Metadata metadata, final InputStream inputStream,
                      final OutputStream outputStream, final ParseContext context)
            throws IOException, TikaException {

        boolean inputToStdIn = true;
        boolean outputFromStdOut = true;
        boolean hasMetadataCommandArguments =
                (metadataCommandArguments != null && !metadataCommandArguments.isEmpty());
        boolean serializeMetadataCommandArgumentsToken = false;
        boolean replacedMetadataCommandArgumentsToken = false;

        TikaInputStream tikaInputStream = TikaInputStream.get(inputStream);
        File tempOutputFile = null;

        List<String> commandMetadataSegments = null;
        if (hasMetadataCommandArguments) {
            commandMetadataSegments = getCommandMetadataSegments(metadata);
        }

        // Build our command
        String[] origCmd = command;
        List<String> cmd = new ArrayList<>();
        for (String commandSegment : origCmd) {
            if (commandSegment.contains(ExternalParser.INPUT_FILE_TOKEN)) {
                commandSegment = commandSegment.replace(ExternalParser.INPUT_FILE_TOKEN,
                        tikaInputStream.getFile().toString());
                inputToStdIn = false;
            }
            if (commandSegment.contains(ExternalParser.OUTPUT_FILE_TOKEN)) {
                tempOutputFile = tmp.createTemporaryFile();
                commandSegment = commandSegment
                        .replace(ExternalParser.OUTPUT_FILE_TOKEN, tempOutputFile.toString());
                outputFromStdOut = false;
            }
            if (commandSegment.contains(METADATA_COMMAND_ARGUMENTS_SERIALIZED_TOKEN)) {
                serializeMetadataCommandArgumentsToken = true;
            }
            if (commandSegment.contains(METADATA_COMMAND_ARGUMENTS_TOKEN)) {
                if (hasMetadataCommandArguments) {
                    cmd.addAll(commandMetadataSegments);
                }
                replacedMetadataCommandArgumentsToken = true;
            } else {
                cmd.add(commandSegment);
            }
        }
        if (hasMetadataCommandArguments) {
            if (serializeMetadataCommandArgumentsToken) {
                // Find all metadata tokens and replace with encapsulated metadata
                int i = 0;
                for (String commandSegment : cmd) {
                    if (commandSegment.contains(METADATA_COMMAND_ARGUMENTS_SERIALIZED_TOKEN)) {
                        commandSegment = commandSegment
                                .replace(METADATA_COMMAND_ARGUMENTS_SERIALIZED_TOKEN,
                                        serializeMetadata(commandMetadataSegments));
                        cmd.set(i, commandSegment);
                    }
                    i++;
                }
            } else if (!replacedMetadataCommandArgumentsToken &&
                    !serializeMetadataCommandArgumentsToken) {
                // Tack metadata onto the end of the cmd as arguments
                cmd.addAll(commandMetadataSegments);
            }
        }

        // Execute
        Process process;
        if (cmd.toArray().length == 1) {
            process = Runtime.getRuntime().exec(cmd.toArray(new String[]{})[0]);
        } else {
            process = Runtime.getRuntime().exec(cmd.toArray(new String[]{}));
        }

        UnsynchronizedByteArrayOutputStream stdErrOutputStream = UnsynchronizedByteArrayOutputStream.builder().get();

        try {
            sendStdErrToOutputStream(process, stdErrOutputStream);

            if (inputToStdIn) {
                sendInputStreamToStdIn(inputStream, process);
            } else {
                // We're not writing to std in this case so close
                process.getOutputStream().close();
            }

            if (outputFromStdOut) {
                sendStdOutToOutputStream(process, outputStream);
            } else {
                tmp.dispose();
                try {
                    process.waitFor();
                } catch (InterruptedException ignore) {
                }
                // The command is finished, read the output file into the given output stream
                InputStream tempOutputFileInputStream = TikaInputStream.get(tempOutputFile);
                IOUtils.copy(tempOutputFileInputStream, outputStream);
            }
        } finally {
            if (outputFromStdOut) {
                try {
                    process.waitFor();
                } catch (InterruptedException ignore) {
                }
            } else {
                try {
                    // Clean up temp output files
                    tempOutputFile.delete();
                } catch (Exception e) {
                    //swallow
                }
            }
            if (!inputToStdIn) {
                // Close input file (and delete if created by up
                // TemporaryResources.createTemporaryFile)
                IOUtils.closeQuietly(tikaInputStream);
            }
            IOUtils.closeQuietly(outputStream);
            IOUtils.closeQuietly(stdErrOutputStream);
            if (process.exitValue() != 0) {
                throw new TikaException("There was an error executing the command line" +
                        "\nExecutable Command:\n\n" + cmd + "\nExecutable Error:\n\n" +
                        stdErrOutputStream.toString(UTF_8.name()));
            }
        }
    }