in tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java [163:252]
public FileVisitResult visitFile(Path file, BasicFileAttributes attr) {
if (totalThreads > 1) {
int hashCode = file
.toAbsolutePath()
.toString()
.hashCode();
if (Math.abs(hashCode % totalThreads) != threadNum) {
return FileVisitResult.CONTINUE;
}
}
if (!file.startsWith(inputRoot)) {
LOG.warn("File (" + file.toAbsolutePath() + ") doesn't start with input root (" + inputRoot.toAbsolutePath() + ")");
return FileVisitResult.CONTINUE;
}
Path relPath = inputRoot.relativize(file);
String suffix = ".txt";
List<String> commandLine = new ArrayList<>();
for (String arg : args) {
commandLine.add(arg);
if (arg.equals("-J")) {
suffix = ".json";
} else if (arg.contains("-x")) {
suffix = ".html";
}
}
String fullPath = file
.toAbsolutePath()
.toString();
if (fullPath.contains(" ")) {
fullPath = "\"" + fullPath + "\"";
}
commandLine.add(fullPath);
Path outputFile = Paths.get(outputRoot
.toAbsolutePath()
.toString(), relPath.toString() + suffix);
try {
Files.createDirectories(outputFile.getParent());
} catch (IOException e) {
LOG.error(MarkerFactory.getMarker("FATAL"), "parent directory for {} was not made!", outputFile);
throw new RuntimeException("couldn't make parent file for " + outputFile);
}
ProcessBuilder builder = new ProcessBuilder();
builder.command(commandLine);
LOG.info("about to process: {}", file.toAbsolutePath());
builder.redirectOutput(outputFile.toFile());
builder.redirectError(ProcessBuilder.Redirect.INHERIT);
Process proc;
try {
proc = builder.start();
} catch (IOException e) {
LOG.error(e.getMessage(), e);
return FileVisitResult.CONTINUE;
}
boolean finished = false;
long totalTime = 180000;//3 minutes
long pulse = 100;
for (int i = 0; i < totalTime; i += pulse) {
try {
Thread.sleep(pulse);
} catch (InterruptedException e) {
//swallow
}
try {
int exit = proc.exitValue();
finished = true;
break;
} catch (IllegalThreadStateException e) {
//swallow
}
}
if (!finished) {
LOG.warn("Had to terminate process working on: {}", file.toAbsolutePath());
proc.destroyForcibly();
}
try {
proc
.getOutputStream()
.flush();
proc
.getOutputStream()
.close();
} catch (IOException e) {
LOG.warn("couldn't close process outputstream", e);
}
processed.incrementAndGet();
return FileVisitResult.CONTINUE;
}