tika/tika-config.xml (35 lines of code) (raw):
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<properties>
<server>
<!-- as of Tika 2.7.0, we do not require the params element here -->
<params>
<port>9998</port>
<host>localhost</host>
<!-- if specified, this will be the id that is used in the
/status endpoint and elsewhere. If an id is specified
and more than one forked processes are invoked, each process
will have an id followed by the port, e.g my_id-9998. If a
forked server has to restart, it will maintain its original id.
If not specified, a UUID will be generated.
-->
<id>my-id</id>
<!-- Origin URL for cors requests. Set to '*' if you
want to allow all CORS requests. Leave blank or remove element
if you do not want to enable CORS. -->
<cors></cors>
<!-- which digests to calculate, comma delimited (e.g. md5,sha256);
optionally specify encoding followed by a colon (e.g. "sha1:32").
Can be empty if you don't want to calculate a digest -->
<digest>sha256</digest>
<!-- how much to read to memory during the digest phase before
spooling to disc...only if digest is selected -->
<digestMarkLimit>1000000</digestMarkLimit>
<!-- request URI log level 'debug' or 'info'; to change the general log level,
edit the "log4j2.xml" file. -->
<logLevel>info</logLevel>
<!-- whether or not to include the stacktrace when a parse exception happens
in the data returned to the user -->
<returnStackTrace>false</returnStackTrace>
<!-- If set to 'true', this runs tika server "in process"
in the legacy 1.x mode.
This means that the server will be susceptible to infinite loops
and crashes.
If set to 'false', the server will spawn a forked
process and restart the forked process on catastrophic failures
(this was called -spawnChild mode in 1.x).
nofork=false is the default in 2.x
-->
<noFork>false</noFork>
<!-- maximum time to allow per parse before shutting down and restarting
the forked parser. Not allowed if nofork=true. -->
<taskTimeoutMillis>300000</taskTimeoutMillis>
<!-- clients cannot specify a timeout less than this amount -->
<minimumTimeoutMillis>30000</minimumTimeoutMillis>
<!-- how often to check whether a parse has timed out.
Not allowed if nofork=true. -->
<taskPulseMillis>10000</taskPulseMillis>
<!-- maximum amount of time to wait for a forked process to
start up.
Not allowed if nofork=true. -->
<maxForkedStartupMillis>120000</maxForkedStartupMillis>
<!-- maximum number of times to allow a specific forked process
to be restarted.
Not allowed if nofork=true. -->
<maxRestarts>-1</maxRestarts>
<!-- maximum files to parse per forked process before
restarting the forked process to clear potential
memory leaks.
Not allowed if nofork=true. -->
<maxFiles>100000</maxFiles>
<!-- if you want to specify a specific javaPath for
the forked process. This should be the full path
including the executable, e.g.: /usr/bin/java
Not allowed if nofork=true. -->
<javaPath>java</javaPath>
<!-- enabling unsecure features content extraction
from specifying local file names -->
<enableUnsecureFeatures>true</enableUnsecureFeatures>
<forkedJvmArgs>
<arg>-Dlog4j.configurationFile=/app/log4j2.xml</arg>
</forkedJvmArgs>
</params>
</server>
<fetchers>
<!-- settings for local file extraction -->
<fetcher class="org.apache.tika.pipes.fetcher.fs.FileSystemFetcher">
<params>
<name>fsf</name>
<basePath>/</basePath>
</params>
</fetcher>
</fetchers>
</properties>