in web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx [615:1323]
export function getIoConfigFormFields(ingestionComboType: IngestionComboType): Field<IoConfig>[] {
const inputSourceType: Field<IoConfig> = {
name: 'inputSource.type',
label: 'Source type',
type: 'string',
suggestions: ['local', 'http', 'inline', 'delta', 's3', 'azureStorage', 'google', 'hdfs'],
info: (
<p>
Druid connects to raw data through{' '}
<ExternalLink href={`${getLink('DOCS')}/ingestion/input-sources`}>
inputSources
</ExternalLink>
. You can change your selected inputSource here.
</p>
),
};
const inputSourceObjectGlob: Field<IoConfig> = {
name: 'inputSource.objectGlob',
label: 'Object glob',
type: 'string',
suggestions: OBJECT_GLOB_SUGGESTIONS,
placeholder: '(all files)',
info: (
<>
<p>A glob for the object part of the URI.</p>
<p>
The glob must match the entire object part, not just the filename. For example, the glob
<Code>*.json</Code> does not match <Code>/bar/file.json</Code>, because and the{' '}
<Code>*</Code> does not match the slash. To match all objects ending in <Code>.json</Code>
, use <Code>**.json</Code> instead.
</p>
<p>
For more information, refer to the documentation for{' '}
<ExternalLink href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/nio/file/FileSystem.html#getPathMatcher(java.lang.String)">
FileSystem#getPathMatcher
</ExternalLink>
.
</p>
</>
),
};
switch (ingestionComboType) {
case 'index_parallel:http':
return [
inputSourceType,
{
name: 'inputSource.uris',
label: 'URIs',
type: 'string-array',
placeholder:
'https://example.com/path/to/file1.ext, https://example.com/path/to/file2.ext',
required: true,
info: (
<p>
The full URI of your file. To ingest from multiple URIs, use commas to separate each
individual URI.
</p>
),
},
{
name: 'inputSource.httpAuthenticationUsername',
label: 'HTTP auth username',
type: 'string',
placeholder: '(optional)',
info: <p>Username to use for authentication with specified URIs</p>,
},
{
name: 'inputSource.httpAuthenticationPassword',
label: 'HTTP auth password',
type: 'string',
placeholder: '(optional)',
info: <p>Password to use for authentication with specified URIs</p>,
},
];
case 'index_parallel:local':
return [
inputSourceType,
{
name: 'inputSource.baseDir',
label: 'Base directory',
type: 'string',
placeholder: '/path/to/files/',
required: true,
info: (
<>
<ExternalLink href={`${getLink('DOCS')}/ingestion/input-sources`}>
inputSource.baseDir
</ExternalLink>
<p>Specifies the directory to search recursively for files to be ingested.</p>
</>
),
},
{
name: 'inputSource.filter',
label: 'File filter',
type: 'string',
required: true,
suggestions: FILTER_SUGGESTIONS,
info: (
<>
<ExternalLink href={`${getLink('DOCS')}/ingestion/native-batch#local-input-source`}>
inputSource.filter
</ExternalLink>
<p>
A wildcard filter for files. See{' '}
<ExternalLink href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/WildcardFileFilter">
here
</ExternalLink>{' '}
for format information. Files matching the filter criteria are considered for
ingestion. Files not matching the filter criteria are ignored.
</p>
</>
),
},
];
case 'index_parallel:druid':
return [
inputSourceType,
{
name: 'inputSource.dataSource',
label: 'Datasource',
type: 'string',
required: true,
info: <p>The datasource to fetch rows from.</p>,
},
{
name: 'inputSource.interval',
label: 'Interval',
type: 'interval',
placeholder: `${CURRENT_YEAR}-01-01/${CURRENT_YEAR + 1}-01-01`,
required: true,
info: (
<p>
A String representing ISO-8601 Interval. This defines the time range to fetch the data
over.
</p>
),
},
{
name: 'inputSource.filter',
label: 'Filter',
type: 'json',
placeholder: '(optional)',
hideInMore: true,
info: (
<p>
The <ExternalLink href={`${getLink('DOCS')}/querying/filters`}>filter</ExternalLink>{' '}
to apply to the data as part of querying.
</p>
),
},
];
case 'index_parallel:inline':
return [
inputSourceType,
// do not add 'data' here as it has special handling in the load-data view
];
case 'index_parallel:s3':
return [
inputSourceType,
{
name: 'inputSource.uris',
label: 'S3 URIs',
type: 'string-array',
placeholder: 's3://your-bucket/some-file1.ext, s3://your-bucket/some-file2.ext',
required: true,
defined: ioConfig =>
!deepGet(ioConfig, 'inputSource.prefixes') && !deepGet(ioConfig, 'inputSource.objects'),
info: (
<>
<p>
The full S3 URI of your file. To ingest from multiple URIs, use commas to separate
each individual URI.
</p>
<p>Either S3 URIs or prefixes or objects must be set.</p>
</>
),
},
{
name: 'inputSource.prefixes',
label: 'S3 prefixes',
type: 'string-array',
placeholder: 's3://your-bucket/some-path1, s3://your-bucket/some-path2',
required: true,
defined: ioConfig =>
!deepGet(ioConfig, 'inputSource.uris') && !deepGet(ioConfig, 'inputSource.objects'),
info: (
<>
<p>A list of paths (with bucket) where your files are stored.</p>
<p>Either S3 URIs or prefixes or objects must be set.</p>
</>
),
},
{
name: 'inputSource.objects',
label: 'S3 objects',
type: 'json',
placeholder: '{"bucket":"your-bucket", "path":"some-file.ext"}',
required: true,
defined: ioConfig => deepGet(ioConfig, 'inputSource.objects'),
info: (
<>
<p>
JSON array of{' '}
<ExternalLink href={`${getLink('DOCS')}/ingestion/input-sources#s3-input-source`}>
S3 Objects
</ExternalLink>
.
</p>
<p>Either S3 URIs or prefixes or objects must be set.</p>
</>
),
},
inputSourceObjectGlob,
{
name: 'inputSource.properties.accessKeyId.type',
label: 'Access key ID type',
type: 'string',
suggestions: [undefined, 'environment', 'default'],
placeholder: '(none)',
info: (
<>
<p>S3 access key type.</p>
<p>Setting this will override the default configuration provided in the config.</p>
<p>
The access key can be pulled from an environment variable or inlined in the
ingestion spec (default).
</p>
<p>
Note: Inlining the access key into the ingestion spec is dangerous as it might
appear in server log files and can be seen by anyone accessing this console.
</p>
</>
),
adjustment: ioConfig => {
return deepSet(
ioConfig,
'inputSource.properties.secretAccessKey.type',
deepGet(ioConfig, 'inputSource.properties.accessKeyId.type'),
);
},
},
{
name: 'inputSource.properties.accessKeyId.variable',
label: 'Access key ID environment variable',
type: 'string',
placeholder: '(environment variable name)',
defined: ioConfig =>
deepGet(ioConfig, 'inputSource.properties.accessKeyId.type') === 'environment',
info: <p>The environment variable containing the S3 access key for this S3 bucket.</p>,
},
{
name: 'inputSource.properties.accessKeyId.password',
label: 'Access key ID value',
type: 'string',
placeholder: '(access key)',
defined: ioConfig =>
deepGet(ioConfig, 'inputSource.properties.accessKeyId.type') === 'default',
info: (
<>
<p>S3 access key for this S3 bucket.</p>
<p>
Note: Inlining the access key into the ingestion spec is dangerous as it might
appear in server log files and can be seen by anyone accessing this console.
</p>
</>
),
},
{
name: 'inputSource.properties.secretAccessKey.type',
label: 'Secret access key type',
type: 'string',
suggestions: [undefined, 'environment', 'default'],
placeholder: '(none)',
info: (
<>
<p>S3 secret key type.</p>
<p>Setting this will override the default configuration provided in the config.</p>
<p>
The secret key can be pulled from an environment variable or inlined in the
ingestion spec (default).
</p>
<p>
Note: Inlining the secret key into the ingestion spec is dangerous as it might
appear in server log files and can be seen by anyone accessing this console.
</p>
</>
),
},
{
name: 'inputSource.properties.secretAccessKey.variable',
label: 'Secret access key environment variable',
type: 'string',
placeholder: '(environment variable name)',
defined: ioConfig =>
deepGet(ioConfig, 'inputSource.properties.secretAccessKey.type') === 'environment',
info: <p>The environment variable containing the S3 secret key for this S3 bucket.</p>,
},
{
name: 'inputSource.properties.secretAccessKey.password',
label: 'Secret access key value',
type: 'string',
placeholder: '(secret key)',
defined: ioConfig =>
deepGet(ioConfig, 'inputSource.properties.secretAccessKey.type') === 'default',
info: (
<>
<p>S3 secret key for this S3 bucket.</p>
<p>
Note: Inlining the access key into the ingestion spec is dangerous as it might
appear in server log files and can be seen by anyone accessing this console.
</p>
</>
),
},
];
case 'index_parallel:azureStorage':
return [
inputSourceType,
{
name: 'inputSource.uris',
label: 'Azure URIs',
type: 'string-array',
placeholder:
'azureStorage://your-storage-account/your-container/some-file1.ext, azureStorage://your-storage-account/your-container/some-file2.ext',
required: true,
defined: ioConfig =>
!deepGet(ioConfig, 'inputSource.prefixes') && !deepGet(ioConfig, 'inputSource.objects'),
info: (
<>
<p>
The full Azure URI of your file. To ingest from multiple URIs, use commas to
separate each individual URI.
</p>
<p>Either Azure URIs or prefixes or objects must be set.</p>
</>
),
},
{
name: 'inputSource.prefixes',
label: 'Azure prefixes',
type: 'string-array',
placeholder:
'azureStorage://your-storage-account/your-container/some-path1, azureStorage://your-storage-account/your-container/some-path2',
required: true,
defined: ioConfig =>
!deepGet(ioConfig, 'inputSource.uris') && !deepGet(ioConfig, 'inputSource.objects'),
info: (
<>
<p>A list of paths (with bucket) where your files are stored.</p>
<p>Either Azure URIs or prefixes or objects must be set.</p>
</>
),
},
{
name: 'inputSource.objects',
label: 'Azure objects',
type: 'json',
placeholder: '{"bucket":"your-storage-account", "path":"your-container/some-file.ext"}',
required: true,
defined: ioConfig => deepGet(ioConfig, 'inputSource.objects'),
info: (
<>
<p>
JSON array of{' '}
<ExternalLink
href={`${getLink('DOCS')}/ingestion/input-sources#azure-input-source`}
>
S3 Objects
</ExternalLink>
.
</p>
<p>Either Azure URIs or prefixes or objects must be set.</p>
</>
),
},
inputSourceObjectGlob,
{
name: 'inputSource.properties.sharedAccessStorageToken',
label: 'Shared Access Storage Token',
type: 'string',
placeholder: '(sas token)',
info: (
<>
<p>Shared Access Storage Token for this storage account.</p>
<p>
Note: Inlining the sas token into the ingestion spec can be dangerous as it might
appear in server log files and can be seen by anyone accessing this console.
</p>
</>
),
},
];
case 'index_parallel:google':
return [
inputSourceType,
{
name: 'inputSource.uris',
label: 'Google Cloud Storage URIs',
type: 'string-array',
placeholder: 'gs://your-bucket/some-file1.ext, gs://your-bucket/some-file2.ext',
required: true,
defined: ioConfig =>
!deepGet(ioConfig, 'inputSource.prefixes') && !deepGet(ioConfig, 'inputSource.objects'),
info: (
<>
<p>
The full Google Cloud Storage URI of your file. To ingest from multiple URIs, use
commas to separate each individual URI.
</p>
<p>Either Google Cloud Storage URIs or prefixes or objects must be set.</p>
</>
),
},
{
name: 'inputSource.prefixes',
label: 'Google Cloud Storage prefixes',
type: 'string-array',
placeholder: 'gs://your-bucket/some-path1, gs://your-bucket/some-path2',
required: true,
defined: ioConfig =>
!deepGet(ioConfig, 'inputSource.uris') && !deepGet(ioConfig, 'inputSource.objects'),
info: (
<>
<p>A list of paths (with bucket) where your files are stored.</p>
<p>Either Google Cloud Storage URIs or prefixes or objects must be set.</p>
</>
),
},
{
name: 'inputSource.objects',
label: 'Google Cloud Storage objects',
type: 'json',
placeholder: '{"bucket":"your-bucket", "path":"some-file.ext"}',
required: true,
defined: ioConfig => deepGet(ioConfig, 'inputSource.objects'),
info: (
<>
<p>
JSON array of{' '}
<ExternalLink
href={`${getLink(
'DOCS',
)}/ingestion/input-sources#google-cloud-storage-input-source`}
>
Google Cloud Storage Objects
</ExternalLink>
.
</p>
<p>Either Google Cloud Storage URIs or prefixes or objects must be set.</p>
</>
),
},
inputSourceObjectGlob,
];
case 'index_parallel:delta':
return [
inputSourceType,
{
name: 'inputSource.tablePath',
label: 'Delta table path',
type: 'string',
placeholder: '/path/to/deltaTable',
required: true,
info: <p>A full path to the Delta Lake table.</p>,
},
{
name: 'inputSource.filter',
label: 'Delta filter',
type: 'json',
placeholder: '{"type": "=", "column": "name", "value": "foo"}',
info: (
<>
<ExternalLink
href={`${getLink('DOCS')}/ingestion/input-sources/#delta-filter-object`}
>
filter
</ExternalLink>
<p>A Delta filter json object to filter Delta Lake scan files.</p>
</>
),
},
{
name: 'inputSource.snapshotVersion',
label: 'Delta snapshot version',
type: 'number',
placeholder: '(latest)',
info: (
<>
The snapshot version to read from the Delta table. By default, the latest snapshot is
read.
</>
),
},
];
case 'index_parallel:hdfs':
return [
inputSourceType,
{
name: 'inputSource.paths',
label: 'Paths',
type: 'string',
placeholder: '/path/to/file.ext',
required: true,
},
];
case 'kafka':
return [
{
name: 'consumerProperties.{bootstrap.servers}',
label: 'Bootstrap servers',
type: 'string',
required: true,
placeholder: 'kafka_broker_host:9092',
info: (
<>
<ExternalLink href={`${getLink('DOCS')}/ingestion/kafka-ingestion#io-configuration`}>
consumerProperties
</ExternalLink>
<p>
A list of Kafka brokers in the form:{' '}
<Code>{`<BROKER_1>:<PORT_1>,<BROKER_2>:<PORT_2>,...`}</Code>
</p>
</>
),
},
{
name: 'topic',
type: 'string',
required: true,
defined: ioConfig =>
oneOfKnown(ioConfig.type, KNOWN_TYPES, 'kafka') && !ioConfig.topicPattern,
placeholder: 'your_kafka_topic',
info: 'The name of the Kafka topic to ingest from.',
},
{
name: 'topicPattern',
type: 'string',
required: true,
defined: ioConfig => oneOfKnown(ioConfig.type, KNOWN_TYPES, 'kafka') && !ioConfig.topic,
placeholder: 'topic1|topic2',
info: (
<>
<p>
A regular expression that represents all topics to be ingested from. For example, to
ingest data from <Code>clicks</Code> and <Code>impressions</Code>, you can set this
to <Code>clicks|impressions</Code>. Alternatively, to ingest from all topics
starting with <Code>metrics-</Code> set this to <Code>metrics-.*</Code>.
</p>
<p>
If new topics are added to the cluster that match the regex, Druid will
automatically start ingesting from those new topics.
</p>
</>
),
},
{
name: 'consumerProperties',
type: 'json',
defaultValue: {},
info: (
<>
<ExternalLink href={`${getLink('DOCS')}/ingestion/kafka-ingestion#io-configuration`}>
consumerProperties
</ExternalLink>
<p>A map of properties to be passed to the Kafka consumer.</p>
</>
),
},
];
case 'kinesis':
return [
{
name: 'stream',
type: 'string',
placeholder: 'your-kinesis-stream',
required: true,
info: <>The Kinesis stream to read.</>,
},
{
name: 'endpoint',
type: 'string',
defaultValue: 'kinesis.us-east-1.amazonaws.com',
suggestions: [
{
group: 'US East',
suggestions: [
'kinesis.us-east-1.amazonaws.com',
'kinesis-fips.us-east-1.amazonaws.com',
'kinesis.us-east-2.amazonaws.com',
'kinesis-fips.us-east-2.amazonaws.com',
],
},
{
group: 'US Gameday Northeast',
suggestions: ['kinesis.us-northeast-1.amazonaws.com'],
},
{
group: 'US West',
suggestions: [
'kinesis.us-west-1.amazonaws.com',
'kinesis-fips.us-west-1.amazonaws.com',
'kinesis.us-west-2.amazonaws.com',
'kinesis-fips.us-west-2.amazonaws.com',
],
},
{ group: 'Africa', suggestions: ['kinesis.af-south-1.amazonaws.com'] },
{
group: 'Asia Pacific',
suggestions: [
'kinesis.ap-east-1.amazonaws.com',
'kinesis.ap-south-2.amazonaws.com',
'kinesis.ap-southeast-3.amazonaws.com',
'kinesis.ap-southeast-5.amazonaws.com',
'kinesis.ap-southeast-4.amazonaws.com',
'kinesis.ap-south-1.amazonaws.com',
'kinesis.ap-northeast-3.amazonaws.com',
'kinesis.ap-northeast-2.amazonaws.com',
'kinesis.ap-southeast-1.amazonaws.com',
'kinesis.ap-southeast-2.amazonaws.com',
'kinesis.ap-northeast-1.amazonaws.com',
],
},
{
group: 'Canada',
suggestions: [
'kinesis.ca-central-1.amazonaws.com',
'kinesis.ca-west-1.amazonaws.com',
],
},
{
group: 'China',
suggestions: [
'kinesis.cn-north-1.amazonaws.com.cn',
'kinesis.cn-northwest-1.amazonaws.com.cn',
],
},
{
group: 'Europe',
suggestions: [
'kinesis.eu-central-1.amazonaws.com',
'kinesis.eu-west-1.amazonaws.com',
'kinesis.eu-west-2.amazonaws.com',
'kinesis.eu-south-1.amazonaws.com',
'kinesis.eu-west-3.amazonaws.com',
'kinesis.eu-south-2.amazonaws.com',
'kinesis.eu-north-1.amazonaws.com',
'kinesis.eu-central-2.amazonaws.com',
],
},
{ group: 'Israel', suggestions: ['kinesis.il-central-1.amazonaws.com'] },
{
group: 'Middle East',
suggestions: [
'kinesis.me-south-1.amazonaws.com',
'kinesis.me-central-1.amazonaws.com',
],
},
{ group: 'South America', suggestions: ['kinesis.sa-east-1.amazonaws.com'] },
{
group: 'AWS GovCloud',
suggestions: [
'kinesis.us-gov-east-1.amazonaws.com',
'kinesis.us-gov-west-1.amazonaws.com',
],
},
],
info: (
<>
The Amazon Kinesis stream endpoint for a region. You can find a list of endpoints{' '}
<ExternalLink href="https://docs.aws.amazon.com/general/latest/gr/ak">
here
</ExternalLink>
.
</>
),
},
{
name: 'awsAssumedRoleArn',
label: 'AWS assumed role ARN',
type: 'string',
placeholder: 'optional',
info: <>The AWS assumed role to use for additional permissions.</>,
},
{
name: 'awsExternalId',
label: 'AWS external ID',
type: 'string',
placeholder: 'optional',
info: <>The AWS external id to use for additional permissions.</>,
},
];
}
throw new Error(`unknown input type ${ingestionComboType}`);
}