export function getIoConfigFormFields()

in web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx [615:1323]


export function getIoConfigFormFields(ingestionComboType: IngestionComboType): Field<IoConfig>[] {
  const inputSourceType: Field<IoConfig> = {
    name: 'inputSource.type',
    label: 'Source type',
    type: 'string',
    suggestions: ['local', 'http', 'inline', 'delta', 's3', 'azureStorage', 'google', 'hdfs'],
    info: (
      <p>
        Druid connects to raw data through{' '}
        <ExternalLink href={`${getLink('DOCS')}/ingestion/input-sources`}>
          inputSources
        </ExternalLink>
        . You can change your selected inputSource here.
      </p>
    ),
  };

  const inputSourceObjectGlob: Field<IoConfig> = {
    name: 'inputSource.objectGlob',
    label: 'Object glob',
    type: 'string',
    suggestions: OBJECT_GLOB_SUGGESTIONS,
    placeholder: '(all files)',
    info: (
      <>
        <p>A glob for the object part of the URI.</p>
        <p>
          The glob must match the entire object part, not just the filename. For example, the glob
          <Code>*.json</Code> does not match <Code>/bar/file.json</Code>, because and the{' '}
          <Code>*</Code> does not match the slash. To match all objects ending in <Code>.json</Code>
          , use <Code>**.json</Code> instead.
        </p>
        <p>
          For more information, refer to the documentation for{' '}
          <ExternalLink href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/nio/file/FileSystem.html#getPathMatcher(java.lang.String)">
            FileSystem#getPathMatcher
          </ExternalLink>
          .
        </p>
      </>
    ),
  };

  switch (ingestionComboType) {
    case 'index_parallel:http':
      return [
        inputSourceType,
        {
          name: 'inputSource.uris',
          label: 'URIs',
          type: 'string-array',
          placeholder:
            'https://example.com/path/to/file1.ext, https://example.com/path/to/file2.ext',
          required: true,
          info: (
            <p>
              The full URI of your file. To ingest from multiple URIs, use commas to separate each
              individual URI.
            </p>
          ),
        },
        {
          name: 'inputSource.httpAuthenticationUsername',
          label: 'HTTP auth username',
          type: 'string',
          placeholder: '(optional)',
          info: <p>Username to use for authentication with specified URIs</p>,
        },
        {
          name: 'inputSource.httpAuthenticationPassword',
          label: 'HTTP auth password',
          type: 'string',
          placeholder: '(optional)',
          info: <p>Password to use for authentication with specified URIs</p>,
        },
      ];

    case 'index_parallel:local':
      return [
        inputSourceType,
        {
          name: 'inputSource.baseDir',
          label: 'Base directory',
          type: 'string',
          placeholder: '/path/to/files/',
          required: true,
          info: (
            <>
              <ExternalLink href={`${getLink('DOCS')}/ingestion/input-sources`}>
                inputSource.baseDir
              </ExternalLink>
              <p>Specifies the directory to search recursively for files to be ingested.</p>
            </>
          ),
        },
        {
          name: 'inputSource.filter',
          label: 'File filter',
          type: 'string',
          required: true,
          suggestions: FILTER_SUGGESTIONS,
          info: (
            <>
              <ExternalLink href={`${getLink('DOCS')}/ingestion/native-batch#local-input-source`}>
                inputSource.filter
              </ExternalLink>
              <p>
                A wildcard filter for files. See{' '}
                <ExternalLink href="https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/filefilter/WildcardFileFilter">
                  here
                </ExternalLink>{' '}
                for format information. Files matching the filter criteria are considered for
                ingestion. Files not matching the filter criteria are ignored.
              </p>
            </>
          ),
        },
      ];

    case 'index_parallel:druid':
      return [
        inputSourceType,
        {
          name: 'inputSource.dataSource',
          label: 'Datasource',
          type: 'string',
          required: true,
          info: <p>The datasource to fetch rows from.</p>,
        },
        {
          name: 'inputSource.interval',
          label: 'Interval',
          type: 'interval',
          placeholder: `${CURRENT_YEAR}-01-01/${CURRENT_YEAR + 1}-01-01`,
          required: true,
          info: (
            <p>
              A String representing ISO-8601 Interval. This defines the time range to fetch the data
              over.
            </p>
          ),
        },
        {
          name: 'inputSource.filter',
          label: 'Filter',
          type: 'json',
          placeholder: '(optional)',
          hideInMore: true,
          info: (
            <p>
              The <ExternalLink href={`${getLink('DOCS')}/querying/filters`}>filter</ExternalLink>{' '}
              to apply to the data as part of querying.
            </p>
          ),
        },
      ];

    case 'index_parallel:inline':
      return [
        inputSourceType,
        // do not add 'data' here as it has special handling in the load-data view
      ];

    case 'index_parallel:s3':
      return [
        inputSourceType,
        {
          name: 'inputSource.uris',
          label: 'S3 URIs',
          type: 'string-array',
          placeholder: 's3://your-bucket/some-file1.ext, s3://your-bucket/some-file2.ext',
          required: true,
          defined: ioConfig =>
            !deepGet(ioConfig, 'inputSource.prefixes') && !deepGet(ioConfig, 'inputSource.objects'),
          info: (
            <>
              <p>
                The full S3 URI of your file. To ingest from multiple URIs, use commas to separate
                each individual URI.
              </p>
              <p>Either S3 URIs or prefixes or objects must be set.</p>
            </>
          ),
        },
        {
          name: 'inputSource.prefixes',
          label: 'S3 prefixes',
          type: 'string-array',
          placeholder: 's3://your-bucket/some-path1, s3://your-bucket/some-path2',
          required: true,
          defined: ioConfig =>
            !deepGet(ioConfig, 'inputSource.uris') && !deepGet(ioConfig, 'inputSource.objects'),
          info: (
            <>
              <p>A list of paths (with bucket) where your files are stored.</p>
              <p>Either S3 URIs or prefixes or objects must be set.</p>
            </>
          ),
        },
        {
          name: 'inputSource.objects',
          label: 'S3 objects',
          type: 'json',
          placeholder: '{"bucket":"your-bucket", "path":"some-file.ext"}',
          required: true,
          defined: ioConfig => deepGet(ioConfig, 'inputSource.objects'),
          info: (
            <>
              <p>
                JSON array of{' '}
                <ExternalLink href={`${getLink('DOCS')}/ingestion/input-sources#s3-input-source`}>
                  S3 Objects
                </ExternalLink>
                .
              </p>
              <p>Either S3 URIs or prefixes or objects must be set.</p>
            </>
          ),
        },
        inputSourceObjectGlob,
        {
          name: 'inputSource.properties.accessKeyId.type',
          label: 'Access key ID type',
          type: 'string',
          suggestions: [undefined, 'environment', 'default'],
          placeholder: '(none)',
          info: (
            <>
              <p>S3 access key type.</p>
              <p>Setting this will override the default configuration provided in the config.</p>
              <p>
                The access key can be pulled from an environment variable or inlined in the
                ingestion spec (default).
              </p>
              <p>
                Note: Inlining the access key into the ingestion spec is dangerous as it might
                appear in server log files and can be seen by anyone accessing this console.
              </p>
            </>
          ),
          adjustment: ioConfig => {
            return deepSet(
              ioConfig,
              'inputSource.properties.secretAccessKey.type',
              deepGet(ioConfig, 'inputSource.properties.accessKeyId.type'),
            );
          },
        },
        {
          name: 'inputSource.properties.accessKeyId.variable',
          label: 'Access key ID environment variable',
          type: 'string',
          placeholder: '(environment variable name)',
          defined: ioConfig =>
            deepGet(ioConfig, 'inputSource.properties.accessKeyId.type') === 'environment',
          info: <p>The environment variable containing the S3 access key for this S3 bucket.</p>,
        },
        {
          name: 'inputSource.properties.accessKeyId.password',
          label: 'Access key ID value',
          type: 'string',
          placeholder: '(access key)',
          defined: ioConfig =>
            deepGet(ioConfig, 'inputSource.properties.accessKeyId.type') === 'default',
          info: (
            <>
              <p>S3 access key for this S3 bucket.</p>
              <p>
                Note: Inlining the access key into the ingestion spec is dangerous as it might
                appear in server log files and can be seen by anyone accessing this console.
              </p>
            </>
          ),
        },

        {
          name: 'inputSource.properties.secretAccessKey.type',
          label: 'Secret access key type',
          type: 'string',
          suggestions: [undefined, 'environment', 'default'],
          placeholder: '(none)',
          info: (
            <>
              <p>S3 secret key type.</p>
              <p>Setting this will override the default configuration provided in the config.</p>
              <p>
                The secret key can be pulled from an environment variable or inlined in the
                ingestion spec (default).
              </p>
              <p>
                Note: Inlining the secret key into the ingestion spec is dangerous as it might
                appear in server log files and can be seen by anyone accessing this console.
              </p>
            </>
          ),
        },
        {
          name: 'inputSource.properties.secretAccessKey.variable',
          label: 'Secret access key environment variable',
          type: 'string',
          placeholder: '(environment variable name)',
          defined: ioConfig =>
            deepGet(ioConfig, 'inputSource.properties.secretAccessKey.type') === 'environment',
          info: <p>The environment variable containing the S3 secret key for this S3 bucket.</p>,
        },
        {
          name: 'inputSource.properties.secretAccessKey.password',
          label: 'Secret access key value',
          type: 'string',
          placeholder: '(secret key)',
          defined: ioConfig =>
            deepGet(ioConfig, 'inputSource.properties.secretAccessKey.type') === 'default',
          info: (
            <>
              <p>S3 secret key for this S3 bucket.</p>
              <p>
                Note: Inlining the access key into the ingestion spec is dangerous as it might
                appear in server log files and can be seen by anyone accessing this console.
              </p>
            </>
          ),
        },
      ];

    case 'index_parallel:azureStorage':
      return [
        inputSourceType,
        {
          name: 'inputSource.uris',
          label: 'Azure URIs',
          type: 'string-array',
          placeholder:
            'azureStorage://your-storage-account/your-container/some-file1.ext, azureStorage://your-storage-account/your-container/some-file2.ext',
          required: true,
          defined: ioConfig =>
            !deepGet(ioConfig, 'inputSource.prefixes') && !deepGet(ioConfig, 'inputSource.objects'),
          info: (
            <>
              <p>
                The full Azure URI of your file. To ingest from multiple URIs, use commas to
                separate each individual URI.
              </p>
              <p>Either Azure URIs or prefixes or objects must be set.</p>
            </>
          ),
        },
        {
          name: 'inputSource.prefixes',
          label: 'Azure prefixes',
          type: 'string-array',
          placeholder:
            'azureStorage://your-storage-account/your-container/some-path1, azureStorage://your-storage-account/your-container/some-path2',
          required: true,
          defined: ioConfig =>
            !deepGet(ioConfig, 'inputSource.uris') && !deepGet(ioConfig, 'inputSource.objects'),
          info: (
            <>
              <p>A list of paths (with bucket) where your files are stored.</p>
              <p>Either Azure URIs or prefixes or objects must be set.</p>
            </>
          ),
        },
        {
          name: 'inputSource.objects',
          label: 'Azure objects',
          type: 'json',
          placeholder: '{"bucket":"your-storage-account", "path":"your-container/some-file.ext"}',
          required: true,
          defined: ioConfig => deepGet(ioConfig, 'inputSource.objects'),
          info: (
            <>
              <p>
                JSON array of{' '}
                <ExternalLink
                  href={`${getLink('DOCS')}/ingestion/input-sources#azure-input-source`}
                >
                  S3 Objects
                </ExternalLink>
                .
              </p>
              <p>Either Azure URIs or prefixes or objects must be set.</p>
            </>
          ),
        },
        inputSourceObjectGlob,
        {
          name: 'inputSource.properties.sharedAccessStorageToken',
          label: 'Shared Access Storage Token',
          type: 'string',
          placeholder: '(sas token)',
          info: (
            <>
              <p>Shared Access Storage Token for this storage account.</p>
              <p>
                Note: Inlining the sas token into the ingestion spec can be dangerous as it might
                appear in server log files and can be seen by anyone accessing this console.
              </p>
            </>
          ),
        },
      ];

    case 'index_parallel:google':
      return [
        inputSourceType,
        {
          name: 'inputSource.uris',
          label: 'Google Cloud Storage URIs',
          type: 'string-array',
          placeholder: 'gs://your-bucket/some-file1.ext, gs://your-bucket/some-file2.ext',
          required: true,
          defined: ioConfig =>
            !deepGet(ioConfig, 'inputSource.prefixes') && !deepGet(ioConfig, 'inputSource.objects'),
          info: (
            <>
              <p>
                The full Google Cloud Storage URI of your file. To ingest from multiple URIs, use
                commas to separate each individual URI.
              </p>
              <p>Either Google Cloud Storage URIs or prefixes or objects must be set.</p>
            </>
          ),
        },
        {
          name: 'inputSource.prefixes',
          label: 'Google Cloud Storage prefixes',
          type: 'string-array',
          placeholder: 'gs://your-bucket/some-path1, gs://your-bucket/some-path2',
          required: true,
          defined: ioConfig =>
            !deepGet(ioConfig, 'inputSource.uris') && !deepGet(ioConfig, 'inputSource.objects'),
          info: (
            <>
              <p>A list of paths (with bucket) where your files are stored.</p>
              <p>Either Google Cloud Storage URIs or prefixes or objects must be set.</p>
            </>
          ),
        },
        {
          name: 'inputSource.objects',
          label: 'Google Cloud Storage objects',
          type: 'json',
          placeholder: '{"bucket":"your-bucket", "path":"some-file.ext"}',
          required: true,
          defined: ioConfig => deepGet(ioConfig, 'inputSource.objects'),
          info: (
            <>
              <p>
                JSON array of{' '}
                <ExternalLink
                  href={`${getLink(
                    'DOCS',
                  )}/ingestion/input-sources#google-cloud-storage-input-source`}
                >
                  Google Cloud Storage Objects
                </ExternalLink>
                .
              </p>
              <p>Either Google Cloud Storage URIs or prefixes or objects must be set.</p>
            </>
          ),
        },
        inputSourceObjectGlob,
      ];

    case 'index_parallel:delta':
      return [
        inputSourceType,
        {
          name: 'inputSource.tablePath',
          label: 'Delta table path',
          type: 'string',
          placeholder: '/path/to/deltaTable',
          required: true,
          info: <p>A full path to the Delta Lake table.</p>,
        },
        {
          name: 'inputSource.filter',
          label: 'Delta filter',
          type: 'json',
          placeholder: '{"type": "=", "column": "name", "value": "foo"}',
          info: (
            <>
              <ExternalLink
                href={`${getLink('DOCS')}/ingestion/input-sources/#delta-filter-object`}
              >
                filter
              </ExternalLink>
              <p>A Delta filter json object to filter Delta Lake scan files.</p>
            </>
          ),
        },
        {
          name: 'inputSource.snapshotVersion',
          label: 'Delta snapshot version',
          type: 'number',
          placeholder: '(latest)',
          info: (
            <>
              The snapshot version to read from the Delta table. By default, the latest snapshot is
              read.
            </>
          ),
        },
      ];

    case 'index_parallel:hdfs':
      return [
        inputSourceType,
        {
          name: 'inputSource.paths',
          label: 'Paths',
          type: 'string',
          placeholder: '/path/to/file.ext',
          required: true,
        },
      ];

    case 'kafka':
      return [
        {
          name: 'consumerProperties.{bootstrap.servers}',
          label: 'Bootstrap servers',
          type: 'string',
          required: true,
          placeholder: 'kafka_broker_host:9092',
          info: (
            <>
              <ExternalLink href={`${getLink('DOCS')}/ingestion/kafka-ingestion#io-configuration`}>
                consumerProperties
              </ExternalLink>
              <p>
                A list of Kafka brokers in the form:{' '}
                <Code>{`<BROKER_1>:<PORT_1>,<BROKER_2>:<PORT_2>,...`}</Code>
              </p>
            </>
          ),
        },
        {
          name: 'topic',
          type: 'string',
          required: true,
          defined: ioConfig =>
            oneOfKnown(ioConfig.type, KNOWN_TYPES, 'kafka') && !ioConfig.topicPattern,
          placeholder: 'your_kafka_topic',
          info: 'The name of the Kafka topic to ingest from.',
        },
        {
          name: 'topicPattern',
          type: 'string',
          required: true,
          defined: ioConfig => oneOfKnown(ioConfig.type, KNOWN_TYPES, 'kafka') && !ioConfig.topic,
          placeholder: 'topic1|topic2',
          info: (
            <>
              <p>
                A regular expression that represents all topics to be ingested from. For example, to
                ingest data from <Code>clicks</Code> and <Code>impressions</Code>, you can set this
                to <Code>clicks|impressions</Code>. Alternatively, to ingest from all topics
                starting with <Code>metrics-</Code> set this to <Code>metrics-.*</Code>.
              </p>
              <p>
                If new topics are added to the cluster that match the regex, Druid will
                automatically start ingesting from those new topics.
              </p>
            </>
          ),
        },
        {
          name: 'consumerProperties',
          type: 'json',
          defaultValue: {},
          info: (
            <>
              <ExternalLink href={`${getLink('DOCS')}/ingestion/kafka-ingestion#io-configuration`}>
                consumerProperties
              </ExternalLink>
              <p>A map of properties to be passed to the Kafka consumer.</p>
            </>
          ),
        },
      ];

    case 'kinesis':
      return [
        {
          name: 'stream',
          type: 'string',
          placeholder: 'your-kinesis-stream',
          required: true,
          info: <>The Kinesis stream to read.</>,
        },
        {
          name: 'endpoint',
          type: 'string',
          defaultValue: 'kinesis.us-east-1.amazonaws.com',
          suggestions: [
            {
              group: 'US East',
              suggestions: [
                'kinesis.us-east-1.amazonaws.com',
                'kinesis-fips.us-east-1.amazonaws.com',
                'kinesis.us-east-2.amazonaws.com',
                'kinesis-fips.us-east-2.amazonaws.com',
              ],
            },
            {
              group: 'US Gameday Northeast',
              suggestions: ['kinesis.us-northeast-1.amazonaws.com'],
            },
            {
              group: 'US West',
              suggestions: [
                'kinesis.us-west-1.amazonaws.com',
                'kinesis-fips.us-west-1.amazonaws.com',
                'kinesis.us-west-2.amazonaws.com',
                'kinesis-fips.us-west-2.amazonaws.com',
              ],
            },
            { group: 'Africa', suggestions: ['kinesis.af-south-1.amazonaws.com'] },
            {
              group: 'Asia Pacific',
              suggestions: [
                'kinesis.ap-east-1.amazonaws.com',
                'kinesis.ap-south-2.amazonaws.com',
                'kinesis.ap-southeast-3.amazonaws.com',
                'kinesis.ap-southeast-5.amazonaws.com',
                'kinesis.ap-southeast-4.amazonaws.com',
                'kinesis.ap-south-1.amazonaws.com',
                'kinesis.ap-northeast-3.amazonaws.com',
                'kinesis.ap-northeast-2.amazonaws.com',
                'kinesis.ap-southeast-1.amazonaws.com',
                'kinesis.ap-southeast-2.amazonaws.com',
                'kinesis.ap-northeast-1.amazonaws.com',
              ],
            },
            {
              group: 'Canada',
              suggestions: [
                'kinesis.ca-central-1.amazonaws.com',
                'kinesis.ca-west-1.amazonaws.com',
              ],
            },
            {
              group: 'China',
              suggestions: [
                'kinesis.cn-north-1.amazonaws.com.cn',
                'kinesis.cn-northwest-1.amazonaws.com.cn',
              ],
            },
            {
              group: 'Europe',
              suggestions: [
                'kinesis.eu-central-1.amazonaws.com',
                'kinesis.eu-west-1.amazonaws.com',
                'kinesis.eu-west-2.amazonaws.com',
                'kinesis.eu-south-1.amazonaws.com',
                'kinesis.eu-west-3.amazonaws.com',
                'kinesis.eu-south-2.amazonaws.com',
                'kinesis.eu-north-1.amazonaws.com',
                'kinesis.eu-central-2.amazonaws.com',
              ],
            },
            { group: 'Israel', suggestions: ['kinesis.il-central-1.amazonaws.com'] },
            {
              group: 'Middle East',
              suggestions: [
                'kinesis.me-south-1.amazonaws.com',
                'kinesis.me-central-1.amazonaws.com',
              ],
            },
            { group: 'South America', suggestions: ['kinesis.sa-east-1.amazonaws.com'] },
            {
              group: 'AWS GovCloud',
              suggestions: [
                'kinesis.us-gov-east-1.amazonaws.com',
                'kinesis.us-gov-west-1.amazonaws.com',
              ],
            },
          ],
          info: (
            <>
              The Amazon Kinesis stream endpoint for a region. You can find a list of endpoints{' '}
              <ExternalLink href="https://docs.aws.amazon.com/general/latest/gr/ak">
                here
              </ExternalLink>
              .
            </>
          ),
        },
        {
          name: 'awsAssumedRoleArn',
          label: 'AWS assumed role ARN',
          type: 'string',
          placeholder: 'optional',
          info: <>The AWS assumed role to use for additional permissions.</>,
        },
        {
          name: 'awsExternalId',
          label: 'AWS external ID',
          type: 'string',
          placeholder: 'optional',
          info: <>The AWS external id to use for additional permissions.</>,
        },
      ];
  }

  throw new Error(`unknown input type ${ingestionComboType}`);
}