export function getSecondaryPartitionRelatedFormFields()

in web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx [1769:1990]
218 lines of code
23 McCabe index (conditional complexity)

export function getSecondaryPartitionRelatedFormFields(
  spec: Partial<IngestionSpec>,
  dimensionSuggestions: string[] | undefined,
): Field<IngestionSpec>[] {
  const specType = getSpecType(spec);
  switch (specType) {
    case 'index_parallel':
      return [
        {
          name: 'spec.tuningConfig.partitionsSpec.type',
          label: 'Partitioning type',
          type: 'string',
          required: true,
          suggestions: ['dynamic', 'hashed', 'range'],
          info: (
            <p>
              For perfect rollup, you should use either <Code>hashed</Code> (partitioning based on
              the hash of dimensions in each row) or <Code>range</Code> (based on several
              dimensions). For best-effort rollup, you should use <Code>dynamic</Code>.
            </p>
          ),
          adjustment: s => {
            if (Array.isArray(dimensionSuggestions) && dimensionSuggestions.length) {
              const partitionsSpecType = deepGet(s, 'spec.tuningConfig.partitionsSpec.type');
              if (partitionsSpecType === 'range') {
                return deepSet(s, 'spec.tuningConfig.partitionsSpec.partitionDimensions', [
                  dimensionSuggestions[0],
                ]);
              }

              if (partitionsSpecType === 'single_dim') {
                return deepSet(
                  s,
                  'spec.tuningConfig.partitionsSpec.partitionDimension',
                  dimensionSuggestions[0],
                );
              }
            }

            return s;
          },
        },
        // partitionsSpec type: dynamic
        {
          name: 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment',
          type: 'number',
          defaultValue: 5000000,
          defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'dynamic',
          info: <>Determines how many rows are in each segment.</>,
        },
        {
          name: 'spec.tuningConfig.partitionsSpec.maxTotalRows',
          type: 'number',
          defaultValue: 20000000,
          defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'dynamic',
          info: <>Total number of rows in segments waiting for being pushed.</>,
        },
        // partitionsSpec type: hashed
        {
          name: 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment',
          type: 'number',
          zeroMeansUndefined: true,
          defaultValue: 5000000,
          defined: s =>
            deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed' &&
            !deepGet(s, 'spec.tuningConfig.partitionsSpec.numShards'),
          info: (
            <>
              <p>
                If the segments generated are a sub-optimal size for the requested partition
                dimensions, consider setting this field.
              </p>
              <p>
                A target row count for each partition. Each partition will have a row count close to
                the target assuming evenly distributed keys. Defaults to 5 million if numShards is
                null.
              </p>
            </>
          ),
        },
        {
          name: 'spec.tuningConfig.partitionsSpec.numShards',
          type: 'number',
          zeroMeansUndefined: true,
          hideInMore: true,
          defined: s =>
            deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed' &&
            !deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment'),
          info: (
            <>
              <p>
                If you know the optimal number of shards and want to speed up the time it takes for
                compaction to run, set this field.
              </p>
              <p>
                Directly specify the number of shards to create. If this is specified and
                &apos;intervals&apos; is specified in the granularitySpec, the index task can skip
                the determine intervals/partitions pass through the data.
              </p>
            </>
          ),
        },
        {
          name: 'spec.tuningConfig.partitionsSpec.partitionDimensions',
          type: 'string-array',
          placeholder: '(all dimensions)',
          defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed',
          info: (
            <>
              <p>The dimensions to partition on.</p>
              <p>Leave blank to select all dimensions.</p>
              <p>
                If you want to partition on specific dimensions then you would likely be better off
                using <Code>range</Code> partitioning instead.
              </p>
            </>
          ),
          hideInMore: true,
        },
        // partitionsSpec type: single_dim, range
        {
          name: 'spec.tuningConfig.partitionsSpec.partitionDimension',
          type: 'string',
          defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'single_dim',
          required: true,
          suggestions: dimensionSuggestions,
          info: (
            <>
              <p>The dimension to partition on.</p>
              <p>
                This should be the first dimension in your schema which would make it first in the
                sort order. As{' '}
                <ExternalLink
                  href={`${getLink('DOCS')}/ingestion/partitioning#partitioning-and-sorting`}
                >
                  Partitioning and sorting are best friends!
                </ExternalLink>
              </p>
            </>
          ),
        },
        {
          name: 'spec.tuningConfig.partitionsSpec.partitionDimensions',
          type: 'string-array',
          defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'range',
          required: true,
          suggestions: dimensionSuggestions
            ? s => {
                const existingDimensions =
                  deepGet(s, 'spec.tuningConfig.partitionsSpec.partitionDimensions') || [];
                return dimensionSuggestions.filter(
                  dimensionSuggestion => !existingDimensions.includes(dimensionSuggestion),
                );
              }
            : undefined,
          info: <p>The dimensions to partition on.</p>,
        },
        {
          name: 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment',
          type: 'number',
          zeroMeansUndefined: true,
          defined: s =>
            oneOf(deepGet(s, 'spec.tuningConfig.partitionsSpec.type'), 'single_dim', 'range') &&
            !deepGet(s, 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment'),
          required: s =>
            !deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment') &&
            !deepGet(s, 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment'),
          info: (
            <p>
              Target number of rows to include in a partition, should be a number that targets
              segments of 500MB~1GB.
            </p>
          ),
        },
        {
          name: 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment',
          type: 'number',
          zeroMeansUndefined: true,
          defined: s =>
            oneOf(deepGet(s, 'spec.tuningConfig.partitionsSpec.type'), 'single_dim', 'range') &&
            !deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment'),
          required: s =>
            !deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment') &&
            !deepGet(s, 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment'),
          info: <p>Maximum number of rows to include in a partition.</p>,
        },
        {
          name: 'spec.tuningConfig.partitionsSpec.assumeGrouped',
          type: 'boolean',
          defaultValue: false,
          hideInMore: true,
          defined: s =>
            oneOf(deepGet(s, 'spec.tuningConfig.partitionsSpec.type'), 'single_dim', 'range'),
          info: (
            <p>
              Assume that input data has already been grouped on time and dimensions. Ingestion will
              run faster, but may choose sub-optimal partitions if this assumption is violated.
            </p>
          ),
        },
      ];

    case 'kafka':
    case 'kinesis':
      return [
        {
          name: 'spec.tuningConfig.maxRowsPerSegment',
          type: 'number',
          defaultValue: 5000000,
          info: <>Determines how many rows are in each segment.</>,
        },
        {
          name: 'spec.tuningConfig.maxTotalRows',
          type: 'number',
          defaultValue: 20000000,
          info: <>Total number of rows in segments waiting for being pushed.</>,
        },
      ];
  }

  throw new Error(`unknown spec type ${specType}`);
}