in web-console/src/druid-models/ingestion-spec/ingestion-spec.tsx [1769:1990]
export function getSecondaryPartitionRelatedFormFields(
spec: Partial<IngestionSpec>,
dimensionSuggestions: string[] | undefined,
): Field<IngestionSpec>[] {
const specType = getSpecType(spec);
switch (specType) {
case 'index_parallel':
return [
{
name: 'spec.tuningConfig.partitionsSpec.type',
label: 'Partitioning type',
type: 'string',
required: true,
suggestions: ['dynamic', 'hashed', 'range'],
info: (
<p>
For perfect rollup, you should use either <Code>hashed</Code> (partitioning based on
the hash of dimensions in each row) or <Code>range</Code> (based on several
dimensions). For best-effort rollup, you should use <Code>dynamic</Code>.
</p>
),
adjustment: s => {
if (Array.isArray(dimensionSuggestions) && dimensionSuggestions.length) {
const partitionsSpecType = deepGet(s, 'spec.tuningConfig.partitionsSpec.type');
if (partitionsSpecType === 'range') {
return deepSet(s, 'spec.tuningConfig.partitionsSpec.partitionDimensions', [
dimensionSuggestions[0],
]);
}
if (partitionsSpecType === 'single_dim') {
return deepSet(
s,
'spec.tuningConfig.partitionsSpec.partitionDimension',
dimensionSuggestions[0],
);
}
}
return s;
},
},
// partitionsSpec type: dynamic
{
name: 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment',
type: 'number',
defaultValue: 5000000,
defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'dynamic',
info: <>Determines how many rows are in each segment.</>,
},
{
name: 'spec.tuningConfig.partitionsSpec.maxTotalRows',
type: 'number',
defaultValue: 20000000,
defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'dynamic',
info: <>Total number of rows in segments waiting for being pushed.</>,
},
// partitionsSpec type: hashed
{
name: 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment',
type: 'number',
zeroMeansUndefined: true,
defaultValue: 5000000,
defined: s =>
deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed' &&
!deepGet(s, 'spec.tuningConfig.partitionsSpec.numShards'),
info: (
<>
<p>
If the segments generated are a sub-optimal size for the requested partition
dimensions, consider setting this field.
</p>
<p>
A target row count for each partition. Each partition will have a row count close to
the target assuming evenly distributed keys. Defaults to 5 million if numShards is
null.
</p>
</>
),
},
{
name: 'spec.tuningConfig.partitionsSpec.numShards',
type: 'number',
zeroMeansUndefined: true,
hideInMore: true,
defined: s =>
deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed' &&
!deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment'),
info: (
<>
<p>
If you know the optimal number of shards and want to speed up the time it takes for
compaction to run, set this field.
</p>
<p>
Directly specify the number of shards to create. If this is specified and
'intervals' is specified in the granularitySpec, the index task can skip
the determine intervals/partitions pass through the data.
</p>
</>
),
},
{
name: 'spec.tuningConfig.partitionsSpec.partitionDimensions',
type: 'string-array',
placeholder: '(all dimensions)',
defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'hashed',
info: (
<>
<p>The dimensions to partition on.</p>
<p>Leave blank to select all dimensions.</p>
<p>
If you want to partition on specific dimensions then you would likely be better off
using <Code>range</Code> partitioning instead.
</p>
</>
),
hideInMore: true,
},
// partitionsSpec type: single_dim, range
{
name: 'spec.tuningConfig.partitionsSpec.partitionDimension',
type: 'string',
defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'single_dim',
required: true,
suggestions: dimensionSuggestions,
info: (
<>
<p>The dimension to partition on.</p>
<p>
This should be the first dimension in your schema which would make it first in the
sort order. As{' '}
<ExternalLink
href={`${getLink('DOCS')}/ingestion/partitioning#partitioning-and-sorting`}
>
Partitioning and sorting are best friends!
</ExternalLink>
</p>
</>
),
},
{
name: 'spec.tuningConfig.partitionsSpec.partitionDimensions',
type: 'string-array',
defined: s => deepGet(s, 'spec.tuningConfig.partitionsSpec.type') === 'range',
required: true,
suggestions: dimensionSuggestions
? s => {
const existingDimensions =
deepGet(s, 'spec.tuningConfig.partitionsSpec.partitionDimensions') || [];
return dimensionSuggestions.filter(
dimensionSuggestion => !existingDimensions.includes(dimensionSuggestion),
);
}
: undefined,
info: <p>The dimensions to partition on.</p>,
},
{
name: 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment',
type: 'number',
zeroMeansUndefined: true,
defined: s =>
oneOf(deepGet(s, 'spec.tuningConfig.partitionsSpec.type'), 'single_dim', 'range') &&
!deepGet(s, 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment'),
required: s =>
!deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment') &&
!deepGet(s, 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment'),
info: (
<p>
Target number of rows to include in a partition, should be a number that targets
segments of 500MB~1GB.
</p>
),
},
{
name: 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment',
type: 'number',
zeroMeansUndefined: true,
defined: s =>
oneOf(deepGet(s, 'spec.tuningConfig.partitionsSpec.type'), 'single_dim', 'range') &&
!deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment'),
required: s =>
!deepGet(s, 'spec.tuningConfig.partitionsSpec.targetRowsPerSegment') &&
!deepGet(s, 'spec.tuningConfig.partitionsSpec.maxRowsPerSegment'),
info: <p>Maximum number of rows to include in a partition.</p>,
},
{
name: 'spec.tuningConfig.partitionsSpec.assumeGrouped',
type: 'boolean',
defaultValue: false,
hideInMore: true,
defined: s =>
oneOf(deepGet(s, 'spec.tuningConfig.partitionsSpec.type'), 'single_dim', 'range'),
info: (
<p>
Assume that input data has already been grouped on time and dimensions. Ingestion will
run faster, but may choose sub-optimal partitions if this assumption is violated.
</p>
),
},
];
case 'kafka':
case 'kinesis':
return [
{
name: 'spec.tuningConfig.maxRowsPerSegment',
type: 'number',
defaultValue: 5000000,
info: <>Determines how many rows are in each segment.</>,
},
{
name: 'spec.tuningConfig.maxTotalRows',
type: 'number',
defaultValue: 20000000,
info: <>Total number of rows in segments waiting for being pushed.</>,
},
];
}
throw new Error(`unknown spec type ${specType}`);
}