in code/KustoCopyConsole/Runner/ExportingRunner.cs [165:210]
private IEnumerable<BlockRowItem> GetLineup(
IterationKey iterationKey,
IEnumerable<BlockRowItem> candidates,
int freeCapacity)
{
const int MIN_STAT_COUNT = 5;
const long MIN_ROW_COUNT_STATS = 100000;
const long MAX_ROW_COUNT = 16000000;
const int LEAP_RATIO = 3;
var MAX_EXPORT_DURATION = TimeSpan.FromMinutes(4);
var latestBlocks = RowItemGateway.InMemoryCache
.ActivityMap[iterationKey.ActivityName]
.IterationMap[iterationKey.IterationId]
.BlockMap
.Values
.Select(c => c.RowItem)
.Where(b => b.State >= BlockState.Exported)
// We want representative export, i.e. meaningful size
.Where(b => b.ExportedRowCount > MIN_ROW_COUNT_STATS)
.OrderByDescending(b => b.ExportedRowCount)
.Take(MIN_STAT_COUNT)
.ToImmutableArray();
if (latestBlocks.Length == MIN_STAT_COUNT)
{ // Replan blocks
var totalDuration = latestBlocks.Sum(b => b.ExportDuration!.Value.TotalSeconds);
var totalRows = latestBlocks.Sum(b => b.ExportedRowCount);
var maxRowCount = latestBlocks.Max(b => b.ExportedRowCount);
var averageDurationPerRow = TimeSpan.FromSeconds(totalDuration / totalRows);
var targetRowCount = Math.Max(
1,
Math.Min(
Math.Min(MAX_ROW_COUNT, LEAP_RATIO * maxRowCount),
MAX_EXPORT_DURATION / averageDurationPerRow));
return GetReplannedLineup(candidates, targetRowCount, freeCapacity);
}
else
{ // Just return the top blocks
return candidates
.OrderBy(b => b.IngestionTimeStart)
.Take(freeCapacity)
.ToImmutableArray();
}
}