in XForm/XForm/Accessory/PerformanceComparisons.cs [357:447]
public void TsvSplit()
{
Stream tsvStream = new MemoryStream();
//Stream tsvStream = new FileStream("Sample.tsv", FileMode.Create);
int rowCount = 1000 * 1000;
WriteSampleTsv(tsvStream, 5, 1000 * 1000);
byte[] content = new byte[64 * 1024];
BitVector cells = new BitVector(content.Length);
BitVector rows = new BitVector(content.Length);
int[] rowEnds = new int[1024];
byte[] allContent = new byte[tsvStream.Length];
tsvStream.Seek(0, SeekOrigin.Begin);
tsvStream.Read(allContent, 0, allContent.Length);
BitVector allCells = new BitVector(allContent.Length);
BitVector allRows = new BitVector(allContent.Length);
using (Benchmarker b = new Benchmarker($"Tsv Parse [{rowCount:n0}] | count", DefaultMeasureMilliseconds))
{
b.Measure("Read only", (int)tsvStream.Length, () =>
{
tsvStream.Seek(0, SeekOrigin.Begin);
while (true)
{
int lengthRead = tsvStream.Read(content, 0, content.Length);
if (lengthRead == 0) break;
}
return rowCount;
});
b.Measure("ReadLine | Split", (int)tsvStream.Length, () =>
{
tsvStream.Seek(0, SeekOrigin.Begin);
int count = 0;
StreamReader reader = new StreamReader(tsvStream);
{
// Header row
reader.ReadLine();
while (!reader.EndOfStream)
{
string line = reader.ReadLine();
string[] cellSet = line.Split('\t');
count++;
}
}
return count;
});
b.Measure("Elfie TsvReader", (int)tsvStream.Length, () =>
{
tsvStream.Seek(0, SeekOrigin.Begin);
int count = 0;
ITabularReader reader = TabularFactory.BuildReader(tsvStream, "Unused.tsv");
{
while (reader.NextRow()) count++;
}
return count;
});
Func<byte[], int, int, ulong[], ulong[], int> splitTsvN = NativeAccelerator.GetMethod<Func<byte[], int, int, ulong[], ulong[], int>>("XForm.Native.String8N", "SplitTsv");
b.Measure("XForm Native Split", (int)tsvStream.Length, () =>
{
tsvStream.Seek(0, SeekOrigin.Begin);
int count = -1;
while (true)
{
int lengthRead = tsvStream.Read(content, 0, content.Length);
if (lengthRead == 0) break;
if (lengthRead < content.Length) Array.Clear(content, lengthRead, content.Length - lengthRead);
int lineCount = splitTsvN(content, 0, lengthRead, cells.Array, rows.Array);
count += lineCount;
int fromRow = 0;
int countCopy = cells.Page(rowEnds, ref fromRow);
}
return count;
});
b.MeasureParallel("XForm Native Split Parallel", (int)tsvStream.Length, (index, length) =>
{
return splitTsvN(allContent, index, length, allCells.Array, allRows.Array) - 1;
});
}
}