in mavibot/src/main/java/org/apache/directory/mavibot/btree/BulkLoader.java [102:179]
private static <K, V> int readElements( BTree<K, V> btree, Iterator<Tuple<K, V>> iterator, List<File> sortedFiles,
List<Tuple<K, V>> tuples, int chunkSize ) throws IOException
{
int nbRead = 0;
int nbIteration = 0;
int nbElems = 0;
boolean inMemory = true;
Set<K> keys = new HashSet<K>();
while ( true )
{
nbIteration++;
tuples.clear();
keys.clear();
// Read up to chukSize elements
while ( iterator.hasNext() && ( nbRead < chunkSize ) )
{
Tuple<K, V> tuple = iterator.next();
tuples.add( tuple );
if ( !keys.contains( tuple.getKey() ) )
{
keys.add( tuple.getKey() );
nbRead++;
}
}
if ( nbRead < chunkSize )
{
if ( nbIteration != 1 )
{
// Flush the sorted data on disk and exit
inMemory = false;
sortedFiles.add( flushToDisk( nbIteration, tuples, btree ) );
}
// Update the number of read elements
nbElems += nbRead;
break;
}
else
{
if ( !iterator.hasNext() )
{
// special case : we have exactly chunkSize elements in the incoming data
if ( nbIteration > 1 )
{
// Flush the sorted data on disk and exit
inMemory = false;
sortedFiles.add( flushToDisk( nbIteration, tuples, btree ) );
}
// We have read all the data in one round trip, let's get out, no need
// to store the data on disk
// Update the number of read elements
nbElems += nbRead;
break;
}
// We have read chunkSize elements, we have to sort them on disk
nbElems += nbRead;
nbRead = 0;
sortedFiles.add( flushToDisk( nbIteration, tuples, btree ) );
}
}
if ( !inMemory )
{
tuples.clear();
}
return nbElems;
}