in poi-scratchpad/src/main/java/org/apache/poi/hwpf/model/PAPBinTable.java [101:264]
static void rebuild( final StringBuilder docText,
ComplexFileTable complexFileTable, List<PAPX> paragraphs )
{
long start = currentTimeMillis();
if ( complexFileTable != null )
{
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
// adding PAPX from fast-saved SPRMs
for ( TextPiece textPiece : complexFileTable.getTextPieceTable()
.getTextPieces() )
{
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
if ( !prm.isComplex() )
continue;
int igrpprl = prm.getIgrpprl();
if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
{
LOG.atWarn().log("{}'s PRM references to unknown grpprl", textPiece);
continue;
}
boolean hasPap = false;
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
.hasNext(); )
{
SprmOperation sprmOperation = iterator.next();
if ( sprmOperation.getType() == SprmOperation.TYPE_PAP )
{
hasPap = true;
break;
}
}
if ( hasPap )
{
SprmBuffer newSprmBuffer = new SprmBuffer( 2 );
newSprmBuffer.append( sprmBuffer.toByteArray() );
PAPX papx = new PAPX( textPiece.getStart(),
textPiece.getEnd(), newSprmBuffer );
paragraphs.add( papx );
}
}
LOG.atDebug().log("Merged (?) with PAPX from complex file table in {} ms ({} elements in total)", box(currentTimeMillis() - start),box(paragraphs.size()));
start = currentTimeMillis();
}
List<PAPX> oldPapxSortedByEndPos = new ArrayList<>(paragraphs);
oldPapxSortedByEndPos.sort(PropertyNode.EndComparator);
LOG.atDebug().log("PAPX sorted by end position in {} ms", box(currentTimeMillis() - start));
start = currentTimeMillis();
final Map<PAPX, Integer> papxToFileOrder = new IdentityHashMap<>();
{
int counter = 0;
for ( PAPX papx : paragraphs )
{
papxToFileOrder.put( papx, Integer.valueOf( counter++ ) );
}
}
final Comparator<PAPX> papxFileOrderComparator = (o1, o2) -> {
Integer i1 = papxToFileOrder.get( o1 );
Integer i2 = papxToFileOrder.get( o2 );
return i1.compareTo( i2 );
};
LOG.atDebug().log("PAPX's order map created in {} ms", box(currentTimeMillis() - start));
start = currentTimeMillis();
List<PAPX> newPapxs = new LinkedList<>();
int lastParStart = 0;
int lastPapxIndex = 0;
for ( int charIndex = 0; charIndex < docText.length(); charIndex++ )
{
final char c = docText.charAt( charIndex );
if ( c != 13 && c != 7 && c != 12 )
continue;
final int startInclusive = lastParStart;
final int endExclusive = charIndex + 1;
boolean broken = false;
List<PAPX> papxs = new LinkedList<>();
for ( int papxIndex = lastPapxIndex; papxIndex < oldPapxSortedByEndPos
.size(); papxIndex++ )
{
broken = false;
PAPX papx = oldPapxSortedByEndPos.get( papxIndex );
assert startInclusive == 0
|| papxIndex + 1 == oldPapxSortedByEndPos.size()
|| papx.getEnd() > startInclusive;
if ( papx.getEnd() - 1 > charIndex )
{
lastPapxIndex = papxIndex;
broken = true;
break;
}
papxs.add( papx );
}
if ( !broken )
{
lastPapxIndex = oldPapxSortedByEndPos.size() - 1;
}
if ( papxs.isEmpty() )
{
LOG.atWarn().log("Paragraph [{}; {}) has no PAPX. Creating new one.", box(startInclusive),box(endExclusive));
// create it manually
PAPX papx = new PAPX( startInclusive, endExclusive,
new SprmBuffer( 2 ) );
newPapxs.add( papx );
lastParStart = endExclusive;
continue;
}
if ( papxs.size() == 1 )
{
// can we reuse existing?
PAPX existing = papxs.get( 0 );
if ( existing.getStart() == startInclusive
&& existing.getEnd() == endExclusive )
{
newPapxs.add( existing );
lastParStart = endExclusive;
continue;
}
}
// restore file order of PAPX
papxs.sort(papxFileOrderComparator);
SprmBuffer sprmBuffer = null;
for ( PAPX papx : papxs )
{
if ( papx.getGrpprl() == null || papx.getGrpprl().length <= 2 )
continue;
if ( sprmBuffer == null ) {
sprmBuffer = papx.getSprmBuf().copy();
} else {
sprmBuffer.append( papx.getGrpprl(), 2 );
}
}
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer );
newPapxs.add( newPapx );
lastParStart = endExclusive;
continue;
}
paragraphs.clear();
paragraphs.addAll( newPapxs );
LOG.atDebug().log("PAPX rebuilded from document text in {} ms ({} elements)", box(currentTimeMillis() - start),box(paragraphs.size()));
}