in tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/chm/ChmLzxBlock.java [300:445]
private void decompressAlignedBlock(int len, byte[] prevcontent) throws TikaException {
if ((getChmSection() == null) || (getState() == null) ||
(getState().getMainTreeTable() == null)) {
throw new ChmParsingException("chm section is null");
}
short s;
int x, i, border;
int matchlen = 0, matchfooter = 0, extra, rundest, runsrc;
int matchoffset = 0;
for (i = getContentLength(); i < len; i++) {
/* new code */
//read huffman tree from main tree
border = getChmSection().peekBits(ChmConstants.LZX_MAINTREE_TABLEBITS);
if (border >= getState().mainTreeTable.length) {
throw new ChmParsingException("error decompressing aligned block.");
}
//break;
/* end new code */
s = getState().mainTreeTable[getChmSection()
.peekBits(ChmConstants.LZX_MAINTREE_TABLEBITS)];
if (s >= getState().getMainTreeElements()) {
x = ChmConstants.LZX_MAINTREE_TABLEBITS;
do {
x++;
s <<= 1;
s += getChmSection().checkBit(x);
} while ((s = getState().mainTreeTable[s]) >= getState().getMainTreeElements());
}
//System.out.printf("%d,", s);
//?getChmSection().getSyncBits(getState().mainTreeTable[s]);
getChmSection().getSyncBits(getState().getMainTreeLengtsTable()[s]);
if (s < ChmConstants.LZX_NUM_CHARS) {
content[i] = (byte) s;
} else {
s -= ChmConstants.LZX_NUM_CHARS;
matchlen = s & ChmConstants.LZX_NUM_PRIMARY_LENGTHS;
if (matchlen == ChmConstants.LZX_NUM_PRIMARY_LENGTHS) {
matchfooter = getState().lengthTreeTable[getChmSection().peekBits(
ChmConstants.LZX_LENGTH_TABLEBITS)];//.LZX_MAINTREE_TABLEBITS)];
if (matchfooter >=
ChmConstants.LZX_LENGTH_MAXSYMBOLS/*?LZX_LENGTH_TABLEBITS*/) {
x = ChmConstants.LZX_LENGTH_TABLEBITS;
do {
x++;
matchfooter <<= 1;
matchfooter += getChmSection().checkBit(x);
} while ((matchfooter = getState().lengthTreeTable[matchfooter]) >=
ChmConstants.LZX_NUM_SECONDARY_LENGTHS);
}
getChmSection().getSyncBits(getState().lengthTreeLengtsTable[matchfooter]);
matchlen += matchfooter;
}
matchlen += ChmConstants.LZX_MIN_MATCH;
matchoffset = s >>> 3;
if (matchoffset > 2) {
extra = ChmConstants.EXTRA_BITS[matchoffset];
matchoffset = (ChmConstants.POSITION_BASE[matchoffset] - 2);
if (extra > 3) {
extra -= 3;
long verbatim_bits = getChmSection().getSyncBits(extra);
matchoffset += (verbatim_bits << 3);
//READ HUFF SYM in Aligned Tree
int aligned_bits =
getChmSection().peekBits(ChmConstants.LZX_NUM_PRIMARY_LENGTHS);
int t = getState().getAlignedTreeTable()[aligned_bits];
if (t >= getState().getMainTreeElements()) {
x = ChmConstants.LZX_ALIGNED_TABLEBITS; //?LZX_MAINTREE_TABLEBITS;
// ?LZX_ALIGNED_TABLEBITS
do {
x++;
t <<= 1;
t += getChmSection().checkBit(x);
} while ((t = getState().getAlignedTreeTable()[t]) >=
getState().getMainTreeElements());
}
getChmSection().getSyncBits(getState().getAlignedLenTable()[t]);
matchoffset += t;
} else if (extra == 3) {
int g = getChmSection().peekBits(ChmConstants.LZX_NUM_PRIMARY_LENGTHS);
int t = getState().getAlignedTreeTable()[g];
if (t >= getState().getMainTreeElements()) {
x = ChmConstants.LZX_ALIGNED_TABLEBITS; //?LZX_MAINTREE_TABLEBITS;
do {
x++;
t <<= 1;
t += getChmSection().checkBit(x);
} while ((t = getState().getAlignedTreeTable()[t]) >=
getState().getMainTreeElements());
}
getChmSection().getSyncBits(getState().getAlignedLenTable()[t]);
matchoffset += t;
} else if (extra > 0) {
long l = getChmSection().getSyncBits(extra);
matchoffset += l;
} else {
matchoffset = 1;
}
getState().setR2(getState().getR1());
getState().setR1(getState().getR0());
getState().setR0(matchoffset);
} else if (matchoffset == 0) {
matchoffset = (int) getState().getR0();
} else if (matchoffset == 1) {
matchoffset = (int) getState().getR1();
getState().setR1(getState().getR0());
getState().setR0(matchoffset);
} else /** match_offset == 2 */ {
matchoffset = (int) getState().getR2();
getState().setR2(getState().getR0());
getState().setR0(matchoffset);
}
rundest = i;
runsrc = rundest - matchoffset;
i += (matchlen - 1);
if (i > len) {
break;
}
if (runsrc < 0) {
if (matchlen + runsrc <= 0) {
runsrc = prevcontent.length + runsrc;
while (matchlen-- > 0) content[rundest++] = prevcontent[runsrc++];
} else {
runsrc = prevcontent.length + runsrc;
while (runsrc < prevcontent.length)
content[rundest++] = prevcontent[runsrc++];
matchlen = matchlen + runsrc - prevcontent.length;
runsrc = 0;
while (matchlen-- > 0) content[rundest++] = content[runsrc++];
}
} else {
/* copies any wrappes around source data */
while ((runsrc < 0) && (matchlen-- > 0)) {
content[rundest++] = content[(int) (runsrc + getBlockLength())];
runsrc++;
}
/* copies match data - no worries about destination wraps */
while (matchlen-- > 0) content[rundest++] = content[runsrc++];
}
}
}
setContentLength(len);
}