in phrasecount/src/main/java/phrasecount/DocumentLoader.java [44:70]
public void load(TransactionBase tx, Context context) throws Exception {
// TODO Need a strategy for dealing w/ large documents. If a worker processes many large
// documents concurrently, it could cause memory exhaustion. Could break up large documents
// into pieces, However, not sure if the example should be complicated with this.
TypedTransactionBase ttx = TYPEL.wrap(tx);
String storedHash = ttx.get().row("uri:" + document.getURI()).col(DOC_HASH_COL).toString();
if (storedHash == null || !storedHash.equals(document.getHash())) {
ttx.mutate().row("uri:" + document.getURI()).col(DOC_HASH_COL).set(document.getHash());
Integer refCount =
ttx.get().row("doc:" + document.getHash()).col(DOC_REF_COUNT_COL).toInteger();
if (refCount == null) {
// this document was never seen before
addNewDocument(ttx, document);
} else {
setRefCount(ttx, document.getHash(), refCount, refCount + 1);
}
if (storedHash != null) {
decrementRefCount(ttx, refCount, storedHash);
}
}
}