in zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/CommitProcessor.java [197:420]
public void run() {
try {
/*
* In each iteration of the following loop we process at most
* requestsToProcess requests of queuedRequests. We have to limit
* the number of request we poll from queuedRequests, since it is
* possible to endlessly poll read requests from queuedRequests, and
* that will lead to a starvation of non-local committed requests.
*/
int requestsToProcess = 0;
boolean commitIsWaiting = false;
do {
/*
* Since requests are placed in the queue before being sent to
* the leader, if commitIsWaiting = true, the commit belongs to
* the first update operation in the queuedRequests or to a
* request from a client on another server (i.e., the order of
* the following two lines is important!).
*/
synchronized (this) {
commitIsWaiting = !committedRequests.isEmpty();
requestsToProcess = queuedRequests.size();
if (requestsToProcess == 0 && !commitIsWaiting) {
// Waiting for requests to process
while (!stopped && requestsToProcess == 0 && !commitIsWaiting) {
wait();
commitIsWaiting = !committedRequests.isEmpty();
requestsToProcess = queuedRequests.size();
}
}
}
ServerMetrics.getMetrics().READS_QUEUED_IN_COMMIT_PROCESSOR.add(numReadQueuedRequests.get());
ServerMetrics.getMetrics().WRITES_QUEUED_IN_COMMIT_PROCESSOR.add(numWriteQueuedRequests.get());
ServerMetrics.getMetrics().COMMITS_QUEUED_IN_COMMIT_PROCESSOR.add(committedRequests.size());
long time = Time.currentElapsedTime();
/*
* Processing up to requestsToProcess requests from the incoming
* queue (queuedRequests). If maxReadBatchSize is set then no
* commits will be processed until maxReadBatchSize number of
* reads are processed (or no more reads remain in the queue).
* After the loop a single committed request is processed if
* one is waiting (or a batch of commits if maxCommitBatchSize
* is set).
*/
Request request;
int readsProcessed = 0;
while (!stopped
&& requestsToProcess > 0
&& (maxReadBatchSize < 0 || readsProcessed <= maxReadBatchSize)
&& (request = queuedRequests.poll()) != null) {
requestsToProcess--;
if (needCommit(request) || pendingRequests.containsKey(request.sessionId)) {
// Add request to pending
Deque<Request> requests = pendingRequests.computeIfAbsent(request.sessionId, sid -> new ArrayDeque<>());
requests.addLast(request);
ServerMetrics.getMetrics().REQUESTS_IN_SESSION_QUEUE.add(requests.size());
} else {
readsProcessed++;
numReadQueuedRequests.decrementAndGet();
sendToNextProcessor(request);
}
/*
* Stop feeding the pool if there is a local pending update
* and a committed request that is ready. Once we have a
* pending request with a waiting committed request, we know
* we can process the committed one. This is because commits
* for local requests arrive in the order they appeared in
* the queue, so if we have a pending request and a
* committed request, the committed request must be for that
* pending write or for a write originating at a different
* server. We skip this if maxReadBatchSize is set.
*/
if (maxReadBatchSize < 0 && !pendingRequests.isEmpty() && !committedRequests.isEmpty()) {
/*
* We set commitIsWaiting so that we won't check
* committedRequests again.
*/
commitIsWaiting = true;
break;
}
}
ServerMetrics.getMetrics().READS_ISSUED_IN_COMMIT_PROC.add(readsProcessed);
if (!commitIsWaiting) {
commitIsWaiting = !committedRequests.isEmpty();
}
/*
* Handle commits, if any.
*/
if (commitIsWaiting && !stopped) {
/*
* Drain outstanding reads
*/
waitForEmptyPool();
if (stopped) {
return;
}
int commitsToProcess = maxCommitBatchSize;
/*
* Loop through all the commits, and try to drain them.
*/
Set<Long> queuesToDrain = new HashSet<>();
long startWriteTime = Time.currentElapsedTime();
int commitsProcessed = 0;
while (commitIsWaiting && !stopped && commitsToProcess > 0) {
// Process committed head
request = committedRequests.peek();
if (request.isThrottled()) {
LOG.error("Throttled request in committed pool: {}. Exiting.", request);
ServiceUtils.requestSystemExit(ExitCode.UNEXPECTED_ERROR.getValue());
}
/*
* Check if this is a local write request is pending,
* if so, update it with the committed info. If the commit matches
* the first write queued in the blockedRequestQueue, we know this is
* a commit for a local write, as commits are received in order. Else
* it must be a commit for a remote write.
*/
if (!queuedWriteRequests.isEmpty()
&& queuedWriteRequests.peek().sessionId == request.sessionId
&& queuedWriteRequests.peek().cxid == request.cxid) {
/*
* Commit matches the earliest write in our write queue.
*/
Deque<Request> sessionQueue = pendingRequests.get(request.sessionId);
ServerMetrics.getMetrics().PENDING_SESSION_QUEUE_SIZE.add(pendingRequests.size());
if (sessionQueue == null || sessionQueue.isEmpty() || !needCommit(sessionQueue.peek())) {
/*
* Can't process this write yet.
* Either there are reads pending in this session, or we
* haven't gotten to this write yet.
*/
break;
} else {
ServerMetrics.getMetrics().REQUESTS_IN_SESSION_QUEUE.add(sessionQueue.size());
// If session queue != null, then it is also not empty.
Request topPending = sessionQueue.poll();
/*
* Generally, we want to send to the next processor our version of the request,
* since it contains the session information that is needed for post update processing.
* In more details, when a request is in the local queue, there is (or could be) a client
* attached to this server waiting for a response, and there is other bookkeeping of
* requests that are outstanding and have originated from this server
* (e.g., for setting the max outstanding requests) - we need to update this info when an
* outstanding request completes. Note that in the other case, the operation
* originated from a different server and there is no local bookkeeping or a local client
* session that needs to be notified.
*/
topPending.setHdr(request.getHdr());
topPending.setTxn(request.getTxn());
topPending.setTxnDigest(request.getTxnDigest());
topPending.zxid = request.zxid;
topPending.commitRecvTime = request.commitRecvTime;
request = topPending;
if (request.isThrottled()) {
LOG.error("Throttled request in committed & pending pool: {}. Exiting.", request);
ServiceUtils.requestSystemExit(ExitCode.UNEXPECTED_ERROR.getValue());
}
// Only decrement if we take a request off the queue.
numWriteQueuedRequests.decrementAndGet();
queuedWriteRequests.poll();
queuesToDrain.add(request.sessionId);
}
}
/*
* Pull the request off the commit queue, now that we are going
* to process it.
*/
committedRequests.remove();
commitsToProcess--;
commitsProcessed++;
// Process the write inline.
processWrite(request);
commitIsWaiting = !committedRequests.isEmpty();
}
ServerMetrics.getMetrics().WRITE_BATCH_TIME_IN_COMMIT_PROCESSOR
.add(Time.currentElapsedTime() - startWriteTime);
ServerMetrics.getMetrics().WRITES_ISSUED_IN_COMMIT_PROC.add(commitsProcessed);
/*
* Process following reads if any, remove session queue(s) if
* empty.
*/
readsProcessed = 0;
for (Long sessionId : queuesToDrain) {
Deque<Request> sessionQueue = pendingRequests.get(sessionId);
int readsAfterWrite = 0;
while (!stopped && !sessionQueue.isEmpty() && !needCommit(sessionQueue.peek())) {
numReadQueuedRequests.decrementAndGet();
sendToNextProcessor(sessionQueue.poll());
readsAfterWrite++;
}
ServerMetrics.getMetrics().READS_AFTER_WRITE_IN_SESSION_QUEUE.add(readsAfterWrite);
readsProcessed += readsAfterWrite;
// Remove empty queues
if (sessionQueue.isEmpty()) {
pendingRequests.remove(sessionId);
}
}
ServerMetrics.getMetrics().SESSION_QUEUES_DRAINED.add(queuesToDrain.size());
ServerMetrics.getMetrics().READ_ISSUED_FROM_SESSION_QUEUE.add(readsProcessed);
}
ServerMetrics.getMetrics().COMMIT_PROCESS_TIME.add(Time.currentElapsedTime() - time);
endOfIteration();
} while (!stoppedMainLoop);
} catch (Throwable e) {
handleException(this.getName(), e);
}
LOG.info("CommitProcessor exited loop!");
}