in server/src/main/java/org/apache/uniffle/server/netty/ShuffleServerNettyHandler.java [524:639]
public void handleGetLocalShuffleIndexRequest(
TransportClient client, GetLocalShuffleIndexRequest req) {
try (ServerRpcAuditContext auditContext = createAuditContext("getLocalShuffleIndex", client)) {
String appId = req.getAppId();
int shuffleId = req.getShuffleId();
int partitionId = req.getPartitionId();
int partitionNumPerRange = req.getPartitionNumPerRange();
int partitionNum = req.getPartitionNum();
String requestInfo =
"appId[" + appId + "], shuffleId[" + shuffleId + "], partitionId[" + partitionId + "]";
auditContext.withAppId(appId);
auditContext.withShuffleId(shuffleId);
auditContext.withArgs(
"requestId="
+ req.getRequestId()
+ ", partitionId="
+ partitionId
+ ", partitionNumPerRange="
+ partitionNumPerRange
+ ", partitionNum="
+ partitionNum);
StatusCode status = verifyRequest(appId);
if (status != StatusCode.SUCCESS) {
auditContext.withStatusCode(status);
GetLocalShuffleIndexResponse response =
new GetLocalShuffleIndexResponse(
req.getRequestId(), status, status.toString(), Unpooled.EMPTY_BUFFER, 0L);
client.getChannel().writeAndFlush(response);
return;
}
String msg = "OK";
GetLocalShuffleIndexV2Response response;
int[] range =
ShuffleStorageUtils.getPartitionRange(partitionId, partitionNumPerRange, partitionNum);
Storage storage =
shuffleServer
.getStorageManager()
.selectStorage(new ShuffleDataReadEvent(appId, shuffleId, partitionId, range[0]));
if (storage != null) {
storage.updateReadMetrics(new StorageReadMetrics(appId, shuffleId));
}
// Index file is expected small size and won't cause oom problem with the assumed size. An
// index segment is 40B,
// with the default size - 2MB, it can support 50k blocks for shuffle data.
long assumedFileSize =
shuffleServer
.getShuffleServerConf()
.getLong(ShuffleServerConf.SERVER_SHUFFLE_INDEX_SIZE_HINT);
if (shuffleServer.getShuffleBufferManager().requireReadMemory(assumedFileSize)) {
ShuffleIndexResult shuffleIndexResult = null;
try {
final long start = System.currentTimeMillis();
shuffleIndexResult =
shuffleServer
.getShuffleTaskManager()
.getShuffleIndex(
appId, shuffleId, partitionId, partitionNumPerRange, partitionNum);
ManagedBuffer data = shuffleIndexResult.getManagedBuffer();
ShuffleServerMetrics.counterTotalReadDataSize.inc(data.size());
ShuffleServerMetrics.counterTotalReadLocalIndexFileSize.inc(data.size());
ShuffleServerMetrics.gaugeReadLocalIndexFileThreadNum.inc();
ShuffleServerMetrics.gaugeReadLocalIndexFileBufferSize.inc(assumedFileSize);
auditContext.withStatusCode(status);
auditContext.withReturnValue("len=" + data.size());
response =
new GetLocalShuffleIndexV2Response(
req.getRequestId(),
status,
msg,
data,
shuffleIndexResult.getDataFileLen(),
shuffleIndexResult.getStorageIds());
ReleaseMemoryAndRecordReadTimeListener listener =
new ReleaseMemoryAndRecordReadTimeListener(
start, assumedFileSize, data.size(), requestInfo, req, response, client);
client.getChannel().writeAndFlush(response).addListener(listener);
return;
} catch (FileNotFoundException indexFileNotFoundException) {
shuffleServer.getShuffleBufferManager().releaseReadMemory(assumedFileSize);
if (shuffleIndexResult != null) {
shuffleIndexResult.release();
}
LOG.warn(
"Index file for {} is not found, maybe the data has been flushed to cold storage "
+ "or partial data still in another shuffle server(when partition split is enabled).",
requestInfo,
indexFileNotFoundException);
response =
new GetLocalShuffleIndexV2Response(
req.getRequestId(), status, msg, Unpooled.EMPTY_BUFFER, 0L);
} catch (Exception e) {
shuffleServer.getShuffleBufferManager().releaseReadMemory(assumedFileSize);
if (shuffleIndexResult != null) {
shuffleIndexResult.release();
}
status = StatusCode.INTERNAL_ERROR;
msg = "Error happened when get shuffle index for " + requestInfo + ", " + e.getMessage();
LOG.error(msg, e);
response =
new GetLocalShuffleIndexV2Response(
req.getRequestId(), status, msg, Unpooled.EMPTY_BUFFER, 0L);
}
} else {
status = StatusCode.NO_BUFFER;
msg = "Can't require memory to get shuffle index";
LOG.warn("{} for {}", msg, requestInfo);
response =
new GetLocalShuffleIndexV2Response(
req.getRequestId(), status, msg, Unpooled.EMPTY_BUFFER, 0L);
}
auditContext.withStatusCode(response.getStatusCode());
client.getChannel().writeAndFlush(response);
}
}