in src/main/java/com/microsoft/azure/spark/tools/job/YarnContainerLogFetcher.java [293:321]
private Observable<String> getContentFromYarnLogDom(final String baseUrl,
final String type,
final long start,
final int size) {
final URI url = UriUtils.normalizeWithSlashEnding(baseUrl).resolve(type);
final List<NameValuePair> params = new ArrayList<>();
params.add(new BasicNameValuePair("start", Long.toString(start)));
if (size > 0) {
params.add(new BasicNameValuePair("size", Long.toString(size)));
}
return getRequest(url, params)
.map(response -> {
try {
return response.getMessage();
} catch (IOException ignored) {
// The upstream requestWithHttpResponse() has already get message buffered.
throw propagate(new AssertionError("The upstream has got messages."));
}
})
.flatMap(html -> {
final String logs = parseLogsFromHtml(type, html).getOrDefault(type, StringUtils.EMPTY);
setFetched(type, start + logs.length());
return StringUtils.isEmpty(logs) ? Observable.empty() : Observable.just(logs);
})
.doOnError(err -> log().warn("Can't parse information from YarnUI log page " + url, err));
}