in src/main/java/org/apache/tomee/website/AbstractDownloadsNG.java [298:348]
public Results fetch(final URL url) {
try (final InputStream input = url.openStream()) {
final LinkContentHandler linkHandler = new LinkContentHandler();
final Metadata metadata = new Metadata();
final HtmlParser parser = new HtmlParser();
final ContentHandler textHandler = new BodyContentHandler(-1);
parser.parse(input,
new TeeContentHandler(linkHandler, textHandler),
metadata,
new ParseContext());
//TODO Possible Improvement: Filters (see below) as CLI parameters
// keep only <a> tags with non-empty href + avoid duplicates + only links containing "tomee" or "openejb", filtering some other artifacts on the dist server
final List<Link> downloadLinks = linkHandler.getLinks().stream()
.filter(l -> l.isAnchor() &&
!l.getUri().isEmpty() &&
!l.getUri().startsWith("#") &&
(l.getUri().contains("tomee-") || l.getUri().contains("openejb-standalone")) &&
// release >= 1.7.x contains this -> is not linked on website -> filter it
!l.getUri().contains("arquillian") &&
// release >= 4.7.x contains this -> is not linked on website -> filter it
!l.getUri().contains("openejb-standalone-4.7.5") &&
// release >= 1.7.x contains this -> is not linked on website -> filter it
!l.getUri().contains("openejb-provisionning-4.7.5") &&
// release >= 1.7.x contains this -> is not linked on website -> filter it
!l.getUri().contains("openejb-ssh-4.7.5") &&
// release >= 1.7.x contains this -> is not linked on website -> filter it
!l.getUri().contains("tomee-webaccess") &&
// release 9.0.0-M3 has no (real) source release -> filter it
!l.getUri().contains("apache-tomee-9.0.0-M3-source-release") &&
// release 9.0.0-M2 has no (real) source release -> filter it
!l.getUri().contains("apache-tomee-9.0.0-M2-source-release") &&
// release 9.0.0-M1 has no (real) source release -> filter it
!l.getUri().contains("apache-tomee-9.0.0-M1-source-release") &&
// no need to have signature hashes
!l.getUri().endsWith("asc.sha1") &&
!l.getUri().endsWith("asc.sha256") &&
!l.getUri().endsWith("asc.sha512"))
.filter(distinctBy(Link::getUri))
.collect(Collectors.toList());
return new Results(url.toString(), Collections.unmodifiableList(downloadLinks));
} catch (Exception e) {
throw new RuntimeException("Could not obtain download link. See stacktrace for further information.", e);
}
}