public List retrieveListing()

in src/java/org/apache/ivy/util/url/ApacheURLLister.java [105:213]


    public List<URL> retrieveListing(URL url, boolean includeFiles, boolean includeDirectories)
            throws IOException {
        List<URL> urlList = new ArrayList<>();

        // add trailing slash for relative urls
        if (!url.getPath().endsWith("/") && !url.getPath().endsWith(".html")) {
            url = new URL(url.getProtocol(), url.getHost(), url.getPort(), url.getPath() + "/");
        }

        URLHandler urlHandler = URLHandlerRegistry.getDefault();
        URLInfo urlInfo = urlHandler.getURLInfo(url);
        if (urlInfo == URLHandler.UNAVAILABLE) {
            return urlList; // not found => return empty list
        }
        // here, urlInfo is valid
        String charset = urlInfo.getBodyCharset();

        InputStream contentStream = urlHandler.openStream(url);
        BufferedReader r = null;
        if (charset == null) {
            r = new BufferedReader(new InputStreamReader(contentStream));
        } else {
            r = new BufferedReader(new InputStreamReader(contentStream, charset));
        }

        String htmlText = FileUtil.readEntirely(r);

        Matcher matcher = PATTERN.matcher(htmlText);

        while (matcher.find()) {
            // get the href text and the displayed text
            String href = matcher.group(1);
            String text = matcher.group(2);

            if (href == null || text == null) {
                // the groups were not found (shouldn't happen, really)
                continue;
            }

            text = text.trim();

            try {
                // URI methods decode the URL
                URI uri = new URI(href);
                href = uri.getPath();
                // handle complete URL listings
                if (uri.getScheme() != null) {
                    if (!href.startsWith(url.getPath())) {
                        // ignore URLs which aren't children of the base URL
                        continue;
                    }
                    href = href.substring(url.getPath().length());
                }
            } catch (URISyntaxException e) {
                // incorrect URL, ignore
                continue;
            }

            if (href.startsWith("../")) {
                // we are only interested in sub-URLs, not parent URLs, so skip this one
                continue;
            }

            // absolute href: convert to relative one
            if (href.startsWith("/")) {
                int slashIndex = href.substring(0, href.length() - 1).lastIndexOf('/');
                href = href.substring(slashIndex + 1);
            }

            // relative to current href: convert to simple relative one
            if (href.startsWith("./")) {
                href = href.substring("./".length());
            }

            // exclude those where they do not match
            // href will never be truncated, text may be truncated by apache
            if (text.endsWith("..>")) {
                // text is probably truncated, we can only check if the href starts with text
                if (!href.startsWith(text.substring(0, text.length() - 3))) {
                    continue;
                }
            } else if (text.endsWith("..&gt;")) {
                // text is probably truncated, we can only check if the href starts with text
                if (!href.startsWith(text.substring(0, text.length() - 6))) {
                    continue;
                }
            } else {
                // text is not truncated, so it must match the url after stripping optional
                // trailing slashes
                String strippedHref = href.endsWith("/") ? href.substring(0, href.length() - 1)
                        : href;
                String strippedText = text.endsWith("/") ? text.substring(0, text.length() - 1)
                        : text;
                if (!strippedHref.equalsIgnoreCase(strippedText)) {
                    continue;
                }
            }

            boolean directory = href.endsWith("/");

            if ((directory && includeDirectories) || (!directory && includeFiles)) {
                URL child = new URL(url, href);
                urlList.add(child);
                Message.debug("ApacheURLLister found URL=[" + child + "].");
            }
        }

        return urlList;
    }