public Set crawl()

in src/main/java/org/apache/creadur/tentacles/NexusClient.java [127:175]


    public Set<URI> crawl(final URI index) throws IOException {
        log.info("Crawl {}", index);
        final Set<URI> resources = new LinkedHashSet<>();

        final CloseableHttpResponse response = get(index);

        final InputStream content = response.getEntity().getContent();
        final StreamLexer lexer = new StreamLexer(content);

        final Set<URI> crawl = new LinkedHashSet<>();

        // <a
        // href="https://repository.apache.org/content/repositories/orgapacheopenejb-094/archetype-catalog.xml">archetype-catalog.xml</a>
        while (lexer.readAndMark("<a ", "/a>")) {

            try {
                final String link = lexer.peek("href=\"", "\"");
                final String name = lexer.peek(">", "<");

                final URI uri = index.resolve(link);

                if (name.equals(ONE_UP)) {
                    continue;
                }
                if (link.equals(ONE_UP)) {
                    continue;
                }

                if (name.endsWith(SLASH)) {
                    crawl.add(uri);
                    continue;
                }

                resources.add(uri);

            } finally {
                lexer.unmark();
            }
        }

        content.close();
        response.close();

        for (final URI uri : crawl) {
            resources.addAll(crawl(uri));
        }

        return resources;
    }