private void downloadWikiEntry()

in wiki-export/src/wiki/export/WikiEntriesDownloader.java [198:272]


    private void downloadWikiEntry(String wikiEntry, File wikiDest) throws Exception {

        String referer = "http://wiki.netbeans.org/" + wikiEntry;

        // This URL returns the wiki entry in XML format
        // The wikitext content is returned in the <export> element.
        URL url = new URL(String.format("http://wiki.netbeans.org/wiki/api.php?action=query&titles=%s&export&format=xml", wikiEntry));
        HttpURLConnection http = (HttpURLConnection) url.openConnection();
        http.setDefaultUseCaches(true);
        http.setDoInput(true);
        http.setUseCaches(true);
        http.addRequestProperty("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0");
        http.addRequestProperty("Accept-Language", "en");
        http.addRequestProperty("Referer", referer);
        http.addRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");

        http.connect();

        log("  Fetching url " + url);
        log("    to " + wikiDest.getAbsolutePath());

        if (http.getResponseCode() == 200) {
            log("  Sleeping...");
            Thread.currentThread().sleep(500L);

            /*
            Parse the HTTP input, which is a MediaWiki XML document.
            From the document we just want to retrieve the 'export' tag text.
             */
            String exportTagText = null;
            DocumentBuilder db = getDocumentBuilderFactory().newDocumentBuilder();

            Document dom = db.parse(http.getInputStream());
            NodeList exportElements = dom.getElementsByTagName("export");
            if (exportElements.getLength() == 1) {
                exportTagText = exportElements.item(0).getTextContent();
            } else {
                throw new Exception("Cannot retrieve 'export' element for wiki name " + wikiEntry);
            }

            /* Now parse the exportTagText, which is itself a XML document */
            StringReader exportContent = new StringReader(exportTagText);
            InputSource inputSource = new InputSource(exportContent);
            dom = db.parse(inputSource);
            /* Add a comment and save it */
            Comment comment = dom.createComment(APACHE_LICENSE_HEADER);
            Element e = dom.getDocumentElement();
            dom.insertBefore(comment, e);
            saveXML(dom, wikiDest);
            exportContent.close();

            /* Fetch the wikitext, inside the 'text' element */
            NodeList textElements = dom.getElementsByTagName("text");
            if (textElements.getLength() == 1) {
                String wikiText = textElements.item(0).getTextContent();
                Map<String, String> images = getImageLinks(wikiText);
                System.out.println("IMAGES: " + images);
                for (Map.Entry<String, String> imageEntry : images.entrySet()) {
                    String imageName = imageEntry.getKey();
                    String imageValue = imageEntry.getKey();
                    File imageDest = new File(destDir, imageName);
                    if (skipExisting && imageDest.exists()) {
                        log("  Skipping already existing " + imageName);
                    } else {
                        downloadImage(wikiEntry, imageValue, imageDest);
                    }
                }
            } else {
                log("WARNING: Empty WikiEntry " + wikiEntry);
            }

        } else {
            log("BAD RESPONSE CODE: " + http.getResponseCode());
        }
    }