in wiki-export/src/wiki/export/WikiEntriesDownloader.java [198:272]
private void downloadWikiEntry(String wikiEntry, File wikiDest) throws Exception {
String referer = "http://wiki.netbeans.org/" + wikiEntry;
// This URL returns the wiki entry in XML format
// The wikitext content is returned in the <export> element.
URL url = new URL(String.format("http://wiki.netbeans.org/wiki/api.php?action=query&titles=%s&export&format=xml", wikiEntry));
HttpURLConnection http = (HttpURLConnection) url.openConnection();
http.setDefaultUseCaches(true);
http.setDoInput(true);
http.setUseCaches(true);
http.addRequestProperty("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:57.0) Gecko/20100101 Firefox/57.0");
http.addRequestProperty("Accept-Language", "en");
http.addRequestProperty("Referer", referer);
http.addRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
http.connect();
log(" Fetching url " + url);
log(" to " + wikiDest.getAbsolutePath());
if (http.getResponseCode() == 200) {
log(" Sleeping...");
Thread.currentThread().sleep(500L);
/*
Parse the HTTP input, which is a MediaWiki XML document.
From the document we just want to retrieve the 'export' tag text.
*/
String exportTagText = null;
DocumentBuilder db = getDocumentBuilderFactory().newDocumentBuilder();
Document dom = db.parse(http.getInputStream());
NodeList exportElements = dom.getElementsByTagName("export");
if (exportElements.getLength() == 1) {
exportTagText = exportElements.item(0).getTextContent();
} else {
throw new Exception("Cannot retrieve 'export' element for wiki name " + wikiEntry);
}
/* Now parse the exportTagText, which is itself a XML document */
StringReader exportContent = new StringReader(exportTagText);
InputSource inputSource = new InputSource(exportContent);
dom = db.parse(inputSource);
/* Add a comment and save it */
Comment comment = dom.createComment(APACHE_LICENSE_HEADER);
Element e = dom.getDocumentElement();
dom.insertBefore(comment, e);
saveXML(dom, wikiDest);
exportContent.close();
/* Fetch the wikitext, inside the 'text' element */
NodeList textElements = dom.getElementsByTagName("text");
if (textElements.getLength() == 1) {
String wikiText = textElements.item(0).getTextContent();
Map<String, String> images = getImageLinks(wikiText);
System.out.println("IMAGES: " + images);
for (Map.Entry<String, String> imageEntry : images.entrySet()) {
String imageName = imageEntry.getKey();
String imageValue = imageEntry.getKey();
File imageDest = new File(destDir, imageName);
if (skipExisting && imageDest.exists()) {
log(" Skipping already existing " + imageName);
} else {
downloadImage(wikiEntry, imageValue, imageDest);
}
}
} else {
log("WARNING: Empty WikiEntry " + wikiEntry);
}
} else {
log("BAD RESPONSE CODE: " + http.getResponseCode());
}
}