in core/cocoon-cli/src/main/java/org/apache/cocoon/bean/CocoonBean.java [408:572]
private void processTarget(Crawler crawler, Target target) throws Exception {
int status = 0;
int linkCount = 0;
int newLinkCount = 0;
int pageSize = 0;
long startTimeMillis = System.currentTimeMillis();
if (target.confirmExtensions()) {
if (!crawler.hasTranslatedLink(target)) {
final String mimeType = getType(target.getDeparameterizedSourceURI(), target.getParameters());
target.setMimeType(mimeType);
crawler.addTranslatedLink(target);
}
}
// IS THIS STILL NEEDED?
//if ("".equals(destinationURI)) {
// return new ArrayList();
//}
// Process links
final HashMap translatedLinks = new HashMap();
if (target.followLinks() && target.confirmExtensions() && isCrawlablePage(target)) {
final Iterator i =
this.getLinks(target.getDeparameterizedSourceURI(), target.getParameters()).iterator();
while (i.hasNext()) {
String linkURI = (String) i.next();
Target linkTarget = target.getDerivedTarget(linkURI);
if (linkTarget == null) {
pageSkipped(linkURI, "link does not share same root as parent");
continue;
}
if (!isIncluded(linkTarget.getSourceURI())) {
pageSkipped(linkTarget.getSourceURI(), "matched include/exclude rules");
continue;
}
if (!crawler.hasTranslatedLink(linkTarget)) {
try {
final String mimeType =
getType(linkTarget.getDeparameterizedSourceURI(), linkTarget.getParameters());
linkTarget.setMimeType(mimeType);
crawler.addTranslatedLink(linkTarget);
log.info(" Link translated: " + linkTarget.getSourceURI());
if (crawler.addTarget(linkTarget)) {
newLinkCount++;
}
} catch (ProcessingException pe) {
this.sendBrokenLinkWarning(linkTarget, pe);
if (this.brokenLinkGenerate) {
if (crawler.addTarget(linkTarget)) {
newLinkCount++;
}
}
}
} else {
String originalURI = linkTarget.getOriginalSourceURI();
linkTarget = crawler.getTranslatedLink(linkTarget);
linkTarget.setOriginalURI(originalURI);
}
translatedLinks.put(linkTarget.getOriginalSourceURI(), linkTarget.getTranslatedURI(target.getPath()));
}
linkCount = translatedLinks.size();
}
try {
// Process URI
DelayedOutputStream output = new DelayedOutputStream();
try {
List gatheredLinks;
if (!target.confirmExtensions() && target.followLinks() && isCrawlablePage(target)) {
gatheredLinks = new ArrayList();
} else {
gatheredLinks = null;
}
final TreeMap headers = new TreeMap();
headers.put("user-agent", userAgent);
headers.put("accept", accept);
status =
getPage(
target.getDeparameterizedSourceURI(),
getLastModified(target),
target.getParameters(),
headers,
target.confirmExtensions() ? translatedLinks : null,
gatheredLinks,
output);
if (status >= 400) {
throw new ProcessingException(
"Resource not found: " + status);
}
if (gatheredLinks != null) {
for (Iterator it = gatheredLinks.iterator();it.hasNext();) {
String linkURI = (String) it.next();
Target linkTarget = target.getDerivedTarget(linkURI);
if (linkTarget == null) {
pageSkipped(linkURI, "link does not share same root as parent");
continue;
}
if (!isIncluded(linkTarget.getSourceURI())) {
pageSkipped(linkTarget.getSourceURI(), "matched include/exclude rules");
continue;
}
if (crawler.addTarget(linkTarget)) {
newLinkCount++;
}
}
linkCount = gatheredLinks.size();
}
} catch (ProcessingException pe) {
output.close();
output = null;
this.resourceUnavailable(target);
this.sendBrokenLinkWarning(target,
DefaultNotifyingBuilder.getRootCause(pe));
} finally {
if (output != null && status != -1) {
ModifiableSource source = getSource(target);
try {
pageSize = output.size();
if (this.checksumsURI == null || !isSameContent(output, target)) {
OutputStream stream = source.getOutputStream();
output.setFileOutputStream(stream);
output.flush();
output.close();
pageGenerated(target.getSourceURI(),
target.getAuthlessDestURI(),
pageSize,
linkCount,
newLinkCount,
crawler.getRemainingCount(),
crawler.getProcessedCount(),
System.currentTimeMillis()- startTimeMillis);
} else {
output.close();
pageSkipped(target.getSourceURI(), "Page not changed");
}
} catch (IOException ioex) {
log.warn(ioex.toString());
} finally {
releaseSource(source);
}
}
}
} catch (Exception rnfe) {
log.warn("Could not process URI: " + target.getSourceURI());
rnfe.printStackTrace();
this.sendBrokenLinkWarning(target.getSourceURI(), "URI not found: "+rnfe.getMessage());
}
}