in src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java [84:312]
public FtpResponse(URL url, CrawlDatum datum, Ftp ftp, Configuration conf)
throws FtpException, IOException {
this.orig = url.toString();
this.base = url.toString();
this.ftp = ftp;
this.conf = conf;
if (!"ftp".equals(url.getProtocol()))
throw new FtpException("Not a ftp url:" + url);
if (url.getQuery() != null) {
Ftp.LOG.warn(
"ftp:// URL may not include a query (query part ignored): {}", url);
}
String path = url.getPath().isEmpty() ? "/" : url.getPath();
try {
if (ftp.followTalk) {
if (Ftp.LOG.isInfoEnabled()) {
Ftp.LOG.info("fetching " + url);
}
} else {
if (Ftp.LOG.isTraceEnabled()) {
Ftp.LOG.trace("fetching " + url);
}
}
InetAddress addr = InetAddress.getByName(url.getHost());
if (addr != null && conf.getBoolean("store.ip.address", false) == true) {
headers.add("_ip_", addr.getHostAddress());
}
// idled too long, remote server or ourselves may have timed out,
// should start anew.
if (ftp.client != null && ftp.keepConnection
&& ftp.renewalTime < System.currentTimeMillis()) {
if (Ftp.LOG.isInfoEnabled()) {
Ftp.LOG.info("delete client because idled too long");
}
ftp.client = null;
}
// start anew if needed
if (ftp.client == null) {
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("start client");
}
// the real client
ftp.client = new Client();
// when to renew, take the lesser
// ftp.renewalTime = System.currentTimeMillis()
// + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout :
// ftp.serverTimeout);
// timeout for control connection
ftp.client.setDefaultTimeout(ftp.timeout);
// timeout for data connection
ftp.client.setDataTimeout(ftp.timeout);
// follow ftp talk?
if (ftp.followTalk)
ftp.client.addProtocolCommandListener(new PrintCommandListener(
Ftp.LOG));
}
// quit from previous site if at a different site now
if (ftp.client.isConnected()) {
InetAddress remoteAddress = ftp.client.getRemoteAddress();
if (!addr.equals(remoteAddress)) {
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("disconnect from " + remoteAddress
+ " before connect to " + addr);
}
// quit from current site
ftp.client.logout();
ftp.client.disconnect();
}
}
// connect to current site if needed
if (!ftp.client.isConnected()) {
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("connect to " + addr);
}
ftp.client.connect(addr);
if (!FTPReply.isPositiveCompletion(ftp.client.getReplyCode())) {
ftp.client.disconnect();
if (Ftp.LOG.isWarnEnabled()) {
Ftp.LOG.warn("ftp.client.connect() failed: " + addr + " "
+ ftp.client.getReplyString());
}
this.code = 500; // http Internal Server Error
return;
}
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("log into " + addr);
}
if (!ftp.client.login(ftp.userName, ftp.passWord)) {
// login failed.
// please note that some server may return 421 immediately
// after USER anonymous, thus ftp.client.login() won't return false,
// but throw exception, which then will be handled by caller
// (not dealt with here at all) .
ftp.client.disconnect();
if (Ftp.LOG.isWarnEnabled()) {
Ftp.LOG.warn("ftp.client.login() failed: " + addr);
}
this.code = 401; // http Unauthorized
return;
}
// insist on binary file type
if (!ftp.client.setFileType(FTP.BINARY_FILE_TYPE)) {
ftp.client.logout();
ftp.client.disconnect();
if (Ftp.LOG.isWarnEnabled()) {
Ftp.LOG.warn("ftp.client.setFileType() failed: " + addr);
}
this.code = 500; // http Internal Server Error
return;
}
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("set parser for " + addr);
}
// SYST is valid only after login
try {
ftp.parser = null;
String parserKey = ftp.client.getSystemName();
// some server reports as UNKNOWN Type: L8, but in fact UNIX Type: L8
if (parserKey.startsWith("UNKNOWN Type: L8"))
parserKey = "UNIX Type: L8";
ftp.parser = (new DefaultFTPFileEntryParserFactory())
.createFileEntryParser(parserKey);
} catch (FtpExceptionBadSystResponse e) {
if (Ftp.LOG.isWarnEnabled()) {
Ftp.LOG
.warn("ftp.client.getSystemName() failed: " + addr + " " + e);
}
ftp.parser = null;
} catch (ParserInitializationException e) {
// ParserInitializationException is RuntimeException defined in
// org.apache.commons.net.ftp.parser.ParserInitializationException
if (Ftp.LOG.isWarnEnabled()) {
Ftp.LOG.warn("createFileEntryParser() failed. " + addr + " " + e);
}
ftp.parser = null;
} finally {
if (ftp.parser == null) {
// do not log as severe, otherwise
// FetcherThread/RequestScheduler will abort
if (Ftp.LOG.isWarnEnabled()) {
Ftp.LOG.warn("ftp.parser is null: " + addr);
}
ftp.client.logout();
ftp.client.disconnect();
this.code = 500; // http Internal Server Error
return;
}
}
} else {
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("use existing connection");
}
}
this.content = null;
path = java.net.URLDecoder.decode(path, "UTF-8");
if (path.endsWith("/")) {
getDirAsHttpResponse(path, datum.getModifiedTime());
} else {
getFileAsHttpResponse(path, datum.getModifiedTime());
}
// reset next renewalTime, take the lesser
if (ftp.client != null && ftp.keepConnection) {
ftp.renewalTime = System.currentTimeMillis()
+ ((ftp.timeout < ftp.serverTimeout) ? ftp.timeout
: ftp.serverTimeout);
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("reset renewalTime to "
+ HttpDateFormat.toString(ftp.renewalTime));
}
}
// getDirAsHttpResponse() or getFileAsHttpResponse() above
// may have deleted ftp.client
if (ftp.client != null && !ftp.keepConnection) {
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("disconnect from " + addr);
}
ftp.client.logout();
ftp.client.disconnect();
}
} catch (Exception e) {
if (Ftp.LOG.isWarnEnabled()) {
Ftp.LOG.warn("Error: ", e);
}
// for any un-foreseen exception (run time exception or not),
// do ultimate clean and leave ftp.client for garbage collection
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("delete client due to exception");
}
ftp.client = null;
// or do explicit garbage collection?
// System.gc();
// can we be less dramatic, using the following instead?
// probably unnecessary for our practical purpose here
// try {
// ftp.client.logout();
// ftp.client.disconnect();
// }
throw new FtpException(e);
// throw e;
}
}