public FtpResponse()

in src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java [84:312]


  public FtpResponse(URL url, CrawlDatum datum, Ftp ftp, Configuration conf)
      throws FtpException, IOException {

    this.orig = url.toString();
    this.base = url.toString();
    this.ftp = ftp;
    this.conf = conf;

    if (!"ftp".equals(url.getProtocol()))
      throw new FtpException("Not a ftp url:" + url);

    if (url.getQuery() != null) {
      Ftp.LOG.warn(
          "ftp:// URL may not include a query (query part ignored): {}", url);
    }

    String path = url.getPath().isEmpty() ? "/" : url.getPath();

    try {

      if (ftp.followTalk) {
        if (Ftp.LOG.isInfoEnabled()) {
          Ftp.LOG.info("fetching " + url);
        }
      } else {
        if (Ftp.LOG.isTraceEnabled()) {
          Ftp.LOG.trace("fetching " + url);
        }
      }

      InetAddress addr = InetAddress.getByName(url.getHost());
      if (addr != null && conf.getBoolean("store.ip.address", false) == true) {
        headers.add("_ip_", addr.getHostAddress());
      }

      // idled too long, remote server or ourselves may have timed out,
      // should start anew.
      if (ftp.client != null && ftp.keepConnection
          && ftp.renewalTime < System.currentTimeMillis()) {
        if (Ftp.LOG.isInfoEnabled()) {
          Ftp.LOG.info("delete client because idled too long");
        }
        ftp.client = null;
      }

      // start anew if needed
      if (ftp.client == null) {
        if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
          Ftp.LOG.info("start client");
        }
        // the real client
        ftp.client = new Client();
        // when to renew, take the lesser
        // ftp.renewalTime = System.currentTimeMillis()
        // + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout :
        // ftp.serverTimeout);

        // timeout for control connection
        ftp.client.setDefaultTimeout(ftp.timeout);
        // timeout for data connection
        ftp.client.setDataTimeout(ftp.timeout);

        // follow ftp talk?
        if (ftp.followTalk)
          ftp.client.addProtocolCommandListener(new PrintCommandListener(
              Ftp.LOG));
      }

      // quit from previous site if at a different site now
      if (ftp.client.isConnected()) {
        InetAddress remoteAddress = ftp.client.getRemoteAddress();
        if (!addr.equals(remoteAddress)) {
          if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
            Ftp.LOG.info("disconnect from " + remoteAddress
                + " before connect to " + addr);
          }
          // quit from current site
          ftp.client.logout();
          ftp.client.disconnect();
        }
      }

      // connect to current site if needed
      if (!ftp.client.isConnected()) {

        if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
          Ftp.LOG.info("connect to " + addr);
        }

        ftp.client.connect(addr);
        if (!FTPReply.isPositiveCompletion(ftp.client.getReplyCode())) {
          ftp.client.disconnect();
          if (Ftp.LOG.isWarnEnabled()) {
            Ftp.LOG.warn("ftp.client.connect() failed: " + addr + " "
                + ftp.client.getReplyString());
          }
          this.code = 500; // http Internal Server Error
          return;
        }

        if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
          Ftp.LOG.info("log into " + addr);
        }

        if (!ftp.client.login(ftp.userName, ftp.passWord)) {
          // login failed.
          // please note that some server may return 421 immediately
          // after USER anonymous, thus ftp.client.login() won't return false,
          // but throw exception, which then will be handled by caller
          // (not dealt with here at all) .
          ftp.client.disconnect();
          if (Ftp.LOG.isWarnEnabled()) {
            Ftp.LOG.warn("ftp.client.login() failed: " + addr);
          }
          this.code = 401; // http Unauthorized
          return;
        }

        // insist on binary file type
        if (!ftp.client.setFileType(FTP.BINARY_FILE_TYPE)) {
          ftp.client.logout();
          ftp.client.disconnect();
          if (Ftp.LOG.isWarnEnabled()) {
            Ftp.LOG.warn("ftp.client.setFileType() failed: " + addr);
          }
          this.code = 500; // http Internal Server Error
          return;
        }

        if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
          Ftp.LOG.info("set parser for " + addr);
        }

        // SYST is valid only after login
        try {
          ftp.parser = null;
          String parserKey = ftp.client.getSystemName();
          // some server reports as UNKNOWN Type: L8, but in fact UNIX Type: L8
          if (parserKey.startsWith("UNKNOWN Type: L8"))
            parserKey = "UNIX Type: L8";
          ftp.parser = (new DefaultFTPFileEntryParserFactory())
              .createFileEntryParser(parserKey);
        } catch (FtpExceptionBadSystResponse e) {
          if (Ftp.LOG.isWarnEnabled()) {
            Ftp.LOG
                .warn("ftp.client.getSystemName() failed: " + addr + " " + e);
          }
          ftp.parser = null;
        } catch (ParserInitializationException e) {
          // ParserInitializationException is RuntimeException defined in
          // org.apache.commons.net.ftp.parser.ParserInitializationException
          if (Ftp.LOG.isWarnEnabled()) {
            Ftp.LOG.warn("createFileEntryParser() failed. " + addr + " " + e);
          }
          ftp.parser = null;
        } finally {
          if (ftp.parser == null) {
            // do not log as severe, otherwise
            // FetcherThread/RequestScheduler will abort
            if (Ftp.LOG.isWarnEnabled()) {
              Ftp.LOG.warn("ftp.parser is null: " + addr);
            }
            ftp.client.logout();
            ftp.client.disconnect();
            this.code = 500; // http Internal Server Error
            return;
          }
        }

      } else {
        if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
          Ftp.LOG.info("use existing connection");
        }
      }

      this.content = null;
      
      path = java.net.URLDecoder.decode(path, "UTF-8");

      if (path.endsWith("/")) {
        getDirAsHttpResponse(path, datum.getModifiedTime());
      } else {
        getFileAsHttpResponse(path, datum.getModifiedTime());
      }

      // reset next renewalTime, take the lesser
      if (ftp.client != null && ftp.keepConnection) {
        ftp.renewalTime = System.currentTimeMillis()
            + ((ftp.timeout < ftp.serverTimeout) ? ftp.timeout
                : ftp.serverTimeout);
        if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
          Ftp.LOG.info("reset renewalTime to "
              + HttpDateFormat.toString(ftp.renewalTime));
        }
      }

      // getDirAsHttpResponse() or getFileAsHttpResponse() above
      // may have deleted ftp.client
      if (ftp.client != null && !ftp.keepConnection) {
        if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
          Ftp.LOG.info("disconnect from " + addr);
        }
        ftp.client.logout();
        ftp.client.disconnect();
      }

    } catch (Exception e) {
      if (Ftp.LOG.isWarnEnabled()) {
        Ftp.LOG.warn("Error: ", e);
      }
      // for any un-foreseen exception (run time exception or not),
      // do ultimate clean and leave ftp.client for garbage collection
      if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
        Ftp.LOG.info("delete client due to exception");
      }
      ftp.client = null;
      // or do explicit garbage collection?
      // System.gc();
      // can we be less dramatic, using the following instead?
      // probably unnecessary for our practical purpose here
      // try {
      // ftp.client.logout();
      // ftp.client.disconnect();
      // }
      throw new FtpException(e);
      // throw e;
    }

  }