You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2015/01/09 07:34:37 UTC

svn commit: r1650447 [20/25] - in /nutch/branches/2.x: ./ src/java/org/apache/nutch/api/ src/java/org/apache/nutch/api/impl/ src/java/org/apache/nutch/api/impl/db/ src/java/org/apache/nutch/api/model/response/ src/java/org/apache/nutch/api/resources/ s...

Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java Fri Jan  9 06:34:33 2015
@@ -27,7 +27,6 @@ import java.net.Socket;
 import java.util.List;
 //import java.util.LinkedList;
 
-
 import org.apache.commons.net.MalformedServerReplyException;
 import org.apache.commons.net.ftp.FTP;
 import org.apache.commons.net.ftp.FTPCommand;
@@ -37,561 +36,551 @@ import org.apache.commons.net.ftp.FTPRep
 import org.apache.commons.net.ftp.FTPConnectionClosedException;
 
 /***********************************************
- * Client.java encapsulates functionalities necessary for nutch to
- * get dir list and retrieve file from an FTP server.
- * This class takes care of all low level details of interacting
- * with an FTP server and provides a convenient higher level interface.
- *
+ * Client.java encapsulates functionalities necessary for nutch to get dir list
+ * and retrieve file from an FTP server. This class takes care of all low level
+ * details of interacting with an FTP server and provides a convenient higher
+ * level interface.
+ * 
  * Modified from FtpClient.java in apache commons-net.
  * 
- * Notes by John Xing:
- * ftp server implementations are hardly uniform and none seems to follow
- * RFCs whole-heartedly. We have no choice, but assume common denominator
- * as following:
- * (1) Use stream mode for data transfer. Block mode will be better for
- *     multiple file downloading and partial file downloading. However
- *     not every ftpd has block mode support.
- * (2) Use passive mode for data connection.
- *     So nutch will work if we run behind firewall.
- * (3) Data connection is opened/closed per ftp command for the reasons
- *     listed in (1). There are ftp servers out there,
- *     when partial downloading is enforeced by closing data channel
- *     socket on our client side, the server side immediately closes
- *     control channel (socket). Our codes deal with such a bad behavior.
- * (4) LIST is used to obtain remote file attributes if possible.
- *     MDTM & SIZE would be nice, but not as ubiquitously implemented as LIST.
- * (5) Avoid using ABOR in single thread? Do not use it at all.
- *
- * About exceptions:
- * Some specific exceptions are re-thrown as one of FtpException*.java
- * In fact, each function throws FtpException*.java or pass IOException.
- *
+ * Notes by John Xing: ftp server implementations are hardly uniform and none
+ * seems to follow RFCs whole-heartedly. We have no choice, but assume common
+ * denominator as following: (1) Use stream mode for data transfer. Block mode
+ * will be better for multiple file downloading and partial file downloading.
+ * However not every ftpd has block mode support. (2) Use passive mode for data
+ * connection. So nutch will work if we run behind firewall. (3) Data connection
+ * is opened/closed per ftp command for the reasons listed in (1). There are ftp
+ * servers out there, when partial downloading is enforeced by closing data
+ * channel socket on our client side, the server side immediately closes control
+ * channel (socket). Our codes deal with such a bad behavior. (4) LIST is used
+ * to obtain remote file attributes if possible. MDTM & SIZE would be nice, but
+ * not as ubiquitously implemented as LIST. (5) Avoid using ABOR in single
+ * thread? Do not use it at all.
+ * 
+ * About exceptions: Some specific exceptions are re-thrown as one of
+ * FtpException*.java In fact, each function throws FtpException*.java or pass
+ * IOException.
+ * 
  * @author John Xing
  ***********************************************/
 
-public class Client extends FTP
-{
-    private int __dataTimeout;
-    private int __passivePort;
-    private String __passiveHost;
-//    private int __fileType, __fileFormat;
-    private boolean __remoteVerificationEnabled;
-//    private FTPFileEntryParser __entryParser;
-    private String __systemName;
-
-    // constructor
-    public Client()
-    {
-        __initDefaults();
-        __dataTimeout = -1;
-        __remoteVerificationEnabled = true;
-    }
-
-    // defaults when initialize
-    private void __initDefaults()
-    {
-        __passiveHost        = null;
-        __passivePort        = -1;
-        __systemName         = null;
-/*        __fileType           = FTP.ASCII_FILE_TYPE;
-        __fileFormat         = FTP.NON_PRINT_TEXT_FORMAT;
-        __entryParser        = null;
-*/    }
-
-    // parse reply for pass()
-    private void __parsePassiveModeReply(String reply)
-    throws MalformedServerReplyException
-    {
-        int i, index, lastIndex;
-        String octet1, octet2;
-        StringBuffer host;
-
-        reply = reply.substring(reply.indexOf('(') + 1,
-                                reply.indexOf(')')).trim();
-
-        host = new StringBuffer(24);
-        lastIndex = 0;
-        index = reply.indexOf(',');
-        host.append(reply.substring(lastIndex, index));
-
-        for (i = 0; i < 3; i++)
-        {
-            host.append('.');
-            lastIndex = index + 1;
-            index = reply.indexOf(',', lastIndex);
-            host.append(reply.substring(lastIndex, index));
-        }
-
-        lastIndex = index + 1;
-        index = reply.indexOf(',', lastIndex);
-
-        octet1 = reply.substring(lastIndex, index);
-        octet2 = reply.substring(index + 1);
-
-        // index and lastIndex now used as temporaries
-        try
-        {
-            index = Integer.parseInt(octet1);
-            lastIndex = Integer.parseInt(octet2);
-        }
-        catch (NumberFormatException e)
-        {
-            throw new MalformedServerReplyException(
-                "Could not parse passive host information.\nServer Reply: " + reply);
-        }
-
-        index <<= 8;
-        index |= lastIndex;
-
-        __passiveHost = host.toString();
-        __passivePort = index;
-    }
-
-    /**
-     * open a passive data connection socket
-     * @param command
-     * @param arg
-     * @return
-     * @throws IOException
-     * @throws FtpExceptionCanNotHaveDataConnection
-     */
-    protected Socket __openPassiveDataConnection(int command, String arg)
+public class Client extends FTP {
+  private int __dataTimeout;
+  private int __passivePort;
+  private String __passiveHost;
+  // private int __fileType, __fileFormat;
+  private boolean __remoteVerificationEnabled;
+  // private FTPFileEntryParser __entryParser;
+  private String __systemName;
+
+  // constructor
+  public Client() {
+    __initDefaults();
+    __dataTimeout = -1;
+    __remoteVerificationEnabled = true;
+  }
+
+  // defaults when initialize
+  private void __initDefaults() {
+    __passiveHost = null;
+    __passivePort = -1;
+    __systemName = null;
+    /*
+     * __fileType = FTP.ASCII_FILE_TYPE; __fileFormat =
+     * FTP.NON_PRINT_TEXT_FORMAT; __entryParser = null;
+     */}
+
+  // parse reply for pass()
+  private void __parsePassiveModeReply(String reply)
+      throws MalformedServerReplyException {
+    int i, index, lastIndex;
+    String octet1, octet2;
+    StringBuffer host;
+
+    reply = reply.substring(reply.indexOf('(') + 1, reply.indexOf(')')).trim();
+
+    host = new StringBuffer(24);
+    lastIndex = 0;
+    index = reply.indexOf(',');
+    host.append(reply.substring(lastIndex, index));
+
+    for (i = 0; i < 3; i++) {
+      host.append('.');
+      lastIndex = index + 1;
+      index = reply.indexOf(',', lastIndex);
+      host.append(reply.substring(lastIndex, index));
+    }
+
+    lastIndex = index + 1;
+    index = reply.indexOf(',', lastIndex);
+
+    octet1 = reply.substring(lastIndex, index);
+    octet2 = reply.substring(index + 1);
+
+    // index and lastIndex now used as temporaries
+    try {
+      index = Integer.parseInt(octet1);
+      lastIndex = Integer.parseInt(octet2);
+    } catch (NumberFormatException e) {
+      throw new MalformedServerReplyException(
+          "Could not parse passive host information.\nServer Reply: " + reply);
+    }
+
+    index <<= 8;
+    index |= lastIndex;
+
+    __passiveHost = host.toString();
+    __passivePort = index;
+  }
+
+  /**
+   * open a passive data connection socket
+   * 
+   * @param command
+   * @param arg
+   * @return
+   * @throws IOException
+   * @throws FtpExceptionCanNotHaveDataConnection
+   */
+  protected Socket __openPassiveDataConnection(int command, String arg)
       throws IOException, FtpExceptionCanNotHaveDataConnection {
-        Socket socket;
+    Socket socket;
 
-//        // 20040317, xing, accommodate ill-behaved servers, see below
-//        int port_previous = __passivePort;
+    // // 20040317, xing, accommodate ill-behaved servers, see below
+    // int port_previous = __passivePort;
 
-        if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
-          throw new FtpExceptionCanNotHaveDataConnection(
-            "pasv() failed. " + getReplyString());
-
-        try {
-          __parsePassiveModeReply(getReplyStrings()[0]);
-        } catch (MalformedServerReplyException e) {
-          throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
-        }
-
-//        // 20040317, xing, accommodate ill-behaved servers, see above
-//        int count = 0;
-//        System.err.println("__passivePort "+__passivePort);
-//        System.err.println("port_previous "+port_previous);
-//        while (__passivePort == port_previous) {
-//          // just quit if too many tries. make it an exception here?
-//          if (count++ > 10)
-//            return null;
-//          // slow down further for each new try
-//          Thread.sleep(500*count);
-//          if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
-//            throw new FtpExceptionCanNotHaveDataConnection(
-//              "pasv() failed. " + getReplyString());
-//            //return null;
-//          try {
-//            __parsePassiveModeReply(getReplyStrings()[0]);
-//          } catch (MalformedServerReplyException e) {
-//            throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
-//          }
-//        }
-
-        socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
-
-        if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
-          socket.close();
-          return null;
-        }
-
-        if (__remoteVerificationEnabled && !verifyRemote(socket))
-        {
-            InetAddress host1, host2;
-
-            host1 = socket.getInetAddress();
-            host2 = getRemoteAddress();
-
-            socket.close();
-
-            // our precaution
-            throw new FtpExceptionCanNotHaveDataConnection(
-                "Host attempting data connection " + host1.getHostAddress() +
-                " is not same as server " + host2.getHostAddress() +
-                " So we intentionally close it for security precaution."
-                );
-        }
-
-        if (__dataTimeout >= 0)
-            socket.setSoTimeout(__dataTimeout);
-
-        return socket;
-    }
-
-    /***
-     * Sets the timeout in milliseconds to use for data connection.
-     * set immediately after opening the data connection.
-     ***/
-    public void setDataTimeout(int timeout)
-    {
-        __dataTimeout = timeout;
-    }
-
-    /***
-     * Closes the connection to the FTP server and restores
-     * connection parameters to the default values.
-     * <p>
-     * @exception IOException If an error occurs while disconnecting.
-     ***/
-    public void disconnect() throws IOException
-    {
-        __initDefaults();
-        super.disconnect();
-        // no worry for data connection, since we always close it
-        // in every ftp command that invloves data connection
-    }
-
-    /***
-     * Enable or disable verification that the remote host taking part
-     * of a data connection is the same as the host to which the control
-     * connection is attached.  The default is for verification to be
-     * enabled.  You may set this value at any time, whether the
-     * FTPClient is currently connected or not.
-     * <p>
-     * @param enable True to enable verification, false to disable verification.
-     ***/
-    public void setRemoteVerificationEnabled(boolean enable)
-    {
-        __remoteVerificationEnabled = enable;
-    }
-
-    /***
-     * Return whether or not verification of the remote host participating
-     * in data connections is enabled.  The default behavior is for
-     * verification to be enabled.
-     * <p>
-     * @return True if verification is enabled, false if not.
-     ***/
-    public boolean isRemoteVerificationEnabled()
-    {
-        return __remoteVerificationEnabled;
-    }
-
-    /***
-     * Login to the FTP server using the provided username and password.
-     * <p>
-     * @param username The username to login under.
-     * @param password The password to use.
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean login(String username, String password) throws IOException
-    {
-        user(username);
-
-        if (FTPReply.isPositiveCompletion(getReplyCode()))
-            return true;
-
-        // If we get here, we either have an error code, or an intermmediate
-        // reply requesting password.
-        if (!FTPReply.isPositiveIntermediate(getReplyCode()))
-            return false;
-
-        return FTPReply.isPositiveCompletion(pass(password));
-    }
-
-    /***
-     * Logout of the FTP server by sending the QUIT command.
-     * <p>
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean logout() throws IOException
-    {
-        return FTPReply.isPositiveCompletion(quit());
-    }
-
-    /**
-     * Retrieve a list reply for path
-     * @param path
-     * @param entries
-     * @param limit
-     * @param parser
-     * @throws IOException
-     * @throws FtpExceptionCanNotHaveDataConnection
-     * @throws FtpExceptionUnknownForcedDataClose
-     * @throws FtpExceptionControlClosedByForcedDataClose
-     */
-    public void retrieveList(String path, List<FTPFile> entries, int limit,
-      FTPFileEntryParser parser)
-      throws IOException,
-        FtpExceptionCanNotHaveDataConnection,
-        FtpExceptionUnknownForcedDataClose,
-        FtpExceptionControlClosedByForcedDataClose {
-      Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
+    if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
+      throw new FtpExceptionCanNotHaveDataConnection("pasv() failed. "
+          + getReplyString());
+
+    try {
+      __parsePassiveModeReply(getReplyStrings()[0]);
+    } catch (MalformedServerReplyException e) {
+      throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
+    }
+
+    // // 20040317, xing, accommodate ill-behaved servers, see above
+    // int count = 0;
+    // System.err.println("__passivePort "+__passivePort);
+    // System.err.println("port_previous "+port_previous);
+    // while (__passivePort == port_previous) {
+    // // just quit if too many tries. make it an exception here?
+    // if (count++ > 10)
+    // return null;
+    // // slow down further for each new try
+    // Thread.sleep(500*count);
+    // if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
+    // throw new FtpExceptionCanNotHaveDataConnection(
+    // "pasv() failed. " + getReplyString());
+    // //return null;
+    // try {
+    // __parsePassiveModeReply(getReplyStrings()[0]);
+    // } catch (MalformedServerReplyException e) {
+    // throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
+    // }
+    // }
 
-      if (socket == null)
-        throw new FtpExceptionCanNotHaveDataConnection("LIST "
-          + ((path == null) ? "" : path));
+    socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
 
-      BufferedReader reader =
-          new BufferedReader(new InputStreamReader(socket.getInputStream()));
+    if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
+      socket.close();
+      return null;
+    }
 
-      // force-close data channel socket, when download limit is reached
-//      boolean mandatory_close = false;
+    if (__remoteVerificationEnabled && !verifyRemote(socket)) {
+      InetAddress host1, host2;
 
-      //List entries = new LinkedList();
-      int count = 0;
-      String line = parser.readNextEntry(reader);
-      while (line != null) {
-        FTPFile ftpFile = parser.parseFTPEntry(line);
-        // skip non-formatted lines
-        if (ftpFile == null) {
-          line = parser.readNextEntry(reader);
-          continue;
-        }
-        entries.add(ftpFile);
-        count += line.length();
-        // impose download limit if limit >= 0, otherwise no limit
-        // here, cut off is up to the line when total bytes is just over limit
-        if (limit >= 0 && count > limit) {
-//          mandatory_close = true;
-          break;
-        }
-        line = parser.readNextEntry(reader);
-      }
+      host1 = socket.getInetAddress();
+      host2 = getRemoteAddress();
 
-      //if (mandatory_close)
-      // you always close here, no matter mandatory_close or not.
-      // however different ftp servers respond differently, see below.
       socket.close();
 
-      // scenarios:
-      // (1) mandatory_close is false, download limit not reached
-      //     no special care here
-      // (2) mandatory_close is true, download limit is reached
-      //     different servers have different reply codes:
-
-      try {
-        int reply = getReply();
-        if (!_notBadReply(reply))
-          throw new FtpExceptionUnknownForcedDataClose(getReplyString());
-      } catch (FTPConnectionClosedException e) {
-        // some ftp servers will close control channel if data channel socket
-        // is closed by our end before all data has been read out. Check:
-        // tux414.q-tam.hp.com FTP server (hp.com version whp02)
-        // so must catch FTPConnectionClosedException thrown by getReply() above
-        //disconnect();
-        throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
-      }
+      // our precaution
+      throw new FtpExceptionCanNotHaveDataConnection(
+          "Host attempting data connection " + host1.getHostAddress()
+              + " is not same as server " + host2.getHostAddress()
+              + " So we intentionally close it for security precaution.");
+    }
+
+    if (__dataTimeout >= 0)
+      socket.setSoTimeout(__dataTimeout);
+
+    return socket;
+  }
+
+  /***
+   * Sets the timeout in milliseconds to use for data connection. set
+   * immediately after opening the data connection.
+   ***/
+  public void setDataTimeout(int timeout) {
+    __dataTimeout = timeout;
+  }
+
+  /***
+   * Closes the connection to the FTP server and restores connection parameters
+   * to the default values.
+   * <p>
+   * 
+   * @exception IOException
+   *              If an error occurs while disconnecting.
+   ***/
+  public void disconnect() throws IOException {
+    __initDefaults();
+    super.disconnect();
+    // no worry for data connection, since we always close it
+    // in every ftp command that invloves data connection
+  }
+
+  /***
+   * Enable or disable verification that the remote host taking part of a data
+   * connection is the same as the host to which the control connection is
+   * attached. The default is for verification to be enabled. You may set this
+   * value at any time, whether the FTPClient is currently connected or not.
+   * <p>
+   * 
+   * @param enable
+   *          True to enable verification, false to disable verification.
+   ***/
+  public void setRemoteVerificationEnabled(boolean enable) {
+    __remoteVerificationEnabled = enable;
+  }
+
+  /***
+   * Return whether or not verification of the remote host participating in data
+   * connections is enabled. The default behavior is for verification to be
+   * enabled.
+   * <p>
+   * 
+   * @return True if verification is enabled, false if not.
+   ***/
+  public boolean isRemoteVerificationEnabled() {
+    return __remoteVerificationEnabled;
+  }
+
+  /***
+   * Login to the FTP server using the provided username and password.
+   * <p>
+   * 
+   * @param username
+   *          The username to login under.
+   * @param password
+   *          The password to use.
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean login(String username, String password) throws IOException {
+    user(username);
 
-    }
-
-    /**
-     * Retrieve a file for path
-     * @param path
-     * @param os
-     * @param limit
-     * @throws IOException
-     * @throws FtpExceptionCanNotHaveDataConnection
-     * @throws FtpExceptionUnknownForcedDataClose
-     * @throws FtpExceptionControlClosedByForcedDataClose
-     */
-    public void retrieveFile(String path, OutputStream os, int limit)
-      throws IOException,
-        FtpExceptionCanNotHaveDataConnection,
-        FtpExceptionUnknownForcedDataClose,
-        FtpExceptionControlClosedByForcedDataClose {
+    if (FTPReply.isPositiveCompletion(getReplyCode()))
+      return true;
 
-      Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
+    // If we get here, we either have an error code, or an intermmediate
+    // reply requesting password.
+    if (!FTPReply.isPositiveIntermediate(getReplyCode()))
+      return false;
+
+    return FTPReply.isPositiveCompletion(pass(password));
+  }
+
+  /***
+   * Logout of the FTP server by sending the QUIT command.
+   * <p>
+   * 
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean logout() throws IOException {
+    return FTPReply.isPositiveCompletion(quit());
+  }
+
+  /**
+   * Retrieve a list reply for path
+   * 
+   * @param path
+   * @param entries
+   * @param limit
+   * @param parser
+   * @throws IOException
+   * @throws FtpExceptionCanNotHaveDataConnection
+   * @throws FtpExceptionUnknownForcedDataClose
+   * @throws FtpExceptionControlClosedByForcedDataClose
+   */
+  public void retrieveList(String path, List<FTPFile> entries, int limit,
+      FTPFileEntryParser parser) throws IOException,
+      FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose,
+      FtpExceptionControlClosedByForcedDataClose {
+    Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
 
-      if (socket == null)
-        throw new FtpExceptionCanNotHaveDataConnection("RETR "
+    if (socket == null)
+      throw new FtpExceptionCanNotHaveDataConnection("LIST "
           + ((path == null) ? "" : path));
 
-      InputStream input = socket.getInputStream();
+    BufferedReader reader = new BufferedReader(new InputStreamReader(
+        socket.getInputStream()));
+
+    // force-close data channel socket, when download limit is reached
+    // boolean mandatory_close = false;
 
-      // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
-      // do we ever need ASCII_FILE_TYPE?
-      //if (__fileType == ASCII_FILE_TYPE)
-      // input = new FromNetASCIIInputStream(input);
-
-      // fixme, should we instruct server here for binary file type?
-
-      // force-close data channel socket
-//      boolean mandatory_close = false;
-
-      int len; int count = 0;
-      byte[] buf =
-        new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
-      while((len=input.read(buf,0,buf.length)) != -1){
-        count += len;
-        // impose download limit if limit >= 0, otherwise no limit
-        // here, cut off is exactly of limit bytes
-        if (limit >= 0 && count > limit) {
-          os.write(buf,0,len-(count-limit));
- //         mandatory_close = true;
-          break;
-        }
-        os.write(buf,0,len);
-        os.flush();
+    // List entries = new LinkedList();
+    int count = 0;
+    String line = parser.readNextEntry(reader);
+    while (line != null) {
+      FTPFile ftpFile = parser.parseFTPEntry(line);
+      // skip non-formatted lines
+      if (ftpFile == null) {
+        line = parser.readNextEntry(reader);
+        continue;
+      }
+      entries.add(ftpFile);
+      count += line.length();
+      // impose download limit if limit >= 0, otherwise no limit
+      // here, cut off is up to the line when total bytes is just over limit
+      if (limit >= 0 && count > limit) {
+        // mandatory_close = true;
+        break;
       }
+      line = parser.readNextEntry(reader);
+    }
 
-      //if (mandatory_close)
-      // you always close here, no matter mandatory_close or not.
-      // however different ftp servers respond differently, see below.
-      socket.close();
+    // if (mandatory_close)
+    // you always close here, no matter mandatory_close or not.
+    // however different ftp servers respond differently, see below.
+    socket.close();
+
+    // scenarios:
+    // (1) mandatory_close is false, download limit not reached
+    // no special care here
+    // (2) mandatory_close is true, download limit is reached
+    // different servers have different reply codes:
+
+    try {
+      int reply = getReply();
+      if (!_notBadReply(reply))
+        throw new FtpExceptionUnknownForcedDataClose(getReplyString());
+    } catch (FTPConnectionClosedException e) {
+      // some ftp servers will close control channel if data channel socket
+      // is closed by our end before all data has been read out. Check:
+      // tux414.q-tam.hp.com FTP server (hp.com version whp02)
+      // so must catch FTPConnectionClosedException thrown by getReply() above
+      // disconnect();
+      throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
+    }
+
+  }
+
+  /**
+   * Retrieve a file for path
+   * 
+   * @param path
+   * @param os
+   * @param limit
+   * @throws IOException
+   * @throws FtpExceptionCanNotHaveDataConnection
+   * @throws FtpExceptionUnknownForcedDataClose
+   * @throws FtpExceptionControlClosedByForcedDataClose
+   */
+  public void retrieveFile(String path, OutputStream os, int limit)
+      throws IOException, FtpExceptionCanNotHaveDataConnection,
+      FtpExceptionUnknownForcedDataClose,
+      FtpExceptionControlClosedByForcedDataClose {
 
-      // scenarios:
-      // (1) mandatory_close is false, download limit not reached
-      //     no special care here
-      // (2) mandatory_close is true, download limit is reached
-      //     different servers have different reply codes:
-
-      // do not need this
-      //sendCommand("ABOR");
-
-      try {
-        int reply = getReply();
-        if (!_notBadReply(reply))
-          throw new FtpExceptionUnknownForcedDataClose(getReplyString());
-      } catch (FTPConnectionClosedException e) {
-        // some ftp servers will close control channel if data channel socket
-        // is closed by our end before all data has been read out. Check:
-        // tux414.q-tam.hp.com FTP server (hp.com version whp02)
-        // so must catch FTPConnectionClosedException thrown by getReply() above
-        //disconnect();
-        throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
-      }
+    Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
 
-    }
+    if (socket == null)
+      throw new FtpExceptionCanNotHaveDataConnection("RETR "
+          + ((path == null) ? "" : path));
 
-    // reply check after closing data connection
-    private boolean _notBadReply(int reply) {
+    InputStream input = socket.getInputStream();
 
-      if (FTPReply.isPositiveCompletion(reply)) {
-        // do nothing
-      } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
+    // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
+    // do we ever need ASCII_FILE_TYPE?
+    // if (__fileType == ASCII_FILE_TYPE)
+    // input = new FromNetASCIIInputStream(input);
+
+    // fixme, should we instruct server here for binary file type?
+
+    // force-close data channel socket
+    // boolean mandatory_close = false;
+
+    int len;
+    int count = 0;
+    byte[] buf = new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
+    while ((len = input.read(buf, 0, buf.length)) != -1) {
+      count += len;
+      // impose download limit if limit >= 0, otherwise no limit
+      // here, cut off is exactly of limit bytes
+      if (limit >= 0 && count > limit) {
+        os.write(buf, 0, len - (count - limit));
+        // mandatory_close = true;
+        break;
+      }
+      os.write(buf, 0, len);
+      os.flush();
+    }
+
+    // if (mandatory_close)
+    // you always close here, no matter mandatory_close or not.
+    // however different ftp servers respond differently, see below.
+    socket.close();
+
+    // scenarios:
+    // (1) mandatory_close is false, download limit not reached
+    // no special care here
+    // (2) mandatory_close is true, download limit is reached
+    // different servers have different reply codes:
+
+    // do not need this
+    // sendCommand("ABOR");
+
+    try {
+      int reply = getReply();
+      if (!_notBadReply(reply))
+        throw new FtpExceptionUnknownForcedDataClose(getReplyString());
+    } catch (FTPConnectionClosedException e) {
+      // some ftp servers will close control channel if data channel socket
+      // is closed by our end before all data has been read out. Check:
+      // tux414.q-tam.hp.com FTP server (hp.com version whp02)
+      // so must catch FTPConnectionClosedException thrown by getReply() above
+      // disconnect();
+      throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
+    }
+
+  }
+
+  // reply check after closing data connection
+  private boolean _notBadReply(int reply) {
+
+    if (FTPReply.isPositiveCompletion(reply)) {
+      // do nothing
+    } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
       // some ftp servers reply 426, e.g.,
       // foggy FTP server (Version wu-2.6.2(2)
-        // there is second reply witing? no!
-        //getReply();
-      } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
+      // there is second reply witing? no!
+      // getReply();
+    } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
       // some ftp servers reply 450, e.g.,
       // ProFTPD [ftp.kernel.org]
-        // there is second reply witing? no!
-        //getReply();
-      } else if (reply == 451) { // FTPReply.ACTION_ABORTED
+      // there is second reply witing? no!
+      // getReply();
+    } else if (reply == 451) { // FTPReply.ACTION_ABORTED
       // some ftp servers reply 451, e.g.,
       // ProFTPD [ftp.kernel.org]
-        // there is second reply witing? no!
-        //getReply();
-      } else if (reply == 451) { // FTPReply.ACTION_ABORTED
-      } else {
+      // there is second reply witing? no!
+      // getReply();
+    } else if (reply == 451) { // FTPReply.ACTION_ABORTED
+    } else {
       // what other kind of ftp server out there?
-        return false;
-      }
+      return false;
+    }
+
+    return true;
+  }
 
+  /***
+   * Sets the file type to be transferred. This should be one of
+   * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
+   * etc. The file type only needs to be set when you want to change the type.
+   * After changing it, the new type stays in effect until you change it again.
+   * The default file type is <code> FTP.ASCII_FILE_TYPE </code> if this method
+   * is never called.
+   * <p>
+   * 
+   * @param fileType
+   *          The <code> _FILE_TYPE </code> constant indcating the type of file.
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean setFileType(int fileType) throws IOException {
+    if (FTPReply.isPositiveCompletion(type(fileType))) {
+      /*
+       * __fileType = fileType; __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
+       */
       return true;
     }
+    return false;
+  }
 
-    /***
-     * Sets the file type to be transferred.  This should be one of 
-     * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
-     * etc.  The file type only needs to be set when you want to change the
-     * type.  After changing it, the new type stays in effect until you change
-     * it again.  The default file type is <code> FTP.ASCII_FILE_TYPE </code>
-     * if this method is never called.
-     * <p>
-     * @param fileType The <code> _FILE_TYPE </code> constant indcating the
-     *                 type of file.
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean setFileType(int fileType) throws IOException
-    {
-        if (FTPReply.isPositiveCompletion(type(fileType)))
-        {
-/*            __fileType = fileType;
-            __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;*/
-            return true;
-        }
-        return false;
-    }
-
-    /***
-     * Fetches the system type name from the server and returns the string.
-     * This value is cached for the duration of the connection after the
-     * first call to this method.  In other words, only the first time
-     * that you invoke this method will it issue a SYST command to the
-     * FTP server.  FTPClient will remember the value and return the
-     * cached value until a call to disconnect.
-     * <p>
-     * @return The system type name obtained from the server.  null if the
-     *       information could not be obtained.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *  command to the server or receiving a reply from the server.
-     ***/
-    public String getSystemName()
-      throws IOException, FtpExceptionBadSystResponse
-    {
-      //if (syst() == FTPReply.NAME_SYSTEM_TYPE)
-      // Technically, we should expect a NAME_SYSTEM_TYPE response, but
-      // in practice FTP servers deviate, so we soften the condition to
-      // a positive completion.
-        if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
-            __systemName = (getReplyStrings()[0]).substring(4);
-        } else {
-            throw new FtpExceptionBadSystResponse(
-              "Bad response of SYST: " + getReplyString());
-        }
-
-        return __systemName;
-    }
-
-    /***
-     * Sends a NOOP command to the FTP server.  This is useful for preventing
-     * server timeouts.
-     * <p>
-     * @return True if successfully completed, false if not.
-     * @exception FTPConnectionClosedException
-     *      If the FTP server prematurely closes the connection as a result
-     *      of the client being idle or some other reason causing the server
-     *      to send FTP reply code 421.  This exception may be caught either
-     *      as an IOException or independently as itself.
-     * @exception IOException  If an I/O error occurs while either sending a
-     *      command to the server or receiving a reply from the server.
-     ***/
-    public boolean sendNoOp() throws IOException
-    {
-        return FTPReply.isPositiveCompletion(noop());
-    }
-
-//    client.stat(path);
-//    client.sendCommand("STAT");
-//    client.sendCommand("STAT",path);
-//    client.sendCommand("MDTM",path);
-//    client.sendCommand("SIZE",path);
-//    client.sendCommand("HELP","SITE");
-//    client.sendCommand("SYST");
-//    client.setRestartOffset(120);
+  /***
+   * Fetches the system type name from the server and returns the string. This
+   * value is cached for the duration of the connection after the first call to
+   * this method. In other words, only the first time that you invoke this
+   * method will it issue a SYST command to the FTP server. FTPClient will
+   * remember the value and return the cached value until a call to disconnect.
+   * <p>
+   * 
+   * @return The system type name obtained from the server. null if the
+   *         information could not be obtained.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public String getSystemName() throws IOException, FtpExceptionBadSystResponse {
+    // if (syst() == FTPReply.NAME_SYSTEM_TYPE)
+    // Technically, we should expect a NAME_SYSTEM_TYPE response, but
+    // in practice FTP servers deviate, so we soften the condition to
+    // a positive completion.
+    if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
+      __systemName = (getReplyStrings()[0]).substring(4);
+    } else {
+      throw new FtpExceptionBadSystResponse("Bad response of SYST: "
+          + getReplyString());
+    }
+
+    return __systemName;
+  }
+
+  /***
+   * Sends a NOOP command to the FTP server. This is useful for preventing
+   * server timeouts.
+   * <p>
+   * 
+   * @return True if successfully completed, false if not.
+   * @exception FTPConnectionClosedException
+   *              If the FTP server prematurely closes the connection as a
+   *              result of the client being idle or some other reason causing
+   *              the server to send FTP reply code 421. This exception may be
+   *              caught either as an IOException or independently as itself.
+   * @exception IOException
+   *              If an I/O error occurs while either sending a command to the
+   *              server or receiving a reply from the server.
+   ***/
+  public boolean sendNoOp() throws IOException {
+    return FTPReply.isPositiveCompletion(noop());
+  }
+
+  // client.stat(path);
+  // client.sendCommand("STAT");
+  // client.sendCommand("STAT",path);
+  // client.sendCommand("MDTM",path);
+  // client.sendCommand("SIZE",path);
+  // client.sendCommand("HELP","SITE");
+  // client.sendCommand("SYST");
+  // client.setRestartOffset(120);
 
 }

Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java Fri Jan  9 06:34:33 2015
@@ -33,13 +33,12 @@ import java.util.Collection;
 import java.util.HashSet;
 
 /**
- * This class is a protocol plugin used for ftp: scheme.
- * It creates {@link FtpResponse} object and gets the content of the url from it.
+ * This class is a protocol plugin used for ftp: scheme. It creates
+ * {@link FtpResponse} object and gets the content of the url from it.
  * Configurable parameters are {@code ftp.username}, {@code ftp.password},
- *                             {@code ftp.content.limit}, {@code ftp.timeout}, 
- *                             {@code ftp.server.timeout}, {@code ftp.password}, 
- *                             {@code ftp.keep.connection} and {@code ftp.follow.talk}.
- * For details see "FTP properties" section in {@code nutch-default.xml}.
+ * {@code ftp.content.limit}, {@code ftp.timeout}, {@code ftp.server.timeout},
+ * {@code ftp.password}, {@code ftp.keep.connection} and {@code ftp.follow.talk}
+ * . For details see "FTP properties" section in {@code nutch-default.xml}.
  */
 public class Ftp implements Protocol {
 
@@ -82,7 +81,7 @@ public class Ftp implements Protocol {
   private Configuration conf;
 
   private FtpRobotRulesParser robots = null;
-      
+
   // constructor
   public Ftp() {
     robots = new FtpRobotRulesParser();
@@ -108,12 +107,14 @@ public class Ftp implements Protocol {
     this.keepConnection = keepConnection;
   }
 
-  /** 
-   * Creates a {@link FtpResponse} object corresponding to the url and 
-   * returns a {@link ProtocolOutput} object as per the content received
+  /**
+   * Creates a {@link FtpResponse} object corresponding to the url and returns a
+   * {@link ProtocolOutput} object as per the content received
    * 
-   * @param url Text containing the ftp url
-   * @param datum The CrawlDatum object corresponding to the url
+   * @param url
+   *          Text containing the ftp url
+   * @param datum
+   *          The CrawlDatum object corresponding to the url
    * 
    * @return {@link ProtocolOutput} object for the url
    */
@@ -233,8 +234,8 @@ public class Ftp implements Protocol {
     if (maxContentLength != Integer.MIN_VALUE) // set maxContentLength
       ftp.setMaxContentLength(maxContentLength);
 
-    Content content = ftp.getProtocolOutput(urlString, WebPage.newBuilder().build())
-        .getContent();
+    Content content = ftp.getProtocolOutput(urlString,
+        WebPage.newBuilder().build()).getContent();
 
     System.err.println("Content-Type: " + content.getContentType());
     System.err.println("Content-Length: "
@@ -252,7 +253,7 @@ public class Ftp implements Protocol {
     return FIELDS;
   }
 
-  /** 
+  /**
    * Get the robots rules for a given url
    */
   public BaseRobotRules getRobotRules(String url, WebPage page) {

Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java Fri Jan  9 06:34:33 2015
@@ -17,13 +17,16 @@
 
 package org.apache.nutch.protocol.ftp;
 
-/** Thrown for Ftp error codes.
+/**
+ * Thrown for Ftp error codes.
  */
 public class FtpError extends FtpException {
 
   private int code;
-  
-  public int getCode(int code) { return code; }
+
+  public int getCode(int code) {
+    return code;
+  }
 
   public FtpError(int code) {
     super("Ftp Error: " + code);

Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java Fri Jan  9 06:34:33 2015
@@ -20,9 +20,9 @@ package org.apache.nutch.protocol.ftp;
 import org.apache.nutch.protocol.ProtocolException;
 
 /***
- * Superclass for important exceptions thrown during FTP talk,
- * that must be handled with care.
- *
+ * Superclass for important exceptions thrown during FTP talk, that must be
+ * handled with care.
+ * 
  * @author John Xing
  */
 public class FtpException extends ProtocolException {

Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java Fri Jan  9 06:34:33 2015
@@ -19,7 +19,7 @@ package org.apache.nutch.protocol.ftp;
 
 /**
  * Exception indicating bad reply of SYST command.
- *
+ * 
  * @author John Xing
  */
 public class FtpExceptionBadSystResponse extends FtpException {

Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java Fri Jan  9 06:34:33 2015
@@ -19,7 +19,7 @@ package org.apache.nutch.protocol.ftp;
 
 /**
  * Exception indicating failure of opening data connection.
- *
+ * 
  * @author John Xing
  */
 public class FtpExceptionCanNotHaveDataConnection extends FtpException {

Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java Fri Jan  9 06:34:33 2015
@@ -18,9 +18,9 @@
 package org.apache.nutch.protocol.ftp;
 
 /**
- * Exception indicating control channel is closed by server end, due to
- * forced closure of data channel at client (our) end.
- *
+ * Exception indicating control channel is closed by server end, due to forced
+ * closure of data channel at client (our) end.
+ * 
  * @author John Xing
  */
 public class FtpExceptionControlClosedByForcedDataClose extends FtpException {

Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java Fri Jan  9 06:34:33 2015
@@ -18,9 +18,9 @@
 package org.apache.nutch.protocol.ftp;
 
 /**
- * Exception indicating unrecognizable reply from server after
- * forced closure of data channel by client (our) side.
- *
+ * Exception indicating unrecognizable reply from server after forced closure of
+ * data channel by client (our) side.
+ * 
  * @author John Xing
  */
 public class FtpExceptionUnknownForcedDataClose extends FtpException {

Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java Fri Jan  9 06:34:33 2015
@@ -17,7 +17,6 @@
 
 package org.apache.nutch.protocol.ftp;
 
-
 import org.apache.avro.util.Utf8;
 import org.apache.commons.net.ftp.FTP;
 import org.apache.commons.net.ftp.FTPFile;
@@ -39,18 +38,14 @@ import java.nio.ByteBuffer;
 import java.util.LinkedList;
 import java.util.List;
 
-
 /************************************
- * FtpResponse.java mimics ftp replies as http response.
- * It tries its best to follow http's way for headers, response codes
- * as well as exceptions.
- *
- * Comments:
- * In this class, all FtpException*.java thrown by Client.java
- * and some important commons-net exceptions passed by Client.java
- * must have been properly dealt with. They'd better not be leaked
- * to the caller of this class.
- *
+ * FtpResponse.java mimics ftp replies as http response. It tries its best to
+ * follow http's way for headers, response codes as well as exceptions.
+ * 
+ * Comments: In this class, all FtpException*.java thrown by Client.java and
+ * some important commons-net exceptions passed by Client.java must have been
+ * properly dealt with. They'd better not be leaked to the caller of this class.
+ * 
  * @author John Xing
  ***********************************/
 public class FtpResponse {
@@ -66,23 +61,26 @@ public class FtpResponse {
   private Configuration conf;
 
   /** Returns the response code. */
-  public int getCode() { return code; }
+  public int getCode() {
+    return code;
+  }
 
   /** Returns the value of a named header. */
   public String getHeader(String name) {
     return headers.get(name);
   }
 
-  public byte[] getContent() { return content; }
+  public byte[] getContent() {
+    return content;
+  }
 
   public Content toContent() {
     return new Content(orig, base, (content != null ? content : EMPTY_CONTENT),
-                       getHeader(Response.CONTENT_TYPE),
-                       headers, this.conf);
+        getHeader(Response.CONTENT_TYPE), headers, this.conf);
   }
 
   public FtpResponse(URL url, WebPage page, Ftp ftp, Configuration conf)
-    throws FtpException, IOException {
+      throws FtpException, IOException {
 
     this.orig = url.toString();
     this.base = url.toString();
@@ -104,27 +102,26 @@ public class FtpResponse {
 
       if (ftp.followTalk) {
         if (Ftp.LOG.isInfoEnabled()) {
-          Ftp.LOG.info("fetching "+url);
+          Ftp.LOG.info("fetching " + url);
         }
       } else {
         if (Ftp.LOG.isTraceEnabled()) {
-          Ftp.LOG.trace("fetching "+url);
+          Ftp.LOG.trace("fetching " + url);
         }
       }
 
       InetAddress addr = InetAddress.getByName(url.getHost());
-      if (addr != null
-          && conf.getBoolean("store.ip.address", false) == true) {
-        String ipString = addr.getHostAddress(); //get the ip address
+      if (addr != null && conf.getBoolean("store.ip.address", false) == true) {
+        String ipString = addr.getHostAddress(); // get the ip address
         page.getMetadata().put(new Utf8("_ip_"),
-          ByteBuffer.wrap(ipString.getBytes()));
+            ByteBuffer.wrap(ipString.getBytes()));
       }
 
       // idled too long, remote server or ourselves may have timed out,
       // should start anew.
       if (ftp.client != null && ftp.keepConnection
           && ftp.renewalTime < System.currentTimeMillis()) {
-        if (Ftp.LOG.isInfoEnabled()) { 
+        if (Ftp.LOG.isInfoEnabled()) {
           Ftp.LOG.info("delete client because idled too long");
         }
         ftp.client = null;
@@ -138,8 +135,9 @@ public class FtpResponse {
         // the real client
         ftp.client = new Client();
         // when to renew, take the lesser
-        //ftp.renewalTime = System.currentTimeMillis()
-        //  + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
+        // ftp.renewalTime = System.currentTimeMillis()
+        // + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout :
+        // ftp.serverTimeout);
 
         // timeout for control connection
         ftp.client.setDefaultTimeout(ftp.timeout);
@@ -148,8 +146,8 @@ public class FtpResponse {
 
         // follow ftp talk?
         if (ftp.followTalk)
-          ftp.client.addProtocolCommandListener(
-            new PrintCommandListener(Ftp.LOG));
+          ftp.client.addProtocolCommandListener(new PrintCommandListener(
+              Ftp.LOG));
       }
 
       // quit from previous site if at a different site now
@@ -157,8 +155,8 @@ public class FtpResponse {
         InetAddress remoteAddress = ftp.client.getRemoteAddress();
         if (!addr.equals(remoteAddress)) {
           if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-            Ftp.LOG.info("disconnect from "+remoteAddress
-            +" before connect to "+addr);
+            Ftp.LOG.info("disconnect from " + remoteAddress
+                + " before connect to " + addr);
           }
           // quit from current site
           ftp.client.logout();
@@ -170,22 +168,22 @@ public class FtpResponse {
       if (!ftp.client.isConnected()) {
 
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("connect to "+addr);
+          Ftp.LOG.info("connect to " + addr);
         }
 
         ftp.client.connect(addr);
         if (!FTPReply.isPositiveCompletion(ftp.client.getReplyCode())) {
           ftp.client.disconnect();
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.connect() failed: "
-              + addr + " " + ftp.client.getReplyString());
+            Ftp.LOG.warn("ftp.client.connect() failed: " + addr + " "
+                + ftp.client.getReplyString());
           }
           this.code = 500; // http Internal Server Error
           return;
         }
 
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("log into "+addr);
+          Ftp.LOG.info("log into " + addr);
         }
 
         if (!ftp.client.login(ftp.userName, ftp.passWord)) {
@@ -196,9 +194,9 @@ public class FtpResponse {
           // (not dealt with here at all) .
           ftp.client.disconnect();
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.login() failed: "+addr);
+            Ftp.LOG.warn("ftp.client.login() failed: " + addr);
           }
-          this.code = 401;  // http Unauthorized
+          this.code = 401; // http Unauthorized
           return;
         }
 
@@ -207,14 +205,14 @@ public class FtpResponse {
           ftp.client.logout();
           ftp.client.disconnect();
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.setFileType() failed: "+addr);
+            Ftp.LOG.warn("ftp.client.setFileType() failed: " + addr);
           }
           this.code = 500; // http Internal Server Error
           return;
         }
 
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("set parser for "+addr);
+          Ftp.LOG.info("set parser for " + addr);
         }
 
         // SYST is valid only after login
@@ -225,17 +223,18 @@ public class FtpResponse {
           if (parserKey.startsWith("UNKNOWN Type: L8"))
             parserKey = "UNIX Type: L8";
           ftp.parser = (new DefaultFTPFileEntryParserFactory())
-            .createFileEntryParser(parserKey);
+              .createFileEntryParser(parserKey);
         } catch (FtpExceptionBadSystResponse e) {
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("ftp.client.getSystemName() failed: "+addr+" "+e);
+            Ftp.LOG
+                .warn("ftp.client.getSystemName() failed: " + addr + " " + e);
           }
           ftp.parser = null;
         } catch (ParserInitializationException e) {
           // ParserInitializationException is RuntimeException defined in
           // org.apache.commons.net.ftp.parser.ParserInitializationException
           if (Ftp.LOG.isWarnEnabled()) {
-            Ftp.LOG.warn("createFileEntryParser() failed. "+addr+" "+e);
+            Ftp.LOG.warn("createFileEntryParser() failed. " + addr + " " + e);
           }
           ftp.parser = null;
         } finally {
@@ -243,7 +242,7 @@ public class FtpResponse {
             // do not log as severe, otherwise
             // FetcherThread/RequestScheduler will abort
             if (Ftp.LOG.isWarnEnabled()) {
-              Ftp.LOG.warn("ftp.parser is null: "+addr);
+              Ftp.LOG.warn("ftp.parser is null: " + addr);
             }
             ftp.client.logout();
             ftp.client.disconnect();
@@ -269,10 +268,11 @@ public class FtpResponse {
       // reset next renewalTime, take the lesser
       if (ftp.client != null && ftp.keepConnection) {
         ftp.renewalTime = System.currentTimeMillis()
-          + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
+            + ((ftp.timeout < ftp.serverTimeout) ? ftp.timeout
+                : ftp.serverTimeout);
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
           Ftp.LOG.info("reset renewalTime to "
-            + HttpDateFormat.toString(ftp.renewalTime));
+              + HttpDateFormat.toString(ftp.renewalTime));
         }
       }
 
@@ -280,15 +280,15 @@ public class FtpResponse {
       // may have deleted ftp.client
       if (ftp.client != null && !ftp.keepConnection) {
         if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-          Ftp.LOG.info("disconnect from "+addr);
+          Ftp.LOG.info("disconnect from " + addr);
         }
         ftp.client.logout();
         ftp.client.disconnect();
       }
-      
+
     } catch (Exception e) {
       if (Ftp.LOG.isWarnEnabled()) {
-        Ftp.LOG.warn(""+e);
+        Ftp.LOG.warn("" + e);
       }
       // for any un-foreseen exception (run time exception or not),
       // do ultimate clean and leave ftp.client for garbage collection
@@ -298,21 +298,21 @@ public class FtpResponse {
       ftp.client = null;
       // or do explicit garbage collection?
       // System.gc();
-// can we be less dramatic, using the following instead?
-// probably unnecessary for our practical purpose here
-//      try {
-//        ftp.client.logout();
-//        ftp.client.disconnect();
-//      }
+      // can we be less dramatic, using the following instead?
+      // probably unnecessary for our practical purpose here
+      // try {
+      // ftp.client.logout();
+      // ftp.client.disconnect();
+      // }
       throw new FtpException(e);
-      //throw e;
+      // throw e;
     }
 
   }
 
   // get ftp file as http response
   private void getFileAsHttpResponse(String path, long lastModified)
-    throws IOException {
+      throws IOException {
 
     ByteArrayOutputStream os = null;
     List<FTPFile> list = null;
@@ -324,9 +324,9 @@ public class FtpResponse {
 
       FTPFile ftpFile = (FTPFile) list.get(0);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Long(ftpFile.getSize()).toString());
+          new Long(ftpFile.getSize()).toString());
       this.headers.set(Response.LAST_MODIFIED,
-                       HttpDateFormat.toString(ftpFile.getTimestamp()));
+          HttpDateFormat.toString(ftpFile.getTimestamp()));
       // don't retrieve the file if not changed.
       if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
         code = 304;
@@ -337,11 +337,11 @@ public class FtpResponse {
 
       this.content = os.toByteArray();
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -350,64 +350,64 @@ public class FtpResponse {
       // control connection is off, clean up
       // ftp.client.disconnect();
       if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-        Ftp.LOG.info("delete client because server cut off control channel: "+e);
+        Ftp.LOG.info("delete client because server cut off control channel: "
+            + e);
       }
       ftp.client = null;
 
       // in case this FtpExceptionControlClosedByForcedDataClose is
       // thrown by retrieveList() (not retrieveFile()) above,
       if (os == null) { // indicating throwing by retrieveList()
-        //throw new FtpException("fail to get attibutes: "+path);
+        // throw new FtpException("fail to get attibutes: "+path);
         if (Ftp.LOG.isWarnEnabled()) {
-          Ftp.LOG.warn(
-              "Please try larger maxContentLength for ftp.client.retrieveList(). "
-            + e);
+          Ftp.LOG
+              .warn("Please try larger maxContentLength for ftp.client.retrieveList(). "
+                  + e);
         }
         // in a way, this is our request fault
-        this.code = 400;  // http Bad request
+        this.code = 400; // http Bad request
         return;
       }
 
       FTPFile ftpFile = (FTPFile) list.get(0);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Long(ftpFile.getSize()).toString());
-      //this.headers.put("content-type", "text/html");
+          new Long(ftpFile.getSize()).toString());
+      // this.headers.put("content-type", "text/html");
       this.headers.set(Response.LAST_MODIFIED,
-                      HttpDateFormat.toString(ftpFile.getTimestamp()));
+          HttpDateFormat.toString(ftpFile.getTimestamp()));
       this.content = os.toByteArray();
       if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
         code = 304;
         return;
       }
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
     } catch (FtpExceptionCanNotHaveDataConnection e) {
 
       if (FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
-      // it is not a file, but dir, so redirect as a dir
+        // it is not a file, but dir, so redirect as a dir
         this.headers.set(Response.LOCATION, path + "/");
-        this.code = 300;  // http redirect
+        this.code = 300; // http redirect
         // fixme, should we do ftp.client.cwd("/"), back to top dir?
       } else {
-      // it is not a dir either
-        this.code = 404;  // http Not Found
+        // it is not a dir either
+        this.code = 404; // http Not Found
       }
 
     } catch (FtpExceptionUnknownForcedDataClose e) {
       // Please note control channel is still live.
       // in a way, this is our request fault
       if (Ftp.LOG.isWarnEnabled()) {
-        Ftp.LOG.warn(
-            "Unrecognized reply after forced close of data channel. "
-          + "If this is acceptable, please modify Client.java accordingly. "
-          + e);
+        Ftp.LOG.warn("Unrecognized reply after forced close of data channel. "
+            + "If this is acceptable, please modify Client.java accordingly. "
+            + e);
       }
       this.code = 400; // http Bad Request
     }
@@ -416,14 +416,14 @@ public class FtpResponse {
 
   // get ftp dir list as http response
   private void getDirAsHttpResponse(String path, long lastModified)
-    throws IOException {
+      throws IOException {
     List<FTPFile> list = new LinkedList<FTPFile>();
 
     try {
 
       // change to that dir first
       if (!FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
-        this.code = 404;  // http Not Found
+        this.code = 404; // http Not Found
         return;
       }
 
@@ -432,15 +432,15 @@ public class FtpResponse {
       ftp.client.retrieveList(null, list, ftp.maxContentLength, ftp.parser);
       this.content = list2html(list, path, "/".equals(path) ? false : true);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Integer(this.content.length).toString());
+          new Integer(this.content.length).toString());
       this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -449,21 +449,22 @@ public class FtpResponse {
       // control connection is off, clean up
       // ftp.client.disconnect();
       if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
-        Ftp.LOG.info("delete client because server cut off control channel: "+e);
+        Ftp.LOG.info("delete client because server cut off control channel: "
+            + e);
       }
       ftp.client = null;
 
       this.content = list2html(list, path, "/".equals(path) ? false : true);
       this.headers.set(Response.CONTENT_LENGTH,
-                       new Integer(this.content.length).toString());
+          new Integer(this.content.length).toString());
       this.headers.set(Response.CONTENT_TYPE, "text/html");
       // this.headers.put("Last-Modified", null);
 
-//      // approximate bytes sent and read
-//      if (this.httpAccounting != null) {
-//        this.httpAccounting.incrementBytesSent(path.length());
-//        this.httpAccounting.incrementBytesRead(this.content.length);
-//      }
+      // // approximate bytes sent and read
+      // if (this.httpAccounting != null) {
+      // this.httpAccounting.incrementBytesSent(path.length());
+      // this.httpAccounting.incrementBytesRead(this.content.length);
+      // }
 
       this.code = 200; // http OK
 
@@ -471,32 +472,35 @@ public class FtpResponse {
       // Please note control channel is still live.
       // in a way, this is our request fault
       if (Ftp.LOG.isWarnEnabled()) {
-        Ftp.LOG.warn(
-            "Unrecognized reply after forced close of data channel. "
-          + "If this is acceptable, please modify Client.java accordingly. "
-          + e);
+        Ftp.LOG.warn("Unrecognized reply after forced close of data channel. "
+            + "If this is acceptable, please modify Client.java accordingly. "
+            + e);
       }
       this.code = 400; // http Bad Request
     } catch (FtpExceptionCanNotHaveDataConnection e) {
-      if (Ftp.LOG.isWarnEnabled()) { Ftp.LOG.warn(""+ e); }
+      if (Ftp.LOG.isWarnEnabled()) {
+        Ftp.LOG.warn("" + e);
+      }
       this.code = 500; // http Iternal Server Error
     }
 
   }
 
   // generate html page from ftp dir list
-  private byte[] list2html(List<FTPFile> list, String path, boolean includeDotDot) {
+  private byte[] list2html(List<FTPFile> list, String path,
+      boolean includeDotDot) {
 
-    //StringBuffer x = new StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
+    // StringBuffer x = new
+    // StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
     StringBuffer x = new StringBuffer("<html><head>");
-    x.append("<title>Index of "+path+"</title></head>\n");
-    x.append("<body><h1>Index of "+path+"</h1><pre>\n");
+    x.append("<title>Index of " + path + "</title></head>\n");
+    x.append("<body><h1>Index of " + path + "</h1><pre>\n");
 
     if (includeDotDot) {
       x.append("<a href='../'>../</a>\t-\t-\t-\n");
     }
 
-    for (int i=0; i<list.size(); i++) {
+    for (int i = 0; i < list.size(); i++) {
       FTPFile f = (FTPFile) list.get(i);
       String name = f.getName();
       String time = HttpDateFormat.toString(f.getTimestamp());
@@ -504,11 +508,11 @@ public class FtpResponse {
         // some ftp server LIST "." and "..", we skip them here
         if (name.equals(".") || name.equals(".."))
           continue;
-        x.append("<a href='"+name+"/"+"'>"+name+"/</a>\t");
-        x.append(time+"\t-\n");
+        x.append("<a href='" + name + "/" + "'>" + name + "/</a>\t");
+        x.append(time + "\t-\n");
       } else if (f.isFile()) {
-        x.append("<a href='"+name+    "'>"+name+"</a>\t");
-        x.append(time+"\t"+f.getSize()+"\n");
+        x.append("<a href='" + name + "'>" + name + "</a>\t");
+        x.append(time + "\t" + f.getSize() + "\n");
       } else {
         // ignore isSymbolicLink()
         // ignore isUnknown()

Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java Fri Jan  9 06:34:33 2015
@@ -31,56 +31,63 @@ import org.slf4j.LoggerFactory;
 import java.net.URL;
 
 /**
- * This class is used for parsing robots for urls belonging to FTP protocol.
- * It extends the generic {@link RobotRulesParser} class and contains 
- * Ftp protocol specific implementation for obtaining the robots file.
+ * This class is used for parsing robots for urls belonging to FTP protocol. It
+ * extends the generic {@link RobotRulesParser} class and contains Ftp protocol
+ * specific implementation for obtaining the robots file.
  */
 public class FtpRobotRulesParser extends RobotRulesParser {
 
   private static final String CONTENT_TYPE = "text/plain";
-  public static final Logger LOG = LoggerFactory.getLogger(FtpRobotRulesParser.class);
+  public static final Logger LOG = LoggerFactory
+      .getLogger(FtpRobotRulesParser.class);
 
-  FtpRobotRulesParser() { }
+  FtpRobotRulesParser() {
+  }
 
   public FtpRobotRulesParser(Configuration conf) {
     super(conf);
   }
 
   /**
-   * The hosts for which the caching of robots rules is yet to be done,
-   * it sends a Ftp request to the host corresponding to the {@link URL} 
-   * passed, gets robots file, parses the rules and caches the rules object
-   * to avoid re-work in future.
+   * The hosts for which the caching of robots rules is yet to be done, it sends
+   * a Ftp request to the host corresponding to the {@link URL} passed, gets
+   * robots file, parses the rules and caches the rules object to avoid re-work
+   * in future.
+   * 
+   * @param ftp
+   *          The {@link Protocol} object
+   * @param url
+   *          URL
    * 
-   *  @param ftp The {@link Protocol} object
-   *  @param url URL 
-   *  
-   *  @return robotRules A {@link BaseRobotRules} object for the rules
+   * @return robotRules A {@link BaseRobotRules} object for the rules
    */
   public BaseRobotRules getRobotRulesSet(Protocol ftp, URL url) {
 
-    String protocol = url.getProtocol().toLowerCase();  // normalize to lower case
-    String host = url.getHost().toLowerCase();          // normalize to lower case
+    String protocol = url.getProtocol().toLowerCase(); // normalize to lower
+                                                       // case
+    String host = url.getHost().toLowerCase(); // normalize to lower case
 
-    BaseRobotRules robotRules = (SimpleRobotRules) CACHE.get(protocol + ":" + host);
+    BaseRobotRules robotRules = (SimpleRobotRules) CACHE.get(protocol + ":"
+        + host);
 
     boolean cacheRule = true;
 
-    if (robotRules == null) {                     // cache miss
+    if (robotRules == null) { // cache miss
 
       if (LOG.isTraceEnabled())
         LOG.trace("cache miss " + url);
 
       try {
-        String robotsUrl = new URL(url, "/robots.txt").toString();        
-        ProtocolOutput output = ((Ftp)ftp).getProtocolOutput(robotsUrl, WebPage.newBuilder().build());
+        String robotsUrl = new URL(url, "/robots.txt").toString();
+        ProtocolOutput output = ((Ftp) ftp).getProtocolOutput(robotsUrl,
+            WebPage.newBuilder().build());
         int statusCode = output.getStatus().getCode();
 
         if (statusCode == ProtocolStatusCodes.SUCCESS) {
-          robotRules =  parseRules(url.toString(), output.getContent().getContent(), 
-                                  CONTENT_TYPE, agentNames);
-        } else {                                       
-          robotRules = EMPTY_RULES;                 // use default rules
+          robotRules = parseRules(url.toString(), output.getContent()
+              .getContent(), CONTENT_TYPE, agentNames);
+        } else {
+          robotRules = EMPTY_RULES; // use default rules
         }
       } catch (Throwable t) {
         if (LOG.isInfoEnabled()) {
@@ -91,7 +98,7 @@ public class FtpRobotRulesParser extends
       }
 
       if (cacheRule)
-        CACHE.put(protocol + ":" + host, robotRules);  // cache rules for host
+        CACHE.put(protocol + ":" + host, robotRules); // cache rules for host
     }
     return robotRules;
   }

Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java Fri Jan  9 06:34:33 2015
@@ -28,45 +28,44 @@ import org.apache.commons.net.ProtocolCo
 
 /***
  * This is a support class for logging all ftp command/reply traffic.
- *
+ * 
  * @author John Xing
  ***/
-public class PrintCommandListener implements ProtocolCommandListener
-{
-    private Logger __logger;
+public class PrintCommandListener implements ProtocolCommandListener {
+  private Logger __logger;
 
-    public PrintCommandListener(Logger logger)
-    {
-        __logger = logger;
-    }
+  public PrintCommandListener(Logger logger) {
+    __logger = logger;
+  }
 
-    public void protocolCommandSent(ProtocolCommandEvent event) {
-      try {
-        __logIt(event);
-      } catch (IOException e) {
-        if (__logger.isInfoEnabled()) {
-          __logger.info("PrintCommandListener.protocolCommandSent(): "+e);
-        }
+  public void protocolCommandSent(ProtocolCommandEvent event) {
+    try {
+      __logIt(event);
+    } catch (IOException e) {
+      if (__logger.isInfoEnabled()) {
+        __logger.info("PrintCommandListener.protocolCommandSent(): " + e);
       }
     }
+  }
 
-    public void protocolReplyReceived(ProtocolCommandEvent event) {
-      try {
-        __logIt(event);
-      } catch (IOException e) {
-        if (__logger.isInfoEnabled()) {
-          __logger.info("PrintCommandListener.protocolReplyReceived(): "+e);
-        }
+  public void protocolReplyReceived(ProtocolCommandEvent event) {
+    try {
+      __logIt(event);
+    } catch (IOException e) {
+      if (__logger.isInfoEnabled()) {
+        __logger.info("PrintCommandListener.protocolReplyReceived(): " + e);
       }
     }
+  }
 
-    private void __logIt(ProtocolCommandEvent event) throws IOException {
-      if (!__logger.isInfoEnabled()) { return; }
-      BufferedReader br =
-        new BufferedReader(new StringReader(event.getMessage()));
-      String line;
-      while ((line = br.readLine()) != null) {
-        __logger.info("ftp> "+line);
-      }
+  private void __logIt(ProtocolCommandEvent event) throws IOException {
+    if (!__logger.isInfoEnabled()) {
+      return;
+    }
+    BufferedReader br = new BufferedReader(new StringReader(event.getMessage()));
+    String line;
+    while ((line = br.readLine()) != null) {
+      __logger.info("ftp> " + line);
     }
+  }
 }

Modified: nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java Fri Jan  9 06:34:33 2015
@@ -49,11 +49,11 @@ public class Http extends HttpBase {
   @Override
   public void setConf(Configuration conf) {
     super.setConf(conf);
-//    Level logLevel = Level.WARNING;
-//    if (conf.getBoolean("http.verbose", false)) {
-//      logLevel = Level.FINE;
-//    }
-//    LOG.setLevel(logLevel);
+    // Level logLevel = Level.WARNING;
+    // if (conf.getBoolean("http.verbose", false)) {
+    // logLevel = Level.FINE;
+    // }
+    // LOG.setLevel(logLevel);
   }
 
   public static void main(String[] args) throws Exception {
@@ -64,7 +64,7 @@ public class Http extends HttpBase {
 
   @Override
   protected Response getResponse(URL url, WebPage page, boolean redirect)
-    throws ProtocolException, IOException {
+      throws ProtocolException, IOException {
     return new HttpResponse(this, url, page);
   }