You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2015/01/09 07:34:37 UTC
svn commit: r1650447 [20/25] - in /nutch/branches/2.x: ./
src/java/org/apache/nutch/api/ src/java/org/apache/nutch/api/impl/
src/java/org/apache/nutch/api/impl/db/
src/java/org/apache/nutch/api/model/response/
src/java/org/apache/nutch/api/resources/ s...
Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java Fri Jan 9 06:34:33 2015
@@ -27,7 +27,6 @@ import java.net.Socket;
import java.util.List;
//import java.util.LinkedList;
-
import org.apache.commons.net.MalformedServerReplyException;
import org.apache.commons.net.ftp.FTP;
import org.apache.commons.net.ftp.FTPCommand;
@@ -37,561 +36,551 @@ import org.apache.commons.net.ftp.FTPRep
import org.apache.commons.net.ftp.FTPConnectionClosedException;
/***********************************************
- * Client.java encapsulates functionalities necessary for nutch to
- * get dir list and retrieve file from an FTP server.
- * This class takes care of all low level details of interacting
- * with an FTP server and provides a convenient higher level interface.
- *
+ * Client.java encapsulates functionalities necessary for nutch to get dir list
+ * and retrieve file from an FTP server. This class takes care of all low level
+ * details of interacting with an FTP server and provides a convenient higher
+ * level interface.
+ *
* Modified from FtpClient.java in apache commons-net.
*
- * Notes by John Xing:
- * ftp server implementations are hardly uniform and none seems to follow
- * RFCs whole-heartedly. We have no choice, but assume common denominator
- * as following:
- * (1) Use stream mode for data transfer. Block mode will be better for
- * multiple file downloading and partial file downloading. However
- * not every ftpd has block mode support.
- * (2) Use passive mode for data connection.
- * So nutch will work if we run behind firewall.
- * (3) Data connection is opened/closed per ftp command for the reasons
- * listed in (1). There are ftp servers out there,
- * when partial downloading is enforeced by closing data channel
- * socket on our client side, the server side immediately closes
- * control channel (socket). Our codes deal with such a bad behavior.
- * (4) LIST is used to obtain remote file attributes if possible.
- * MDTM & SIZE would be nice, but not as ubiquitously implemented as LIST.
- * (5) Avoid using ABOR in single thread? Do not use it at all.
- *
- * About exceptions:
- * Some specific exceptions are re-thrown as one of FtpException*.java
- * In fact, each function throws FtpException*.java or pass IOException.
- *
+ * Notes by John Xing: ftp server implementations are hardly uniform and none
+ * seems to follow RFCs whole-heartedly. We have no choice, but assume common
+ * denominator as following: (1) Use stream mode for data transfer. Block mode
+ * will be better for multiple file downloading and partial file downloading.
+ * However not every ftpd has block mode support. (2) Use passive mode for data
+ * connection. So nutch will work if we run behind firewall. (3) Data connection
+ * is opened/closed per ftp command for the reasons listed in (1). There are ftp
+ * servers out there, when partial downloading is enforeced by closing data
+ * channel socket on our client side, the server side immediately closes control
+ * channel (socket). Our codes deal with such a bad behavior. (4) LIST is used
+ * to obtain remote file attributes if possible. MDTM & SIZE would be nice, but
+ * not as ubiquitously implemented as LIST. (5) Avoid using ABOR in single
+ * thread? Do not use it at all.
+ *
+ * About exceptions: Some specific exceptions are re-thrown as one of
+ * FtpException*.java In fact, each function throws FtpException*.java or pass
+ * IOException.
+ *
* @author John Xing
***********************************************/
-public class Client extends FTP
-{
- private int __dataTimeout;
- private int __passivePort;
- private String __passiveHost;
-// private int __fileType, __fileFormat;
- private boolean __remoteVerificationEnabled;
-// private FTPFileEntryParser __entryParser;
- private String __systemName;
-
- // constructor
- public Client()
- {
- __initDefaults();
- __dataTimeout = -1;
- __remoteVerificationEnabled = true;
- }
-
- // defaults when initialize
- private void __initDefaults()
- {
- __passiveHost = null;
- __passivePort = -1;
- __systemName = null;
-/* __fileType = FTP.ASCII_FILE_TYPE;
- __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
- __entryParser = null;
-*/ }
-
- // parse reply for pass()
- private void __parsePassiveModeReply(String reply)
- throws MalformedServerReplyException
- {
- int i, index, lastIndex;
- String octet1, octet2;
- StringBuffer host;
-
- reply = reply.substring(reply.indexOf('(') + 1,
- reply.indexOf(')')).trim();
-
- host = new StringBuffer(24);
- lastIndex = 0;
- index = reply.indexOf(',');
- host.append(reply.substring(lastIndex, index));
-
- for (i = 0; i < 3; i++)
- {
- host.append('.');
- lastIndex = index + 1;
- index = reply.indexOf(',', lastIndex);
- host.append(reply.substring(lastIndex, index));
- }
-
- lastIndex = index + 1;
- index = reply.indexOf(',', lastIndex);
-
- octet1 = reply.substring(lastIndex, index);
- octet2 = reply.substring(index + 1);
-
- // index and lastIndex now used as temporaries
- try
- {
- index = Integer.parseInt(octet1);
- lastIndex = Integer.parseInt(octet2);
- }
- catch (NumberFormatException e)
- {
- throw new MalformedServerReplyException(
- "Could not parse passive host information.\nServer Reply: " + reply);
- }
-
- index <<= 8;
- index |= lastIndex;
-
- __passiveHost = host.toString();
- __passivePort = index;
- }
-
- /**
- * open a passive data connection socket
- * @param command
- * @param arg
- * @return
- * @throws IOException
- * @throws FtpExceptionCanNotHaveDataConnection
- */
- protected Socket __openPassiveDataConnection(int command, String arg)
+public class Client extends FTP {
+ private int __dataTimeout;
+ private int __passivePort;
+ private String __passiveHost;
+ // private int __fileType, __fileFormat;
+ private boolean __remoteVerificationEnabled;
+ // private FTPFileEntryParser __entryParser;
+ private String __systemName;
+
+ // constructor
+ public Client() {
+ __initDefaults();
+ __dataTimeout = -1;
+ __remoteVerificationEnabled = true;
+ }
+
+ // defaults when initialize
+ private void __initDefaults() {
+ __passiveHost = null;
+ __passivePort = -1;
+ __systemName = null;
+ /*
+ * __fileType = FTP.ASCII_FILE_TYPE; __fileFormat =
+ * FTP.NON_PRINT_TEXT_FORMAT; __entryParser = null;
+ */}
+
+ // parse reply for pass()
+ private void __parsePassiveModeReply(String reply)
+ throws MalformedServerReplyException {
+ int i, index, lastIndex;
+ String octet1, octet2;
+ StringBuffer host;
+
+ reply = reply.substring(reply.indexOf('(') + 1, reply.indexOf(')')).trim();
+
+ host = new StringBuffer(24);
+ lastIndex = 0;
+ index = reply.indexOf(',');
+ host.append(reply.substring(lastIndex, index));
+
+ for (i = 0; i < 3; i++) {
+ host.append('.');
+ lastIndex = index + 1;
+ index = reply.indexOf(',', lastIndex);
+ host.append(reply.substring(lastIndex, index));
+ }
+
+ lastIndex = index + 1;
+ index = reply.indexOf(',', lastIndex);
+
+ octet1 = reply.substring(lastIndex, index);
+ octet2 = reply.substring(index + 1);
+
+ // index and lastIndex now used as temporaries
+ try {
+ index = Integer.parseInt(octet1);
+ lastIndex = Integer.parseInt(octet2);
+ } catch (NumberFormatException e) {
+ throw new MalformedServerReplyException(
+ "Could not parse passive host information.\nServer Reply: " + reply);
+ }
+
+ index <<= 8;
+ index |= lastIndex;
+
+ __passiveHost = host.toString();
+ __passivePort = index;
+ }
+
+ /**
+ * open a passive data connection socket
+ *
+ * @param command
+ * @param arg
+ * @return
+ * @throws IOException
+ * @throws FtpExceptionCanNotHaveDataConnection
+ */
+ protected Socket __openPassiveDataConnection(int command, String arg)
throws IOException, FtpExceptionCanNotHaveDataConnection {
- Socket socket;
+ Socket socket;
-// // 20040317, xing, accommodate ill-behaved servers, see below
-// int port_previous = __passivePort;
+ // // 20040317, xing, accommodate ill-behaved servers, see below
+ // int port_previous = __passivePort;
- if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
- throw new FtpExceptionCanNotHaveDataConnection(
- "pasv() failed. " + getReplyString());
-
- try {
- __parsePassiveModeReply(getReplyStrings()[0]);
- } catch (MalformedServerReplyException e) {
- throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
- }
-
-// // 20040317, xing, accommodate ill-behaved servers, see above
-// int count = 0;
-// System.err.println("__passivePort "+__passivePort);
-// System.err.println("port_previous "+port_previous);
-// while (__passivePort == port_previous) {
-// // just quit if too many tries. make it an exception here?
-// if (count++ > 10)
-// return null;
-// // slow down further for each new try
-// Thread.sleep(500*count);
-// if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
-// throw new FtpExceptionCanNotHaveDataConnection(
-// "pasv() failed. " + getReplyString());
-// //return null;
-// try {
-// __parsePassiveModeReply(getReplyStrings()[0]);
-// } catch (MalformedServerReplyException e) {
-// throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
-// }
-// }
-
- socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
-
- if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
- socket.close();
- return null;
- }
-
- if (__remoteVerificationEnabled && !verifyRemote(socket))
- {
- InetAddress host1, host2;
-
- host1 = socket.getInetAddress();
- host2 = getRemoteAddress();
-
- socket.close();
-
- // our precaution
- throw new FtpExceptionCanNotHaveDataConnection(
- "Host attempting data connection " + host1.getHostAddress() +
- " is not same as server " + host2.getHostAddress() +
- " So we intentionally close it for security precaution."
- );
- }
-
- if (__dataTimeout >= 0)
- socket.setSoTimeout(__dataTimeout);
-
- return socket;
- }
-
- /***
- * Sets the timeout in milliseconds to use for data connection.
- * set immediately after opening the data connection.
- ***/
- public void setDataTimeout(int timeout)
- {
- __dataTimeout = timeout;
- }
-
- /***
- * Closes the connection to the FTP server and restores
- * connection parameters to the default values.
- * <p>
- * @exception IOException If an error occurs while disconnecting.
- ***/
- public void disconnect() throws IOException
- {
- __initDefaults();
- super.disconnect();
- // no worry for data connection, since we always close it
- // in every ftp command that invloves data connection
- }
-
- /***
- * Enable or disable verification that the remote host taking part
- * of a data connection is the same as the host to which the control
- * connection is attached. The default is for verification to be
- * enabled. You may set this value at any time, whether the
- * FTPClient is currently connected or not.
- * <p>
- * @param enable True to enable verification, false to disable verification.
- ***/
- public void setRemoteVerificationEnabled(boolean enable)
- {
- __remoteVerificationEnabled = enable;
- }
-
- /***
- * Return whether or not verification of the remote host participating
- * in data connections is enabled. The default behavior is for
- * verification to be enabled.
- * <p>
- * @return True if verification is enabled, false if not.
- ***/
- public boolean isRemoteVerificationEnabled()
- {
- return __remoteVerificationEnabled;
- }
-
- /***
- * Login to the FTP server using the provided username and password.
- * <p>
- * @param username The username to login under.
- * @param password The password to use.
- * @return True if successfully completed, false if not.
- * @exception FTPConnectionClosedException
- * If the FTP server prematurely closes the connection as a result
- * of the client being idle or some other reason causing the server
- * to send FTP reply code 421. This exception may be caught either
- * as an IOException or independently as itself.
- * @exception IOException If an I/O error occurs while either sending a
- * command to the server or receiving a reply from the server.
- ***/
- public boolean login(String username, String password) throws IOException
- {
- user(username);
-
- if (FTPReply.isPositiveCompletion(getReplyCode()))
- return true;
-
- // If we get here, we either have an error code, or an intermmediate
- // reply requesting password.
- if (!FTPReply.isPositiveIntermediate(getReplyCode()))
- return false;
-
- return FTPReply.isPositiveCompletion(pass(password));
- }
-
- /***
- * Logout of the FTP server by sending the QUIT command.
- * <p>
- * @return True if successfully completed, false if not.
- * @exception FTPConnectionClosedException
- * If the FTP server prematurely closes the connection as a result
- * of the client being idle or some other reason causing the server
- * to send FTP reply code 421. This exception may be caught either
- * as an IOException or independently as itself.
- * @exception IOException If an I/O error occurs while either sending a
- * command to the server or receiving a reply from the server.
- ***/
- public boolean logout() throws IOException
- {
- return FTPReply.isPositiveCompletion(quit());
- }
-
- /**
- * Retrieve a list reply for path
- * @param path
- * @param entries
- * @param limit
- * @param parser
- * @throws IOException
- * @throws FtpExceptionCanNotHaveDataConnection
- * @throws FtpExceptionUnknownForcedDataClose
- * @throws FtpExceptionControlClosedByForcedDataClose
- */
- public void retrieveList(String path, List<FTPFile> entries, int limit,
- FTPFileEntryParser parser)
- throws IOException,
- FtpExceptionCanNotHaveDataConnection,
- FtpExceptionUnknownForcedDataClose,
- FtpExceptionControlClosedByForcedDataClose {
- Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
+ if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
+ throw new FtpExceptionCanNotHaveDataConnection("pasv() failed. "
+ + getReplyString());
+
+ try {
+ __parsePassiveModeReply(getReplyStrings()[0]);
+ } catch (MalformedServerReplyException e) {
+ throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
+ }
+
+ // // 20040317, xing, accommodate ill-behaved servers, see above
+ // int count = 0;
+ // System.err.println("__passivePort "+__passivePort);
+ // System.err.println("port_previous "+port_previous);
+ // while (__passivePort == port_previous) {
+ // // just quit if too many tries. make it an exception here?
+ // if (count++ > 10)
+ // return null;
+ // // slow down further for each new try
+ // Thread.sleep(500*count);
+ // if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
+ // throw new FtpExceptionCanNotHaveDataConnection(
+ // "pasv() failed. " + getReplyString());
+ // //return null;
+ // try {
+ // __parsePassiveModeReply(getReplyStrings()[0]);
+ // } catch (MalformedServerReplyException e) {
+ // throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
+ // }
+ // }
- if (socket == null)
- throw new FtpExceptionCanNotHaveDataConnection("LIST "
- + ((path == null) ? "" : path));
+ socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
- BufferedReader reader =
- new BufferedReader(new InputStreamReader(socket.getInputStream()));
+ if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
+ socket.close();
+ return null;
+ }
- // force-close data channel socket, when download limit is reached
-// boolean mandatory_close = false;
+ if (__remoteVerificationEnabled && !verifyRemote(socket)) {
+ InetAddress host1, host2;
- //List entries = new LinkedList();
- int count = 0;
- String line = parser.readNextEntry(reader);
- while (line != null) {
- FTPFile ftpFile = parser.parseFTPEntry(line);
- // skip non-formatted lines
- if (ftpFile == null) {
- line = parser.readNextEntry(reader);
- continue;
- }
- entries.add(ftpFile);
- count += line.length();
- // impose download limit if limit >= 0, otherwise no limit
- // here, cut off is up to the line when total bytes is just over limit
- if (limit >= 0 && count > limit) {
-// mandatory_close = true;
- break;
- }
- line = parser.readNextEntry(reader);
- }
+ host1 = socket.getInetAddress();
+ host2 = getRemoteAddress();
- //if (mandatory_close)
- // you always close here, no matter mandatory_close or not.
- // however different ftp servers respond differently, see below.
socket.close();
- // scenarios:
- // (1) mandatory_close is false, download limit not reached
- // no special care here
- // (2) mandatory_close is true, download limit is reached
- // different servers have different reply codes:
-
- try {
- int reply = getReply();
- if (!_notBadReply(reply))
- throw new FtpExceptionUnknownForcedDataClose(getReplyString());
- } catch (FTPConnectionClosedException e) {
- // some ftp servers will close control channel if data channel socket
- // is closed by our end before all data has been read out. Check:
- // tux414.q-tam.hp.com FTP server (hp.com version whp02)
- // so must catch FTPConnectionClosedException thrown by getReply() above
- //disconnect();
- throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
- }
+ // our precaution
+ throw new FtpExceptionCanNotHaveDataConnection(
+ "Host attempting data connection " + host1.getHostAddress()
+ + " is not same as server " + host2.getHostAddress()
+ + " So we intentionally close it for security precaution.");
+ }
+
+ if (__dataTimeout >= 0)
+ socket.setSoTimeout(__dataTimeout);
+
+ return socket;
+ }
+
+ /***
+ * Sets the timeout in milliseconds to use for data connection. set
+ * immediately after opening the data connection.
+ ***/
+ public void setDataTimeout(int timeout) {
+ __dataTimeout = timeout;
+ }
+
+ /***
+ * Closes the connection to the FTP server and restores connection parameters
+ * to the default values.
+ * <p>
+ *
+ * @exception IOException
+ * If an error occurs while disconnecting.
+ ***/
+ public void disconnect() throws IOException {
+ __initDefaults();
+ super.disconnect();
+ // no worry for data connection, since we always close it
+ // in every ftp command that invloves data connection
+ }
+
+ /***
+ * Enable or disable verification that the remote host taking part of a data
+ * connection is the same as the host to which the control connection is
+ * attached. The default is for verification to be enabled. You may set this
+ * value at any time, whether the FTPClient is currently connected or not.
+ * <p>
+ *
+ * @param enable
+ * True to enable verification, false to disable verification.
+ ***/
+ public void setRemoteVerificationEnabled(boolean enable) {
+ __remoteVerificationEnabled = enable;
+ }
+
+ /***
+ * Return whether or not verification of the remote host participating in data
+ * connections is enabled. The default behavior is for verification to be
+ * enabled.
+ * <p>
+ *
+ * @return True if verification is enabled, false if not.
+ ***/
+ public boolean isRemoteVerificationEnabled() {
+ return __remoteVerificationEnabled;
+ }
+
+ /***
+ * Login to the FTP server using the provided username and password.
+ * <p>
+ *
+ * @param username
+ * The username to login under.
+ * @param password
+ * The password to use.
+ * @return True if successfully completed, false if not.
+ * @exception FTPConnectionClosedException
+ * If the FTP server prematurely closes the connection as a
+ * result of the client being idle or some other reason causing
+ * the server to send FTP reply code 421. This exception may be
+ * caught either as an IOException or independently as itself.
+ * @exception IOException
+ * If an I/O error occurs while either sending a command to the
+ * server or receiving a reply from the server.
+ ***/
+ public boolean login(String username, String password) throws IOException {
+ user(username);
- }
-
- /**
- * Retrieve a file for path
- * @param path
- * @param os
- * @param limit
- * @throws IOException
- * @throws FtpExceptionCanNotHaveDataConnection
- * @throws FtpExceptionUnknownForcedDataClose
- * @throws FtpExceptionControlClosedByForcedDataClose
- */
- public void retrieveFile(String path, OutputStream os, int limit)
- throws IOException,
- FtpExceptionCanNotHaveDataConnection,
- FtpExceptionUnknownForcedDataClose,
- FtpExceptionControlClosedByForcedDataClose {
+ if (FTPReply.isPositiveCompletion(getReplyCode()))
+ return true;
- Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
+ // If we get here, we either have an error code, or an intermmediate
+ // reply requesting password.
+ if (!FTPReply.isPositiveIntermediate(getReplyCode()))
+ return false;
+
+ return FTPReply.isPositiveCompletion(pass(password));
+ }
+
+ /***
+ * Logout of the FTP server by sending the QUIT command.
+ * <p>
+ *
+ * @return True if successfully completed, false if not.
+ * @exception FTPConnectionClosedException
+ * If the FTP server prematurely closes the connection as a
+ * result of the client being idle or some other reason causing
+ * the server to send FTP reply code 421. This exception may be
+ * caught either as an IOException or independently as itself.
+ * @exception IOException
+ * If an I/O error occurs while either sending a command to the
+ * server or receiving a reply from the server.
+ ***/
+ public boolean logout() throws IOException {
+ return FTPReply.isPositiveCompletion(quit());
+ }
+
+ /**
+ * Retrieve a list reply for path
+ *
+ * @param path
+ * @param entries
+ * @param limit
+ * @param parser
+ * @throws IOException
+ * @throws FtpExceptionCanNotHaveDataConnection
+ * @throws FtpExceptionUnknownForcedDataClose
+ * @throws FtpExceptionControlClosedByForcedDataClose
+ */
+ public void retrieveList(String path, List<FTPFile> entries, int limit,
+ FTPFileEntryParser parser) throws IOException,
+ FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose,
+ FtpExceptionControlClosedByForcedDataClose {
+ Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
- if (socket == null)
- throw new FtpExceptionCanNotHaveDataConnection("RETR "
+ if (socket == null)
+ throw new FtpExceptionCanNotHaveDataConnection("LIST "
+ ((path == null) ? "" : path));
- InputStream input = socket.getInputStream();
+ BufferedReader reader = new BufferedReader(new InputStreamReader(
+ socket.getInputStream()));
+
+ // force-close data channel socket, when download limit is reached
+ // boolean mandatory_close = false;
- // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
- // do we ever need ASCII_FILE_TYPE?
- //if (__fileType == ASCII_FILE_TYPE)
- // input = new FromNetASCIIInputStream(input);
-
- // fixme, should we instruct server here for binary file type?
-
- // force-close data channel socket
-// boolean mandatory_close = false;
-
- int len; int count = 0;
- byte[] buf =
- new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
- while((len=input.read(buf,0,buf.length)) != -1){
- count += len;
- // impose download limit if limit >= 0, otherwise no limit
- // here, cut off is exactly of limit bytes
- if (limit >= 0 && count > limit) {
- os.write(buf,0,len-(count-limit));
- // mandatory_close = true;
- break;
- }
- os.write(buf,0,len);
- os.flush();
+ // List entries = new LinkedList();
+ int count = 0;
+ String line = parser.readNextEntry(reader);
+ while (line != null) {
+ FTPFile ftpFile = parser.parseFTPEntry(line);
+ // skip non-formatted lines
+ if (ftpFile == null) {
+ line = parser.readNextEntry(reader);
+ continue;
+ }
+ entries.add(ftpFile);
+ count += line.length();
+ // impose download limit if limit >= 0, otherwise no limit
+ // here, cut off is up to the line when total bytes is just over limit
+ if (limit >= 0 && count > limit) {
+ // mandatory_close = true;
+ break;
}
+ line = parser.readNextEntry(reader);
+ }
- //if (mandatory_close)
- // you always close here, no matter mandatory_close or not.
- // however different ftp servers respond differently, see below.
- socket.close();
+ // if (mandatory_close)
+ // you always close here, no matter mandatory_close or not.
+ // however different ftp servers respond differently, see below.
+ socket.close();
+
+ // scenarios:
+ // (1) mandatory_close is false, download limit not reached
+ // no special care here
+ // (2) mandatory_close is true, download limit is reached
+ // different servers have different reply codes:
+
+ try {
+ int reply = getReply();
+ if (!_notBadReply(reply))
+ throw new FtpExceptionUnknownForcedDataClose(getReplyString());
+ } catch (FTPConnectionClosedException e) {
+ // some ftp servers will close control channel if data channel socket
+ // is closed by our end before all data has been read out. Check:
+ // tux414.q-tam.hp.com FTP server (hp.com version whp02)
+ // so must catch FTPConnectionClosedException thrown by getReply() above
+ // disconnect();
+ throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
+ }
+
+ }
+
+ /**
+ * Retrieve a file for path
+ *
+ * @param path
+ * @param os
+ * @param limit
+ * @throws IOException
+ * @throws FtpExceptionCanNotHaveDataConnection
+ * @throws FtpExceptionUnknownForcedDataClose
+ * @throws FtpExceptionControlClosedByForcedDataClose
+ */
+ public void retrieveFile(String path, OutputStream os, int limit)
+ throws IOException, FtpExceptionCanNotHaveDataConnection,
+ FtpExceptionUnknownForcedDataClose,
+ FtpExceptionControlClosedByForcedDataClose {
- // scenarios:
- // (1) mandatory_close is false, download limit not reached
- // no special care here
- // (2) mandatory_close is true, download limit is reached
- // different servers have different reply codes:
-
- // do not need this
- //sendCommand("ABOR");
-
- try {
- int reply = getReply();
- if (!_notBadReply(reply))
- throw new FtpExceptionUnknownForcedDataClose(getReplyString());
- } catch (FTPConnectionClosedException e) {
- // some ftp servers will close control channel if data channel socket
- // is closed by our end before all data has been read out. Check:
- // tux414.q-tam.hp.com FTP server (hp.com version whp02)
- // so must catch FTPConnectionClosedException thrown by getReply() above
- //disconnect();
- throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
- }
+ Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
- }
+ if (socket == null)
+ throw new FtpExceptionCanNotHaveDataConnection("RETR "
+ + ((path == null) ? "" : path));
- // reply check after closing data connection
- private boolean _notBadReply(int reply) {
+ InputStream input = socket.getInputStream();
- if (FTPReply.isPositiveCompletion(reply)) {
- // do nothing
- } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
+ // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
+ // do we ever need ASCII_FILE_TYPE?
+ // if (__fileType == ASCII_FILE_TYPE)
+ // input = new FromNetASCIIInputStream(input);
+
+ // fixme, should we instruct server here for binary file type?
+
+ // force-close data channel socket
+ // boolean mandatory_close = false;
+
+ int len;
+ int count = 0;
+ byte[] buf = new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
+ while ((len = input.read(buf, 0, buf.length)) != -1) {
+ count += len;
+ // impose download limit if limit >= 0, otherwise no limit
+ // here, cut off is exactly of limit bytes
+ if (limit >= 0 && count > limit) {
+ os.write(buf, 0, len - (count - limit));
+ // mandatory_close = true;
+ break;
+ }
+ os.write(buf, 0, len);
+ os.flush();
+ }
+
+ // if (mandatory_close)
+ // you always close here, no matter mandatory_close or not.
+ // however different ftp servers respond differently, see below.
+ socket.close();
+
+ // scenarios:
+ // (1) mandatory_close is false, download limit not reached
+ // no special care here
+ // (2) mandatory_close is true, download limit is reached
+ // different servers have different reply codes:
+
+ // do not need this
+ // sendCommand("ABOR");
+
+ try {
+ int reply = getReply();
+ if (!_notBadReply(reply))
+ throw new FtpExceptionUnknownForcedDataClose(getReplyString());
+ } catch (FTPConnectionClosedException e) {
+ // some ftp servers will close control channel if data channel socket
+ // is closed by our end before all data has been read out. Check:
+ // tux414.q-tam.hp.com FTP server (hp.com version whp02)
+ // so must catch FTPConnectionClosedException thrown by getReply() above
+ // disconnect();
+ throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
+ }
+
+ }
+
+ // reply check after closing data connection
+ private boolean _notBadReply(int reply) {
+
+ if (FTPReply.isPositiveCompletion(reply)) {
+ // do nothing
+ } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
// some ftp servers reply 426, e.g.,
// foggy FTP server (Version wu-2.6.2(2)
- // there is second reply witing? no!
- //getReply();
- } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
+ // there is second reply witing? no!
+ // getReply();
+ } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
// some ftp servers reply 450, e.g.,
// ProFTPD [ftp.kernel.org]
- // there is second reply witing? no!
- //getReply();
- } else if (reply == 451) { // FTPReply.ACTION_ABORTED
+ // there is second reply witing? no!
+ // getReply();
+ } else if (reply == 451) { // FTPReply.ACTION_ABORTED
// some ftp servers reply 451, e.g.,
// ProFTPD [ftp.kernel.org]
- // there is second reply witing? no!
- //getReply();
- } else if (reply == 451) { // FTPReply.ACTION_ABORTED
- } else {
+ // there is second reply witing? no!
+ // getReply();
+ } else if (reply == 451) { // FTPReply.ACTION_ABORTED
+ } else {
// what other kind of ftp server out there?
- return false;
- }
+ return false;
+ }
+
+ return true;
+ }
+ /***
+ * Sets the file type to be transferred. This should be one of
+ * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
+ * etc. The file type only needs to be set when you want to change the type.
+ * After changing it, the new type stays in effect until you change it again.
+ * The default file type is <code> FTP.ASCII_FILE_TYPE </code> if this method
+ * is never called.
+ * <p>
+ *
+ * @param fileType
+ * The <code> _FILE_TYPE </code> constant indcating the type of file.
+ * @return True if successfully completed, false if not.
+ * @exception FTPConnectionClosedException
+ * If the FTP server prematurely closes the connection as a
+ * result of the client being idle or some other reason causing
+ * the server to send FTP reply code 421. This exception may be
+ * caught either as an IOException or independently as itself.
+ * @exception IOException
+ * If an I/O error occurs while either sending a command to the
+ * server or receiving a reply from the server.
+ ***/
+ public boolean setFileType(int fileType) throws IOException {
+ if (FTPReply.isPositiveCompletion(type(fileType))) {
+ /*
+ * __fileType = fileType; __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
+ */
return true;
}
+ return false;
+ }
- /***
- * Sets the file type to be transferred. This should be one of
- * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
- * etc. The file type only needs to be set when you want to change the
- * type. After changing it, the new type stays in effect until you change
- * it again. The default file type is <code> FTP.ASCII_FILE_TYPE </code>
- * if this method is never called.
- * <p>
- * @param fileType The <code> _FILE_TYPE </code> constant indcating the
- * type of file.
- * @return True if successfully completed, false if not.
- * @exception FTPConnectionClosedException
- * If the FTP server prematurely closes the connection as a result
- * of the client being idle or some other reason causing the server
- * to send FTP reply code 421. This exception may be caught either
- * as an IOException or independently as itself.
- * @exception IOException If an I/O error occurs while either sending a
- * command to the server or receiving a reply from the server.
- ***/
- public boolean setFileType(int fileType) throws IOException
- {
- if (FTPReply.isPositiveCompletion(type(fileType)))
- {
-/* __fileType = fileType;
- __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;*/
- return true;
- }
- return false;
- }
-
- /***
- * Fetches the system type name from the server and returns the string.
- * This value is cached for the duration of the connection after the
- * first call to this method. In other words, only the first time
- * that you invoke this method will it issue a SYST command to the
- * FTP server. FTPClient will remember the value and return the
- * cached value until a call to disconnect.
- * <p>
- * @return The system type name obtained from the server. null if the
- * information could not be obtained.
- * @exception FTPConnectionClosedException
- * If the FTP server prematurely closes the connection as a result
- * of the client being idle or some other reason causing the server
- * to send FTP reply code 421. This exception may be caught either
- * as an IOException or independently as itself.
- * @exception IOException If an I/O error occurs while either sending a
- * command to the server or receiving a reply from the server.
- ***/
- public String getSystemName()
- throws IOException, FtpExceptionBadSystResponse
- {
- //if (syst() == FTPReply.NAME_SYSTEM_TYPE)
- // Technically, we should expect a NAME_SYSTEM_TYPE response, but
- // in practice FTP servers deviate, so we soften the condition to
- // a positive completion.
- if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
- __systemName = (getReplyStrings()[0]).substring(4);
- } else {
- throw new FtpExceptionBadSystResponse(
- "Bad response of SYST: " + getReplyString());
- }
-
- return __systemName;
- }
-
- /***
- * Sends a NOOP command to the FTP server. This is useful for preventing
- * server timeouts.
- * <p>
- * @return True if successfully completed, false if not.
- * @exception FTPConnectionClosedException
- * If the FTP server prematurely closes the connection as a result
- * of the client being idle or some other reason causing the server
- * to send FTP reply code 421. This exception may be caught either
- * as an IOException or independently as itself.
- * @exception IOException If an I/O error occurs while either sending a
- * command to the server or receiving a reply from the server.
- ***/
- public boolean sendNoOp() throws IOException
- {
- return FTPReply.isPositiveCompletion(noop());
- }
-
-// client.stat(path);
-// client.sendCommand("STAT");
-// client.sendCommand("STAT",path);
-// client.sendCommand("MDTM",path);
-// client.sendCommand("SIZE",path);
-// client.sendCommand("HELP","SITE");
-// client.sendCommand("SYST");
-// client.setRestartOffset(120);
+ /***
+ * Fetches the system type name from the server and returns the string. This
+ * value is cached for the duration of the connection after the first call to
+ * this method. In other words, only the first time that you invoke this
+ * method will it issue a SYST command to the FTP server. FTPClient will
+ * remember the value and return the cached value until a call to disconnect.
+ * <p>
+ *
+ * @return The system type name obtained from the server. null if the
+ * information could not be obtained.
+ * @exception FTPConnectionClosedException
+ * If the FTP server prematurely closes the connection as a
+ * result of the client being idle or some other reason causing
+ * the server to send FTP reply code 421. This exception may be
+ * caught either as an IOException or independently as itself.
+ * @exception IOException
+ * If an I/O error occurs while either sending a command to the
+ * server or receiving a reply from the server.
+ ***/
+ public String getSystemName() throws IOException, FtpExceptionBadSystResponse {
+ // if (syst() == FTPReply.NAME_SYSTEM_TYPE)
+ // Technically, we should expect a NAME_SYSTEM_TYPE response, but
+ // in practice FTP servers deviate, so we soften the condition to
+ // a positive completion.
+ if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
+ __systemName = (getReplyStrings()[0]).substring(4);
+ } else {
+ throw new FtpExceptionBadSystResponse("Bad response of SYST: "
+ + getReplyString());
+ }
+
+ return __systemName;
+ }
+
+ /***
+ * Sends a NOOP command to the FTP server. This is useful for preventing
+ * server timeouts.
+ * <p>
+ *
+ * @return True if successfully completed, false if not.
+ * @exception FTPConnectionClosedException
+ * If the FTP server prematurely closes the connection as a
+ * result of the client being idle or some other reason causing
+ * the server to send FTP reply code 421. This exception may be
+ * caught either as an IOException or independently as itself.
+ * @exception IOException
+ * If an I/O error occurs while either sending a command to the
+ * server or receiving a reply from the server.
+ ***/
+ public boolean sendNoOp() throws IOException {
+ return FTPReply.isPositiveCompletion(noop());
+ }
+
+ // client.stat(path);
+ // client.sendCommand("STAT");
+ // client.sendCommand("STAT",path);
+ // client.sendCommand("MDTM",path);
+ // client.sendCommand("SIZE",path);
+ // client.sendCommand("HELP","SITE");
+ // client.sendCommand("SYST");
+ // client.setRestartOffset(120);
}
Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java Fri Jan 9 06:34:33 2015
@@ -33,13 +33,12 @@ import java.util.Collection;
import java.util.HashSet;
/**
- * This class is a protocol plugin used for ftp: scheme.
- * It creates {@link FtpResponse} object and gets the content of the url from it.
+ * This class is a protocol plugin used for ftp: scheme. It creates
+ * {@link FtpResponse} object and gets the content of the url from it.
* Configurable parameters are {@code ftp.username}, {@code ftp.password},
- * {@code ftp.content.limit}, {@code ftp.timeout},
- * {@code ftp.server.timeout}, {@code ftp.password},
- * {@code ftp.keep.connection} and {@code ftp.follow.talk}.
- * For details see "FTP properties" section in {@code nutch-default.xml}.
+ * {@code ftp.content.limit}, {@code ftp.timeout}, {@code ftp.server.timeout},
+ * {@code ftp.password}, {@code ftp.keep.connection} and {@code ftp.follow.talk}
+ * . For details see "FTP properties" section in {@code nutch-default.xml}.
*/
public class Ftp implements Protocol {
@@ -82,7 +81,7 @@ public class Ftp implements Protocol {
private Configuration conf;
private FtpRobotRulesParser robots = null;
-
+
// constructor
public Ftp() {
robots = new FtpRobotRulesParser();
@@ -108,12 +107,14 @@ public class Ftp implements Protocol {
this.keepConnection = keepConnection;
}
- /**
- * Creates a {@link FtpResponse} object corresponding to the url and
- * returns a {@link ProtocolOutput} object as per the content received
+ /**
+ * Creates a {@link FtpResponse} object corresponding to the url and returns a
+ * {@link ProtocolOutput} object as per the content received
*
- * @param url Text containing the ftp url
- * @param datum The CrawlDatum object corresponding to the url
+ * @param url
+ * Text containing the ftp url
+ * @param datum
+ * The CrawlDatum object corresponding to the url
*
* @return {@link ProtocolOutput} object for the url
*/
@@ -233,8 +234,8 @@ public class Ftp implements Protocol {
if (maxContentLength != Integer.MIN_VALUE) // set maxContentLength
ftp.setMaxContentLength(maxContentLength);
- Content content = ftp.getProtocolOutput(urlString, WebPage.newBuilder().build())
- .getContent();
+ Content content = ftp.getProtocolOutput(urlString,
+ WebPage.newBuilder().build()).getContent();
System.err.println("Content-Type: " + content.getContentType());
System.err.println("Content-Length: "
@@ -252,7 +253,7 @@ public class Ftp implements Protocol {
return FIELDS;
}
- /**
+ /**
* Get the robots rules for a given url
*/
public BaseRobotRules getRobotRules(String url, WebPage page) {
Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java Fri Jan 9 06:34:33 2015
@@ -17,13 +17,16 @@
package org.apache.nutch.protocol.ftp;
-/** Thrown for Ftp error codes.
+/**
+ * Thrown for Ftp error codes.
*/
public class FtpError extends FtpException {
private int code;
-
- public int getCode(int code) { return code; }
+
+ public int getCode(int code) {
+ return code;
+ }
public FtpError(int code) {
super("Ftp Error: " + code);
Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java Fri Jan 9 06:34:33 2015
@@ -20,9 +20,9 @@ package org.apache.nutch.protocol.ftp;
import org.apache.nutch.protocol.ProtocolException;
/***
- * Superclass for important exceptions thrown during FTP talk,
- * that must be handled with care.
- *
+ * Superclass for important exceptions thrown during FTP talk, that must be
+ * handled with care.
+ *
* @author John Xing
*/
public class FtpException extends ProtocolException {
Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java Fri Jan 9 06:34:33 2015
@@ -19,7 +19,7 @@ package org.apache.nutch.protocol.ftp;
/**
* Exception indicating bad reply of SYST command.
- *
+ *
* @author John Xing
*/
public class FtpExceptionBadSystResponse extends FtpException {
Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java Fri Jan 9 06:34:33 2015
@@ -19,7 +19,7 @@ package org.apache.nutch.protocol.ftp;
/**
* Exception indicating failure of opening data connection.
- *
+ *
* @author John Xing
*/
public class FtpExceptionCanNotHaveDataConnection extends FtpException {
Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java Fri Jan 9 06:34:33 2015
@@ -18,9 +18,9 @@
package org.apache.nutch.protocol.ftp;
/**
- * Exception indicating control channel is closed by server end, due to
- * forced closure of data channel at client (our) end.
- *
+ * Exception indicating control channel is closed by server end, due to forced
+ * closure of data channel at client (our) end.
+ *
* @author John Xing
*/
public class FtpExceptionControlClosedByForcedDataClose extends FtpException {
Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java Fri Jan 9 06:34:33 2015
@@ -18,9 +18,9 @@
package org.apache.nutch.protocol.ftp;
/**
- * Exception indicating unrecognizable reply from server after
- * forced closure of data channel by client (our) side.
- *
+ * Exception indicating unrecognizable reply from server after forced closure of
+ * data channel by client (our) side.
+ *
* @author John Xing
*/
public class FtpExceptionUnknownForcedDataClose extends FtpException {
Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java Fri Jan 9 06:34:33 2015
@@ -17,7 +17,6 @@
package org.apache.nutch.protocol.ftp;
-
import org.apache.avro.util.Utf8;
import org.apache.commons.net.ftp.FTP;
import org.apache.commons.net.ftp.FTPFile;
@@ -39,18 +38,14 @@ import java.nio.ByteBuffer;
import java.util.LinkedList;
import java.util.List;
-
/************************************
- * FtpResponse.java mimics ftp replies as http response.
- * It tries its best to follow http's way for headers, response codes
- * as well as exceptions.
- *
- * Comments:
- * In this class, all FtpException*.java thrown by Client.java
- * and some important commons-net exceptions passed by Client.java
- * must have been properly dealt with. They'd better not be leaked
- * to the caller of this class.
- *
+ * FtpResponse.java mimics ftp replies as http response. It tries its best to
+ * follow http's way for headers, response codes as well as exceptions.
+ *
+ * Comments: In this class, all FtpException*.java thrown by Client.java and
+ * some important commons-net exceptions passed by Client.java must have been
+ * properly dealt with. They'd better not be leaked to the caller of this class.
+ *
* @author John Xing
***********************************/
public class FtpResponse {
@@ -66,23 +61,26 @@ public class FtpResponse {
private Configuration conf;
/** Returns the response code. */
- public int getCode() { return code; }
+ public int getCode() {
+ return code;
+ }
/** Returns the value of a named header. */
public String getHeader(String name) {
return headers.get(name);
}
- public byte[] getContent() { return content; }
+ public byte[] getContent() {
+ return content;
+ }
public Content toContent() {
return new Content(orig, base, (content != null ? content : EMPTY_CONTENT),
- getHeader(Response.CONTENT_TYPE),
- headers, this.conf);
+ getHeader(Response.CONTENT_TYPE), headers, this.conf);
}
public FtpResponse(URL url, WebPage page, Ftp ftp, Configuration conf)
- throws FtpException, IOException {
+ throws FtpException, IOException {
this.orig = url.toString();
this.base = url.toString();
@@ -104,27 +102,26 @@ public class FtpResponse {
if (ftp.followTalk) {
if (Ftp.LOG.isInfoEnabled()) {
- Ftp.LOG.info("fetching "+url);
+ Ftp.LOG.info("fetching " + url);
}
} else {
if (Ftp.LOG.isTraceEnabled()) {
- Ftp.LOG.trace("fetching "+url);
+ Ftp.LOG.trace("fetching " + url);
}
}
InetAddress addr = InetAddress.getByName(url.getHost());
- if (addr != null
- && conf.getBoolean("store.ip.address", false) == true) {
- String ipString = addr.getHostAddress(); //get the ip address
+ if (addr != null && conf.getBoolean("store.ip.address", false) == true) {
+ String ipString = addr.getHostAddress(); // get the ip address
page.getMetadata().put(new Utf8("_ip_"),
- ByteBuffer.wrap(ipString.getBytes()));
+ ByteBuffer.wrap(ipString.getBytes()));
}
// idled too long, remote server or ourselves may have timed out,
// should start anew.
if (ftp.client != null && ftp.keepConnection
&& ftp.renewalTime < System.currentTimeMillis()) {
- if (Ftp.LOG.isInfoEnabled()) {
+ if (Ftp.LOG.isInfoEnabled()) {
Ftp.LOG.info("delete client because idled too long");
}
ftp.client = null;
@@ -138,8 +135,9 @@ public class FtpResponse {
// the real client
ftp.client = new Client();
// when to renew, take the lesser
- //ftp.renewalTime = System.currentTimeMillis()
- // + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
+ // ftp.renewalTime = System.currentTimeMillis()
+ // + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout :
+ // ftp.serverTimeout);
// timeout for control connection
ftp.client.setDefaultTimeout(ftp.timeout);
@@ -148,8 +146,8 @@ public class FtpResponse {
// follow ftp talk?
if (ftp.followTalk)
- ftp.client.addProtocolCommandListener(
- new PrintCommandListener(Ftp.LOG));
+ ftp.client.addProtocolCommandListener(new PrintCommandListener(
+ Ftp.LOG));
}
// quit from previous site if at a different site now
@@ -157,8 +155,8 @@ public class FtpResponse {
InetAddress remoteAddress = ftp.client.getRemoteAddress();
if (!addr.equals(remoteAddress)) {
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
- Ftp.LOG.info("disconnect from "+remoteAddress
- +" before connect to "+addr);
+ Ftp.LOG.info("disconnect from " + remoteAddress
+ + " before connect to " + addr);
}
// quit from current site
ftp.client.logout();
@@ -170,22 +168,22 @@ public class FtpResponse {
if (!ftp.client.isConnected()) {
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
- Ftp.LOG.info("connect to "+addr);
+ Ftp.LOG.info("connect to " + addr);
}
ftp.client.connect(addr);
if (!FTPReply.isPositiveCompletion(ftp.client.getReplyCode())) {
ftp.client.disconnect();
if (Ftp.LOG.isWarnEnabled()) {
- Ftp.LOG.warn("ftp.client.connect() failed: "
- + addr + " " + ftp.client.getReplyString());
+ Ftp.LOG.warn("ftp.client.connect() failed: " + addr + " "
+ + ftp.client.getReplyString());
}
this.code = 500; // http Internal Server Error
return;
}
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
- Ftp.LOG.info("log into "+addr);
+ Ftp.LOG.info("log into " + addr);
}
if (!ftp.client.login(ftp.userName, ftp.passWord)) {
@@ -196,9 +194,9 @@ public class FtpResponse {
// (not dealt with here at all) .
ftp.client.disconnect();
if (Ftp.LOG.isWarnEnabled()) {
- Ftp.LOG.warn("ftp.client.login() failed: "+addr);
+ Ftp.LOG.warn("ftp.client.login() failed: " + addr);
}
- this.code = 401; // http Unauthorized
+ this.code = 401; // http Unauthorized
return;
}
@@ -207,14 +205,14 @@ public class FtpResponse {
ftp.client.logout();
ftp.client.disconnect();
if (Ftp.LOG.isWarnEnabled()) {
- Ftp.LOG.warn("ftp.client.setFileType() failed: "+addr);
+ Ftp.LOG.warn("ftp.client.setFileType() failed: " + addr);
}
this.code = 500; // http Internal Server Error
return;
}
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
- Ftp.LOG.info("set parser for "+addr);
+ Ftp.LOG.info("set parser for " + addr);
}
// SYST is valid only after login
@@ -225,17 +223,18 @@ public class FtpResponse {
if (parserKey.startsWith("UNKNOWN Type: L8"))
parserKey = "UNIX Type: L8";
ftp.parser = (new DefaultFTPFileEntryParserFactory())
- .createFileEntryParser(parserKey);
+ .createFileEntryParser(parserKey);
} catch (FtpExceptionBadSystResponse e) {
if (Ftp.LOG.isWarnEnabled()) {
- Ftp.LOG.warn("ftp.client.getSystemName() failed: "+addr+" "+e);
+ Ftp.LOG
+ .warn("ftp.client.getSystemName() failed: " + addr + " " + e);
}
ftp.parser = null;
} catch (ParserInitializationException e) {
// ParserInitializationException is RuntimeException defined in
// org.apache.commons.net.ftp.parser.ParserInitializationException
if (Ftp.LOG.isWarnEnabled()) {
- Ftp.LOG.warn("createFileEntryParser() failed. "+addr+" "+e);
+ Ftp.LOG.warn("createFileEntryParser() failed. " + addr + " " + e);
}
ftp.parser = null;
} finally {
@@ -243,7 +242,7 @@ public class FtpResponse {
// do not log as severe, otherwise
// FetcherThread/RequestScheduler will abort
if (Ftp.LOG.isWarnEnabled()) {
- Ftp.LOG.warn("ftp.parser is null: "+addr);
+ Ftp.LOG.warn("ftp.parser is null: " + addr);
}
ftp.client.logout();
ftp.client.disconnect();
@@ -269,10 +268,11 @@ public class FtpResponse {
// reset next renewalTime, take the lesser
if (ftp.client != null && ftp.keepConnection) {
ftp.renewalTime = System.currentTimeMillis()
- + ((ftp.timeout<ftp.serverTimeout) ? ftp.timeout : ftp.serverTimeout);
+ + ((ftp.timeout < ftp.serverTimeout) ? ftp.timeout
+ : ftp.serverTimeout);
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
Ftp.LOG.info("reset renewalTime to "
- + HttpDateFormat.toString(ftp.renewalTime));
+ + HttpDateFormat.toString(ftp.renewalTime));
}
}
@@ -280,15 +280,15 @@ public class FtpResponse {
// may have deleted ftp.client
if (ftp.client != null && !ftp.keepConnection) {
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
- Ftp.LOG.info("disconnect from "+addr);
+ Ftp.LOG.info("disconnect from " + addr);
}
ftp.client.logout();
ftp.client.disconnect();
}
-
+
} catch (Exception e) {
if (Ftp.LOG.isWarnEnabled()) {
- Ftp.LOG.warn(""+e);
+ Ftp.LOG.warn("" + e);
}
// for any un-foreseen exception (run time exception or not),
// do ultimate clean and leave ftp.client for garbage collection
@@ -298,21 +298,21 @@ public class FtpResponse {
ftp.client = null;
// or do explicit garbage collection?
// System.gc();
-// can we be less dramatic, using the following instead?
-// probably unnecessary for our practical purpose here
-// try {
-// ftp.client.logout();
-// ftp.client.disconnect();
-// }
+ // can we be less dramatic, using the following instead?
+ // probably unnecessary for our practical purpose here
+ // try {
+ // ftp.client.logout();
+ // ftp.client.disconnect();
+ // }
throw new FtpException(e);
- //throw e;
+ // throw e;
}
}
// get ftp file as http response
private void getFileAsHttpResponse(String path, long lastModified)
- throws IOException {
+ throws IOException {
ByteArrayOutputStream os = null;
List<FTPFile> list = null;
@@ -324,9 +324,9 @@ public class FtpResponse {
FTPFile ftpFile = (FTPFile) list.get(0);
this.headers.set(Response.CONTENT_LENGTH,
- new Long(ftpFile.getSize()).toString());
+ new Long(ftpFile.getSize()).toString());
this.headers.set(Response.LAST_MODIFIED,
- HttpDateFormat.toString(ftpFile.getTimestamp()));
+ HttpDateFormat.toString(ftpFile.getTimestamp()));
// don't retrieve the file if not changed.
if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
code = 304;
@@ -337,11 +337,11 @@ public class FtpResponse {
this.content = os.toByteArray();
-// // approximate bytes sent and read
-// if (this.httpAccounting != null) {
-// this.httpAccounting.incrementBytesSent(path.length());
-// this.httpAccounting.incrementBytesRead(this.content.length);
-// }
+ // // approximate bytes sent and read
+ // if (this.httpAccounting != null) {
+ // this.httpAccounting.incrementBytesSent(path.length());
+ // this.httpAccounting.incrementBytesRead(this.content.length);
+ // }
this.code = 200; // http OK
@@ -350,64 +350,64 @@ public class FtpResponse {
// control connection is off, clean up
// ftp.client.disconnect();
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
- Ftp.LOG.info("delete client because server cut off control channel: "+e);
+ Ftp.LOG.info("delete client because server cut off control channel: "
+ + e);
}
ftp.client = null;
// in case this FtpExceptionControlClosedByForcedDataClose is
// thrown by retrieveList() (not retrieveFile()) above,
if (os == null) { // indicating throwing by retrieveList()
- //throw new FtpException("fail to get attibutes: "+path);
+ // throw new FtpException("fail to get attibutes: "+path);
if (Ftp.LOG.isWarnEnabled()) {
- Ftp.LOG.warn(
- "Please try larger maxContentLength for ftp.client.retrieveList(). "
- + e);
+ Ftp.LOG
+ .warn("Please try larger maxContentLength for ftp.client.retrieveList(). "
+ + e);
}
// in a way, this is our request fault
- this.code = 400; // http Bad request
+ this.code = 400; // http Bad request
return;
}
FTPFile ftpFile = (FTPFile) list.get(0);
this.headers.set(Response.CONTENT_LENGTH,
- new Long(ftpFile.getSize()).toString());
- //this.headers.put("content-type", "text/html");
+ new Long(ftpFile.getSize()).toString());
+ // this.headers.put("content-type", "text/html");
this.headers.set(Response.LAST_MODIFIED,
- HttpDateFormat.toString(ftpFile.getTimestamp()));
+ HttpDateFormat.toString(ftpFile.getTimestamp()));
this.content = os.toByteArray();
if (ftpFile.getTimestamp().getTimeInMillis() <= lastModified) {
code = 304;
return;
}
-// // approximate bytes sent and read
-// if (this.httpAccounting != null) {
-// this.httpAccounting.incrementBytesSent(path.length());
-// this.httpAccounting.incrementBytesRead(this.content.length);
-// }
+ // // approximate bytes sent and read
+ // if (this.httpAccounting != null) {
+ // this.httpAccounting.incrementBytesSent(path.length());
+ // this.httpAccounting.incrementBytesRead(this.content.length);
+ // }
this.code = 200; // http OK
} catch (FtpExceptionCanNotHaveDataConnection e) {
if (FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
- // it is not a file, but dir, so redirect as a dir
+ // it is not a file, but dir, so redirect as a dir
this.headers.set(Response.LOCATION, path + "/");
- this.code = 300; // http redirect
+ this.code = 300; // http redirect
// fixme, should we do ftp.client.cwd("/"), back to top dir?
} else {
- // it is not a dir either
- this.code = 404; // http Not Found
+ // it is not a dir either
+ this.code = 404; // http Not Found
}
} catch (FtpExceptionUnknownForcedDataClose e) {
// Please note control channel is still live.
// in a way, this is our request fault
if (Ftp.LOG.isWarnEnabled()) {
- Ftp.LOG.warn(
- "Unrecognized reply after forced close of data channel. "
- + "If this is acceptable, please modify Client.java accordingly. "
- + e);
+ Ftp.LOG.warn("Unrecognized reply after forced close of data channel. "
+ + "If this is acceptable, please modify Client.java accordingly. "
+ + e);
}
this.code = 400; // http Bad Request
}
@@ -416,14 +416,14 @@ public class FtpResponse {
// get ftp dir list as http response
private void getDirAsHttpResponse(String path, long lastModified)
- throws IOException {
+ throws IOException {
List<FTPFile> list = new LinkedList<FTPFile>();
try {
// change to that dir first
if (!FTPReply.isPositiveCompletion(ftp.client.cwd(path))) {
- this.code = 404; // http Not Found
+ this.code = 404; // http Not Found
return;
}
@@ -432,15 +432,15 @@ public class FtpResponse {
ftp.client.retrieveList(null, list, ftp.maxContentLength, ftp.parser);
this.content = list2html(list, path, "/".equals(path) ? false : true);
this.headers.set(Response.CONTENT_LENGTH,
- new Integer(this.content.length).toString());
+ new Integer(this.content.length).toString());
this.headers.set(Response.CONTENT_TYPE, "text/html");
// this.headers.put("Last-Modified", null);
-// // approximate bytes sent and read
-// if (this.httpAccounting != null) {
-// this.httpAccounting.incrementBytesSent(path.length());
-// this.httpAccounting.incrementBytesRead(this.content.length);
-// }
+ // // approximate bytes sent and read
+ // if (this.httpAccounting != null) {
+ // this.httpAccounting.incrementBytesSent(path.length());
+ // this.httpAccounting.incrementBytesRead(this.content.length);
+ // }
this.code = 200; // http OK
@@ -449,21 +449,22 @@ public class FtpResponse {
// control connection is off, clean up
// ftp.client.disconnect();
if ((ftp.followTalk) && (Ftp.LOG.isInfoEnabled())) {
- Ftp.LOG.info("delete client because server cut off control channel: "+e);
+ Ftp.LOG.info("delete client because server cut off control channel: "
+ + e);
}
ftp.client = null;
this.content = list2html(list, path, "/".equals(path) ? false : true);
this.headers.set(Response.CONTENT_LENGTH,
- new Integer(this.content.length).toString());
+ new Integer(this.content.length).toString());
this.headers.set(Response.CONTENT_TYPE, "text/html");
// this.headers.put("Last-Modified", null);
-// // approximate bytes sent and read
-// if (this.httpAccounting != null) {
-// this.httpAccounting.incrementBytesSent(path.length());
-// this.httpAccounting.incrementBytesRead(this.content.length);
-// }
+ // // approximate bytes sent and read
+ // if (this.httpAccounting != null) {
+ // this.httpAccounting.incrementBytesSent(path.length());
+ // this.httpAccounting.incrementBytesRead(this.content.length);
+ // }
this.code = 200; // http OK
@@ -471,32 +472,35 @@ public class FtpResponse {
// Please note control channel is still live.
// in a way, this is our request fault
if (Ftp.LOG.isWarnEnabled()) {
- Ftp.LOG.warn(
- "Unrecognized reply after forced close of data channel. "
- + "If this is acceptable, please modify Client.java accordingly. "
- + e);
+ Ftp.LOG.warn("Unrecognized reply after forced close of data channel. "
+ + "If this is acceptable, please modify Client.java accordingly. "
+ + e);
}
this.code = 400; // http Bad Request
} catch (FtpExceptionCanNotHaveDataConnection e) {
- if (Ftp.LOG.isWarnEnabled()) { Ftp.LOG.warn(""+ e); }
+ if (Ftp.LOG.isWarnEnabled()) {
+ Ftp.LOG.warn("" + e);
+ }
this.code = 500; // http Iternal Server Error
}
}
// generate html page from ftp dir list
- private byte[] list2html(List<FTPFile> list, String path, boolean includeDotDot) {
+ private byte[] list2html(List<FTPFile> list, String path,
+ boolean includeDotDot) {
- //StringBuffer x = new StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
+ // StringBuffer x = new
+ // StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
StringBuffer x = new StringBuffer("<html><head>");
- x.append("<title>Index of "+path+"</title></head>\n");
- x.append("<body><h1>Index of "+path+"</h1><pre>\n");
+ x.append("<title>Index of " + path + "</title></head>\n");
+ x.append("<body><h1>Index of " + path + "</h1><pre>\n");
if (includeDotDot) {
x.append("<a href='../'>../</a>\t-\t-\t-\n");
}
- for (int i=0; i<list.size(); i++) {
+ for (int i = 0; i < list.size(); i++) {
FTPFile f = (FTPFile) list.get(i);
String name = f.getName();
String time = HttpDateFormat.toString(f.getTimestamp());
@@ -504,11 +508,11 @@ public class FtpResponse {
// some ftp server LIST "." and "..", we skip them here
if (name.equals(".") || name.equals(".."))
continue;
- x.append("<a href='"+name+"/"+"'>"+name+"/</a>\t");
- x.append(time+"\t-\n");
+ x.append("<a href='" + name + "/" + "'>" + name + "/</a>\t");
+ x.append(time + "\t-\n");
} else if (f.isFile()) {
- x.append("<a href='"+name+ "'>"+name+"</a>\t");
- x.append(time+"\t"+f.getSize()+"\n");
+ x.append("<a href='" + name + "'>" + name + "</a>\t");
+ x.append(time + "\t" + f.getSize() + "\n");
} else {
// ignore isSymbolicLink()
// ignore isUnknown()
Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpRobotRulesParser.java Fri Jan 9 06:34:33 2015
@@ -31,56 +31,63 @@ import org.slf4j.LoggerFactory;
import java.net.URL;
/**
- * This class is used for parsing robots for urls belonging to FTP protocol.
- * It extends the generic {@link RobotRulesParser} class and contains
- * Ftp protocol specific implementation for obtaining the robots file.
+ * This class is used for parsing robots for urls belonging to FTP protocol. It
+ * extends the generic {@link RobotRulesParser} class and contains Ftp protocol
+ * specific implementation for obtaining the robots file.
*/
public class FtpRobotRulesParser extends RobotRulesParser {
private static final String CONTENT_TYPE = "text/plain";
- public static final Logger LOG = LoggerFactory.getLogger(FtpRobotRulesParser.class);
+ public static final Logger LOG = LoggerFactory
+ .getLogger(FtpRobotRulesParser.class);
- FtpRobotRulesParser() { }
+ FtpRobotRulesParser() {
+ }
public FtpRobotRulesParser(Configuration conf) {
super(conf);
}
/**
- * The hosts for which the caching of robots rules is yet to be done,
- * it sends a Ftp request to the host corresponding to the {@link URL}
- * passed, gets robots file, parses the rules and caches the rules object
- * to avoid re-work in future.
+ * The hosts for which the caching of robots rules is yet to be done, it sends
+ * a Ftp request to the host corresponding to the {@link URL} passed, gets
+ * robots file, parses the rules and caches the rules object to avoid re-work
+ * in future.
+ *
+ * @param ftp
+ * The {@link Protocol} object
+ * @param url
+ * URL
*
- * @param ftp The {@link Protocol} object
- * @param url URL
- *
- * @return robotRules A {@link BaseRobotRules} object for the rules
+ * @return robotRules A {@link BaseRobotRules} object for the rules
*/
public BaseRobotRules getRobotRulesSet(Protocol ftp, URL url) {
- String protocol = url.getProtocol().toLowerCase(); // normalize to lower case
- String host = url.getHost().toLowerCase(); // normalize to lower case
+ String protocol = url.getProtocol().toLowerCase(); // normalize to lower
+ // case
+ String host = url.getHost().toLowerCase(); // normalize to lower case
- BaseRobotRules robotRules = (SimpleRobotRules) CACHE.get(protocol + ":" + host);
+ BaseRobotRules robotRules = (SimpleRobotRules) CACHE.get(protocol + ":"
+ + host);
boolean cacheRule = true;
- if (robotRules == null) { // cache miss
+ if (robotRules == null) { // cache miss
if (LOG.isTraceEnabled())
LOG.trace("cache miss " + url);
try {
- String robotsUrl = new URL(url, "/robots.txt").toString();
- ProtocolOutput output = ((Ftp)ftp).getProtocolOutput(robotsUrl, WebPage.newBuilder().build());
+ String robotsUrl = new URL(url, "/robots.txt").toString();
+ ProtocolOutput output = ((Ftp) ftp).getProtocolOutput(robotsUrl,
+ WebPage.newBuilder().build());
int statusCode = output.getStatus().getCode();
if (statusCode == ProtocolStatusCodes.SUCCESS) {
- robotRules = parseRules(url.toString(), output.getContent().getContent(),
- CONTENT_TYPE, agentNames);
- } else {
- robotRules = EMPTY_RULES; // use default rules
+ robotRules = parseRules(url.toString(), output.getContent()
+ .getContent(), CONTENT_TYPE, agentNames);
+ } else {
+ robotRules = EMPTY_RULES; // use default rules
}
} catch (Throwable t) {
if (LOG.isInfoEnabled()) {
@@ -91,7 +98,7 @@ public class FtpRobotRulesParser extends
}
if (cacheRule)
- CACHE.put(protocol + ":" + host, robotRules); // cache rules for host
+ CACHE.put(protocol + ":" + host, robotRules); // cache rules for host
}
return robotRules;
}
Modified: nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/PrintCommandListener.java Fri Jan 9 06:34:33 2015
@@ -28,45 +28,44 @@ import org.apache.commons.net.ProtocolCo
/***
* This is a support class for logging all ftp command/reply traffic.
- *
+ *
* @author John Xing
***/
-public class PrintCommandListener implements ProtocolCommandListener
-{
- private Logger __logger;
+public class PrintCommandListener implements ProtocolCommandListener {
+ private Logger __logger;
- public PrintCommandListener(Logger logger)
- {
- __logger = logger;
- }
+ public PrintCommandListener(Logger logger) {
+ __logger = logger;
+ }
- public void protocolCommandSent(ProtocolCommandEvent event) {
- try {
- __logIt(event);
- } catch (IOException e) {
- if (__logger.isInfoEnabled()) {
- __logger.info("PrintCommandListener.protocolCommandSent(): "+e);
- }
+ public void protocolCommandSent(ProtocolCommandEvent event) {
+ try {
+ __logIt(event);
+ } catch (IOException e) {
+ if (__logger.isInfoEnabled()) {
+ __logger.info("PrintCommandListener.protocolCommandSent(): " + e);
}
}
+ }
- public void protocolReplyReceived(ProtocolCommandEvent event) {
- try {
- __logIt(event);
- } catch (IOException e) {
- if (__logger.isInfoEnabled()) {
- __logger.info("PrintCommandListener.protocolReplyReceived(): "+e);
- }
+ public void protocolReplyReceived(ProtocolCommandEvent event) {
+ try {
+ __logIt(event);
+ } catch (IOException e) {
+ if (__logger.isInfoEnabled()) {
+ __logger.info("PrintCommandListener.protocolReplyReceived(): " + e);
}
}
+ }
- private void __logIt(ProtocolCommandEvent event) throws IOException {
- if (!__logger.isInfoEnabled()) { return; }
- BufferedReader br =
- new BufferedReader(new StringReader(event.getMessage()));
- String line;
- while ((line = br.readLine()) != null) {
- __logger.info("ftp> "+line);
- }
+ private void __logIt(ProtocolCommandEvent event) throws IOException {
+ if (!__logger.isInfoEnabled()) {
+ return;
+ }
+ BufferedReader br = new BufferedReader(new StringReader(event.getMessage()));
+ String line;
+ while ((line = br.readLine()) != null) {
+ __logger.info("ftp> " + line);
}
+ }
}
Modified: nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java Fri Jan 9 06:34:33 2015
@@ -49,11 +49,11 @@ public class Http extends HttpBase {
@Override
public void setConf(Configuration conf) {
super.setConf(conf);
-// Level logLevel = Level.WARNING;
-// if (conf.getBoolean("http.verbose", false)) {
-// logLevel = Level.FINE;
-// }
-// LOG.setLevel(logLevel);
+ // Level logLevel = Level.WARNING;
+ // if (conf.getBoolean("http.verbose", false)) {
+ // logLevel = Level.FINE;
+ // }
+ // LOG.setLevel(logLevel);
}
public static void main(String[] args) throws Exception {
@@ -64,7 +64,7 @@ public class Http extends HttpBase {
@Override
protected Response getResponse(URL url, WebPage page, boolean redirect)
- throws ProtocolException, IOException {
+ throws ProtocolException, IOException {
return new HttpResponse(this, url, page);
}