You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by th...@apache.org on 2016/07/05 22:48:50 UTC
[06/69] [abbrv] [partial] nutch git commit: Re arranged the source
code as per maven conventions for build
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/plugin.dtd
----------------------------------------------------------------------
diff --git a/src/plugin/plugin.dtd b/src/plugin/plugin.dtd
deleted file mode 100644
index 9b67da7..0000000
--- a/src/plugin/plugin.dtd
+++ /dev/null
@@ -1,206 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-
-<!--
- ! Licensed to the Apache Software Foundation (ASF) under one or more
- ! contributor license agreements. See the NOTICE file distributed with
- ! this work for additional information regarding copyright ownership.
- ! The ASF licenses this file to You under the Apache License, Version 2.0
- ! (the "License"); you may not use this file except in compliance with
- ! the License. You may obtain a copy of the License at
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !
- !
- ! Document : plugin.dtd
- ! Created on : 14 avril 2006, 22:14
- ! Author : Chris Mattmann, Jerome Charron
- ! Description: Nutch plug-in manifest DTD
- !
- ! PUBLIC ID : -//Apache Software Fundation//DTD Nutch Plugin Manifest 1.0//EN
- ! SYSTEM ID : http://lucene.apache.org/nutch/plugin.dtd
--->
-
-
-
-<!--
- ! The <plugin> element defines the body of the manifest.
- ! It optionally contains definitions for the plug-in runtime,
- ! definitions of other plug-ins required by this one,
- ! declarations of any new extension points being introduced by the plug-in,
- ! as well as configuration of functional extensions
- ! (configured into extension points defined by other plug-ins,
- ! or introduced by this plug-in).
- !-->
-<!ELEMENT plugin (runtime?, requires?, extension-point*, extension*)>
-
-<!-- A user displayable name for the plug-in -->
-<!ATTLIST plugin name CDATA #REQUIRED>
-
-<!--
- ! A unique identifier for the plug-in.
- ! To minimize potential for naming collisions,
- ! the identifier should be derived from the internet domain id
- ! of the supplying provider (reversing the domain name tokens and
- ! appending additional name tokens separated by dot [.]).
- ! For example, provider nutch.org could define plug-in identifier
- ! org.nutch.myplugin
- !-->
-<!ATTLIST plugin id CDATA #REQUIRED>
-
-<!--
- ! The plug-in version number.
- ! NOTE : Version numbers compatibility are not yet implemented.
- !-->
-<!ATTLIST plugin version CDATA #REQUIRED>
-
-<!-- The user-displayable name of the provider supplying the plug-in. -->
-<!ATTLIST plugin provider-name CDATA #IMPLIED>
-
-<!--
- ! The name of the plug-in class for this plug-in.
- ! The class must be a subclass of org.apache.nutch.plugin.Plugin
- !-->
-<!ATTLIST plugin class CDATA #IMPLIED>
-
-
-<!--
- ! The <requires> section of the manifest declares
- ! any dependencies on other plug-ins.
- !-->
-<!ELEMENT requires (import+)>
-
-
-<!-- Each dependency is specified using an <import> element. -->
-<!ELEMENT import EMPTY>
-
-<!-- The identifier of the required plug-in. -->
-<!ATTLIST import plugin CDATA #REQUIRED>
-
-
-<!--
- ! The <runtime> section of the manifest contains a definition of one or more
- ! libraries that make up the plug-in runtime.
- ! The referenced libraries are used by the plugin execution mechanisms
- ! (the plug-in class loader) to load and execute the correct code required by
- ! the plug-in.
- !-->
-<!ELEMENT runtime (library+)>
-
-
-<!--
- !The <library> elements collectively define the plug-in runtime.
- ! At least one <library> must be specified.
- !-->
-<!ELEMENT library (export*)>
-
-<!--
- ! A string reference to a library file or directory containing classes
- ! (relative to the plug-in install directory).
- ! Directory references must contain trailing file separator.
- !-->
-<!ATTLIST library name CDATA #REQUIRED>
-
-
-<!--
- ! Each <library> element can specify which portion
- ! of the library should be exported.
- ! The export rules are specified as a set of export masks.
- ! By default (no export rules specified),
- ! the library is considered to be private.
- ! Each export mask is specified using the name attribute.
- !-->
-<!ELEMENT export EMPTY>
-
-<!--
- ! The export mask can have the following values:
- ! * - indicates all contents of library are exported (public)
- ! package.name.* - indicates all classes in the specified package
- ! are exported. The matching rules are the same as in the
- ! Java import statement.
- ! package.name.ClassName - fully qualified java class name
- !
- ! NOTE : export mask is not yet implemented in Nutch.
- !-->
-<!ATTLIST export name CDATA #REQUIRED>
-
-
-<!--
- ! Nutch's architecture is based on the notion of configurable extension points.
- ! Nutch itself predefines a set of extension points that cover the task of
- ! extending it (for example, adding parser, indexing filter, ...).
- ! In addition to the predefined extension points, each supplied plug-in can
- ! declare additional extension points. By declaring an extension point the
- ! plug-in is essentially advertising the ability to configure the plug-in
- ! function with externally supplied extensions.
- !-->
-<!ELEMENT extension-point EMPTY>
-
-<!-- A user-displayable name for the extension point. -->
-<!ATTLIST extension-point name CDATA #REQUIRED>
-
-<!-- A simple id, unique within this plug-in -->
-<!ATTLIST extension-point id CDATA #REQUIRED>
-
-
-<!--
- ! Actual extensions are configured into extension points
- ! (predefined, or newly declared in this plug-in) in the <extension> section.
- !
- ! The configuration information is specified by at least one implementation
- ! with some parameters.
- !-->
-<!ELEMENT extension (implementation+)>
-
-<!--
- ! A reference to an extension point being configured.
- ! The extension point can be one defined in this plug-in or another plug-in.
- !-->
-<!ATTLIST extension point CDATA #REQUIRED>
-
-<!--
- ! Optional identifier for this extension point configuration instance.
- ! This is used by extension points that need to uniquely identify
- ! (rather than just enumerate) the specific configured extensions.
- ! The identifier is specified as a simple token unique within the definition
- ! of the declaring plug-in. When used globally, the extension identifier
- ! is qualified by the plug-in identifier.
- ! FIXME : Seems it is never read in the code.
- !-->
-<!ATTLIST extension id CDATA #IMPLIED>
-
-<!--
- ! A user-displayable name for the extension.
- ! FIXME : Seems it is never read in the code.
- !-->
-<!ATTLIST extension name CDATA #IMPLIED>
-
-
-<!--
- ! Defines a specific implementation for the extension.
- ! This implementation can define some special name/value parameters
- ! used at runtime.
- !-->
-<!ELEMENT implementation (parameter*)>
-
-<!-- A unique identifier for this implementation -->
-<!ATTLIST implementation id CDATA #REQUIRED>
-
-<!-- The fully-qualified Java Class that implements this extension-point -->
-<!ATTLIST implementation class CDATA #REQUIRED>
-
-
-<!-- Defines a name/value parameter -->
-<!ELEMENT parameter EMPTY>
-
-<!-- The parameter's name (should be unique for an extension) -->
-<!ATTLIST parameter name CDATA #REQUIRED>
-
-<!-- The parameter's value -->
-<!ATTLIST parameter value CDATA #REQUIRED>
-
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/build.xml
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-file/build.xml b/src/plugin/protocol-file/build.xml
deleted file mode 100644
index 121b1fe..0000000
--- a/src/plugin/protocol-file/build.xml
+++ /dev/null
@@ -1,29 +0,0 @@
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<project name="protocol-file" default="jar-core">
-
- <import file="../build-plugin.xml"/>
-
- <!-- for junit test -->
- <mkdir dir="${build.test}/data"/>
- <copy todir="${build.test}/data">
- <fileset dir="sample">
- <include name="*.txt"/>
- </fileset>
- </copy>
-</project>
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/ivy.xml
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-file/ivy.xml b/src/plugin/protocol-file/ivy.xml
deleted file mode 100644
index 1a86d68..0000000
--- a/src/plugin/protocol-file/ivy.xml
+++ /dev/null
@@ -1,41 +0,0 @@
-<?xml version="1.0" ?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<ivy-module version="1.0">
- <info organisation="org.apache.nutch" module="${ant.project.name}">
- <license name="Apache 2.0"/>
- <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
- <description>
- Apache Nutch
- </description>
- </info>
-
- <configurations>
- <include file="../../..//ivy/ivy-configurations.xml"/>
- </configurations>
-
- <publications>
- <!--get the artifact from our module name-->
- <artifact conf="master"/>
- </publications>
-
- <dependencies>
- </dependencies>
-
-</ivy-module>
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/plugin.xml
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-file/plugin.xml b/src/plugin/protocol-file/plugin.xml
deleted file mode 100644
index 1647ce4..0000000
--- a/src/plugin/protocol-file/plugin.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<plugin
- id="protocol-file"
- name="File Protocol Plug-in"
- version="1.0.0"
- provider-name="nutch.org">
-
-
- <runtime>
- <library name="protocol-file.jar">
- <export name="*"/>
- </library>
- </runtime>
-
- <requires>
- <import plugin="nutch-extensionpoints"/>
- </requires>
-
- <extension id="org.apache.nutch.protocol.file"
- name="FileProtocol"
- point="org.apache.nutch.protocol.Protocol">
-
- <implementation id="org.apache.nutch.protocol.file.File"
- class="org.apache.nutch.protocol.file.File">
- <parameter name="protocolName" value="file"/>
- </implementation>
-
- </extension>
-
-</plugin>
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/sample/testprotocolfile.txt
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-file/sample/testprotocolfile.txt b/src/plugin/protocol-file/sample/testprotocolfile.txt
deleted file mode 100644
index fbe8a8a..0000000
--- a/src/plugin/protocol-file/sample/testprotocolfile.txt
+++ /dev/null
@@ -1 +0,0 @@
-Protocol File Test
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt b/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt
deleted file mode 100644
index fbe8a8a..0000000
--- a/src/plugin/protocol-file/sample/testprotocolfile_(encoded).txt
+++ /dev/null
@@ -1 +0,0 @@
-Protocol File Test
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
deleted file mode 100644
index 2712218..0000000
--- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
+++ /dev/null
@@ -1,228 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.file;
-
-import java.net.URL;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-
-import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.net.protocols.Response;
-import org.apache.nutch.protocol.Content;
-import org.apache.nutch.protocol.Protocol;
-import org.apache.nutch.protocol.ProtocolOutput;
-import org.apache.nutch.protocol.ProtocolStatus;
-import org.apache.nutch.protocol.RobotRulesParser;
-import org.apache.nutch.util.NutchConfiguration;
-
-import crawlercommons.robots.BaseRobotRules;
-
-/**
- * This class is a protocol plugin used for file: scheme. It creates
- * {@link FileResponse} object and gets the content of the url from it.
- * Configurable parameters are {@code file.content.limit} and
- * {@code file.crawl.parent} in nutch-default.xml defined under
- * "file properties" section.
- *
- * @author John Xing
- */
-public class File implements Protocol {
-
- public static final Logger LOG = LoggerFactory.getLogger(File.class);
-
- static final int MAX_REDIRECTS = 5;
-
- int maxContentLength;
- boolean crawlParents;
-
- /**
- * if true return a redirect for symbolic links and do not resolve the links
- * internally
- */
- boolean symlinksAsRedirects = true;
-
- private Configuration conf;
-
- public File() {
- }
-
- /**
- * Set the {@link Configuration} object
- */
- public void setConf(Configuration conf) {
- this.conf = conf;
- this.maxContentLength = conf.getInt("file.content.limit", 64 * 1024);
- this.crawlParents = conf.getBoolean("file.crawl.parent", true);
- this.symlinksAsRedirects = conf.getBoolean(
- "file.crawl.redirect_noncanonical", true);
- }
-
- /**
- * Get the {@link Configuration} object
- */
- public Configuration getConf() {
- return this.conf;
- }
-
- /**
- * Set the length after at which content is truncated.
- */
- public void setMaxContentLength(int maxContentLength) {
- this.maxContentLength = maxContentLength;
- }
-
- /**
- * Creates a {@link FileResponse} object corresponding to the url and return a
- * {@link ProtocolOutput} object as per the content received
- *
- * @param url
- * Text containing the url
- * @param datum
- * The CrawlDatum object corresponding to the url
- *
- * @return {@link ProtocolOutput} object for the content of the file indicated
- * by url
- */
- public ProtocolOutput getProtocolOutput(Text url, CrawlDatum datum) {
- String urlString = url.toString();
- try {
- URL u = new URL(urlString);
-
- int redirects = 0;
-
- while (true) {
- FileResponse response;
- response = new FileResponse(u, datum, this, getConf()); // make a
- // request
-
- int code = response.getCode();
-
- if (code == 200) { // got a good response
- return new ProtocolOutput(response.toContent()); // return it
-
- } else if (code == 304) { // got not modified
- return new ProtocolOutput(response.toContent(),
- ProtocolStatus.STATUS_NOTMODIFIED);
-
- } else if (code == 401) { // access denied / no read permissions
- return new ProtocolOutput(response.toContent(), new ProtocolStatus(
- ProtocolStatus.ACCESS_DENIED));
-
- } else if (code == 404) { // no such file
- return new ProtocolOutput(response.toContent(),
- ProtocolStatus.STATUS_NOTFOUND);
-
- } else if (code >= 300 && code < 400) { // handle redirect
- u = new URL(response.getHeader("Location"));
- if (LOG.isTraceEnabled()) {
- LOG.trace("redirect to " + u);
- }
- if (symlinksAsRedirects) {
- return new ProtocolOutput(response.toContent(), new ProtocolStatus(
- ProtocolStatus.MOVED, u));
- } else if (redirects == MAX_REDIRECTS) {
- LOG.trace("Too many redirects: {}", url);
- return new ProtocolOutput(response.toContent(), new ProtocolStatus(
- ProtocolStatus.REDIR_EXCEEDED, u));
- }
- redirects++;
-
- } else { // convert to exception
- throw new FileError(code);
- }
- }
- } catch (Exception e) {
- e.printStackTrace();
- return new ProtocolOutput(null, new ProtocolStatus(e));
- }
- }
-
- /**
- * Quick way for running this class. Useful for debugging.
- */
- public static void main(String[] args) throws Exception {
- int maxContentLength = Integer.MIN_VALUE;
- String logLevel = "info";
- boolean dumpContent = false;
- String urlString = null;
-
- String usage = "Usage: File [-logLevel level] [-maxContentLength L] [-dumpContent] url";
-
- if (args.length == 0) {
- System.err.println(usage);
- System.exit(-1);
- }
-
- for (int i = 0; i < args.length; i++) {
- if (args[i].equals("-logLevel")) {
- logLevel = args[++i];
- } else if (args[i].equals("-maxContentLength")) {
- maxContentLength = Integer.parseInt(args[++i]);
- } else if (args[i].equals("-dumpContent")) {
- dumpContent = true;
- } else if (i != args.length - 1) {
- System.err.println(usage);
- System.exit(-1);
- } else
- urlString = args[i];
- }
-
- File file = new File();
- file.setConf(NutchConfiguration.create());
-
- if (maxContentLength != Integer.MIN_VALUE) // set maxContentLength
- file.setMaxContentLength(maxContentLength);
-
- // set log level
- // LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
-
- ProtocolOutput output = file.getProtocolOutput(new Text(urlString),
- new CrawlDatum());
- Content content = output.getContent();
-
- System.err.println("URL: " + content.getUrl());
- System.err.println("Status: " + output.getStatus());
- System.err.println("Content-Type: " + content.getContentType());
- System.err.println("Content-Length: "
- + content.getMetadata().get(Response.CONTENT_LENGTH));
- System.err.println("Last-Modified: "
- + content.getMetadata().get(Response.LAST_MODIFIED));
- String redirectLocation = content.getMetadata().get("Location");
- if (redirectLocation != null) {
- System.err.println("Location: " + redirectLocation);
- }
-
- if (dumpContent) {
- System.out.print(new String(content.getContent()));
- }
-
- file = null;
- }
-
- /**
- * No robots parsing is done for file protocol. So this returns a set of empty
- * rules which will allow every url.
- */
- public BaseRobotRules getRobotRules(Text url, CrawlDatum datum) {
- return RobotRulesParser.EMPTY_RULES;
- }
-}
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java
deleted file mode 100644
index 4fef340..0000000
--- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileError.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.file;
-
-/**
- * Thrown for File error codes.
- */
-public class FileError extends FileException {
-
- private int code;
-
- public int getCode(int code) {
- return code;
- }
-
- public FileError(int code) {
- super("File Error: " + code);
- this.code = code;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileException.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileException.java b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileException.java
deleted file mode 100644
index f0467de..0000000
--- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileException.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.file;
-
-import org.apache.nutch.protocol.ProtocolException;
-
-public class FileException extends ProtocolException {
-
- public FileException() {
- super();
- }
-
- public FileException(String message) {
- super(message);
- }
-
- public FileException(String message, Throwable cause) {
- super(message, cause);
- }
-
- public FileException(Throwable cause) {
- super(cause);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
deleted file mode 100644
index b6e74ff..0000000
--- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
+++ /dev/null
@@ -1,317 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.file;
-
-// JDK imports
-import java.net.URL;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-
-// Nutch imports
-import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.protocol.Content;
-import org.apache.nutch.util.MimeUtil;
-import org.apache.nutch.metadata.Metadata;
-import org.apache.nutch.net.protocols.HttpDateFormat;
-import org.apache.nutch.net.protocols.Response;
-
-// Tika imports
-import org.apache.tika.Tika;
-
-// Hadoop imports
-import org.apache.hadoop.conf.Configuration;
-
-/************************************
- * FileResponse.java mimics file replies as http response. It tries its best to
- * follow http's way for headers, response codes as well as exceptions.
- *
- * Comments: (1) java.net.URL and java.net.URLConnection can handle file:
- * scheme. However they are not flexible enough, so not used in this
- * implementation.
- *
- * (2) java.io.File is used for its abstractness across platforms. Warning:
- * java.io.File API (1.4.2) does not elaborate on how special files, such as
- * /dev/* in unix and /proc/* on linux, are treated. Tests show (a)
- * java.io.File.isFile() return false for /dev/* (b) java.io.File.isFile()
- * return true for /proc/* (c) java.io.File.length() return 0 for /proc/* We are
- * probably oaky for now. Could be buggy here. How about special files on
- * windows?
- *
- * (3) java.io.File API (1.4.2) does not seem to know unix hard link files. They
- * are just treated as individual files.
- *
- * (4) No funcy POSIX file attributes yet. May never need?
- *
- * @author John Xing
- ***********************************/
-public class FileResponse {
-
- private String orig;
- private String base;
- private byte[] content;
- private static final byte[] EMPTY_CONTENT = new byte[0];
- private int code;
- private Metadata headers = new Metadata();
-
- private final File file;
- private Configuration conf;
-
- private MimeUtil MIME;
- private Tika tika;
-
- /** Returns the response code. */
- public int getCode() {
- return code;
- }
-
- /** Returns the value of a named header. */
- public String getHeader(String name) {
- return headers.get(name);
- }
-
- public byte[] getContent() {
- return content;
- }
-
- public Content toContent() {
- return new Content(orig, base, (content != null ? content : EMPTY_CONTENT),
- getHeader(Response.CONTENT_TYPE), headers, this.conf);
- }
-
- /**
- * Default public constructor
- *
- * @param url
- * @param datum
- * @param file
- * @param conf
- * @throws FileException
- * @throws IOException
- */
- public FileResponse(URL url, CrawlDatum datum, File file, Configuration conf)
- throws FileException, IOException {
-
- this.orig = url.toString();
- this.base = url.toString();
- this.file = file;
- this.conf = conf;
-
- MIME = new MimeUtil(conf);
- tika = new Tika();
-
- if (!"file".equals(url.getProtocol()))
- throw new FileException("Not a file url:" + url);
-
- if (File.LOG.isTraceEnabled()) {
- File.LOG.trace("fetching " + url);
- }
-
- if (url.getPath() != url.getFile()) {
- if (File.LOG.isWarnEnabled()) {
- File.LOG.warn("url.getPath() != url.getFile(): " + url);
- }
- }
-
- String path = "".equals(url.getPath()) ? "/" : url.getPath();
-
- try {
- // specify the encoding via the config later?
- path = java.net.URLDecoder.decode(path, "UTF-8");
- } catch (UnsupportedEncodingException ex) {
- }
-
- try {
-
- this.content = null;
-
- // url.toURI() is only in j2se 1.5.0
- // java.io.File f = new java.io.File(url.toURI());
- java.io.File f = new java.io.File(path);
-
- if (!f.exists()) {
- this.code = 404; // http Not Found
- return;
- }
-
- if (!f.canRead()) {
- this.code = 401; // http Unauthorized
- return;
- }
-
- // symbolic link or relative path on unix
- // fix me: what's the consequence on windows platform
- // where case is insensitive
- if (!f.equals(f.getCanonicalFile())) {
- // set headers
- // hdrs.put("Location", f.getCanonicalFile().toURI());
- //
- // we want to automatically escape characters that are illegal in URLs.
- // It is recommended that new code convert an abstract pathname into a
- // URL
- // by first converting it into a URI, via the toURI method, and then
- // converting the URI into a URL via the URI.toURL method.
- headers.set(Response.LOCATION, f.getCanonicalFile().toURI().toURL()
- .toString());
-
- this.code = 300; // http redirect
- return;
- }
- if (f.lastModified() <= datum.getModifiedTime()) {
- this.code = 304;
- this.headers.set("Last-Modified",
- HttpDateFormat.toString(f.lastModified()));
- return;
- }
-
- if (f.isDirectory()) {
- getDirAsHttpResponse(f);
- } else if (f.isFile()) {
- getFileAsHttpResponse(f);
- } else {
- this.code = 500; // http Internal Server Error
- return;
- }
-
- } catch (IOException e) {
- throw e;
- }
-
- }
-
- // get file as http response
- private void getFileAsHttpResponse(java.io.File f) throws FileException,
- IOException {
-
- // ignore file of size larger than
- // Integer.MAX_VALUE = 2^31-1 = 2147483647
- long size = f.length();
- if (size > Integer.MAX_VALUE) {
- throw new FileException("file is too large, size: " + size);
- // or we can do this?
- // this.code = 400; // http Bad request
- // return;
- }
-
- // capture content
- int len = (int) size;
-
- if (this.file.maxContentLength >= 0 && len > this.file.maxContentLength)
- len = this.file.maxContentLength;
-
- this.content = new byte[len];
-
- java.io.InputStream is = new java.io.FileInputStream(f);
- int offset = 0;
- int n = 0;
- while (offset < len
- && (n = is.read(this.content, offset, len - offset)) >= 0) {
- offset += n;
- }
- if (offset < len) { // keep whatever already have, but issue a warning
- if (File.LOG.isWarnEnabled()) {
- File.LOG.warn("not enough bytes read from file: " + f.getPath());
- }
- }
- is.close();
-
- // set headers
- headers.set(Response.CONTENT_LENGTH, new Long(size).toString());
- headers.set(Response.LAST_MODIFIED,
- HttpDateFormat.toString(f.lastModified()));
-
- String mimeType = tika.detect(f);
-
- headers.set(Response.CONTENT_TYPE, mimeType != null ? mimeType : "");
-
- // response code
- this.code = 200; // http OK
- }
-
- /**
- * get dir list as http response
- *
- * @param f
- * @throws IOException
- */
- private void getDirAsHttpResponse(java.io.File f) throws IOException {
-
- String path = f.toString();
- if (this.file.crawlParents)
- this.content = list2html(f.listFiles(), path, "/".equals(path) ? false
- : true);
- else
- this.content = list2html(f.listFiles(), path, false);
-
- // set headers
- headers.set(Response.CONTENT_LENGTH,
- new Integer(this.content.length).toString());
- headers.set(Response.CONTENT_TYPE, "text/html");
- headers.set(Response.LAST_MODIFIED,
- HttpDateFormat.toString(f.lastModified()));
-
- // response code
- this.code = 200; // http OK
- }
-
- /**
- * generate html page from dir list
- *
- * @param list
- * @param path
- * @param includeDotDot
- * @return
- */
- private byte[] list2html(java.io.File[] list, String path,
- boolean includeDotDot) {
-
- StringBuffer x = new StringBuffer("<html><head>");
- x.append("<title>Index of " + path + "</title></head>\n");
- x.append("<body><h1>Index of " + path + "</h1><pre>\n");
-
- if (includeDotDot) {
- x.append("<a href='../'>../</a>\t-\t-\t-\n");
- }
-
- // fix me: we might want to sort list here! but not now.
-
- java.io.File f;
- for (int i = 0; i < list.length; i++) {
- f = list[i];
- String name = f.getName();
- String time = HttpDateFormat.toString(f.lastModified());
- if (f.isDirectory()) {
- // java 1.4.2 api says dir itself and parent dir are not listed
- // so the following is not needed.
- // if (name.equals(".") || name.equals(".."))
- // continue;
- x.append("<a href='" + name + "/" + "'>" + name + "/</a>\t");
- x.append(time + "\t-\n");
- } else if (f.isFile()) {
- x.append("<a href='" + name + "'>" + name + "</a>\t");
- x.append(time + "\t" + f.length() + "\n");
- } else {
- // ignore any other
- }
- }
-
- x.append("</pre></body></html>\n");
-
- return new String(x).getBytes();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package.html
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package.html b/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package.html
deleted file mode 100644
index 221c79c..0000000
--- a/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/package.html
+++ /dev/null
@@ -1,5 +0,0 @@
-<html>
-<body>
-<p>Protocol plugin which supports retrieving local file resources.</p><p></p>
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java b/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
deleted file mode 100644
index 5f95377..0000000
--- a/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.file;
-
-// Hadoop imports
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-
-// Nutch imports
-import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.net.protocols.Response;
-import org.apache.nutch.protocol.Protocol;
-import org.apache.nutch.protocol.ProtocolException;
-import org.apache.nutch.protocol.ProtocolFactory;
-import org.apache.nutch.protocol.ProtocolOutput;
-import org.apache.nutch.protocol.ProtocolStatus;
-import org.apache.nutch.util.NutchConfiguration;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * @author mattmann
- * @version $Revision$
- *
- * <p>
- * Unit tests for the {@link File}Protocol.
- * </p>
- * .
- */
-public class TestProtocolFile {
-
- private String fileSeparator = System.getProperty("file.separator");
- private String sampleDir = System.getProperty("test.data", ".");
-
- private static final String[] testTextFiles = new String[] {
- "testprotocolfile.txt", "testprotocolfile_(encoded).txt",
- "testprotocolfile_%28encoded%29.txt" };
-
- private static final CrawlDatum datum = new CrawlDatum();
-
- private static final String expectedMimeType = "text/plain";
-
- private Configuration conf;
-
- @Before
- public void setUp() {
- conf = NutchConfiguration.create();
- }
-
- @Test
- public void testSetContentType() throws ProtocolException {
- for (String testTextFile : testTextFiles) {
- setContentType(testTextFile);
- }
- }
-
- /**
- * Tests the setting of the <code>Response.CONTENT_TYPE</code> metadata field.
- *
- * @since NUTCH-384
- *
- */
- public void setContentType(String testTextFile) throws ProtocolException {
- String urlString = "file:" + sampleDir + fileSeparator + testTextFile;
- Assert.assertNotNull(urlString);
- Protocol protocol = new ProtocolFactory(conf).getProtocol(urlString);
- ProtocolOutput output = protocol.getProtocolOutput(new Text(urlString),
- datum);
- Assert.assertNotNull(output);
- Assert.assertEquals("Status code: [" + output.getStatus().getCode()
- + "], not equal to: [" + ProtocolStatus.SUCCESS + "]: args: ["
- + output.getStatus().getArgs() + "]", ProtocolStatus.SUCCESS, output
- .getStatus().getCode());
- Assert.assertNotNull(output.getContent());
- Assert.assertNotNull(output.getContent().getContentType());
- Assert.assertEquals(expectedMimeType, output.getContent().getContentType());
- Assert.assertNotNull(output.getContent().getMetadata());
- Assert.assertEquals(expectedMimeType, output.getContent().getMetadata()
- .get(Response.CONTENT_TYPE));
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/build.xml
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-ftp/build.xml b/src/plugin/protocol-ftp/build.xml
deleted file mode 100644
index 79314d4..0000000
--- a/src/plugin/protocol-ftp/build.xml
+++ /dev/null
@@ -1,22 +0,0 @@
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<project name="protocol-ftp" default="jar-core">
-
- <import file="../build-plugin.xml"/>
-
-</project>
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/ivy.xml
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-ftp/ivy.xml b/src/plugin/protocol-ftp/ivy.xml
deleted file mode 100644
index 214c445..0000000
--- a/src/plugin/protocol-ftp/ivy.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-<?xml version="1.0" ?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<ivy-module version="1.0">
- <info organisation="org.apache.nutch" module="${ant.project.name}">
- <license name="Apache 2.0"/>
- <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
- <description>
- Apache Nutch
- </description>
- </info>
-
- <configurations>
- <include file="../../..//ivy/ivy-configurations.xml"/>
- </configurations>
-
- <publications>
- <!--get the artifact from our module name-->
- <artifact conf="master"/>
- </publications>
-
- <dependencies>
- <dependency org="commons-net" name="commons-net" rev="1.2.2" conf="*->master"/>
- </dependencies>
-
-</ivy-module>
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/plugin.xml
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-ftp/plugin.xml b/src/plugin/protocol-ftp/plugin.xml
deleted file mode 100644
index 1421e37..0000000
--- a/src/plugin/protocol-ftp/plugin.xml
+++ /dev/null
@@ -1,46 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<plugin
- id="protocol-ftp"
- name="Ftp Protocol Plug-in"
- version="1.0.0"
- provider-name="nutch.org">
-
- <runtime>
- <library name="protocol-ftp.jar">
- <export name="*"/>
- </library>
- <library name="commons-net-1.2.0-dev.jar"/>
- </runtime>
-
- <requires>
- <import plugin="nutch-extensionpoints"/>
- </requires>
-
- <extension id="org.apache.nutch.protocol.ftp"
- name="FtpProtocol"
- point="org.apache.nutch.protocol.Protocol">
-
- <implementation id="org.apache.nutch.protocol.ftp.Ftp"
- class="org.apache.nutch.protocol.ftp.Ftp">
- <parameter name="protocolName" value="ftp"/>
- </implementation>
-
- </extension>
-
-</plugin>
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
deleted file mode 100644
index da25d87..0000000
--- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
+++ /dev/null
@@ -1,595 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.ftp;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-
-import java.net.InetAddress;
-import java.net.Socket;
-
-import java.util.List;
-//import java.util.LinkedList;
-
-import org.apache.commons.net.MalformedServerReplyException;
-
-import org.apache.commons.net.ftp.FTP;
-import org.apache.commons.net.ftp.FTPCommand;
-import org.apache.commons.net.ftp.FTPFile;
-import org.apache.commons.net.ftp.FTPFileEntryParser;
-import org.apache.commons.net.ftp.FTPReply;
-
-import org.apache.commons.net.ftp.FTPConnectionClosedException;
-
-/***********************************************
- * Client.java encapsulates functionalities necessary for nutch to get dir list
- * and retrieve file from an FTP server. This class takes care of all low level
- * details of interacting with an FTP server and provides a convenient higher
- * level interface.
- *
- * Modified from FtpClient.java in apache commons-net.
- *
- * Notes by John Xing: ftp server implementations are hardly uniform and none
- * seems to follow RFCs whole-heartedly. We have no choice, but assume common
- * denominator as following: (1) Use stream mode for data transfer. Block mode
- * will be better for multiple file downloading and partial file downloading.
- * However not every ftpd has block mode support. (2) Use passive mode for data
- * connection. So Nutch will work if we run behind firewall. (3) Data connection
- * is opened/closed per ftp command for the reasons listed in (1). There are ftp
- * servers out there, when partial downloading is enforced by closing data
- * channel socket on our client side, the server side immediately closes control
- * channel (socket). Our codes deal with such a bad behavior. (4) LIST is used
- * to obtain remote file attributes if possible. MDTM & SIZE would be nice, but
- * not as ubiquitously implemented as LIST. (5) Avoid using ABOR in single
- * thread? Do not use it at all.
- *
- * About exceptions: Some specific exceptions are re-thrown as one of
- * FtpException*.java In fact, each function throws FtpException*.java or pass
- * IOException.
- *
- * @author John Xing
- ***********************************************/
-
-public class Client extends FTP {
- private int __dataTimeout;
- private int __passivePort;
- private String __passiveHost;
- // private int __fileType, __fileFormat;
- private boolean __remoteVerificationEnabled;
- // private FTPFileEntryParser __entryParser;
- private String __systemName;
-
- /** Public default constructor */
- public Client() {
- __initDefaults();
- __dataTimeout = -1;
- __remoteVerificationEnabled = true;
- }
-
- // defaults when initialize
- private void __initDefaults() {
- __passiveHost = null;
- __passivePort = -1;
- __systemName = null;
- // __fileType = FTP.ASCII_FILE_TYPE;
- // __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
- // __entryParser = null;
- }
-
- // parse reply for pass()
- private void __parsePassiveModeReply(String reply)
- throws MalformedServerReplyException {
- int i, index, lastIndex;
- String octet1, octet2;
- StringBuffer host;
-
- reply = reply.substring(reply.indexOf('(') + 1, reply.indexOf(')')).trim();
-
- host = new StringBuffer(24);
- lastIndex = 0;
- index = reply.indexOf(',');
- host.append(reply.substring(lastIndex, index));
-
- for (i = 0; i < 3; i++) {
- host.append('.');
- lastIndex = index + 1;
- index = reply.indexOf(',', lastIndex);
- host.append(reply.substring(lastIndex, index));
- }
-
- lastIndex = index + 1;
- index = reply.indexOf(',', lastIndex);
-
- octet1 = reply.substring(lastIndex, index);
- octet2 = reply.substring(index + 1);
-
- // index and lastIndex now used as temporaries
- try {
- index = Integer.parseInt(octet1);
- lastIndex = Integer.parseInt(octet2);
- } catch (NumberFormatException e) {
- throw new MalformedServerReplyException(
- "Could not parse passive host information.\nServer Reply: " + reply);
- }
-
- index <<= 8;
- index |= lastIndex;
-
- __passiveHost = host.toString();
- __passivePort = index;
- }
-
- /**
- * open a passive data connection socket
- *
- * @param command
- * @param arg
- * @return
- * @throws IOException
- * @throws FtpExceptionCanNotHaveDataConnection
- */
- protected Socket __openPassiveDataConnection(int command, String arg)
- throws IOException, FtpExceptionCanNotHaveDataConnection {
- Socket socket;
-
- // // 20040317, xing, accommodate ill-behaved servers, see below
- // int port_previous = __passivePort;
-
- if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
- throw new FtpExceptionCanNotHaveDataConnection("pasv() failed. "
- + getReplyString());
-
- try {
- __parsePassiveModeReply(getReplyStrings()[0]);
- } catch (MalformedServerReplyException e) {
- throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
- }
-
- // // 20040317, xing, accommodate ill-behaved servers, see above
- // int count = 0;
- // System.err.println("__passivePort "+__passivePort);
- // System.err.println("port_previous "+port_previous);
- // while (__passivePort == port_previous) {
- // // just quit if too many tries. make it an exception here?
- // if (count++ > 10)
- // return null;
- // // slow down further for each new try
- // Thread.sleep(500*count);
- // if (pasv() != FTPReply.ENTERING_PASSIVE_MODE)
- // throw new FtpExceptionCanNotHaveDataConnection(
- // "pasv() failed. " + getReplyString());
- // //return null;
- // try {
- // __parsePassiveModeReply(getReplyStrings()[0]);
- // } catch (MalformedServerReplyException e) {
- // throw new FtpExceptionCanNotHaveDataConnection(e.getMessage());
- // }
- // }
-
- socket = _socketFactory_.createSocket(__passiveHost, __passivePort);
-
- if (!FTPReply.isPositivePreliminary(sendCommand(command, arg))) {
- socket.close();
- return null;
- }
-
- if (__remoteVerificationEnabled && !verifyRemote(socket)) {
- InetAddress host1, host2;
-
- host1 = socket.getInetAddress();
- host2 = getRemoteAddress();
-
- socket.close();
-
- // our precaution
- throw new FtpExceptionCanNotHaveDataConnection(
- "Host attempting data connection " + host1.getHostAddress()
- + " is not same as server " + host2.getHostAddress()
- + " So we intentionally close it for security precaution.");
- }
-
- if (__dataTimeout >= 0)
- socket.setSoTimeout(__dataTimeout);
-
- return socket;
- }
-
- /***
- * Sets the timeout in milliseconds to use for data connection. set
- * immediately after opening the data connection.
- ***/
- public void setDataTimeout(int timeout) {
- __dataTimeout = timeout;
- }
-
- /***
- * Closes the connection to the FTP server and restores connection parameters
- * to the default values.
- * <p>
- *
- * @exception IOException
- * If an error occurs while disconnecting.
- ***/
- public void disconnect() throws IOException {
- __initDefaults();
- super.disconnect();
- // no worry for data connection, since we always close it
- // in every ftp command that invloves data connection
- }
-
- /***
- * Enable or disable verification that the remote host taking part of a data
- * connection is the same as the host to which the control connection is
- * attached. The default is for verification to be enabled. You may set this
- * value at any time, whether the FTPClient is currently connected or not.
- * <p>
- *
- * @param enable
- * True to enable verification, false to disable verification.
- ***/
- public void setRemoteVerificationEnabled(boolean enable) {
- __remoteVerificationEnabled = enable;
- }
-
- /***
- * Return whether or not verification of the remote host participating in data
- * connections is enabled. The default behavior is for verification to be
- * enabled.
- * <p>
- *
- * @return True if verification is enabled, false if not.
- ***/
- public boolean isRemoteVerificationEnabled() {
- return __remoteVerificationEnabled;
- }
-
- /***
- * Login to the FTP server using the provided username and password.
- * <p>
- *
- * @param username
- * The username to login under.
- * @param password
- * The password to use.
- * @return True if successfully completed, false if not.
- * @exception FTPConnectionClosedException
- * If the FTP server prematurely closes the connection as a
- * result of the client being idle or some other reason causing
- * the server to send FTP reply code 421. This exception may be
- * caught either as an IOException or independently as itself.
- * @exception IOException
- * If an I/O error occurs while either sending a command to the
- * server or receiving a reply from the server.
- ***/
- public boolean login(String username, String password) throws IOException {
- user(username);
-
- if (FTPReply.isPositiveCompletion(getReplyCode()))
- return true;
-
- // If we get here, we either have an error code, or an intermmediate
- // reply requesting password.
- if (!FTPReply.isPositiveIntermediate(getReplyCode()))
- return false;
-
- return FTPReply.isPositiveCompletion(pass(password));
- }
-
- /***
- * Logout of the FTP server by sending the QUIT command.
- * <p>
- *
- * @return True if successfully completed, false if not.
- * @exception FTPConnectionClosedException
- * If the FTP server prematurely closes the connection as a
- * result of the client being idle or some other reason causing
- * the server to send FTP reply code 421. This exception may be
- * caught either as an IOException or independently as itself.
- * @exception IOException
- * If an I/O error occurs while either sending a command to the
- * server or receiving a reply from the server.
- ***/
- public boolean logout() throws IOException {
- return FTPReply.isPositiveCompletion(quit());
- }
-
- /**
- * retrieve list reply for path
- *
- * @param path
- * @param entries
- * @param limit
- * @param parser
- * @throws IOException
- * @throws FtpExceptionCanNotHaveDataConnection
- * @throws FtpExceptionUnknownForcedDataClose
- * @throws FtpExceptionControlClosedByForcedDataClose
- */
- public void retrieveList(String path, List<FTPFile> entries, int limit,
- FTPFileEntryParser parser) throws IOException,
- FtpExceptionCanNotHaveDataConnection, FtpExceptionUnknownForcedDataClose,
- FtpExceptionControlClosedByForcedDataClose {
- Socket socket = __openPassiveDataConnection(FTPCommand.LIST, path);
-
- if (socket == null)
- throw new FtpExceptionCanNotHaveDataConnection("LIST "
- + ((path == null) ? "" : path));
-
- BufferedReader reader = new BufferedReader(new InputStreamReader(
- socket.getInputStream()));
-
- // force-close data channel socket, when download limit is reached
- // boolean mandatory_close = false;
-
- // List entries = new LinkedList();
- int count = 0;
- String line = parser.readNextEntry(reader);
- while (line != null) {
- FTPFile ftpFile = parser.parseFTPEntry(line);
- // skip non-formatted lines
- if (ftpFile == null) {
- line = parser.readNextEntry(reader);
- continue;
- }
- entries.add(ftpFile);
- count += line.length();
- // impose download limit if limit >= 0, otherwise no limit
- // here, cut off is up to the line when total bytes is just over limit
- if (limit >= 0 && count > limit) {
- // mandatory_close = true;
- break;
- }
- line = parser.readNextEntry(reader);
- }
-
- // if (mandatory_close)
- // you always close here, no matter mandatory_close or not.
- // however different ftp servers respond differently, see below.
- socket.close();
-
- // scenarios:
- // (1) mandatory_close is false, download limit not reached
- // no special care here
- // (2) mandatory_close is true, download limit is reached
- // different servers have different reply codes:
-
- try {
- int reply = getReply();
- if (!_notBadReply(reply))
- throw new FtpExceptionUnknownForcedDataClose(getReplyString());
- } catch (FTPConnectionClosedException e) {
- // some ftp servers will close control channel if data channel socket
- // is closed by our end before all data has been read out. Check:
- // tux414.q-tam.hp.com FTP server (hp.com version whp02)
- // so must catch FTPConnectionClosedException thrown by getReply() above
- // disconnect();
- throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
- }
-
- }
-
- /**
- * retrieve file for path
- *
- * @param path
- * @param os
- * @param limit
- * @throws IOException
- * @throws FtpExceptionCanNotHaveDataConnection
- * @throws FtpExceptionUnknownForcedDataClose
- * @throws FtpExceptionControlClosedByForcedDataClose
- */
- public void retrieveFile(String path, OutputStream os, int limit)
- throws IOException, FtpExceptionCanNotHaveDataConnection,
- FtpExceptionUnknownForcedDataClose,
- FtpExceptionControlClosedByForcedDataClose {
-
- Socket socket = __openPassiveDataConnection(FTPCommand.RETR, path);
-
- if (socket == null)
- throw new FtpExceptionCanNotHaveDataConnection("RETR "
- + ((path == null) ? "" : path));
-
- InputStream input = socket.getInputStream();
-
- // 20040318, xing, treat everything as BINARY_FILE_TYPE for now
- // do we ever need ASCII_FILE_TYPE?
- // if (__fileType == ASCII_FILE_TYPE)
- // input = new FromNetASCIIInputStream(input);
-
- // fixme, should we instruct server here for binary file type?
-
- // force-close data channel socket
- // boolean mandatory_close = false;
-
- int len;
- int count = 0;
- byte[] buf = new byte[org.apache.commons.net.io.Util.DEFAULT_COPY_BUFFER_SIZE];
- while ((len = input.read(buf, 0, buf.length)) != -1) {
- count += len;
- // impose download limit if limit >= 0, otherwise no limit
- // here, cut off is exactly of limit bytes
- if (limit >= 0 && count > limit) {
- os.write(buf, 0, len - (count - limit));
- // mandatory_close = true;
- break;
- }
- os.write(buf, 0, len);
- os.flush();
- }
-
- // if (mandatory_close)
- // you always close here, no matter mandatory_close or not.
- // however different ftp servers respond differently, see below.
- socket.close();
-
- // scenarios:
- // (1) mandatory_close is false, download limit not reached
- // no special care here
- // (2) mandatory_close is true, download limit is reached
- // different servers have different reply codes:
-
- // do not need this
- // sendCommand("ABOR");
-
- try {
- int reply = getReply();
- if (!_notBadReply(reply))
- throw new FtpExceptionUnknownForcedDataClose(getReplyString());
- } catch (FTPConnectionClosedException e) {
- // some ftp servers will close control channel if data channel socket
- // is closed by our end before all data has been read out. Check:
- // tux414.q-tam.hp.com FTP server (hp.com version whp02)
- // so must catch FTPConnectionClosedException thrown by getReply() above
- // disconnect();
- throw new FtpExceptionControlClosedByForcedDataClose(e.getMessage());
- }
-
- }
-
- /**
- * reply check after closing data connection
- *
- * @param reply
- * @return
- */
- private boolean _notBadReply(int reply) {
-
- if (FTPReply.isPositiveCompletion(reply)) {
- // do nothing
- } else if (reply == 426) { // FTPReply.TRANSFER_ABORTED
- // some ftp servers reply 426, e.g.,
- // foggy FTP server (Version wu-2.6.2(2)
- // there is second reply witing? no!
- // getReply();
- } else if (reply == 450) { // FTPReply.FILE_ACTION_NOT_TAKEN
- // some ftp servers reply 450, e.g.,
- // ProFTPD [ftp.kernel.org]
- // there is second reply witing? no!
- // getReply();
- } else if (reply == 451) { // FTPReply.ACTION_ABORTED
- // some ftp servers reply 451, e.g.,
- // ProFTPD [ftp.kernel.org]
- // there is second reply witing? no!
- // getReply();
- } else if (reply == 451) { // FTPReply.ACTION_ABORTED
- } else {
- // what other kind of ftp server out there?
- return false;
- }
-
- return true;
- }
-
- /***
- * Sets the file type to be transferred. This should be one of
- * <code> FTP.ASCII_FILE_TYPE </code>, <code> FTP.IMAGE_FILE_TYPE </code>,
- * etc. The file type only needs to be set when you want to change the type.
- * After changing it, the new type stays in effect until you change it again.
- * The default file type is <code> FTP.ASCII_FILE_TYPE </code> if this method
- * is never called.
- * <p>
- *
- * @param fileType
- * The <code> _FILE_TYPE </code> constant indcating the type of file.
- * @return True if successfully completed, false if not.
- * @exception FTPConnectionClosedException
- * If the FTP server prematurely closes the connection as a
- * result of the client being idle or some other reason causing
- * the server to send FTP reply code 421. This exception may be
- * caught either as an IOException or independently as itself.
- * @exception IOException
- * If an I/O error occurs while either sending a command to the
- * server or receiving a reply from the server.
- ***/
- public boolean setFileType(int fileType) throws IOException {
- if (FTPReply.isPositiveCompletion(type(fileType))) {
- /*
- * __fileType = fileType; __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
- */
- return true;
- }
- return false;
- }
-
- /***
- * Fetches the system type name from the server and returns the string. This
- * value is cached for the duration of the connection after the first call to
- * this method. In other words, only the first time that you invoke this
- * method will it issue a SYST command to the FTP server. FTPClient will
- * remember the value and return the cached value until a call to disconnect.
- * <p>
- *
- * @return The system type name obtained from the server. null if the
- * information could not be obtained.
- * @exception FTPConnectionClosedException
- * If the FTP server prematurely closes the connection as a
- * result of the client being idle or some other reason causing
- * the server to send FTP reply code 421. This exception may be
- * caught either as an IOException or independently as itself.
- * @exception IOException
- * If an I/O error occurs while either sending a command to the
- * server or receiving a reply from the server.
- ***/
- public String getSystemName() throws IOException, FtpExceptionBadSystResponse {
- // if (syst() == FTPReply.NAME_SYSTEM_TYPE)
- // Technically, we should expect a NAME_SYSTEM_TYPE response, but
- // in practice FTP servers deviate, so we soften the condition to
- // a positive completion.
- if (__systemName == null && FTPReply.isPositiveCompletion(syst())) {
- __systemName = (getReplyStrings()[0]).substring(4);
- } else {
- throw new FtpExceptionBadSystResponse("Bad response of SYST: "
- + getReplyString());
- }
-
- return __systemName;
- }
-
- /***
- * Sends a NOOP command to the FTP server. This is useful for preventing
- * server timeouts.
- * <p>
- *
- * @return True if successfully completed, false if not.
- * @exception FTPConnectionClosedException
- * If the FTP server prematurely closes the connection as a
- * result of the client being idle or some other reason causing
- * the server to send FTP reply code 421. This exception may be
- * caught either as an IOException or independently as itself.
- * @exception IOException
- * If an I/O error occurs while either sending a command to the
- * server or receiving a reply from the server.
- ***/
- public boolean sendNoOp() throws IOException {
- return FTPReply.isPositiveCompletion(noop());
- }
-
- // client.stat(path);
- // client.sendCommand("STAT");
- // client.sendCommand("STAT",path);
- // client.sendCommand("MDTM",path);
- // client.sendCommand("SIZE",path);
- // client.sendCommand("HELP","SITE");
- // client.sendCommand("SYST");
- // client.setRestartOffset(120);
-
-}
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
deleted file mode 100644
index 772f3bb..0000000
--- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
+++ /dev/null
@@ -1,267 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.ftp;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.commons.net.ftp.FTPFileEntryParser;
-
-import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.hadoop.io.Text;
-import org.apache.nutch.net.protocols.Response;
-
-import org.apache.hadoop.conf.Configuration;
-
-import org.apache.nutch.protocol.Content;
-import org.apache.nutch.metadata.Nutch;
-import org.apache.nutch.protocol.Protocol;
-import org.apache.nutch.protocol.ProtocolOutput;
-import org.apache.nutch.protocol.ProtocolStatus;
-import crawlercommons.robots.BaseRobotRules;
-
-import java.net.URL;
-
-import java.io.IOException;
-
-/**
- * This class is a protocol plugin used for ftp: scheme. It creates
- * {@link FtpResponse} object and gets the content of the url from it.
- * Configurable parameters are {@code ftp.username}, {@code ftp.password},
- * {@code ftp.content.limit}, {@code ftp.timeout}, {@code ftp.server.timeout},
- * {@code ftp.password}, {@code ftp.keep.connection} and {@code ftp.follow.talk}
- * . For details see "FTP properties" section in {@code nutch-default.xml}.
- */
-public class Ftp implements Protocol {
-
- public static final Logger LOG = LoggerFactory.getLogger(Ftp.class);
-
- private static final int BUFFER_SIZE = 16384; // 16*1024 = 16384
-
- static final int MAX_REDIRECTS = 5;
-
- int timeout;
-
- int maxContentLength;
-
- String userName;
- String passWord;
-
- // typical/default server timeout is 120*1000 millisec.
- // better be conservative here
- int serverTimeout;
-
- // when to have client start anew
- long renewalTime = -1;
-
- boolean keepConnection;
-
- boolean followTalk;
-
- // ftp client
- Client client = null;
- // ftp dir list entry parser
- FTPFileEntryParser parser = null;
-
- private Configuration conf;
-
- private FtpRobotRulesParser robots = null;
-
- // constructor
- public Ftp() {
- robots = new FtpRobotRulesParser();
- }
-
- /** Set the timeout. */
- public void setTimeout(int to) {
- timeout = to;
- }
-
- /** Set the point at which content is truncated. */
- public void setMaxContentLength(int length) {
- maxContentLength = length;
- }
-
- /** Set followTalk */
- public void setFollowTalk(boolean followTalk) {
- this.followTalk = followTalk;
- }
-
- /** Set keepConnection */
- public void setKeepConnection(boolean keepConnection) {
- this.keepConnection = keepConnection;
- }
-
- /**
- * Creates a {@link FtpResponse} object corresponding to the url and returns a
- * {@link ProtocolOutput} object as per the content received
- *
- * @param url
- * Text containing the ftp url
- * @param datum
- * The CrawlDatum object corresponding to the url
- *
- * @return {@link ProtocolOutput} object for the url
- */
- public ProtocolOutput getProtocolOutput(Text url, CrawlDatum datum) {
- String urlString = url.toString();
- try {
- URL u = new URL(urlString);
-
- int redirects = 0;
-
- while (true) {
- FtpResponse response;
- response = new FtpResponse(u, datum, this, getConf()); // make a request
-
- int code = response.getCode();
- datum.getMetaData().put(Nutch.PROTOCOL_STATUS_CODE_KEY,
- new Text(Integer.toString(code)));
-
-
- if (code == 200) { // got a good response
- return new ProtocolOutput(response.toContent()); // return it
-
- } else if (code >= 300 && code < 400) { // handle redirect
- if (redirects == MAX_REDIRECTS)
- throw new FtpException("Too many redirects: " + url);
- u = new URL(response.getHeader("Location"));
- redirects++;
- if (LOG.isTraceEnabled()) {
- LOG.trace("redirect to " + u);
- }
- } else { // convert to exception
- throw new FtpError(code);
- }
- }
- } catch (Exception e) {
- return new ProtocolOutput(null, new ProtocolStatus(e));
- }
- }
-
- protected void finalize() {
- try {
- if (this.client != null && this.client.isConnected()) {
- this.client.logout();
- this.client.disconnect();
- }
- } catch (IOException e) {
- // do nothing
- }
- }
-
- /** For debugging. */
- public static void main(String[] args) throws Exception {
- int timeout = Integer.MIN_VALUE;
- int maxContentLength = Integer.MIN_VALUE;
- String logLevel = "info";
- boolean followTalk = false;
- boolean keepConnection = false;
- boolean dumpContent = false;
- String urlString = null;
-
- String usage = "Usage: Ftp [-logLevel level] [-followTalk] [-keepConnection] [-timeout N] [-maxContentLength L] [-dumpContent] url";
-
- if (args.length == 0) {
- System.err.println(usage);
- System.exit(-1);
- }
-
- for (int i = 0; i < args.length; i++) {
- if (args[i].equals("-logLevel")) {
- logLevel = args[++i];
- } else if (args[i].equals("-followTalk")) {
- followTalk = true;
- } else if (args[i].equals("-keepConnection")) {
- keepConnection = true;
- } else if (args[i].equals("-timeout")) {
- timeout = Integer.parseInt(args[++i]) * 1000;
- } else if (args[i].equals("-maxContentLength")) {
- maxContentLength = Integer.parseInt(args[++i]);
- } else if (args[i].equals("-dumpContent")) {
- dumpContent = true;
- } else if (i != args.length - 1) {
- System.err.println(usage);
- System.exit(-1);
- } else {
- urlString = args[i];
- }
- }
-
- Ftp ftp = new Ftp();
-
- ftp.setFollowTalk(followTalk);
- ftp.setKeepConnection(keepConnection);
-
- if (timeout != Integer.MIN_VALUE) // set timeout
- ftp.setTimeout(timeout);
-
- if (maxContentLength != Integer.MIN_VALUE) // set maxContentLength
- ftp.setMaxContentLength(maxContentLength);
-
- // set log level
- // LOG.setLevel(Level.parse((new String(logLevel)).toUpperCase()));
-
- Content content = ftp.getProtocolOutput(new Text(urlString),
- new CrawlDatum()).getContent();
-
- System.err.println("Content-Type: " + content.getContentType());
- System.err.println("Content-Length: "
- + content.getMetadata().get(Response.CONTENT_LENGTH));
- System.err.println("Last-Modified: "
- + content.getMetadata().get(Response.LAST_MODIFIED));
- if (dumpContent) {
- System.out.print(new String(content.getContent()));
- }
-
- ftp = null;
- }
-
- /**
- * Set the {@link Configuration} object
- */
- public void setConf(Configuration conf) {
- this.conf = conf;
- this.maxContentLength = conf.getInt("ftp.content.limit", 64 * 1024);
- this.timeout = conf.getInt("ftp.timeout", 10000);
- this.userName = conf.get("ftp.username", "anonymous");
- this.passWord = conf.get("ftp.password", "anonymous@example.com");
- this.serverTimeout = conf.getInt("ftp.server.timeout", 60 * 1000);
- this.keepConnection = conf.getBoolean("ftp.keep.connection", false);
- this.followTalk = conf.getBoolean("ftp.follow.talk", false);
- this.robots.setConf(conf);
- }
-
- /**
- * Get the {@link Configuration} object
- */
- public Configuration getConf() {
- return this.conf;
- }
-
- /**
- * Get the robots rules for a given url
- */
- public BaseRobotRules getRobotRules(Text url, CrawlDatum datum) {
- return robots.getRobotRulesSet(this, url);
- }
-
- public int getBufferSize() {
- return BUFFER_SIZE;
- }
-}
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java
deleted file mode 100644
index b63a67e..0000000
--- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpError.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.ftp;
-
-/**
- * Thrown for Ftp error codes.
- */
-public class FtpError extends FtpException {
-
- private int code;
-
- public int getCode(int code) {
- return code;
- }
-
- public FtpError(int code) {
- super("Ftp Error: " + code);
- this.code = code;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java
deleted file mode 100644
index 5a29668..0000000
--- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpException.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.ftp;
-
-import org.apache.nutch.protocol.ProtocolException;
-
-/***
- * Superclass for important exceptions thrown during FTP talk, that must be
- * handled with care.
- *
- * @author John Xing
- */
-public class FtpException extends ProtocolException {
-
- public FtpException() {
- super();
- }
-
- public FtpException(String message) {
- super(message);
- }
-
- public FtpException(String message, Throwable cause) {
- super(message, cause);
- }
-
- public FtpException(Throwable cause) {
- super(cause);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java
deleted file mode 100644
index 689ac8e..0000000
--- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionBadSystResponse.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.ftp;
-
-/**
- * Exception indicating bad reply of SYST command.
- *
- * @author John Xing
- */
-public class FtpExceptionBadSystResponse extends FtpException {
- FtpExceptionBadSystResponse(String msg) {
- super(msg);
- }
-}
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java
deleted file mode 100644
index 9f35b74..0000000
--- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionCanNotHaveDataConnection.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.ftp;
-
-/**
- * Exception indicating failure of opening data connection.
- *
- * @author John Xing
- */
-public class FtpExceptionCanNotHaveDataConnection extends FtpException {
- FtpExceptionCanNotHaveDataConnection(String msg) {
- super(msg);
- }
-}
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java
deleted file mode 100644
index c058fcb..0000000
--- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionControlClosedByForcedDataClose.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.ftp;
-
-/**
- * Exception indicating control channel is closed by server end, due to forced
- * closure of data channel at client (our) end.
- *
- * @author John Xing
- */
-public class FtpExceptionControlClosedByForcedDataClose extends FtpException {
- FtpExceptionControlClosedByForcedDataClose(String msg) {
- super(msg);
- }
-}
http://git-wip-us.apache.org/repos/asf/nutch/blob/ffa16784/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java
----------------------------------------------------------------------
diff --git a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java b/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java
deleted file mode 100644
index 9083d7c..0000000
--- a/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpExceptionUnknownForcedDataClose.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nutch.protocol.ftp;
-
-/**
- * Exception indicating unrecognizable reply from server after forced closure of
- * data channel by client (our) side.
- *
- * @author John Xing
- */
-public class FtpExceptionUnknownForcedDataClose extends FtpException {
- FtpExceptionUnknownForcedDataClose(String msg) {
- super(msg);
- }
-}