You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2007/03/10 07:52:34 UTC
svn commit: r516660 - in /lucene/nutch/trunk: ./
src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/
src/plugin/protocol-file/src/test/ src/plugin/protocol-file/src/test/org/
src/plugin/protocol-file/src/test/org/apache/ src/plugin/protoc...
Author: mattmann
Date: Fri Mar 9 22:52:31 2007
New Revision: 516660
URL: http://svn.apache.org/viewvc?view=rev&rev=516660
Log:
fix for NUTCH-384 (contributed by Heiko Dietze)
Added:
lucene/nutch/trunk/src/plugin/protocol-file/src/test/
lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/
lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/
lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/
lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/
lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/
lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/testprotocolfile.txt
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=516660&r1=516659&r2=516660
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Fri Mar 9 22:52:31 2007
@@ -155,6 +155,9 @@
52. NUTCH-167 - Observation of robots "noarchive" directive. (ab)
+53. NUTCH-384 - Protocol-file plugin does not allow the parse plugins
+ framework to operate properly (Heiko Dietze via mattmann)
+
Release 0.8 - 2006-07-25
Modified: lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java?view=diff&rev=516660&r1=516659&r2=516660
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java Fri Mar 9 22:52:31 2007
@@ -34,6 +34,7 @@
import org.apache.nutch.protocol.ProtocolOutput;
import org.apache.nutch.protocol.ProtocolStatus;
import org.apache.nutch.protocol.RobotRules;
+import org.apache.nutch.util.NutchConfiguration;
import java.net.URL;
@@ -100,6 +101,7 @@
}
}
} catch (Exception e) {
+ e.printStackTrace();
return new ProtocolOutput(null, new ProtocolStatus(e));
}
}
@@ -137,6 +139,7 @@
}
File file = new File();
+ file.setConf(NutchConfiguration.create());
if (maxContentLength != Integer.MIN_VALUE) // set maxContentLength
file.setMaxContentLength(maxContentLength);
Modified: lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java?view=diff&rev=516660&r1=516659&r2=516660
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java Fri Mar 9 22:52:31 2007
@@ -26,6 +26,8 @@
// Nutch imports
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.protocol.Content;
+import org.apache.nutch.util.mime.MimeType;
+import org.apache.nutch.util.mime.MimeTypes;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
@@ -191,9 +193,12 @@
// set headers
headers.set(Response.CONTENT_LENGTH, new Long(size).toString());
- headers.set(Response.LAST_MODIFIED,
- this.file.httpDateFormat.toString(f.lastModified()));
- headers.set(Response.CONTENT_TYPE, ""); // No Content-Type at file protocol level
+ headers.set(Response.LAST_MODIFIED, this.file.httpDateFormat.toString(f
+ .lastModified()));
+ MimeTypes mimeTypes = MimeTypes.get(conf.get("mime.types.file"));
+ MimeType mimeType = mimeTypes.getMimeType(f);
+ String mimeTypeString = mimeType != null ? mimeType.getName() : "";
+ headers.set(Response.CONTENT_TYPE, mimeTypeString);
// response code
this.code = 200; // http OK
Added: lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java?view=auto&rev=516660
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java (added)
+++ lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java Fri Mar 9 22:52:31 2007
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.protocol.file;
+
+// Hadoop imports
+import org.apache.hadoop.io.Text;
+
+// Nutch imports
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.net.protocols.Response;
+import org.apache.nutch.protocol.ProtocolOutput;
+import org.apache.nutch.protocol.ProtocolStatus;
+import org.apache.nutch.util.NutchConfiguration;
+
+// Junit imports
+import junit.framework.TestCase;
+
+/**
+ * @author mattmann
+ * @version $Revision$
+ *
+ * <p>
+ * Unit tests for the {@link File}Protocol.
+ * </p>.
+ */
+public class TestProtocolFile extends TestCase {
+
+ private static final org.apache.nutch.protocol.file.File fileProtocol =
+ new org.apache.nutch.protocol.file.File();
+
+ private static final String testTextFile = "testprotocolfile.txt";
+
+ private static final CrawlDatum datum = new CrawlDatum();
+
+ private static final String expectedMimeType = "text/plain";
+
+ static {
+ fileProtocol.setConf(NutchConfiguration.create());
+ }
+
+ /**
+ * Tests the setting of the <code>Response.CONTENT_TYPE</code> metadata
+ * field.
+ *
+ * @since NUTCH-384
+ *
+ */
+ public void testSetContentType() {
+ Text fileUrl = new Text(this.getClass().getResource(testTextFile)
+ .toString());
+ assertNotNull(fileUrl);
+ ProtocolOutput output = fileProtocol.getProtocolOutput(fileUrl, datum);
+ assertNotNull(output);
+ assertEquals("Status code: [" + output.getStatus().getCode()
+ + "], not equal to: [" + ProtocolStatus.SUCCESS + "]: args: ["
+ + output.getStatus().getArgs() + "]", ProtocolStatus.SUCCESS, output
+ .getStatus().getCode());
+ assertNotNull(output.getContent());
+ assertNotNull(output.getContent().getContentType());
+ assertEquals(expectedMimeType, output.getContent().getContentType());
+ assertNotNull(output.getContent().getMetadata());
+ assertEquals(expectedMimeType, output.getContent().getMetadata().get(
+ Response.CONTENT_TYPE));
+
+ }
+
+}
Added: lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/testprotocolfile.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/testprotocolfile.txt?view=auto&rev=516660
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/testprotocolfile.txt (added)
+++ lucene/nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/testprotocolfile.txt Fri Mar 9 22:52:31 2007
@@ -0,0 +1 @@
+Protocol File Test