You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2012/12/08 21:27:07 UTC
svn commit: r1418750 - in /nutch/branches/2.x: ./ conf/
src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/
src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/
Author: lewismc
Date: Sat Dec 8 20:27:06 2012
New Revision: 1418750
URL: http://svn.apache.org/viewvc?rev=1418750&view=rev
Log:
backport NUTCH-1232 Remove site field from index-basic
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/conf/schema-solr4.xml
nutch/branches/2.x/conf/schema.xml
nutch/branches/2.x/conf/solrindex-mapping.xml
nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
nutch/branches/2.x/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1418750&r1=1418749&r2=1418750&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Sat Dec 8 20:27:06 2012
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.2 - Current Development
+* backport NUTCH-1232 Remove site field from index-basic (lewismc)
+
* NUTCH-1370 Expose exact number of urls injected @runtime (ferdy, snagel and lewismc)
(includes commit for NUTCH-1471 make explicit which datastore urls are injected to)
Modified: nutch/branches/2.x/conf/schema-solr4.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/schema-solr4.xml?rev=1418750&r1=1418749&r2=1418750&view=diff
==============================================================================
--- nutch/branches/2.x/conf/schema-solr4.xml (original)
+++ nutch/branches/2.x/conf/schema-solr4.xml Sat Dec 8 20:27:06 2012
@@ -312,7 +312,6 @@
<field name="host" type="url" stored="false" indexed="true"/>
<field name="url" type="url" stored="true" indexed="true" required="true"/>
<field name="orig" type="url" stored="true" indexed="true" />
- <field name="site" type="string" stored="false" indexed="true"/>
<!-- stored=true for highlighting, use term vectors and positions for fast highlighting -->
<field name="content" type="text_general" stored="true" indexed="true"/>
<field name="title" type="text_general" stored="true" indexed="true"/>
Modified: nutch/branches/2.x/conf/schema.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/schema.xml?rev=1418750&r1=1418749&r2=1418750&view=diff
==============================================================================
--- nutch/branches/2.x/conf/schema.xml (original)
+++ nutch/branches/2.x/conf/schema.xml Sat Dec 8 20:27:06 2012
@@ -22,6 +22,7 @@
https://issues.apache.org/jira/browse/NUTCH-994
https://issues.apache.org/jira/browse/NUTCH-997
https://issues.apache.org/jira/browse/NUTCH-1058
+ https://issues.apache.org/jira/browse/NUTCH-1394
and
http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/
example/solr/conf/schema.xml?view=markup
@@ -74,7 +75,6 @@
<!-- fields for index-basic plugin -->
<field name="host" type="url" stored="false" indexed="true"/>
- <field name="site" type="string" stored="false" indexed="true"/>
<field name="url" type="url" stored="true" indexed="true"
required="true"/>
<field name="content" type="text" stored="false" indexed="true"/>
Modified: nutch/branches/2.x/conf/solrindex-mapping.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/solrindex-mapping.xml?rev=1418750&r1=1418749&r2=1418750&view=diff
==============================================================================
--- nutch/branches/2.x/conf/solrindex-mapping.xml (original)
+++ nutch/branches/2.x/conf/solrindex-mapping.xml Sat Dec 8 20:27:06 2012
@@ -32,7 +32,6 @@
-->
<fields>
<field dest="content" source="content"/>
- <field dest="site" source="site"/>
<field dest="title" source="title"/>
<field dest="host" source="host"/>
<field dest="segment" source="segment"/>
Modified: nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java?rev=1418750&r1=1418749&r2=1418750&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java (original)
+++ nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java Sat Dec 8 20:27:06 2012
@@ -38,7 +38,6 @@ import org.apache.solr.common.util.DateU
/** Adds basic searchable fields to a document. The fields are:
* host - add host as un-stored, indexed and tokenized
- * site - add site as un-stored, indexed and un-tokenized
* url - url is both stored and indexed, so it's both searchable and returned.
* This is also a required field.
* orig - also store original url as both stored and indexed
@@ -95,8 +94,6 @@ public class BasicIndexingFilter impleme
if (host != null) {
// add host as un-stored, indexed and tokenized
doc.add("host", host);
- // add site as un-stored, indexed and un-tokenized
- doc.add("site", host);
}
// url is both stored and indexed, so it's both searchable and returned
Modified: nutch/branches/2.x/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java?rev=1418750&r1=1418749&r2=1418750&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java (original)
+++ nutch/branches/2.x/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java Sat Dec 8 20:27:06 2012
@@ -30,7 +30,7 @@ import junit.framework.TestCase;
/**
* JUnit test case which tests
- * 1. that the host, site, url, orig, content, title, cache and tstamp fields
+ * 1. that the host, url, orig, content, title, cache and tstamp fields
* are obtained by the filter.
* 2. that configurable maximum length functionality for titles actually works. .
* This property defaults at 100 characters @see {@code indexer.max.title.length}
@@ -64,7 +64,6 @@ public class TestBasicIndexingFilter ext
}
assertNotNull(doc);
assertTrue("check for host field ", doc.getFieldNames().contains("host"));
- assertTrue("check for site field", doc.getFieldNames().contains("site"));
assertTrue("check for url field", doc.getFieldNames().contains("url"));
assertTrue("check for orig field", doc.getFieldNames().contains("orig"));
assertTrue("check for content field", doc.getFieldNames().contains("content"));