You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2012/12/08 21:27:07 UTC

svn commit: r1418750 - in /nutch/branches/2.x: ./ conf/ src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/ src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/

Author: lewismc
Date: Sat Dec  8 20:27:06 2012
New Revision: 1418750

URL: http://svn.apache.org/viewvc?rev=1418750&view=rev
Log:
backport NUTCH-1232 Remove site field from index-basic

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/conf/schema-solr4.xml
    nutch/branches/2.x/conf/schema.xml
    nutch/branches/2.x/conf/solrindex-mapping.xml
    nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
    nutch/branches/2.x/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1418750&r1=1418749&r2=1418750&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Sat Dec  8 20:27:06 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.2 - Current Development
 
+* backport NUTCH-1232 Remove site field from index-basic (lewismc)
+
 * NUTCH-1370 Expose exact number of urls injected @runtime (ferdy, snagel and lewismc)
    (includes commit for NUTCH-1471 make explicit which datastore urls are injected to)
 

Modified: nutch/branches/2.x/conf/schema-solr4.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/schema-solr4.xml?rev=1418750&r1=1418749&r2=1418750&view=diff
==============================================================================
--- nutch/branches/2.x/conf/schema-solr4.xml (original)
+++ nutch/branches/2.x/conf/schema-solr4.xml Sat Dec  8 20:27:06 2012
@@ -312,7 +312,6 @@
     <field name="host" type="url" stored="false" indexed="true"/>
     <field name="url" type="url" stored="true" indexed="true" required="true"/>
     <field name="orig" type="url" stored="true" indexed="true" />
-    <field name="site" type="string" stored="false" indexed="true"/>
     <!-- stored=true for highlighting, use term vectors  and positions for fast highlighting -->
     <field name="content" type="text_general" stored="true" indexed="true"/>
     <field name="title" type="text_general" stored="true" indexed="true"/>

Modified: nutch/branches/2.x/conf/schema.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/schema.xml?rev=1418750&r1=1418749&r2=1418750&view=diff
==============================================================================
--- nutch/branches/2.x/conf/schema.xml (original)
+++ nutch/branches/2.x/conf/schema.xml Sat Dec  8 20:27:06 2012
@@ -22,6 +22,7 @@
         https://issues.apache.org/jira/browse/NUTCH-994
         https://issues.apache.org/jira/browse/NUTCH-997
         https://issues.apache.org/jira/browse/NUTCH-1058
+        https://issues.apache.org/jira/browse/NUTCH-1394
         and
         http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/
         example/solr/conf/schema.xml?view=markup
@@ -74,7 +75,6 @@
 
         <!-- fields for index-basic plugin -->
         <field name="host" type="url" stored="false" indexed="true"/>
-        <field name="site" type="string" stored="false" indexed="true"/>
         <field name="url" type="url" stored="true" indexed="true"
             required="true"/>
         <field name="content" type="text" stored="false" indexed="true"/>

Modified: nutch/branches/2.x/conf/solrindex-mapping.xml
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/conf/solrindex-mapping.xml?rev=1418750&r1=1418749&r2=1418750&view=diff
==============================================================================
--- nutch/branches/2.x/conf/solrindex-mapping.xml (original)
+++ nutch/branches/2.x/conf/solrindex-mapping.xml Sat Dec  8 20:27:06 2012
@@ -32,7 +32,6 @@
          -->
 	<fields>
 		<field dest="content" source="content"/>
-		<field dest="site" source="site"/>
 		<field dest="title" source="title"/>
 		<field dest="host" source="host"/>
 		<field dest="segment" source="segment"/>

Modified: nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java?rev=1418750&r1=1418749&r2=1418750&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java (original)
+++ nutch/branches/2.x/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java Sat Dec  8 20:27:06 2012
@@ -38,7 +38,6 @@ import org.apache.solr.common.util.DateU
 
 /** Adds basic searchable fields to a document. The fields are:
  * host - add host as un-stored, indexed and tokenized
- * site - add site as un-stored, indexed and un-tokenized
  * url - url is both stored and indexed, so it's both searchable and returned. 
  * This is also a required field.
  * orig - also store original url as both stored and indexed
@@ -95,8 +94,6 @@ public class BasicIndexingFilter impleme
     if (host != null) {
       // add host as un-stored, indexed and tokenized
       doc.add("host", host);
-      // add site as un-stored, indexed and un-tokenized
-      doc.add("site", host);
     }
 
     // url is both stored and indexed, so it's both searchable and returned

Modified: nutch/branches/2.x/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java?rev=1418750&r1=1418749&r2=1418750&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java (original)
+++ nutch/branches/2.x/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java Sat Dec  8 20:27:06 2012
@@ -30,7 +30,7 @@ import junit.framework.TestCase;
 
 /**
  * JUnit test case which tests
- * 1. that the host, site, url, orig, content, title, cache and tstamp fields 
+ * 1. that the host, url, orig, content, title, cache and tstamp fields 
  * are obtained by the filter.
  * 2. that configurable maximum length functionality for titles actually works. .
  * This property defaults at 100 characters @see {@code indexer.max.title.length} 
@@ -64,7 +64,6 @@ public class TestBasicIndexingFilter ext
 	}
 	assertNotNull(doc);
 	assertTrue("check for host field ", doc.getFieldNames().contains("host"));
-	assertTrue("check for site field", doc.getFieldNames().contains("site"));
 	assertTrue("check for url field", doc.getFieldNames().contains("url"));
 	assertTrue("check for orig field", doc.getFieldNames().contains("orig"));
 	assertTrue("check for content field", doc.getFieldNames().contains("content"));