You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2010/07/02 19:19:45 UTC
svn commit: r960064 - in /nutch/trunk: ./ conf/ docs/ src/engines/ src/java/org/apache/nutch/analysis/ src/java/org/apache/nutch/clustering/ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/html/ src/java/org/apache/nutch/indexer/ src/java/or...

Author: ab
Date: Fri Jul  2 17:19:43 2010
New Revision: 960064

URL: http://svn.apache.org/viewvc?rev=960064&view=rev
Log:
NUTCH-837 Remove search servers and Lucene dependencies.

Added:
    nutch/trunk/src/java/org/apache/nutch/indexer/NutchField.java   (with props)
Removed:
    nutch/trunk/conf/common-terms.utf8
    nutch/trunk/conf/custom-fields.xml
    nutch/trunk/docs/
    nutch/trunk/src/engines/
    nutch/trunk/src/java/org/apache/nutch/analysis/
    nutch/trunk/src/java/org/apache/nutch/clustering/
    nutch/trunk/src/java/org/apache/nutch/html/
    nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
    nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java
    nutch/trunk/src/java/org/apache/nutch/indexer/HighFreqTerms.java
    nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
    nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java
    nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
    nutch/trunk/src/java/org/apache/nutch/indexer/NutchSimilarity.java
    nutch/trunk/src/java/org/apache/nutch/indexer/field/
    nutch/trunk/src/java/org/apache/nutch/indexer/lucene/
    nutch/trunk/src/java/org/apache/nutch/ontology/
    nutch/trunk/src/java/org/apache/nutch/searcher/
    nutch/trunk/src/java/org/apache/nutch/servlet/
    nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java
    nutch/trunk/src/java/org/apache/nutch/tools/SearchLoadTester.java
    nutch/trunk/src/java/org/apache/nutch/tools/compat/
    nutch/trunk/src/plugin/analysis-de/
    nutch/trunk/src/plugin/analysis-fr/
    nutch/trunk/src/plugin/clustering-carrot2/
    nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java
    nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCQueryFilter.java
    nutch/trunk/src/plugin/field-basic/
    nutch/trunk/src/plugin/field-boost/
    nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageQueryFilter.java
    nutch/trunk/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagQueryFilter.java
    nutch/trunk/src/plugin/ontology/
    nutch/trunk/src/plugin/query-basic/
    nutch/trunk/src/plugin/query-custom/
    nutch/trunk/src/plugin/query-more/
    nutch/trunk/src/plugin/query-site/
    nutch/trunk/src/plugin/query-url/
    nutch/trunk/src/plugin/response-json/
    nutch/trunk/src/plugin/response-xml/
    nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/searcher/
    nutch/trunk/src/plugin/summary-basic/
    nutch/trunk/src/plugin/summary-lucene/
    nutch/trunk/src/test/org/apache/nutch/analysis/
    nutch/trunk/src/test/org/apache/nutch/clustering/
    nutch/trunk/src/test/org/apache/nutch/db/
    nutch/trunk/src/test/org/apache/nutch/indexer/TestDeleteDuplicates.java
    nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexSorter.java
    nutch/trunk/src/test/org/apache/nutch/ontology/
    nutch/trunk/src/test/org/apache/nutch/searcher/
    nutch/trunk/src/web/
    nutch/trunk/src/xmlcatalog/
Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/build.xml
    nutch/trunk/conf/log4j.properties
    nutch/trunk/conf/nutch-default.xml
    nutch/trunk/default.properties
    nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
    nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java
    nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
    nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java
    nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java
    nutch/trunk/src/java/org/apache/nutch/indexer/NutchDocument.java
    nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
    nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
    nutch/trunk/src/plugin/build.xml
    nutch/trunk/src/plugin/creativecommons/plugin.xml
    nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
    nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
    nutch/trunk/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
    nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
    nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
    nutch/trunk/src/plugin/languageidentifier/plugin.xml
    nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java
    nutch/trunk/src/plugin/microformats-reltag/plugin.xml
    nutch/trunk/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java
    nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
    nutch/trunk/src/plugin/subcollection/plugin.xml
    nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java
    nutch/trunk/src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java
    nutch/trunk/src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Jul  2 17:19:43 2010
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.0 - Current Development
 
+* NUTCH-837 Remove search servers and Lucene dependencies (ab)
+
 * NUTCH-836 Remove deprecated parse plugins (jnioche)
 
 * NUTCH-835 Document deduplication failed using MD5Signature (Sebastian Nagel via ab)

Modified: nutch/trunk/build.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Fri Jul  2 17:19:43 2010
@@ -43,12 +43,6 @@
     <pathelement location="${build.dir}/${final.name}.job" />
   </path>
 
-  <!-- xmlcatalog definition for xslt task -->
-  <xmlcatalog id="docDTDs">
-     <dtd publicId="-//W3C//DTD XHTML 1.0 Transitional//EN"            
-          location="${xmlcatalog.dir}/xhtml1-transitional.dtd"/> 
-  </xmlcatalog> 
-
   <!-- ====================================================== -->
   <!-- Stuff needed by all targets                            -->
   <!-- ====================================================== -->
@@ -113,18 +107,6 @@
     <ant dir="src/plugin" target="deploy" inheritAll="false"/>
   </target>
 
-  <target name="generate-src" depends="init">
-    <javacc target="${src.dir}/org/apache/nutch/analysis/NutchAnalysis.jj"
-            javacchome="${javacc.home}">
-    </javacc>
-
-    <fixcrlf srcdir="${src.dir}" eol="lf" includes="**/*.java"/>
-
-  </target>
-
-  <target name="dynamic" depends="generate-src, compile">
-  </target>
-
   <!-- ================================================================== -->
   <!-- Make nutch.jar                                                     -->
   <!-- ================================================================== -->
@@ -163,57 +145,6 @@
   </target>
 
   <!-- ================================================================== -->
-  <!-- Make nutch.war                                                     -->
-  <!-- ================================================================== -->
-  <!--                                                                    -->
-  <!-- ================================================================== -->
-  <target name="war" depends="jar,compile,generate-docs">
-
-    <!-- generate the nutch.xml (servlet context) file -->
-    <xslt in="${basedir}/conf/nutch-default.xml"
-          out="${build.dir}/nutch.xml"
-          style="${basedir}/conf/context.xsl">
-        <xmlcatalog refid="docDTDs"/>
-    	<outputproperty name="indent" value="yes"/>
-    </xslt>
-    <war destfile="${build.dir}/${final.name}.war"
-    	webxml="${web.src.dir}/web.xml">
-      <fileset dir="${web.src.dir}/jsp"/>
-      <zipfileset dir="${docs.src}" includes="include/*.html"/>
-      <zipfileset dir="${build.docs}" includes="*/include/*.html"/>
-      <fileset dir="${docs.dir}"/>
-      <lib dir="${lib.dir}">
-        <include name="lucene*.jar"/>
-        <include name="taglibs-*.jar"/>
-        <include name="hadoop-*.jar"/>
-        <include name="dom4j-*.jar"/>
-        <include name="xerces-*.jar"/>
-        <include name="tika-*.jar"/>
-        <include name="apache-solr-*.jar"/>
-        <include name="commons-httpclient-*.jar"/>
-        <include name="commons-codec-*.jar"/>
-        <include name="commons-collections-*.jar"/>
-        <include name="commons-beanutils-*.jar"/>
-        <include name="commons-cli-*.jar"/>
-        <include name="commons-lang-*.jar"/>
-        <include name="commons-logging-*.jar"/>
-        <include name="log4j-*.jar"/>
-      </lib>
-      <lib dir="${build.dir}">
-	      <include name="${final.name}.jar"/>
-      </lib>
-      <classes dir="${conf.dir}" excludes="**/*.template"/>
-      <classes dir="${web.src.dir}/locale"/>
-      <classes file="${web.src.dir}/log4j.properties"/>
-      <zipfileset prefix="WEB-INF/classes/plugins" dir="${build.plugins}"/>
-      <webinf dir="${lib.dir}">
-	      <include name="taglibs-*.tld"/>
-      </webinf>
-    </war>
-   </target>
-
-
-  <!-- ================================================================== -->
   <!-- Compile test code                                                  --> 
   <!-- ================================================================== -->
   <target name="compile-core-test" depends="compile-core">
@@ -254,9 +185,6 @@
 	<fileset dir="${basedir}/src">
         	<include name="java/**/*.java"/>
 	        <include name="plugin/**/*.java"/>
-		<!-- Exclude generated sources -->
-		<exclude name="**/NutchAnalysis.java" />
-		<exclude name="**/NutchAnalysisTokenManager.java" />
       </fileset>
     </pmd>
 	<condition property="pmd.stop" value="true">
@@ -337,44 +265,25 @@
 
       <packageset dir="${src.dir}"/>
       <packageset dir="${plugins.dir}/lib-http/src/java"/>
-      <packageset dir="${plugins.dir}/lib-parsems/src/java"/>
       <packageset dir="${plugins.dir}/lib-regex-filter/src/java"/>
       <packageset dir="${plugins.dir}/microformats-reltag/src/java"/>
-      <packageset dir="${plugins.dir}/ontology/src/java"/>
       <packageset dir="${plugins.dir}/protocol-file/src/java"/>
       <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
       <packageset dir="${plugins.dir}/protocol-http/src/java"/>
       <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
       <packageset dir="${plugins.dir}/parse-ext/src/java"/>
-      <packageset dir="${plugins.dir}/parse-html/src/java"/>
       <packageset dir="${plugins.dir}/parse-js/src/java"/>
-      <packageset dir="${plugins.dir}/parse-text/src/java"/>
-      <packageset dir="${plugins.dir}/parse-pdf/src/java"/>
-<!--  <packageset dir="${plugins.dir}/parse-rtf/src/java"/> plugin excluded from build due to licensing issues-->
-<!--  <packageset dir="${plugins.dir}/parse-mp3/src/java"/> plugin excluded from build due to licensing issues-->
-      <packageset dir="${plugins.dir}/parse-msexcel/src/java"/>
-      <packageset dir="${plugins.dir}/parse-mspowerpoint/src/java"/>
-      <packageset dir="${plugins.dir}/parse-msword/src/java"/>
-      <packageset dir="${plugins.dir}/parse-oo/src/java"/>
       <packageset dir="${plugins.dir}/parse-rss/src/java"/>
       <packageset dir="${plugins.dir}/parse-swf/src/java"/>
       <packageset dir="${plugins.dir}/parse-zip/src/java"/>
       <packageset dir="${plugins.dir}/index-basic/src/java"/>
       <packageset dir="${plugins.dir}/index-more/src/java"/>
-      <packageset dir="${plugins.dir}/query-basic/src/java"/>
-      <packageset dir="${plugins.dir}/query-more/src/java"/>
-      <packageset dir="${plugins.dir}/query-site/src/java"/>
-      <packageset dir="${plugins.dir}/query-url/src/java"/>
       <packageset dir="${plugins.dir}/scoring-opic/src/java"/>
-      <packageset dir="${plugins.dir}/summary-basic/src/java"/>
-      <packageset dir="${plugins.dir}/summary-lucene/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
       <packageset dir="${plugins.dir}/creativecommons/src/java"/>
       <packageset dir="${plugins.dir}/languageidentifier/src/java"/>
-      <packageset dir="${plugins.dir}/clustering-carrot2/src/java"/>
-      <packageset dir="${plugins.dir}/ontology/src/java"/>
       
       <link href="${javadoc.link.java}"/>
       <link href="${javadoc.link.lucene}"/>
@@ -393,12 +302,7 @@
       <group title="URL Filter Plugins" packages="${plugins.urlfilter}"/>
       <group title="Scoring Plugins" packages="${plugins.scoring}"/>
       <group title="Parse Plugins" packages="${plugins.parse}"/>
-      <group title="Analysis Plugins" packages="${plugins.analysis}"/>
       <group title="Indexing Filter Plugins" packages="${plugins.index}"/>
-      <group title="Query Filter Plugins" packages="${plugins.query}"/>
-      <group title="Summary Plugins" packages="${plugins.summary}"/>
-      <group title="Clustering Plugins" packages="${plugins.clustering}"/>
-      <group title="Ontology Plugins" packages="${plugins.ontology}"/>
       <group title="Misc. Plugins" packages="${plugins.misc}"/>
     </javadoc>
     <!-- Copy the plugin.dtd file to the plugin doc-files dir -->
@@ -411,129 +315,12 @@
            includes="nutch-default.xml" style="conf/nutch-conf.xsl"/>
   </target>
 
-  <target name="generate-locale" if="doc.locale">
-    <echo message="Generating docs for locale=${doc.locale}"/>
-
-    <mkdir dir="${build.docs}/${doc.locale}/include"/>
-    <xslt in="${docs.src}/include/${doc.locale}/header.xml"
-          out="${build.docs}/${doc.locale}/include/header.html"
-          style="${docs.src}/style/nutch-header.xsl">
-        <xmlcatalog refid="docDTDs"/>
-    </xslt>
-
-    <dependset>
-       <srcfileset dir="${docs.src}/include/${doc.locale}" includes="*.xml"/>
-       <srcfileset dir="${docs.src}/style" includes="*.xsl"/>
-       <targetfileset dir="${docs.dir}/${doc.locale}" includes="*.html"/>
-    </dependset>  
-
-    <copy file="${docs.src}/style/nutch-page.xsl"
-          todir="${build.docs}/${doc.locale}"
-          preservelastmodified="true"/>
-
-    <xslt basedir="${docs.src}/pages/${doc.locale}"
-          destdir="${docs.dir}/${doc.locale}"
-          includes="*.xml"
-          style="${build.docs}/${doc.locale}/nutch-page.xsl">
-         <xmlcatalog refid="docDTDs"/>
-    </xslt>
-  </target>
-
-
-  <target name="generate-docs" depends="init">
-    <dependset>
-       <srcfileset dir="${docs.src}/include" includes="*.html"/>
-       <targetfileset dir="${docs.dir}" includes="**/*.html"/>
-    </dependset>  
-
-    <mkdir dir="${build.docs}/include"/>
-    <copy todir="${build.docs}/include">
-      <fileset dir="${docs.src}/include"/>
-    </copy>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="ca"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="de"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="en"/>
-    </antcall>
-    
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="es"/>
-    </antcall>
-    
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="fi"/>
-    </antcall>
-    
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="fr"/>
-    </antcall>
-    
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="hu"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="it"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="jp"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="ms"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="nl"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="pl"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="pt"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="sh"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="sr"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="sv"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="th"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="zh"/>
-    </antcall>
-
-    <fixcrlf srcdir="${docs.dir}" eol="lf" encoding="utf-8"
-             includes="**/*.html"/>
-
-  </target>
-
   <!-- ================================================================== -->
   <!-- D I S T R I B U T I O N                                            -->
   <!-- ================================================================== -->
   <!--                                                                    -->
   <!-- ================================================================== -->
-  <target name="package" depends="jar, job, war, javadoc">
+  <target name="package" depends="jar, job, javadoc">
     <mkdir dir="${dist.dir}"/>
     <mkdir dir="${dist.dir}/lib"/>
     <mkdir dir="${dist.dir}/bin"/>
@@ -549,13 +336,8 @@
       <fileset dir="${build.plugins}"/>
     </copy>
 
-    <copy todir="${dist.dir}/webapps">
-      <fileset dir="${build.webapps}"/>
-    </copy>
-
     <copy file="${build.dir}/${final.name}.jar" todir="${dist.dir}"/>
     <copy file="${build.dir}/${final.name}.job" todir="${dist.dir}"/>
-    <copy file="${build.dir}/${final.name}.war" todir="${dist.dir}"/>
 
     <copy todir="${dist.dir}/bin">
       <fileset dir="bin"/>

Modified: nutch/trunk/conf/log4j.properties
URL: http://svn.apache.org/viewvc/nutch/trunk/conf/log4j.properties?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/conf/log4j.properties (original)
+++ nutch/trunk/conf/log4j.properties Fri Jul  2 17:19:43 2010
@@ -22,9 +22,6 @@ log4j.logger.org.apache.nutch.segment.Se
 log4j.logger.org.apache.nutch.crawl.CrawlDb=INFO,cmdstdout
 log4j.logger.org.apache.nutch.crawl.LinkDb=INFO,cmdstdout
 log4j.logger.org.apache.nutch.crawl.LinkDbMerger=INFO,cmdstdout
-log4j.logger.org.apache.nutch.indexer.Indexer=INFO,cmdstdout
-log4j.logger.org.apache.nutch.indexer.DeleteDuplicates=INFO,cmdstdout
-log4j.logger.org.apache.nutch.indexer.IndexMerger=INFO,cmdstdout
 
 log4j.logger.org.apache.nutch=INFO
 log4j.logger.org.apache.hadoop=WARN

Modified: nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Fri Jul  2 17:19:43 2010
@@ -769,129 +769,6 @@
   </description>
 </property>
 
-
-<!-- analysis properties -->
-
-<property>
-  <name>analysis.common.terms.file</name>
-  <value>common-terms.utf8</value>
-  <description>The name of a file containing a list of common terms
-  that should be indexed in n-grams.</description>
-</property>
-
-<!-- searcher properties -->
-
-<property>
-  <name>searcher.dir</name>
-  <value>crawl</value>
-  <description>
-  Path to root of crawl.  This directory is searched (in
-  order) for either the file search-servers.txt, containing a list of
-  distributed search servers, or the directory "index" containing
-  merged indexes, or the directory "segments" containing segment
-  indexes.
-  </description>
-</property>
-
-<property>
-  <name>searcher.filter.cache.size</name>
-  <value>16</value>
-  <description>
-  Maximum number of filters to cache.  Filters can accelerate certain
-  field-based queries, like language, document format, etc.  Each
-  filter requires one bit of RAM per page.  So, with a 10 million page
-  index, a cache size of 16 consumes two bytes per page, or 20MB.
-  </description>
-</property>
-
-<property>
-  <name>searcher.filter.cache.threshold</name>
-  <value>0.05</value>
-  <description>
-  Filters are cached when their term is matched by more than this
-  fraction of pages.  For example, with a threshold of 0.05, and 10
-  million pages, the term must match more than 1/20, or 50,000 pages.
-  So, if out of 10 million pages, 50% of pages are in English, and 2%
-  are in Finnish, then, with a threshold of 0.05, searches for
-  "lang:en" will use a cached filter, while searches for "lang:fi"
-  will score all 20,000 finnish documents.
-  </description>
-</property>
-
-<property>
-  <name>searcher.hostgrouping.rawhits.factor</name>
-  <value>2.0</value>
-  <description>
-  A factor that is used to determine the number of raw hits
-  initially fetched, before host grouping is done.
-  </description>
-</property>
-
-<property>
-  <name>searcher.summary.context</name>
-  <value>5</value>
-  <description>
-  The number of context terms to display preceding and following
-  matching terms in a hit summary.
-  </description>
-</property>
-
-<property>
-  <name>searcher.summary.length</name>
-  <value>20</value>
-  <description>
-  The total number of terms to display in a hit summary.
-  </description>
-</property>
-
-<property>
-  <name>searcher.max.hits</name>
-  <value>-1</value>
-  <description>If positive, search stops after this many hits are
-  found.  Setting this to small, positive values (e.g., 1000) can make
-  searches much faster.  With a sorted index, the quality of the hits
-  suffers little.</description>
-</property>
-
-<property>
-  <name>searcher.max.time.tick_count</name>
-  <value>-1</value>
-  <description>If positive value is defined here, limit search time for
-  every request to this number of elapsed ticks (see the tick_length
-  property below). The total maximum time for any search request will be
-  then limited to tick_count * tick_length milliseconds. When search time
-  is exceeded, partial results will be returned, and the total number of
-  hits will be estimated.
-  </description>
-</property>
-
-<property>
-  <name>searcher.max.time.tick_length</name>
-  <value>200</value>
-  <description>The number of milliseconds between ticks. Larger values
-  reduce the timer granularity (precision). Smaller values bring more
-  overhead.
-  </description>
-</property>
-
-<property>
-  <name>searcher.num.handlers</name>
-  <value>10</value>
-  <description>The number of handlers for the distributed search server.
-  </description>
-</property>
-
-<property>
-  <name>searcher.max.hits.per.page</name>
-  <value>1000</value>
-  <description> The maximum number of hits to show per page. -1 if
-    unlimited. If the number of hits requested by the user (via
-    hitsPerPage parameter in the query string) is more than the value
-    specified in this property, then this value is assumed as the number
-    of hits per page.
-  </description>
-</property>
-
 <!-- URL normalizer properties -->
 
 <property>
@@ -956,7 +833,7 @@
 
 <property>
   <name>plugin.includes</name>
-  <value>protocol-http|urlfilter-regex|parse-tika|index-(basic|anchor)|query-(basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>
+  <value>protocol-http|urlfilter-regex|parse-tika|index-(basic|anchor)|scoring-opic|urlnormalizer-(pass|regex|basic)</value>
   <description>Regular expression naming plugin directory names to
   include.  Any plugin not matching this expression is excluded.
   In any case you need at least include the nutch-extensionpoints plugin. By
@@ -1115,137 +992,6 @@
   </description>
 </property>
 
-<!-- clustering extension properties -->
-
-<property>
-  <name>extension.clustering.hits-to-cluster</name>
-  <value>100</value>
-  <description>Number of snippets retrieved for the clustering extension
-  if clustering extension is available and user requested results
-  to be clustered.</description>
-</property>
-
-<property>
-  <name>extension.clustering.extension-name</name>
-  <value></value>
-  <description>Use the specified online clustering extension. If empty,
-  the first available extension will be used. The "name" here refers to an 'id'
-  attribute of the 'implementation' element in the plugin descriptor XML
-  file.</description>
-</property>
-
-<!-- ontology extension properties -->
-
-<property>
-  <name>extension.ontology.extension-name</name>
-  <value></value>
-  <description>Use the specified online ontology extension. If empty,
-  the first available extension will be used. The "name" here refers to an 'id'
-  attribute of the 'implementation' element in the plugin descriptor XML
-  file.</description>
-</property>
-
-<property>
-  <name>extension.ontology.urls</name>
-  <value>
-  </value>
-  <description>Urls of owl files, separated by spaces, such as
-  http://www.example.com/ontology/time.owl
-  http://www.example.com/ontology/space.owl
-  http://www.example.com/ontology/wine.owl
-  Or
-  file:/ontology/time.owl
-  file:/ontology/space.owl
-  file:/ontology/wine.owl
-  You have to make sure each url is valid.
-  By default, there is no owl file, so query refinement based on ontology
-  is silently ignored.
-  </description>
-</property>
-
-<!-- query-basic plugin properties -->
-
-<property>
-  <name>query.url.boost</name>
-  <value>4.0</value>
-  <description> Used as a boost for url field in Lucene query.
-  </description>
-</property>
-
-<property>
-  <name>query.anchor.boost</name>
-  <value>2.0</value>
-  <description> Used as a boost for anchor field in Lucene query.
-  </description>
-</property>
-
-<property>
-  <name>query.title.boost</name>
-  <value>1.5</value>
-  <description> Used as a boost for title field in Lucene query.
-  </description>
-</property>
-
-<property>
-  <name>query.host.boost</name>
-  <value>2.0</value>
-  <description> Used as a boost for host field in Lucene query.
-  </description>
-</property>
-
-<property>
-  <name>query.phrase.boost</name>
-  <value>1.0</value>
-  <description> Used as a boost for phrase in Lucene query.
-  Multiplied by boost for field phrase is matched in.
-  </description>
-</property>
-
-<!--
-<property>
-  <name>query.basic.description.boost</name>
-  <value>1.0</value>
-  <description> Declares a custom field and its boost to be added to the default fields of the Lucene query.
-  </description>
-</property>
--->
-
-<!-- creative-commons plugin properties -->
-
-<property>
-  <name>query.cc.boost</name>
-  <value>0.0</value>
-  <description> Used as a boost for cc field in Lucene query.
-  </description>
-</property>
-
-<!-- query-more plugin properties -->
-
-<property>
-  <name>query.type.boost</name>
-  <value>0.0</value>
-  <description> Used as a boost for type field in Lucene query.
-  </description>
-</property>
-
-<!-- query-site plugin properties -->
-
-<property>
-  <name>query.site.boost</name>
-  <value>0.0</value>
-  <description> Used as a boost for site field in Lucene query.
-  </description>
-</property>
-
-<!-- microformats-reltag plugin properties -->
-
-<property>
-  <name>query.tag.boost</name>
-  <value>1.0</value>
-  <description> Used as a boost for tag field in Lucene query.
-  </description>
-</property>
-
 <!-- language-identifier plugin properties -->
 
 <property>
@@ -1280,13 +1026,6 @@
   </description>
 </property>
 
-<property>
-  <name>query.lang.boost</name>
-  <value>0.0</value>
-  <description> Used as a boost for lang field in Lucene query.
-  </description>
-</property>
-
 <!-- Temporary Hadoop 0.17.x workaround. -->
 
 <property>
@@ -1300,65 +1039,6 @@
   </description>
 </property>
 
-<!-- response writer properties -->
-
-<property>
-  <name>search.response.default.type</name>
-  <value>xml</value>
-  <description>
-  The default response type returned if none is specified.
-  </description>
-</property>
-
-<property>
-  <name>search.response.default.lang</name>
-  <value>en</value>
-  <description>
-  The default response language if none is specified.
-  </description>
-</property>
-
-<property>
-  <name>search.response.default.numrows</name>
-  <value>10</value>
-  <description>
-  The default number of rows to return if none is specified.
-  </description>
-</property>
-
-<property>
-  <name>search.response.default.dedupfield</name>
-  <value>site</value>
-  <description>
-  The default dedup field if none is specified.
-  </description>
-</property>
-
-<property>
-  <name>search.response.default.numdupes</name>
-  <value>1</value>
-  <description>
-  The default number of duplicates returned if none is specified.
-  </description>
-</property>
-
-<property>
-  <name>searcher.response.maxage</name>
-  <value>86400</value>
-  <description>
-  The maxage of a response in seconds. Used in caching headers.
-  </description>
-</property>
-
-<property>
-  <name>searcher.response.prettyprint</name>
-  <value>true</value>
-  <description>
-  Should the response output be pretty printed.  Setting to true enables better
-  debugging, false removes unneeded spaces and gives better throughput.
-  </description>
-</property>
-
 <!-- solr index properties -->
 <property>
   <name>solrindex.mapping.file</name>

Modified: nutch/trunk/default.properties
URL: http://svn.apache.org/viewvc/nutch/trunk/default.properties?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/default.properties (original)
+++ nutch/trunk/default.properties Fri Jul  2 17:19:43 2010
@@ -9,16 +9,11 @@ src.dir = ./src/java
 lib.dir = ./lib
 conf.dir = ./conf
 plugins.dir = ./src/plugin
-docs.dir = ./docs
-docs.src = ${basedir}/src/web
-xmlcatalog.dir = ${basedir}/src/xmlcatalog
 
 build.dir = ./build
 build.classes = ${build.dir}/classes
-build.webapps = ${build.dir}/webapps
 build.plugins = ${build.dir}/plugins
-build.docs = ${build.dir}/docs
-build.javadoc = ${build.docs}/api
+build.javadoc = ${build.dir}/docs/api
 build.encoding = UTF-8
 
 test.src.dir = ./src/test
@@ -29,9 +24,6 @@ test.build.javadoc = ${test.build.dir}/d
 
 javacc.home=/usr/java/javacc
 
-web.src.dir = ./src/web
-src.webapps = ./src/webapps
-
 # Proxy Host and Port to use for building JavaDoc
 javadoc.proxy.host=-J-DproxyHost=
 javadoc.proxy.port=-J-DproxyPort=
@@ -45,7 +37,7 @@ dist.dir=${build.dir}/${final.name}
 javac.debug=on
 javac.optimize=on
 javac.deprecation=off
-javac.version= 1.5
+javac.version= 1.6
 
 #
 # Plugins API
@@ -89,13 +81,6 @@ plugins.parse=\
    org.apache.nutch.parse.zip
 
 #
-# Analysis Plugins
-#
-plugins.analysis=\
-#  ${plugin.analysis-de}:\
-#  ${plugin.analysis-fr}
-
-#
 # Indexing Filter Plugins
 #
 plugins.index=\
@@ -112,25 +97,6 @@ plugins.query=\
    org.apache.nutch.searcher.url*
 
 #
-# Ontology Plugins
-#
-plugins.ontology=\
-   org.apache.nutch.ontology.jena*
-
-#
-# Online Clusterer Plugins
-#
-plugins.clustering=\
-   org.apache.nutch.clustering.carrot2*
-
-#
-# Summary Plugins
-#
-plugins.summary=\
-   org.apache.nutch.summary.basic*:\
-   org.apache.nutch.summary.lucene*
-
-#
 # Misc. Plugins
 #
 # (gathers plugins that cannot be dispatched
@@ -138,6 +104,5 @@ plugins.summary=\
 # many extension points)
 #
 plugins.misc=\
-   org.apache.nutch.analysis.lang*:\
    org.apache.nutch.microformats.reltag*:\
    org.creativecommons.nutch*

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java Fri Jul  2 17:19:43 2010
@@ -29,9 +29,7 @@ import org.apache.hadoop.fs.*;
 import org.apache.hadoop.conf.*;
 import org.apache.hadoop.mapred.*;
 import org.apache.nutch.parse.ParseSegment;
-import org.apache.nutch.indexer.DeleteDuplicates;
-import org.apache.nutch.indexer.IndexMerger;
-import org.apache.nutch.indexer.Indexer;
+import org.apache.nutch.indexer.solr.SolrDeleteDuplicates;
 import org.apache.nutch.indexer.solr.SolrIndexer;
 import org.apache.nutch.util.HadoopFSUtil;
 import org.apache.nutch.util.NutchConfiguration;
@@ -52,8 +50,7 @@ public class Crawl {
   public static void main(String args[]) throws Exception {
     if (args.length < 1) {
       System.out.println
-      ("Usage: Crawl <urlDir> [-dir d] [-threads n] [-depth i] [-topN N]" +
-        " [-solr solrURL]");
+      ("Usage: Crawl <urlDir> -solr <solrURL> [-dir d] [-threads n] [-depth i] [-topN N]");
       return;
     }
 
@@ -65,7 +62,6 @@ public class Crawl {
     int threads = job.getInt("fetcher.threads.fetch", 10);
     int depth = 5;
     long topN = Long.MAX_VALUE;
-    String indexerName = "lucene";
     String solrUrl = null;
     
     for (int i = 0; i < args.length; i++) {
@@ -82,15 +78,17 @@ public class Crawl {
           topN = Integer.parseInt(args[i+1]);
           i++;
       } else if ("-solr".equals(args[i])) {
-        indexerName = "solr";
         solrUrl = StringUtils.lowerCase(args[i + 1]);
         i++;
       } else if (args[i] != null) {
         rootUrlDir = new Path(args[i]);
       }
     }
+    
+    if (solrUrl == null) {
+      LOG.warn("solrUrl is not set, indexing will be skipped...");
+    }
 
-    boolean isSolrIndex = StringUtils.equalsIgnoreCase(indexerName, "solr");
     FileSystem fs = FileSystem.get(job);
 
     if (LOG.isInfoEnabled()) {
@@ -98,10 +96,7 @@ public class Crawl {
       LOG.info("rootUrlDir = " + rootUrlDir);
       LOG.info("threads = " + threads);
       LOG.info("depth = " + depth);      
-      LOG.info("indexer=" + indexerName);
-      if (isSolrIndex) {
-        LOG.info("solrUrl=" + solrUrl);
-      }
+      LOG.info("solrUrl=" + solrUrl);
       if (topN != Long.MAX_VALUE)
         LOG.info("topN = " + topN);
     }
@@ -139,41 +134,16 @@ public class Crawl {
     if (i > 0) {
       linkDbTool.invert(linkDb, segments, true, true, false); // invert links
 
-      // index, dedup & merge
-      FileStatus[] fstats = fs.listStatus(segments, HadoopFSUtil.getPassDirectoriesFilter(fs));
-      if (isSolrIndex) {
+      if (solrUrl != null) {
+        // index, dedup & merge
+        FileStatus[] fstats = fs.listStatus(segments, HadoopFSUtil.getPassDirectoriesFilter(fs));
         SolrIndexer indexer = new SolrIndexer(conf);
         indexer.indexSolr(solrUrl, crawlDb, linkDb, 
-            Arrays.asList(HadoopFSUtil.getPaths(fstats)));
+          Arrays.asList(HadoopFSUtil.getPaths(fstats)));
+        SolrDeleteDuplicates dedup = new SolrDeleteDuplicates();
+        dedup.setConf(conf);
+        dedup.dedup(solrUrl);
       }
-      else {
-        
-        DeleteDuplicates dedup = new DeleteDuplicates(conf);        
-        if(indexes != null) {
-          // Delete old indexes
-          if (fs.exists(indexes)) {
-            LOG.info("Deleting old indexes: " + indexes);
-            fs.delete(indexes, true);
-          }
-
-          // Delete old index
-          if (fs.exists(index)) {
-            LOG.info("Deleting old merged index: " + index);
-            fs.delete(index, true);
-          }
-        }
-        
-        Indexer indexer = new Indexer(conf);
-        indexer.index(indexes, crawlDb, linkDb, 
-            Arrays.asList(HadoopFSUtil.getPaths(fstats)));
-        
-        IndexMerger merger = new IndexMerger(conf);
-        if(indexes != null) {
-          dedup.dedup(new Path[] { indexes });
-          fstats = fs.listStatus(indexes, HadoopFSUtil.getPassDirectoriesFilter(fs));
-          merger.merge(HadoopFSUtil.getPaths(fstats), index, tmpDir);
-        }
-      }    
       
     } else {
       LOG.warn("No URLs to fetch - check your seed list and URL filters.");

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java Fri Jul  2 17:19:43 2010
@@ -45,9 +45,6 @@ public class NutchWritable extends Gener
       org.apache.nutch.parse.ParseStatus.class,
       org.apache.nutch.protocol.Content.class,
       org.apache.nutch.protocol.ProtocolStatus.class,
-      org.apache.nutch.searcher.Hit.class,
-      org.apache.nutch.searcher.HitDetails.class,
-      org.apache.nutch.searcher.Hits.class
     };
   }
 

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/IndexerMapReduce.java Fri Jul  2 17:19:43 2010
@@ -152,7 +152,7 @@ implements Mapper<Text, Writable, Text, 
       return;
     }
     // apply boost to all indexed fields.
-    doc.setScore(boost);
+    doc.setWeight(boost);
     // store boost for use by explain and dedup
     doc.add("boost", Float.toString(boost));
 

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java Fri Jul  2 17:19:43 2010
@@ -52,13 +52,4 @@ public interface IndexingFilter extends 
    */
   NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks)
     throws IndexingException;
-
-  /** Adds index-level configuraition options.
-   * Implementations can update given configuration to pass document-independent
-   * information to indexing backends. As a rule of thumb, prefix meta keys
-   * with the name of the backend intended. For example, when
-   * passing information to lucene backend, prefix keys with "lucene.".
-   * @param conf Configuration instance.
-   * */
-  public void addIndexBackendOptions(Configuration conf);
 }

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java Fri Jul  2 17:19:43 2010
@@ -70,7 +70,6 @@ public class IndexingFilters {
               .getExtensionInstance();
           LOG.info("Adding " + filter.getClass().getName());
           if (!filterMap.containsKey(filter.getClass().getName())) {
-            filter.addIndexBackendOptions(conf);
             filterMap.put(filter.getClass().getName(), filter);
           }
         }
@@ -89,7 +88,6 @@ public class IndexingFilters {
             IndexingFilter filter = filterMap
                 .get(orderedFilters[i]);
             if (filter != null) {
-              filter.addIndexBackendOptions(conf);
               filters.add(filter);
             }
           }

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/NutchDocument.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/NutchDocument.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/NutchDocument.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/NutchDocument.java Fri Jul  2 17:19:43 2010
@@ -19,11 +19,9 @@ package org.apache.nutch.indexer;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
-import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 
@@ -35,51 +33,48 @@ import org.apache.nutch.metadata.Metadat
 
 /** A {@link NutchDocument} is the unit of indexing.*/
 public class NutchDocument
-implements Writable, Iterable<Entry<String, List<String>>> {
+implements Writable, Iterable<Entry<String, NutchField>> {
 
-  public static final byte VERSION = 1;
-
-  private Map<String, List<String>> fields;
+  public static final byte VERSION = 2;
+  
+  private Map<String, NutchField> fields;
 
   private Metadata documentMeta;
 
-  private float score;
+  private float weight;
 
   public NutchDocument() {
-    fields = new HashMap<String, List<String>>();
+    fields = new HashMap<String, NutchField>();
     documentMeta = new Metadata();
-    score = 0.0f;
+    weight = 1.0f;
   }
 
-  public void add(String name, String value) {
-    List<String> fieldValues = fields.get(name);
-    if (fieldValues == null) {
-      fieldValues = new ArrayList<String>();
+  public void add(String name, Object value) {
+    NutchField field = fields.get(name);
+    if (field == null) {
+      field = new NutchField(value);
+      fields.put(name, field);
+    } else {
+      field.add(value);
     }
-    fieldValues.add(value);
-    fields.put(name, fieldValues);
-  }
-
-  private void addFieldUnprotected(String name, String value) {
-    fields.get(name).add(value);
   }
 
-  public String getFieldValue(String name) {
-    List<String> fieldValues = fields.get(name);
-    if (fieldValues == null) {
+  public Object getFieldValue(String name) {
+    NutchField field = fields.get(name);
+    if (field == null) {
       return null;
     }
-    if (fieldValues.size() == 0) {
+    if (field.getValues().size() == 0) {
       return null;
     }
-    return fieldValues.get(0);
+    return field.getValues().get(0);
   }
 
-  public List<String> getFieldValues(String name) {
+  public NutchField getField(String name) {
     return fields.get(name);
   }
 
-  public List<String> removeField(String name) {
+  public NutchField removeField(String name) {
     return fields.remove(name);
   }
 
@@ -88,16 +83,16 @@ implements Writable, Iterable<Entry<Stri
   }
 
   /** Iterate over all fields. */
-  public Iterator<Entry<String, List<String>>> iterator() {
+  public Iterator<Entry<String, NutchField>> iterator() {
     return fields.entrySet().iterator();
   }
 
-  public float getScore() {
-    return score;
+  public float getWeight() {
+    return weight;
   }
 
-  public void setScore(float score) {
-    this.score = score;
+  public void setWeight(float weight) {
+    this.weight = weight;
   }
 
   public Metadata getDocumentMeta() {
@@ -105,6 +100,7 @@ implements Writable, Iterable<Entry<Stri
   }
 
   public void readFields(DataInput in) throws IOException {
+    fields.clear();
     byte version = in.readByte();
     if (version != VERSION) {
       throw new VersionMismatchException(VERSION, version);
@@ -112,30 +108,23 @@ implements Writable, Iterable<Entry<Stri
     int size = WritableUtils.readVInt(in);
     for (int i = 0; i < size; i++) {
       String name = Text.readString(in);
-      int numValues = WritableUtils.readVInt(in);
-      fields.put(name, new ArrayList<String>());
-      for (int j = 0; j < numValues; j++) {
-        String value = Text.readString(in);
-        addFieldUnprotected(name, value);
-      }
+      NutchField field = new NutchField();
+      field.readFields(in);
+      fields.put(name, field);
     }
-    score = in.readFloat();
+    weight = in.readFloat();
     documentMeta.readFields(in);
   }
 
   public void write(DataOutput out) throws IOException {
     out.writeByte(VERSION);
     WritableUtils.writeVInt(out, fields.size());
-    for (Map.Entry<String, List<String>> entry : fields.entrySet()) {
+    for (Map.Entry<String, NutchField> entry : fields.entrySet()) {
       Text.writeString(out, entry.getKey());
-      List<String> values = entry.getValue();
-      WritableUtils.writeVInt(out, values.size());
-      for (String value : values) {
-        Text.writeString(out, value);
-      }
+      NutchField field = entry.getValue();
+      field.write(out);
     }
-    out.writeFloat(score);
+    out.writeFloat(weight);
     documentMeta.write(out);
   }
-
 }

Added: nutch/trunk/src/java/org/apache/nutch/indexer/NutchField.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/NutchField.java?rev=960064&view=auto
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/NutchField.java (added)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/NutchField.java Fri Jul  2 17:19:43 2010
@@ -0,0 +1,64 @@
+package org.apache.nutch.indexer;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.hadoop.io.Writable;
+
+/**
+ * This class represents a multi-valued field with a weight. Values are arbitrary
+ * objects.
+ */
+public class NutchField implements Writable {
+  private float weight;
+  private List<Object> values = new ArrayList<Object>();
+  
+  public NutchField() {
+    
+  }
+  
+  public NutchField(Object value) {
+    this(value, 1.0f);
+  }
+  
+  public NutchField(Object value, float weight) {
+    this.weight = weight;
+    if (value instanceof Collection) {
+      values.addAll((Collection<Object>)value);
+    } else {
+      values.add(value);
+    }
+  }
+  
+  public void add(Object value) {
+    values.add(value);
+  }
+  
+  public float getWeight() {
+    return weight;
+  }
+
+  public void setWeight(float weight) {
+    this.weight = weight;
+  }
+
+  public List<Object> getValues() {
+    return values;
+  }
+  
+  public void reset() {
+    weight = 1.0f;
+    values.clear();
+  }
+
+  public void readFields(DataInput in) throws IOException {
+  }
+
+  public void write(DataOutput out) throws IOException {
+  }
+
+}

Propchange: nutch/trunk/src/java/org/apache/nutch/indexer/NutchField.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java Fri Jul  2 17:19:43 2010
@@ -39,7 +39,6 @@ import org.apache.hadoop.mapred.lib.Iden
 import org.apache.hadoop.mapred.lib.NullOutputFormat;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
-import org.apache.nutch.indexer.DeleteDuplicates;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
 import org.apache.solr.client.solrj.SolrQuery;

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/solr/SolrWriter.java Fri Jul  2 17:19:43 2010
@@ -23,6 +23,7 @@ import java.util.Map.Entry;
 
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.indexer.NutchField;
 import org.apache.nutch.indexer.NutchIndexWriter;
 import org.apache.solr.client.solrj.SolrServer;
 import org.apache.solr.client.solrj.SolrServerException;
@@ -47,16 +48,16 @@ public class SolrWriter implements Nutch
 
   public void write(NutchDocument doc) throws IOException {
     final SolrInputDocument inputDoc = new SolrInputDocument();
-    for(final Entry<String, List<String>> e : doc) {
-      for (final String val : e.getValue()) {
-        inputDoc.addField(solrMapping.mapKey(e.getKey()), val);
+    for(final Entry<String, NutchField> e : doc) {
+      for (final Object val : e.getValue().getValues()) {
+        inputDoc.addField(solrMapping.mapKey(e.getKey()), val, e.getValue().getWeight());
         String sCopy = solrMapping.mapCopyKey(e.getKey());
         if (sCopy != e.getKey()) {
         	inputDoc.addField(sCopy, val);	
         }
       }
     }
-    inputDoc.setDocumentBoost(doc.getScore());
+    inputDoc.setDocumentBoost(doc.getWeight());
     inputDocs.add(inputDoc);
     if (inputDocs.size() > commitSize) {
       try {

Modified: nutch/trunk/src/plugin/build.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/build.xml?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/build.xml (original)
+++ nutch/trunk/src/plugin/build.xml Fri Jul  2 17:19:43 2010
@@ -26,14 +26,11 @@
   <!-- Build & deploy all the plugin jars.                    -->
   <!-- ====================================================== -->
   <target name="deploy">
-     <ant dir="clustering-carrot2" target="deploy"/>
      <ant dir="creativecommons" target="deploy"/>
      <ant dir="feed" target="deploy"/>
      <ant dir="index-basic" target="deploy"/>
      <ant dir="index-anchor" target="deploy"/>
      <ant dir="index-more" target="deploy"/>
-     <ant dir="field-basic" target="deploy"/>
-     <ant dir="field-boost" target="deploy"/>
      <ant dir="languageidentifier" target="deploy"/>
      <ant dir="lib-http" target="deploy"/>
      <ant dir="lib-lucene-analyzers" target="deploy"/>
@@ -42,7 +39,6 @@
      <ant dir="lib-xml" target="deploy"/>
      <ant dir="microformats-reltag" target="deploy"/>
      <ant dir="nutch-extensionpoints" target="deploy"/>
-     <ant dir="ontology" target="deploy"/>
      <ant dir="protocol-file" target="deploy"/>
      <ant dir="protocol-ftp" target="deploy"/>
      <ant dir="protocol-http" target="deploy"/>
@@ -53,18 +49,9 @@
      <ant dir="parse-swf" target="deploy"/>
      <ant dir="parse-tika" target="deploy"/>
      <ant dir="parse-zip" target="deploy"/>
-     <ant dir="query-basic" target="deploy"/>
-     <ant dir="query-more" target="deploy"/>
-     <ant dir="query-site" target="deploy"/>
-     <ant dir="query-custom" target="deploy"/>
-     <ant dir="query-url" target="deploy"/>
-     <ant dir="response-json" target="deploy"/>
-     <ant dir="response-xml" target="deploy"/>
      <ant dir="scoring-opic" target="deploy"/>
      <ant dir="scoring-link" target="deploy"/>
-     <ant dir="summary-basic" target="deploy"/>
      <ant dir="subcollection" target="deploy"/>
-     <ant dir="summary-lucene" target="deploy"/>
      <ant dir="tld" target="deploy"/>
      <ant dir="urlfilter-automaton" target="deploy"/>
      <ant dir="urlfilter-domain" target="deploy" />
@@ -86,7 +73,6 @@
      <ant dir="index-more" target="test"/>
      <ant dir="languageidentifier" target="test"/>
      <ant dir="lib-http" target="test"/>
-     <ant dir="ontology" target="test"/>
      <ant dir="protocol-httpclient" target="test"/>
      <!--ant dir="parse-ext" target="test"/-->
      <ant dir="parse-rss" target="test"/>
@@ -94,7 +80,6 @@
      <ant dir="parse-swf" target="test"/>
      <ant dir="parse-tika" target="test"/>
      <ant dir="parse-zip" target="test"/>
-     <ant dir="query-url" target="test"/>
      <ant dir="subcollection" target="test"/>
      <ant dir="urlfilter-automaton" target="test"/>
      <ant dir="urlfilter-domain" target="test" />
@@ -110,16 +95,11 @@
   <!-- Clean all of the plugins.                              -->
   <!-- ====================================================== -->
   <target name="clean">
-    <ant dir="analysis-de" target="clean"/>
-    <ant dir="analysis-fr" target="clean"/>
-    <ant dir="clustering-carrot2" target="clean"/>
     <ant dir="creativecommons" target="clean"/>
     <ant dir="feed" target="clean"/>
     <ant dir="index-basic" target="clean"/>
     <ant dir="index-anchor" target="clean"/>
     <ant dir="index-more" target="clean"/>
-    <ant dir="field-basic" target="clean"/>
-    <ant dir="field-boost" target="clean"/>  	
     <ant dir="languageidentifier" target="clean"/>
     <ant dir="lib-commons-httpclient" target="clean"/>
     <ant dir="lib-http" target="clean"/>
@@ -129,7 +109,6 @@
     <ant dir="lib-xml" target="clean"/>
     <ant dir="microformats-reltag" target="clean"/>
     <ant dir="nutch-extensionpoints" target="clean"/>
-    <ant dir="ontology" target="clean"/>
     <ant dir="protocol-file" target="clean"/>
     <ant dir="protocol-ftp" target="clean"/>
     <ant dir="protocol-http" target="clean"/>
@@ -140,18 +119,9 @@
     <ant dir="parse-swf" target="clean"/>
     <ant dir="parse-tika" target="clean"/>
     <ant dir="parse-zip" target="clean"/>
-    <ant dir="query-basic" target="clean"/>
-    <ant dir="query-more" target="clean"/>
-    <ant dir="query-site" target="clean"/>
-    <ant dir="query-url" target="clean"/>
-    <ant dir="query-custom" target="clean"/>
-    <ant dir="response-json" target="clean"/>
-    <ant dir="response-xml" target="clean"/>
     <ant dir="scoring-opic" target="clean"/>
     <ant dir="scoring-link" target="clean"/>
     <ant dir="subcollection" target="clean"/>
-    <ant dir="summary-basic" target="clean"/>
-    <ant dir="summary-lucene" target="clean"/>
     <ant dir="tld" target="clean"/>
     <ant dir="urlfilter-automaton" target="clean"/>
     <ant dir="urlfilter-domain" target="clean" />

Modified: nutch/trunk/src/plugin/creativecommons/plugin.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/creativecommons/plugin.xml?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/creativecommons/plugin.xml (original)
+++ nutch/trunk/src/plugin/creativecommons/plugin.xml Fri Jul  2 17:19:43 2010
@@ -45,13 +45,4 @@
                       class="org.creativecommons.nutch.CCIndexingFilter"/>
    </extension>
 
-   <extension id="org.creativecommons.nutch.CCQueryFilter"
-              name="Creative Commmons Query Filter"
-              point="org.apache.nutch.searcher.QueryFilter">
-      <implementation id="CCQueryFilter"
-                      class="org.creativecommons.nutch.CCQueryFilter">
-        <parameter name="fields" value="cc"/>
-      </implementation>
-   </extension>
-
 </plugin>

Modified: nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java Fri Jul  2 17:19:43 2010
@@ -24,7 +24,6 @@ import org.apache.nutch.parse.Parse;
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.IndexingException;
 import org.apache.nutch.indexer.NutchDocument;
-import org.apache.nutch.indexer.lucene.LuceneWriter;
 import org.apache.hadoop.io.Text;
 
 import org.apache.nutch.crawl.CrawlDatum;
@@ -111,10 +110,6 @@ public class CCIndexingFilter implements
     doc.add(FIELD, feature);
   }
 
-  public void addIndexBackendOptions(Configuration conf) {
-    LuceneWriter.addFieldOptions(FIELD, LuceneWriter.STORE.YES, LuceneWriter.INDEX.UNTOKENIZED, conf);
-  }
-
   public void setConf(Configuration conf) {
     this.conf = conf;
   }

Modified: nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java Fri Jul  2 17:19:43 2010
@@ -30,7 +30,6 @@ import org.apache.nutch.crawl.Inlinks;
 import org.apache.nutch.indexer.IndexingException;
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.NutchDocument;
-import org.apache.nutch.indexer.lucene.LuceneWriter;
 import org.apache.nutch.metadata.Feed;
 import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.parse.Parse;
@@ -122,24 +121,6 @@ public class FeedIndexingFilter implemen
     return conf;
   }
 
-  public void addIndexBackendOptions(Configuration conf) {
-    LuceneWriter.addFieldOptions(Feed.FEED_AUTHOR,
-        LuceneWriter.STORE.YES, LuceneWriter.INDEX.TOKENIZED, conf);
-
-    LuceneWriter.addFieldOptions(Feed.FEED_TAGS,
-        LuceneWriter.STORE.YES, LuceneWriter.INDEX.TOKENIZED, conf);
-
-    LuceneWriter.addFieldOptions(Feed.FEED,
-        LuceneWriter.STORE.YES, LuceneWriter.INDEX.TOKENIZED, conf);
-
-    LuceneWriter.addFieldOptions(PUBLISHED_DATE,
-        LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO_NORMS, conf);
-
-    LuceneWriter.addFieldOptions(UPDATED_DATE,
-        LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO_NORMS, conf);
-
-  }
-
   /**
    * Sets the {@link Configuration} object used to configure this
    * {@link IndexingFilter}.

Modified: nutch/trunk/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java Fri Jul  2 17:19:43 2010
@@ -25,7 +25,6 @@ import org.apache.nutch.crawl.Inlinks;
 import org.apache.nutch.indexer.IndexingException;
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.NutchDocument;
-import org.apache.nutch.indexer.lucene.LuceneWriter;
 import org.apache.nutch.parse.Parse;
 
 /**
@@ -57,9 +56,4 @@ public class AnchorIndexingFilter
     return doc;
   }
 
-  public void addIndexBackendOptions(Configuration conf) {
-    LuceneWriter.addFieldOptions("anchor", LuceneWriter.STORE.NO,
-        LuceneWriter.INDEX.TOKENIZED, conf);
-  }
-
 }

Modified: nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java Fri Jul  2 17:19:43 2010
@@ -28,7 +28,6 @@ import org.apache.nutch.parse.Parse;
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.IndexingException;
 import org.apache.nutch.indexer.NutchDocument;
-import org.apache.nutch.indexer.lucene.LuceneWriter;
 import org.apache.hadoop.io.Text;
 
 import org.apache.nutch.crawl.CrawlDatum;
@@ -94,40 +93,6 @@ public class BasicIndexingFilter impleme
     return doc;
   }
 
-  public void addIndexBackendOptions(Configuration conf) {
-
-    ///////////////////////////
-    //    add lucene options   //
-    ///////////////////////////
-
-    // host is un-stored, indexed and tokenized
-    LuceneWriter.addFieldOptions("host", LuceneWriter.STORE.NO,
-        LuceneWriter.INDEX.TOKENIZED, conf);
-
-    // site is un-stored, indexed and un-tokenized
-    LuceneWriter.addFieldOptions("site", LuceneWriter.STORE.NO,
-        LuceneWriter.INDEX.UNTOKENIZED, conf);
-
-    // url is both stored and indexed, so it's both searchable and returned
-    LuceneWriter.addFieldOptions("url", LuceneWriter.STORE.YES,
-        LuceneWriter.INDEX.TOKENIZED, conf);
-
-    // content is indexed, so that it's searchable, but not stored in index
-    LuceneWriter.addFieldOptions("content", LuceneWriter.STORE.NO,
-        LuceneWriter.INDEX.TOKENIZED, conf);
-
-    // anchors are indexed, so they're searchable, but not stored in index
-    LuceneWriter.addFieldOptions("anchor", LuceneWriter.STORE.NO,
-        LuceneWriter.INDEX.TOKENIZED, conf);
-
-    // title is indexed and stored so that it can be displayed
-    LuceneWriter.addFieldOptions("title", LuceneWriter.STORE.YES,
-        LuceneWriter.INDEX.TOKENIZED, conf);
-
-    LuceneWriter.addFieldOptions("cache", LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO, conf);
-    LuceneWriter.addFieldOptions("tstamp", LuceneWriter.STORE.YES, LuceneWriter.INDEX.NO, conf);
-  }
-
   public void setConf(Configuration conf) {
     this.conf = conf;
     this.MAX_TITLE_LENGTH = conf.getInt("indexer.max.title.length", 100);

Modified: nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java Fri Jul  2 17:19:43 2010
@@ -38,7 +38,6 @@ import org.apache.nutch.parse.Parse;
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.IndexingException;
 import org.apache.nutch.indexer.NutchDocument;
-import org.apache.nutch.indexer.lucene.LuceneWriter;
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
@@ -287,32 +286,6 @@ public class MoreIndexingFilter implemen
     return doc;
   }
 
-  public void addIndexBackendOptions(Configuration conf) {
-
-    ///////////////////////////
-    //    add lucene options //
-    ///////////////////////////
-
-    LuceneWriter.addFieldOptions("type", LuceneWriter.STORE.NO,
-        LuceneWriter.INDEX.UNTOKENIZED, conf);
-
-    // primaryType and subType are stored, indexed and un-tokenized
-    LuceneWriter.addFieldOptions("primaryType", LuceneWriter.STORE.YES,
-        LuceneWriter.INDEX.UNTOKENIZED, conf);
-    LuceneWriter.addFieldOptions("subType", LuceneWriter.STORE.YES,
-        LuceneWriter.INDEX.UNTOKENIZED, conf);
-
-    LuceneWriter.addFieldOptions("contentLength", LuceneWriter.STORE.YES,
-        LuceneWriter.INDEX.NO, conf);
-
-    LuceneWriter.addFieldOptions("lastModified", LuceneWriter.STORE.YES,
-        LuceneWriter.INDEX.NO, conf);
-
-    // un-stored, indexed and un-tokenized
-    LuceneWriter.addFieldOptions("date", LuceneWriter.STORE.NO,
-        LuceneWriter.INDEX.UNTOKENIZED, conf);
-  }
-
   public void setConf(Configuration conf) {
     this.conf = conf;
     MIME = new MimeUtil(conf);

Modified: nutch/trunk/src/plugin/languageidentifier/plugin.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/languageidentifier/plugin.xml?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/languageidentifier/plugin.xml (original)
+++ nutch/trunk/src/plugin/languageidentifier/plugin.xml Fri Jul  2 17:19:43 2010
@@ -45,16 +45,5 @@
                       class="org.apache.nutch.analysis.lang.LanguageIndexingFilter"/>
    </extension>
 
-
-   <extension id="org.apache.nutch.analysis.lang.LanguageQueryFilter"
-              name="Nutch Language Query Filter"
-              point="org.apache.nutch.searcher.QueryFilter">
-      <implementation id="LanguageQueryFilter"
-                      class="org.apache.nutch.analysis.lang.LanguageQueryFilter">
-        <parameter name="raw-fields" value="lang"/>
-      </implementation>
-   </extension>
-
-
 </plugin>
 

Modified: nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java Fri Jul  2 17:19:43 2010
@@ -23,7 +23,6 @@ import org.apache.nutch.crawl.Inlinks;
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.IndexingException;
 import org.apache.nutch.indexer.NutchDocument;
-import org.apache.nutch.indexer.lucene.LuceneWriter;
 import org.apache.hadoop.io.Text;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.metadata.Metadata;
@@ -95,11 +94,6 @@ public class LanguageIndexingFilter impl
     return doc;
   }
 
-  public void addIndexBackendOptions(Configuration conf) {
-    LuceneWriter.addFieldOptions("lang", LuceneWriter.STORE.YES,
-        LuceneWriter.INDEX.UNTOKENIZED, conf);
-  }
-  
   public void setConf(Configuration conf) {
     this.conf = conf;
     this.languageIdentifier = new LanguageIdentifier(conf);

Modified: nutch/trunk/src/plugin/microformats-reltag/plugin.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/microformats-reltag/plugin.xml?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/microformats-reltag/plugin.xml (original)
+++ nutch/trunk/src/plugin/microformats-reltag/plugin.xml Fri Jul  2 17:19:43 2010
@@ -45,17 +45,5 @@
                       class="org.apache.nutch.microformats.reltag.RelTagIndexingFilter"/>
    </extension>
 
-
-   <extension id="org.apache.nutch.microformats.reltag.RelTagQueryFilter"
-              name="Rel-Tag query filter"
-              point="org.apache.nutch.searcher.QueryFilter">
-      <implementation id="RelTagQueryFilter"
-                      class="org.apache.nutch.microformats.reltag.RelTagQueryFilter">
-        <parameter name="raw-fields" value="tag"/>
-      </implementation>
-      
-   </extension>
-
-
 </plugin>
 

Modified: nutch/trunk/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagIndexingFilter.java Fri Jul  2 17:19:43 2010
@@ -23,7 +23,6 @@ import org.apache.nutch.crawl.Inlinks;
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.IndexingException;
 import org.apache.nutch.indexer.NutchDocument;
-import org.apache.nutch.indexer.lucene.LuceneWriter;
 import org.apache.hadoop.io.Text;
 import org.apache.nutch.parse.Parse;
 
@@ -60,11 +59,6 @@ public class RelTagIndexingFilter implem
     return doc;
   }
 
-  public void addIndexBackendOptions(Configuration conf) {
-    LuceneWriter.addFieldOptions("tag", LuceneWriter.STORE.YES,
-        LuceneWriter.INDEX.UNTOKENIZED, conf);
-  }
-  
   /* ----------------------------- *
    * <implementation:Configurable> *
    * ----------------------------- */

Modified: nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml (original)
+++ nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml Fri Jul  2 17:19:43 2010
@@ -25,22 +25,10 @@
    Please not that plugins can define extension points as well to be extendable.-->
 
 <extension-point
-      id="org.apache.nutch.clustering.OnlineClusterer"
-      name="Nutch Online Search Results Clustering Plugin"/>
-
-<extension-point
-      id="org.apache.nutch.indexer.field.FieldFilter"
-      name="Nutch Field Filter"/>
-      
-<extension-point
       id="org.apache.nutch.indexer.IndexingFilter"
       name="Nutch Indexing Filter"/>
 
 <extension-point
-      id="org.apache.nutch.ontology.Ontology"
-      name="Ontology Model Loader"/>
-
-<extension-point
       id="org.apache.nutch.parse.Parser"
       name="Nutch Content Parser"/>
  
@@ -53,10 +41,6 @@
       name="Nutch Protocol"/>
 
 <extension-point
-      id="org.apache.nutch.searcher.QueryFilter"
-      name="Nutch Query Filter"/>
-
-<extension-point
       id="org.apache.nutch.net.URLFilter"
       name="Nutch URL Filter"/>
 
@@ -65,18 +49,6 @@
       name="Nutch URL Normalizer"/>
 
 <extension-point
-      id="org.apache.nutch.analysis.NutchAnalyzer"
-      name="Nutch Analysis"/>
-
-<extension-point
-      id="org.apache.nutch.searcher.response.ResponseWriter"
-      name="Nutch Search Results Response Writer"/>
-      
-<extension-point
-      id="org.apache.nutch.searcher.Summarizer"
-      name="Nutch Summarizer"/>
-
-<extension-point
       id="org.apache.nutch.scoring.ScoringFilter"
       name="Nutch Scoring"/>
 

Modified: nutch/trunk/src/plugin/subcollection/plugin.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/subcollection/plugin.xml?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/subcollection/plugin.xml (original)
+++ nutch/trunk/src/plugin/subcollection/plugin.xml Fri Jul  2 17:19:43 2010
@@ -31,16 +31,6 @@
       </library>
    </runtime>
    
-   <extension id="org.apache.nutch.searcher.subcollection.query"
-              name="Subcollection Query Filter"
-              point="org.apache.nutch.searcher.QueryFilter">
-    <implementation id="SubcollectionQueryFilter"
-               class="org.apache.nutch.searcher.subcollection.SubcollectionQueryFilter">
-         <parameter name="raw-fields" value="subcollection"/>
-         </implementation>
-               
-   </extension>      
-
    <extension id="org.apache.nutch.indexer.subcollection.indexing"
               name="Subcollection Indexing Filter"
               point="org.apache.nutch.indexer.IndexingFilter">

Modified: nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/subcollection/src/java/org/apache/nutch/indexer/subcollection/SubcollectionIndexingFilter.java Fri Jul  2 17:19:43 2010
@@ -29,7 +29,6 @@ import org.apache.nutch.util.NutchConfig
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.IndexingException;
 import org.apache.nutch.indexer.NutchDocument;
-import org.apache.nutch.indexer.lucene.LuceneWriter;
 
 import org.apache.nutch.collection.CollectionManager;
 import org.apache.nutch.crawl.CrawlDatum;
@@ -72,9 +71,4 @@ public class SubcollectionIndexingFilter
     addSubCollectionField(doc, sUrl);
     return doc;
   }
-
-  public void addIndexBackendOptions(Configuration conf) {
-    LuceneWriter.addFieldOptions(FIELD_NAME, LuceneWriter.STORE.YES,
-        LuceneWriter.INDEX.TOKENIZED, conf);
-  }
 }

Modified: nutch/trunk/src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/tld/src/java/org/apache/nutch/indexer/tld/TLDIndexingFilter.java Fri Jul  2 17:19:43 2010
@@ -28,7 +28,6 @@ import org.apache.nutch.crawl.Inlinks;
 import org.apache.nutch.indexer.IndexingException;
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.NutchDocument;
-import org.apache.nutch.indexer.lucene.LuceneWriter;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.util.URLUtil;
 import org.apache.nutch.util.domain.DomainSuffix;
@@ -65,10 +64,4 @@ public class TLDIndexingFilter implement
   public Configuration getConf() {
     return this.conf;
   }
-
-  public void addIndexBackendOptions(Configuration conf) {
-    // store, no index
-    LuceneWriter.addFieldOptions("tld", LuceneWriter.STORE.YES,
-                                 LuceneWriter.INDEX.NO, conf);
-  }
 }

Modified: nutch/trunk/src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java?rev=960064&r1=960063&r2=960064&view=diff
==============================================================================
--- nutch/trunk/src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java (original)
+++ nutch/trunk/src/plugin/tld/src/java/org/apache/nutch/scoring/tld/TLDScoringFilter.java Fri Jul  2 17:19:43 2010
@@ -26,6 +26,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
 import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.indexer.NutchField;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.protocol.Content;
@@ -52,12 +53,12 @@ public class TLDScoringFilter implements
       CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
       throws ScoringFilterException {
 
-    List<String> tlds = doc.getFieldValues("tld");
+    NutchField tlds = doc.getField("tld");
     float boost = 1.0f;
 
     if(tlds != null) {
-      for(String tld : tlds) {
-        DomainSuffix entry = tldEntries.get(tld);
+      for(Object tld : tlds.getValues()) {
+        DomainSuffix entry = tldEntries.get(tld.toString());
         if(entry != null)
           boost *= entry.getBoost();
       }