You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2009/10/09 19:02:52 UTC

svn commit: r823614 [3/6] - in /lucene/nutch/trunk: ./ conf/ docs/ca/ docs/de/ docs/en/ docs/es/ docs/fi/ docs/fr/ docs/hu/ docs/jp/ docs/ms/ docs/nl/ docs/pl/ docs/pt/ docs/sv/ docs/th/ docs/zh/ lib/ site/ site/skin/ site/skin/images/ src/java/ src/ja...

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/ResolveUrls.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/ResolveUrls.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/SearchLoadTester.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/SearchLoadTester.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcInputFormat.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcInputFormat.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/compat/CrawlDbConverter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/compat/ReprUrlFixer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/compat/ReprUrlFixer.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/CommandRunner.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/CommandRunner.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/DeflateUtils.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/EncodingDetector.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/EncodingDetector.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/FSUtils.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/FSUtils.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/GZIPUtils.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/GZIPUtils.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/HadoopFSUtil.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/LockUtil.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java
------------------------------------------------------------------------------
--- svn:keywords (original)
+++ svn:keywords Fri Oct  9 17:02:32 2009
@@ -1 +1 @@
-Date Revision
+Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchJob.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchJob.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/ObjectCache.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/ObjectCache.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/PrefixStringMatcher.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/PrefixStringMatcher.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/SuffixStringMatcher.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/SuffixStringMatcher.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/TrieStringMatcher.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/TrieStringMatcher.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/URLUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/URLUtil.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainStatistics.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainStatistics.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffix.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffix.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffixes.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffixes.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/TopLevelDomain.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/TopLevelDomain.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/analysis-de/build.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/analysis-de/plugin.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/analysis-de/src/java/org/apache/nutch/analysis/de/GermanAnalyzer.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/analysis-fr/build.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/analysis-fr/plugin.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/analysis-fr/src/java/org/apache/nutch/analysis/fr/FrenchAnalyzer.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/build-plugin.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/build-plugin.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/build.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/build.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml?rev=823614&r1=823613&r2=823614&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml Fri Oct  9 17:02:32 2009
@@ -1,56 +1,56 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<plugin
-   id="clustering-carrot2"
-   name="Online Search Results Clustering using Carrot2's components"
-   version="1.0.3"
-   provider-name="www.carrot2.org">
-
-   <runtime>
-      <library name="clustering-carrot2.jar">
-         <export name="*"/>
-      </library>
-
-      <!--
-	   The defaults for Lingo. If you plan to use another clustering
-	   algorithm from the Carrot2 project, you'll need all the JARs
-	   required for that algorithm.
-	-->
-      <library name="carrot2-filter-lingo.jar"/>
-      <library name="carrot2-local-core.jar"/>
-      <library name="carrot2-snowball-stemmers.jar"/>
-      <library name="carrot2-util-common.jar"/>
-      <library name="carrot2-util-tokenizer.jar"/>
-
-      <library name="commons-collections-3.2.jar"/>
-      <library name="commons-pool-1.3.jar"/>
-      <library name="Jama-1.0.2.jar"/>
-      <library name="violinstrings-1.0.2.jar"/>
-   </runtime>
-
-   <requires>
-      <import plugin="nutch-extensionpoints"/>
-   </requires>
-
-   <extension id="org.apache.nutch.clustering.carrot2"
-              name="Carrot2 Clusterer"
-              point="org.apache.nutch.clustering.OnlineClusterer">
-      <implementation id="Carrot2"
-                      class="org.apache.nutch.clustering.carrot2.Clusterer"/>
-   </extension>
-</plugin>
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<plugin
+   id="clustering-carrot2"
+   name="Online Search Results Clustering using Carrot2's components"
+   version="1.0.3"
+   provider-name="www.carrot2.org">
+
+   <runtime>
+      <library name="clustering-carrot2.jar">
+         <export name="*"/>
+      </library>
+
+      <!--
+	   The defaults for Lingo. If you plan to use another clustering
+	   algorithm from the Carrot2 project, you'll need all the JARs
+	   required for that algorithm.
+	-->
+      <library name="carrot2-filter-lingo.jar"/>
+      <library name="carrot2-local-core.jar"/>
+      <library name="carrot2-snowball-stemmers.jar"/>
+      <library name="carrot2-util-common.jar"/>
+      <library name="carrot2-util-tokenizer.jar"/>
+
+      <library name="commons-collections-3.2.jar"/>
+      <library name="commons-pool-1.3.jar"/>
+      <library name="Jama-1.0.2.jar"/>
+      <library name="violinstrings-1.0.2.jar"/>
+   </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <extension id="org.apache.nutch.clustering.carrot2"
+              name="Carrot2 Clusterer"
+              point="org.apache.nutch.clustering.OnlineClusterer">
+      <implementation id="Carrot2"
+                      class="org.apache.nutch.clustering.carrot2.Clusterer"/>
+   </extension>
+</plugin>

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/readme.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/readme.txt?rev=823614&r1=823613&r2=823614&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/readme.txt (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/readme.txt Fri Oct  9 17:02:32 2009
@@ -1,7 +1,7 @@
-This plugin extension adds search results clustering capability to Nutch search 
-frontend.
-
-Carrot2 JARs come from codebase in version: 2.1
-
-See the WIKI for more information about configuration and installation
-of this plugin.
+This plugin extension adds search results clustering capability to Nutch search 
+frontend.
+
+Carrot2 JARs come from codebase in version: 2.1
+
+See the WIKI for more information about configuration and installation
+of this plugin.

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/readme.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java?rev=823614&r1=823613&r2=823614&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java Fri Oct  9 17:02:32 2009
@@ -1,330 +1,330 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.clustering.carrot2;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-import org.apache.nutch.clustering.HitsCluster;
-import org.apache.nutch.clustering.OnlineClusterer;
-import org.apache.nutch.searcher.HitDetails;
-import org.carrot2.core.DuplicatedKeyException;
-import org.carrot2.core.InitializationException;
-import org.carrot2.core.LocalComponent;
-import org.carrot2.core.LocalComponentFactory;
-import org.carrot2.core.LocalControllerBase;
-import org.carrot2.core.LocalProcess;
-import org.carrot2.core.LocalProcessBase;
-import org.carrot2.core.MissingComponentException;
-import org.carrot2.core.MissingProcessException;
-import org.carrot2.core.ProcessingResult;
-import org.carrot2.core.clustering.RawCluster;
-import org.carrot2.core.controller.ControllerHelper;
-import org.carrot2.core.controller.LoaderExtensionUnknownException;
-import org.carrot2.core.impl.ArrayOutputComponent;
-import org.carrot2.core.linguistic.Language;
-import org.carrot2.filter.lingo.local.LingoLocalFilterComponent;
-import org.carrot2.util.tokenizer.languages.AllKnownLanguages;
-
-
-
-/**
- * This plugin provides an implementation of {@link OnlineClusterer} 
- * extension using clustering components of the Carrot2 project
- * (<a href="http://www.carrot2.org">http://www.carrot2.org</a>).
- * 
- * <p>This class hardcodes an equivalent of the following Carrot2 process:
- * <pre><![CDATA[
- * <local-process id="yahoo-lingo">
- *   <name>Yahoo Search API -- Lingo Classic Clusterer</name>
- * 
- *   <input  component-key="input-nutch" />
- *   <filter component-key="filter-lingo" />
- *   <output component-key="output-clustersConsumer" />
- * </local-process>
- * ]]></pre>
- */
-public class Clusterer implements OnlineClusterer, Configurable {
-  /** Default language property name. */
-  private final static String CONF_PROP_DEFAULT_LANGUAGE =
-    "extension.clustering.carrot2.defaultLanguage";
-
-  /** Recognizable languages property name. */
-  private final static String CONF_PROP_LANGUAGES =
-    "extension.clustering.carrot2.languages";
-
-  /** Internal clustering process ID in Carrot2 LocalController */
-  private final static String PROCESS_ID = "nutch-lingo";
-  
-  public static final Log logger = LogFactory.getLog(Clusterer.class);  
-
-  /** The LocalController instance used for clustering */
-  private LocalControllerBase controller;
-
-  /** Nutch configuration. */
-  private Configuration conf;
-
-  /** 
-   * Default language for hits. English by default, but may be changed
-   * via a property in Nutch configuration. 
-   */
-  private String defaultLanguage = "en";
-
-  /** 
-   * A list of recognizable languages..
-   * English only by default, but configurable via Nutch configuration.
-   */
-  private String [] languages = new String [] {defaultLanguage};
-
-  /**
-   * An empty public constructor for making new instances
-   * of the clusterer.
-   */
-  public Clusterer() {
-    // Don't forget to call {@link #setConf(Configuration)}.
-  }
-
-  /**
-   * See {@link OnlineClusterer} for documentation.
-   */
-  public HitsCluster [] clusterHits(HitDetails [] hitDetails, String [] descriptions) {
-    if (this.controller == null) {
-      logger.error("initialize() not called.");
-      return new HitsCluster[0];
-    }
-
-    final Map requestParams = new HashMap();
-    requestParams.put(NutchInputComponent.NUTCH_INPUT_HIT_DETAILS_ARRAY,
-      hitDetails);
-    requestParams.put(NutchInputComponent.NUTCH_INPUT_SUMMARIES_ARRAY,
-      descriptions);
-
-    try {
-      // The input component takes Nutch's results so we don't need the query argument.
-      final ProcessingResult result = 
-        controller.query(PROCESS_ID, "no-query", requestParams);
-
-      final ArrayOutputComponent.Result output =
-        (ArrayOutputComponent.Result) result.getQueryResult();
-
-      final List outputClusters = output.clusters;
-      final HitsCluster [] clusters = new HitsCluster[ outputClusters.size() ];
-
-      int j = 0;
-      for (Iterator i = outputClusters.iterator(); i.hasNext(); j++) {
-        RawCluster rcluster = (RawCluster) i.next();
-        clusters[j] = new HitsClusterAdapter(rcluster, hitDetails);
-      }
-
-      // invoke Carrot2 process here.
-      return clusters;
-    } catch (MissingProcessException e) {
-      throw new RuntimeException("Missing clustering process.", e);
-    } catch (Exception e) {
-      throw new RuntimeException("Unidentified problems with the clustering.", e);
-    }
-  }
-
-  /**
-   * Implementation of {@link Configurable}
-   */
-  public void setConf(Configuration conf) {
-    this.conf = conf;
-
-    // Configure default language and other component settings.
-    if (conf.get(CONF_PROP_DEFAULT_LANGUAGE) != null) {
-      // Change the default language.
-      this.defaultLanguage = conf.get(CONF_PROP_DEFAULT_LANGUAGE);
-    } 
-    if (conf.getStrings(CONF_PROP_LANGUAGES) != null) {
-      this.languages = conf.getStrings(CONF_PROP_LANGUAGES);
-    }
-
-    if (logger.isInfoEnabled()) {
-      logger.info("Default language: " + defaultLanguage);
-      logger.info("Enabled languages: " + Arrays.asList(languages));
-    }
-
-    initialize();
-  }
-
-  /**
-   * Implementation of {@link Configurable}
-   */
-  public Configuration getConf() {
-    return conf;
-  }
-  
-  /**
-   * Initialize clustering processes and Carrot2 components.
-   */
-  private synchronized void initialize() {
-    // Initialize language list, temporarily switching off logging
-    // of warnings. This is a bit of a hack, but we don't want to
-    // redistribute the entire Carrot2 distro and this prevents
-    // nasty ClassNotFound warnings.
-    final Logger c2Logger = Logger.getLogger("org.carrot2");
-    final Level original = c2Logger.getLevel();
-    c2Logger.setLevel(Level.ERROR);
-    AllKnownLanguages.getLanguageCodes();
-    c2Logger.setLevel(original);
-
-    // Initialize the controller.    
-    controller = new LocalControllerBase();
-
-    final Configuration nutchConf = getConf();
-    final String processResource = nutchConf.get(
-        "extension.clustering.carrot2.process-resource");
-
-    if (processResource == null) {
-      logger.info("Using default clustering algorithm (Lingo).");
-      addDefaultProcess();
-    } else {
-      logger.info("Using custom clustering process: " + processResource);
-      controller.setComponentAutoload(true);
-      
-      final ControllerHelper helper = new ControllerHelper();
-      final InputStream is = Thread.currentThread()
-        .getContextClassLoader().getResourceAsStream(processResource);
-      if (is != null) {
-        try {
-          final LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
-            public LocalComponent getInstance() {
-              return new NutchInputComponent(defaultLanguage);
-            }
-          };
-          controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
-          
-          final LocalProcess process = helper.loadProcess(
-              helper.getExtension(processResource), is).getProcess();
-          controller.addProcess(PROCESS_ID, process);
-          is.close();
-        } catch (IOException e) {
-          logger.error("Could not load process resource: " + processResource, e);
-        } catch (LoaderExtensionUnknownException e) {
-          logger.error("Unrecognized extension of process resource: " + processResource);
-        } catch (InstantiationException e) {
-          logger.error("Could not instantiate process: " + processResource, e);
-        } catch (InitializationException e) {
-          logger.error("Could not initialize process: " + processResource, e);
-        } catch (DuplicatedKeyException e) {
-          logger.error("Duplicated key (unreachable?): " + processResource, e);
-        } catch (MissingComponentException e) {
-          logger.error("Some components are missing, could not initialize process: " 
-              + processResource, e);
-        }
-      } else {
-        logger.error("Could not find process resource: " + processResource);
-      }
-    }
-  }
-
-  /**
-   * Adds a default clustering process using Lingo algorithm.
-   */
-  private void addDefaultProcess() {
-    try {
-      addComponentFactories();
-      addProcesses();
-    } catch (DuplicatedKeyException e) {
-      logger.fatal("Duplicated component or process identifier.", e);
-    }
-  }
-
-  /** Adds the required component factories to a local Carrot2 controller. */
-  private void addComponentFactories() throws DuplicatedKeyException {
-    //  *   <input  component-key="input-nutch" />
-    LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
-      public LocalComponent getInstance() {
-        return new NutchInputComponent(defaultLanguage);
-      }
-    };
-    controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
-
-    // *   <filter component-key="filter-lingo" />
-    LocalComponentFactory lingoFactory = new LocalComponentFactory() {
-      public LocalComponent getInstance() {
-        final HashMap defaults = new HashMap();
-
-        // These are adjustments settings for the clustering algorithm.
-        // If you try the live WebStart demo of Carrot2 you can see how they affect
-        // the final clustering: http://www.carrot2.org 
-        defaults.put("lsi.threshold.clusterAssignment", "0.150");
-        defaults.put("lsi.threshold.candidateCluster",  "0.775");
-
-        // Initialize a new Lingo clustering component.
-        ArrayList languageList = new ArrayList(languages.length);
-        for (int i = 0; i < languages.length; i++) {
-          final String lcode = languages[i];
-          try {
-            final Language lang = AllKnownLanguages.getLanguageForIsoCode(lcode);
-            if (lang == null) {
-              logger.warn("Language not supported in Carrot2: " + lcode);
-            } else {
-              languageList.add(lang);
-              logger.debug("Language loaded: " + lcode);
-            }
-          } catch (Throwable t) {
-              logger.warn("Language could not be loaded: " + lcode, t);
-          }
-        }
-        return new LingoLocalFilterComponent(
-          (Language []) languageList.toArray(new Language [languageList.size()]), defaults);
-      }
-    };
-    controller.addLocalComponentFactory("filter-lingo", lingoFactory);
-
-    // *   <output component-key="output-clustersConsumer" />
-    LocalComponentFactory clusterConsumerOutputFactory = new LocalComponentFactory() {
-      public LocalComponent getInstance() {
-        return new ArrayOutputComponent();
-      }
-    };
-    controller.addLocalComponentFactory("output-array", 
-      clusterConsumerOutputFactory);
-  }
-
-  /** 
-   * Adds a hardcoded clustering process to the local controller.
-   */  
-  private void addProcesses() {
-    final LocalProcessBase process = new LocalProcessBase(
-        "input-nutch",
-        "output-array",
-        new String [] {"filter-lingo"},
-        "The Lingo clustering algorithm (www.carrot2.org).",
-        "");
-
-    try {
-      controller.addProcess(PROCESS_ID, process);
-    } catch (Exception e) {
-      throw new RuntimeException("Could not assemble clustering process.", e);
-    }
-  }  
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.clustering.carrot2;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.nutch.clustering.HitsCluster;
+import org.apache.nutch.clustering.OnlineClusterer;
+import org.apache.nutch.searcher.HitDetails;
+import org.carrot2.core.DuplicatedKeyException;
+import org.carrot2.core.InitializationException;
+import org.carrot2.core.LocalComponent;
+import org.carrot2.core.LocalComponentFactory;
+import org.carrot2.core.LocalControllerBase;
+import org.carrot2.core.LocalProcess;
+import org.carrot2.core.LocalProcessBase;
+import org.carrot2.core.MissingComponentException;
+import org.carrot2.core.MissingProcessException;
+import org.carrot2.core.ProcessingResult;
+import org.carrot2.core.clustering.RawCluster;
+import org.carrot2.core.controller.ControllerHelper;
+import org.carrot2.core.controller.LoaderExtensionUnknownException;
+import org.carrot2.core.impl.ArrayOutputComponent;
+import org.carrot2.core.linguistic.Language;
+import org.carrot2.filter.lingo.local.LingoLocalFilterComponent;
+import org.carrot2.util.tokenizer.languages.AllKnownLanguages;
+
+
+
+/**
+ * This plugin provides an implementation of {@link OnlineClusterer} 
+ * extension using clustering components of the Carrot2 project
+ * (<a href="http://www.carrot2.org">http://www.carrot2.org</a>).
+ * 
+ * <p>This class hardcodes an equivalent of the following Carrot2 process:
+ * <pre><![CDATA[
+ * <local-process id="yahoo-lingo">
+ *   <name>Yahoo Search API -- Lingo Classic Clusterer</name>
+ * 
+ *   <input  component-key="input-nutch" />
+ *   <filter component-key="filter-lingo" />
+ *   <output component-key="output-clustersConsumer" />
+ * </local-process>
+ * ]]></pre>
+ */
+public class Clusterer implements OnlineClusterer, Configurable {
+  /** Default language property name. */
+  private final static String CONF_PROP_DEFAULT_LANGUAGE =
+    "extension.clustering.carrot2.defaultLanguage";
+
+  /** Recognizable languages property name. */
+  private final static String CONF_PROP_LANGUAGES =
+    "extension.clustering.carrot2.languages";
+
+  /** Internal clustering process ID in Carrot2 LocalController */
+  private final static String PROCESS_ID = "nutch-lingo";
+  
+  public static final Log logger = LogFactory.getLog(Clusterer.class);  
+
+  /** The LocalController instance used for clustering */
+  private LocalControllerBase controller;
+
+  /** Nutch configuration. */
+  private Configuration conf;
+
+  /** 
+   * Default language for hits. English by default, but may be changed
+   * via a property in Nutch configuration. 
+   */
+  private String defaultLanguage = "en";
+
+  /** 
+   * A list of recognizable languages..
+   * English only by default, but configurable via Nutch configuration.
+   */
+  private String [] languages = new String [] {defaultLanguage};
+
+  /**
+   * An empty public constructor for making new instances
+   * of the clusterer.
+   */
+  public Clusterer() {
+    // Don't forget to call {@link #setConf(Configuration)}.
+  }
+
+  /**
+   * See {@link OnlineClusterer} for documentation.
+   */
+  public HitsCluster [] clusterHits(HitDetails [] hitDetails, String [] descriptions) {
+    if (this.controller == null) {
+      logger.error("initialize() not called.");
+      return new HitsCluster[0];
+    }
+
+    final Map requestParams = new HashMap();
+    requestParams.put(NutchInputComponent.NUTCH_INPUT_HIT_DETAILS_ARRAY,
+      hitDetails);
+    requestParams.put(NutchInputComponent.NUTCH_INPUT_SUMMARIES_ARRAY,
+      descriptions);
+
+    try {
+      // The input component takes Nutch's results so we don't need the query argument.
+      final ProcessingResult result = 
+        controller.query(PROCESS_ID, "no-query", requestParams);
+
+      final ArrayOutputComponent.Result output =
+        (ArrayOutputComponent.Result) result.getQueryResult();
+
+      final List outputClusters = output.clusters;
+      final HitsCluster [] clusters = new HitsCluster[ outputClusters.size() ];
+
+      int j = 0;
+      for (Iterator i = outputClusters.iterator(); i.hasNext(); j++) {
+        RawCluster rcluster = (RawCluster) i.next();
+        clusters[j] = new HitsClusterAdapter(rcluster, hitDetails);
+      }
+
+      // invoke Carrot2 process here.
+      return clusters;
+    } catch (MissingProcessException e) {
+      throw new RuntimeException("Missing clustering process.", e);
+    } catch (Exception e) {
+      throw new RuntimeException("Unidentified problems with the clustering.", e);
+    }
+  }
+
+  /**
+   * Implementation of {@link Configurable}
+   */
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+
+    // Configure default language and other component settings.
+    if (conf.get(CONF_PROP_DEFAULT_LANGUAGE) != null) {
+      // Change the default language.
+      this.defaultLanguage = conf.get(CONF_PROP_DEFAULT_LANGUAGE);
+    } 
+    if (conf.getStrings(CONF_PROP_LANGUAGES) != null) {
+      this.languages = conf.getStrings(CONF_PROP_LANGUAGES);
+    }
+
+    if (logger.isInfoEnabled()) {
+      logger.info("Default language: " + defaultLanguage);
+      logger.info("Enabled languages: " + Arrays.asList(languages));
+    }
+
+    initialize();
+  }
+
+  /**
+   * Implementation of {@link Configurable}
+   */
+  public Configuration getConf() {
+    return conf;
+  }
+  
+  /**
+   * Initialize clustering processes and Carrot2 components.
+   */
+  private synchronized void initialize() {
+    // Initialize language list, temporarily switching off logging
+    // of warnings. This is a bit of a hack, but we don't want to
+    // redistribute the entire Carrot2 distro and this prevents
+    // nasty ClassNotFound warnings.
+    final Logger c2Logger = Logger.getLogger("org.carrot2");
+    final Level original = c2Logger.getLevel();
+    c2Logger.setLevel(Level.ERROR);
+    AllKnownLanguages.getLanguageCodes();
+    c2Logger.setLevel(original);
+
+    // Initialize the controller.    
+    controller = new LocalControllerBase();
+
+    final Configuration nutchConf = getConf();
+    final String processResource = nutchConf.get(
+        "extension.clustering.carrot2.process-resource");
+
+    if (processResource == null) {
+      logger.info("Using default clustering algorithm (Lingo).");
+      addDefaultProcess();
+    } else {
+      logger.info("Using custom clustering process: " + processResource);
+      controller.setComponentAutoload(true);
+      
+      final ControllerHelper helper = new ControllerHelper();
+      final InputStream is = Thread.currentThread()
+        .getContextClassLoader().getResourceAsStream(processResource);
+      if (is != null) {
+        try {
+          final LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
+            public LocalComponent getInstance() {
+              return new NutchInputComponent(defaultLanguage);
+            }
+          };
+          controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
+          
+          final LocalProcess process = helper.loadProcess(
+              helper.getExtension(processResource), is).getProcess();
+          controller.addProcess(PROCESS_ID, process);
+          is.close();
+        } catch (IOException e) {
+          logger.error("Could not load process resource: " + processResource, e);
+        } catch (LoaderExtensionUnknownException e) {
+          logger.error("Unrecognized extension of process resource: " + processResource);
+        } catch (InstantiationException e) {
+          logger.error("Could not instantiate process: " + processResource, e);
+        } catch (InitializationException e) {
+          logger.error("Could not initialize process: " + processResource, e);
+        } catch (DuplicatedKeyException e) {
+          logger.error("Duplicated key (unreachable?): " + processResource, e);
+        } catch (MissingComponentException e) {
+          logger.error("Some components are missing, could not initialize process: " 
+              + processResource, e);
+        }
+      } else {
+        logger.error("Could not find process resource: " + processResource);
+      }
+    }
+  }
+
+  /**
+   * Adds a default clustering process using Lingo algorithm.
+   */
+  private void addDefaultProcess() {
+    try {
+      addComponentFactories();
+      addProcesses();
+    } catch (DuplicatedKeyException e) {
+      logger.fatal("Duplicated component or process identifier.", e);
+    }
+  }
+
+  /** Adds the required component factories to a local Carrot2 controller. */
+  private void addComponentFactories() throws DuplicatedKeyException {
+    //  *   <input  component-key="input-nutch" />
+    LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
+      public LocalComponent getInstance() {
+        return new NutchInputComponent(defaultLanguage);
+      }
+    };
+    controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
+
+    // *   <filter component-key="filter-lingo" />
+    LocalComponentFactory lingoFactory = new LocalComponentFactory() {
+      public LocalComponent getInstance() {
+        final HashMap defaults = new HashMap();
+
+        // These are adjustments settings for the clustering algorithm.
+        // If you try the live WebStart demo of Carrot2 you can see how they affect
+        // the final clustering: http://www.carrot2.org 
+        defaults.put("lsi.threshold.clusterAssignment", "0.150");
+        defaults.put("lsi.threshold.candidateCluster",  "0.775");
+
+        // Initialize a new Lingo clustering component.
+        ArrayList languageList = new ArrayList(languages.length);
+        for (int i = 0; i < languages.length; i++) {
+          final String lcode = languages[i];
+          try {
+            final Language lang = AllKnownLanguages.getLanguageForIsoCode(lcode);
+            if (lang == null) {
+              logger.warn("Language not supported in Carrot2: " + lcode);
+            } else {
+              languageList.add(lang);
+              logger.debug("Language loaded: " + lcode);
+            }
+          } catch (Throwable t) {
+              logger.warn("Language could not be loaded: " + lcode, t);
+          }
+        }
+        return new LingoLocalFilterComponent(
+          (Language []) languageList.toArray(new Language [languageList.size()]), defaults);
+      }
+    };
+    controller.addLocalComponentFactory("filter-lingo", lingoFactory);
+
+    // *   <output component-key="output-clustersConsumer" />
+    LocalComponentFactory clusterConsumerOutputFactory = new LocalComponentFactory() {
+      public LocalComponent getInstance() {
+        return new ArrayOutputComponent();
+      }
+    };
+    controller.addLocalComponentFactory("output-array", 
+      clusterConsumerOutputFactory);
+  }
+
+  /** 
+   * Adds a hardcoded clustering process to the local controller.
+   */  
+  private void addProcesses() {
+    final LocalProcessBase process = new LocalProcessBase(
+        "input-nutch",
+        "output-array",
+        new String [] {"filter-lingo"},
+        "The Lingo clustering algorithm (www.carrot2.org).",
+        "");
+
+    try {
+      controller.addProcess(PROCESS_ID, process);
+    } catch (Exception e) {
+      throw new RuntimeException("Could not assemble clustering process.", e);
+    }
+  }  
+}

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/HitsClusterAdapter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/HitsClusterAdapter.java?rev=823614&r1=823613&r2=823614&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/HitsClusterAdapter.java (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/HitsClusterAdapter.java Fri Oct  9 17:02:32 2009
@@ -1,108 +1,108 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.clustering.carrot2;
-
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.nutch.clustering.HitsCluster;
-import org.apache.nutch.searcher.HitDetails;
-import org.carrot2.core.clustering.RawCluster;
-import org.carrot2.core.clustering.RawDocument;
-
-/**
- * An adapter of Carrot2's {@link RawCluster} interface to
- * {@link HitsCluster} interface. 
- */
-public class HitsClusterAdapter implements HitsCluster {
-  private RawCluster rawCluster;
-  private HitDetails [] hits;
-
-  /**
-   * Lazily initialized subclusters array.
-   */
-  private HitsCluster [] subclusters;
-  
-  /**
-   * Lazily initialized documents array.
-   */
-  private HitDetails [] documents;
-  
-  /**
-   * Creates a new adapter.
-   */
-  public HitsClusterAdapter(RawCluster rawCluster, HitDetails [] hits) {
-    this.rawCluster = rawCluster;
-    this.hits = hits;
-  }
-
-  /*
-   * @see org.apache.nutch.clustering.HitsCluster#getSubclusters()
-   */
-  public HitsCluster[] getSubclusters() {
-    if (this.subclusters == null) {
-      final List rawSubclusters = rawCluster.getSubclusters();
-      if (rawSubclusters == null || rawSubclusters.size() == 0) {
-        subclusters = null;
-      } else {
-        subclusters = new HitsCluster[rawSubclusters.size()];
-        int j = 0;
-        for (Iterator i = rawSubclusters.iterator(); i.hasNext(); j++) {
-          RawCluster c = (RawCluster) i.next();
-          subclusters[j] = new HitsClusterAdapter(c, hits);
-        }
-      }
-    }
-
-    return subclusters;
-  }
-
-  /*
-   * @see org.apache.nutch.clustering.HitsCluster#getHits()
-   */
-  public HitDetails[] getHits() {
-    if (documents == null) {
-      List rawDocuments = this.rawCluster.getDocuments();
-      documents = new HitDetails[ rawDocuments.size() ];
-      
-      int j = 0;
-      for (Iterator i = rawDocuments.iterator(); i.hasNext(); j++) {
-        RawDocument doc = (RawDocument) i.next();
-        Integer offset = (Integer) doc.getId();
-        documents[j] = this.hits[offset.intValue()];
-      }
-    }
-
-    return documents;
-  }
-
-  /*
-   * @see org.apache.nutch.clustering.HitsCluster#getDescriptionLabels()
-   */
-  public String[] getDescriptionLabels() {
-    List phrases = this.rawCluster.getClusterDescription();
-    return (String []) phrases.toArray( new String [ phrases.size() ]);
-  }
-
-  /*
-   * @see org.apache.nutch.clustering.HitsCluster#isJunkCluster()
-   */
-  public boolean isJunkCluster() {
-    return rawCluster.getProperty(RawCluster.PROPERTY_JUNK_CLUSTER) != null;
-  }
-}
-
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.clustering.carrot2;
+
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.nutch.clustering.HitsCluster;
+import org.apache.nutch.searcher.HitDetails;
+import org.carrot2.core.clustering.RawCluster;
+import org.carrot2.core.clustering.RawDocument;
+
+/**
+ * An adapter of Carrot2's {@link RawCluster} interface to
+ * {@link HitsCluster} interface. 
+ */
+public class HitsClusterAdapter implements HitsCluster {
+  private RawCluster rawCluster;
+  private HitDetails [] hits;
+
+  /**
+   * Lazily initialized subclusters array.
+   */
+  private HitsCluster [] subclusters;
+  
+  /**
+   * Lazily initialized documents array.
+   */
+  private HitDetails [] documents;
+  
+  /**
+   * Creates a new adapter.
+   */
+  public HitsClusterAdapter(RawCluster rawCluster, HitDetails [] hits) {
+    this.rawCluster = rawCluster;
+    this.hits = hits;
+  }
+
+  /*
+   * @see org.apache.nutch.clustering.HitsCluster#getSubclusters()
+   */
+  public HitsCluster[] getSubclusters() {
+    if (this.subclusters == null) {
+      final List rawSubclusters = rawCluster.getSubclusters();
+      if (rawSubclusters == null || rawSubclusters.size() == 0) {
+        subclusters = null;
+      } else {
+        subclusters = new HitsCluster[rawSubclusters.size()];
+        int j = 0;
+        for (Iterator i = rawSubclusters.iterator(); i.hasNext(); j++) {
+          RawCluster c = (RawCluster) i.next();
+          subclusters[j] = new HitsClusterAdapter(c, hits);
+        }
+      }
+    }
+
+    return subclusters;
+  }
+
+  /*
+   * @see org.apache.nutch.clustering.HitsCluster#getHits()
+   */
+  public HitDetails[] getHits() {
+    if (documents == null) {
+      List rawDocuments = this.rawCluster.getDocuments();
+      documents = new HitDetails[ rawDocuments.size() ];
+      
+      int j = 0;
+      for (Iterator i = rawDocuments.iterator(); i.hasNext(); j++) {
+        RawDocument doc = (RawDocument) i.next();
+        Integer offset = (Integer) doc.getId();
+        documents[j] = this.hits[offset.intValue()];
+      }
+    }
+
+    return documents;
+  }
+
+  /*
+   * @see org.apache.nutch.clustering.HitsCluster#getDescriptionLabels()
+   */
+  public String[] getDescriptionLabels() {
+    List phrases = this.rawCluster.getClusterDescription();
+    return (String []) phrases.toArray( new String [ phrases.size() ]);
+  }
+
+  /*
+   * @see org.apache.nutch.clustering.HitsCluster#isJunkCluster()
+   */
+  public boolean isJunkCluster() {
+    return rawCluster.getProperty(RawCluster.PROPERTY_JUNK_CLUSTER) != null;
+  }
+}
+

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/HitsClusterAdapter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/HitsClusterAdapter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchDocument.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchDocument.java?rev=823614&r1=823613&r2=823614&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchDocument.java (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchDocument.java Fri Oct  9 17:02:32 2009
@@ -1,65 +1,65 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.clustering.carrot2;
-
-import org.apache.nutch.searcher.HitDetails;
-import org.carrot2.core.clustering.RawDocument;
-import org.carrot2.core.clustering.RawDocumentBase;
-
-/**
- * An adapter class that implements {@link RawDocument} required for Carrot2.  
- */
-public class NutchDocument extends RawDocumentBase {
-  /**
-   * Integer identifier of this document. We need a subclass of 
-   * {@link java.lang.Object}, so this should do.
-   */
-  private final Integer id;
-  
-  /**
-   * Creates a new document with the given id, <code>summary</code> and wrapping
-   * a <code>details</code> hit details.
-   */
-  public NutchDocument(int id, HitDetails details, String summary, String defaultLanguage) {
-    super(details.getValue("url"), details.getValue("title"), summary);
-
-    // Handle document language -- attempt to extract it from the details,
-    // otherwise set to the default.
-    String lang = details.getValue("lang");
-    if (lang == null) {
-      // No default language. Take the default from the configuration file.
-      lang = defaultLanguage;
-    }
-
-    // Use this language for the snippet. Truncate longer ISO codes
-    // to only include two-letter language code.
-    if (lang.length() > 2) {
-      lang = lang.substring(0, 2);
-    }
-    lang = lang.toLowerCase();    
-    super.setProperty(RawDocument.PROPERTY_LANGUAGE, lang);
-
-    this.id = Integer.valueOf(id);
-  }
-
-  /*
-   * @see com.dawidweiss.carrot.core.local.clustering.RawDocument#getId()
-   */
-  public Object getId() {
-    return id;
-  }
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.clustering.carrot2;
+
+import org.apache.nutch.searcher.HitDetails;
+import org.carrot2.core.clustering.RawDocument;
+import org.carrot2.core.clustering.RawDocumentBase;
+
+/**
+ * An adapter class that implements {@link RawDocument} required for Carrot2.  
+ */
+public class NutchDocument extends RawDocumentBase {
+  /**
+   * Integer identifier of this document. We need a subclass of 
+   * {@link java.lang.Object}, so this should do.
+   */
+  private final Integer id;
+  
+  /**
+   * Creates a new document with the given id, <code>summary</code> and wrapping
+   * a <code>details</code> hit details.
+   */
+  public NutchDocument(int id, HitDetails details, String summary, String defaultLanguage) {
+    super(details.getValue("url"), details.getValue("title"), summary);
+
+    // Handle document language -- attempt to extract it from the details,
+    // otherwise set to the default.
+    String lang = details.getValue("lang");
+    if (lang == null) {
+      // No default language. Take the default from the configuration file.
+      lang = defaultLanguage;
+    }
+
+    // Use this language for the snippet. Truncate longer ISO codes
+    // to only include two-letter language code.
+    if (lang.length() > 2) {
+      lang = lang.substring(0, 2);
+    }
+    lang = lang.toLowerCase();    
+    super.setProperty(RawDocument.PROPERTY_LANGUAGE, lang);
+
+    this.id = Integer.valueOf(id);
+  }
+
+  /*
+   * @see com.dawidweiss.carrot.core.local.clustering.RawDocument#getId()
+   */
+  public Object getId() {
+    return id;
+  }
 }
\ No newline at end of file

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchDocument.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchDocument.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchInputComponent.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchInputComponent.java?rev=823614&r1=823613&r2=823614&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchInputComponent.java (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchInputComponent.java Fri Oct  9 17:02:32 2009
@@ -1,108 +1,108 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.clustering.carrot2;
-
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.nutch.searcher.HitDetails;
-import org.carrot2.core.LocalInputComponentBase;
-import org.carrot2.core.ProcessingException;
-import org.carrot2.core.RequestContext;
-import org.carrot2.core.clustering.RawDocumentsConsumer;
-import org.carrot2.core.clustering.RawDocumentsProducer;
-
-/**
- * An input component that ignores the query passed from the
- * controller and instead looks for data stored in the request context.
- * This enables us to reuse the same physical component implementation
- * for data that has already been acquired from Nutch.
- */
-public class NutchInputComponent extends LocalInputComponentBase {
-  public final static String NUTCH_INPUT_HIT_DETAILS_ARRAY
-    = "NUTCH_INPUT_HIT_DETAILS_ARRAY";
-
-  public final static String NUTCH_INPUT_SUMMARIES_ARRAY 
-    = "NUTCH_INPUT_SUMMARIES_ARRAY";
-
-  /** Capabilities required from the next component in the chain */
-  private final static Set SUCCESSOR_CAPABILITIES = toSet(RawDocumentsConsumer.class);
-
-  /** This component's capabilities */
-  private final static Set COMPONENT_CAPABILITIES = toSet(RawDocumentsProducer.class);
-
-  /**
-   * Default language code for hits that don't have their own.
-   */
-  private String defaultLanguage;
-
-  /**
-   * Creates an input component with the given default language code.
-   */
-  public NutchInputComponent(String defaultLanguage) {
-    this.defaultLanguage = defaultLanguage;
-  }
-
-  /*
-   * @see com.dawidweiss.carrot.core.local.LocalInputComponent#setQuery(java.lang.String)
-   */
-  public void setQuery(String query) {
-      // ignore the query; data will be provided from the request context.
-  }
-
-  /**
-   * A callback hook that starts the processing.
-   */
-  public void startProcessing(RequestContext context) throws ProcessingException {
-    // let successor components know that the processing has started.
-    super.startProcessing(context);
-    
-    // get the information about documents from the context.
-    final Map params = context.getRequestParameters();
-    final HitDetails [] details = (HitDetails[]) params.get(NUTCH_INPUT_HIT_DETAILS_ARRAY);
-    final String [] summaries = (String[]) params.get(NUTCH_INPUT_SUMMARIES_ARRAY);
-
-    if (details == null)
-      throw new ProcessingException("Details array must not be null.");
-
-    if (summaries == null)
-      throw new ProcessingException("Summaries array must not be null.");
-
-    if (summaries.length != details.length)
-      throw new ProcessingException("Summaries and details must be of the same length.");
-    
-    // produce 'documents' for successor components.
-    final RawDocumentsConsumer consumer = (RawDocumentsConsumer) next;
-    for (int i = 0; i < summaries.length; i++) {
-      consumer.addDocument(new NutchDocument(i, details[i], summaries[i], defaultLanguage));
-    }
-  }
-
-  /**
-   * Returns the capabilities provided by this component.
-   */
-  public Set getComponentCapabilities() {
-    return COMPONENT_CAPABILITIES;
-  }
-    
-  /**
-   * Returns the capabilities required from the successor component.
-   */
-  public Set getRequiredSuccessorCapabilities() {
-    return SUCCESSOR_CAPABILITIES;
-  }
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.clustering.carrot2;
+
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.nutch.searcher.HitDetails;
+import org.carrot2.core.LocalInputComponentBase;
+import org.carrot2.core.ProcessingException;
+import org.carrot2.core.RequestContext;
+import org.carrot2.core.clustering.RawDocumentsConsumer;
+import org.carrot2.core.clustering.RawDocumentsProducer;
+
+/**
+ * An input component that ignores the query passed from the
+ * controller and instead looks for data stored in the request context.
+ * This enables us to reuse the same physical component implementation
+ * for data that has already been acquired from Nutch.
+ */
+public class NutchInputComponent extends LocalInputComponentBase {
+  public final static String NUTCH_INPUT_HIT_DETAILS_ARRAY
+    = "NUTCH_INPUT_HIT_DETAILS_ARRAY";
+
+  public final static String NUTCH_INPUT_SUMMARIES_ARRAY 
+    = "NUTCH_INPUT_SUMMARIES_ARRAY";
+
+  /** Capabilities required from the next component in the chain */
+  private final static Set SUCCESSOR_CAPABILITIES = toSet(RawDocumentsConsumer.class);
+
+  /** This component's capabilities */
+  private final static Set COMPONENT_CAPABILITIES = toSet(RawDocumentsProducer.class);
+
+  /**
+   * Default language code for hits that don't have their own.
+   */
+  private String defaultLanguage;
+
+  /**
+   * Creates an input component with the given default language code.
+   */
+  public NutchInputComponent(String defaultLanguage) {
+    this.defaultLanguage = defaultLanguage;
+  }
+
+  /*
+   * @see com.dawidweiss.carrot.core.local.LocalInputComponent#setQuery(java.lang.String)
+   */
+  public void setQuery(String query) {
+      // ignore the query; data will be provided from the request context.
+  }
+
+  /**
+   * A callback hook that starts the processing.
+   */
+  public void startProcessing(RequestContext context) throws ProcessingException {
+    // let successor components know that the processing has started.
+    super.startProcessing(context);
+    
+    // get the information about documents from the context.
+    final Map params = context.getRequestParameters();
+    final HitDetails [] details = (HitDetails[]) params.get(NUTCH_INPUT_HIT_DETAILS_ARRAY);
+    final String [] summaries = (String[]) params.get(NUTCH_INPUT_SUMMARIES_ARRAY);
+
+    if (details == null)
+      throw new ProcessingException("Details array must not be null.");
+
+    if (summaries == null)
+      throw new ProcessingException("Summaries array must not be null.");
+
+    if (summaries.length != details.length)
+      throw new ProcessingException("Summaries and details must be of the same length.");
+    
+    // produce 'documents' for successor components.
+    final RawDocumentsConsumer consumer = (RawDocumentsConsumer) next;
+    for (int i = 0; i < summaries.length; i++) {
+      consumer.addDocument(new NutchDocument(i, details[i], summaries[i], defaultLanguage));
+    }
+  }
+
+  /**
+   * Returns the capabilities provided by this component.
+   */
+  public Set getComponentCapabilities() {
+    return COMPONENT_CAPABILITIES;
+  }
+    
+  /**
+   * Returns the capabilities required from the successor component.
+   */
+  public Set getRequiredSuccessorCapabilities() {
+    return SUCCESSOR_CAPABILITIES;
+  }
+}

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchInputComponent.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchInputComponent.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/test/org/apache/nutch/clustering/carrot2/TestClusterer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/test/org/apache/nutch/clustering/carrot2/TestClusterer.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/test/org/apache/nutch/clustering/carrot2/test-input.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/test/org/apache/nutch/clustering/carrot2/test-input.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/README.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/build.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/conf/crawl-urlfilter.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/conf/nutch-site.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/conf/nutch-site.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/data/anchor.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/data/rdf.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/data/rel.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/plugin.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/plugin.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCQueryFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCQueryFilter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/web/include/footer.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/web/include/header.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/web/include/style.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/web/search.jsp
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/web/web.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/web/web.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/feed/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/feed/build.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/feed/lib/rome-0.9.LICENSE.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/feed/plugin.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/feed/plugin.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/field-basic/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/field-basic/build.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/field-basic/plugin.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/field-basic/plugin.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/field-boost/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/field-boost/build.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/field-boost/plugin.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/field-boost/plugin.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/field-boost/src/java/org/apache/nutch/indexer/field/boost/BoostFieldFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/field-boost/src/java/org/apache/nutch/indexer/field/boost/BoostFieldFilter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/index-anchor/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-anchor/build.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/index-anchor/plugin.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-anchor/plugin.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-basic/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-basic/build.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/index-basic/plugin.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-basic/plugin.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-more/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-more/build.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/index-more/plugin.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-more/plugin.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/languageidentifier/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/languageidentifier/build.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/languageidentifier/plugin.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/languageidentifier/plugin.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Propchange: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL