You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2009/10/09 19:02:52 UTC
svn commit: r823614 [3/6] - in /lucene/nutch/trunk: ./ conf/ docs/ca/
docs/de/ docs/en/ docs/es/ docs/fi/ docs/fr/ docs/hu/ docs/jp/ docs/ms/
docs/nl/ docs/pl/ docs/pt/ docs/sv/ docs/th/ docs/zh/ lib/ site/ site/skin/
site/skin/images/ src/java/ src/ja...
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/ResolveUrls.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/ResolveUrls.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/SearchLoadTester.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/SearchLoadTester.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcInputFormat.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcInputFormat.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/compat/CrawlDbConverter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/compat/ReprUrlFixer.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/tools/compat/ReprUrlFixer.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/CommandRunner.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/CommandRunner.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/DeflateUtils.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/DomUtil.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/EncodingDetector.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/EncodingDetector.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/FSUtils.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/FSUtils.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/GZIPUtils.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/GZIPUtils.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/HadoopFSUtil.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/LockUtil.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/LogUtil.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/MimeUtil.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/NodeWalker.java
------------------------------------------------------------------------------
--- svn:keywords (original)
+++ svn:keywords Fri Oct 9 17:02:32 2009
@@ -1 +1 @@
-Date Revision
+Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchJob.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchJob.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/ObjectCache.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/ObjectCache.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/PrefixStringMatcher.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/PrefixStringMatcher.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/StringUtil.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/SuffixStringMatcher.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/SuffixStringMatcher.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/TrieStringMatcher.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/TrieStringMatcher.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/URLUtil.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/URLUtil.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainStatistics.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainStatistics.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffix.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffix.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffixes.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffixes.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainSuffixesReader.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/TopLevelDomain.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/TopLevelDomain.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/util/domain/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/analysis-de/build.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/analysis-de/plugin.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/analysis-de/src/java/org/apache/nutch/analysis/de/GermanAnalyzer.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/analysis-fr/build.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/analysis-fr/plugin.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/analysis-fr/src/java/org/apache/nutch/analysis/fr/FrenchAnalyzer.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/build-plugin.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/build-plugin.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/build.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/build.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/build.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml?rev=823614&r1=823613&r2=823614&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml Fri Oct 9 17:02:32 2009
@@ -1,56 +1,56 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<plugin
- id="clustering-carrot2"
- name="Online Search Results Clustering using Carrot2's components"
- version="1.0.3"
- provider-name="www.carrot2.org">
-
- <runtime>
- <library name="clustering-carrot2.jar">
- <export name="*"/>
- </library>
-
- <!--
- The defaults for Lingo. If you plan to use another clustering
- algorithm from the Carrot2 project, you'll need all the JARs
- required for that algorithm.
- -->
- <library name="carrot2-filter-lingo.jar"/>
- <library name="carrot2-local-core.jar"/>
- <library name="carrot2-snowball-stemmers.jar"/>
- <library name="carrot2-util-common.jar"/>
- <library name="carrot2-util-tokenizer.jar"/>
-
- <library name="commons-collections-3.2.jar"/>
- <library name="commons-pool-1.3.jar"/>
- <library name="Jama-1.0.2.jar"/>
- <library name="violinstrings-1.0.2.jar"/>
- </runtime>
-
- <requires>
- <import plugin="nutch-extensionpoints"/>
- </requires>
-
- <extension id="org.apache.nutch.clustering.carrot2"
- name="Carrot2 Clusterer"
- point="org.apache.nutch.clustering.OnlineClusterer">
- <implementation id="Carrot2"
- class="org.apache.nutch.clustering.carrot2.Clusterer"/>
- </extension>
-</plugin>
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<plugin
+ id="clustering-carrot2"
+ name="Online Search Results Clustering using Carrot2's components"
+ version="1.0.3"
+ provider-name="www.carrot2.org">
+
+ <runtime>
+ <library name="clustering-carrot2.jar">
+ <export name="*"/>
+ </library>
+
+ <!--
+ The defaults for Lingo. If you plan to use another clustering
+ algorithm from the Carrot2 project, you'll need all the JARs
+ required for that algorithm.
+ -->
+ <library name="carrot2-filter-lingo.jar"/>
+ <library name="carrot2-local-core.jar"/>
+ <library name="carrot2-snowball-stemmers.jar"/>
+ <library name="carrot2-util-common.jar"/>
+ <library name="carrot2-util-tokenizer.jar"/>
+
+ <library name="commons-collections-3.2.jar"/>
+ <library name="commons-pool-1.3.jar"/>
+ <library name="Jama-1.0.2.jar"/>
+ <library name="violinstrings-1.0.2.jar"/>
+ </runtime>
+
+ <requires>
+ <import plugin="nutch-extensionpoints"/>
+ </requires>
+
+ <extension id="org.apache.nutch.clustering.carrot2"
+ name="Carrot2 Clusterer"
+ point="org.apache.nutch.clustering.OnlineClusterer">
+ <implementation id="Carrot2"
+ class="org.apache.nutch.clustering.carrot2.Clusterer"/>
+ </extension>
+</plugin>
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/readme.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/readme.txt?rev=823614&r1=823613&r2=823614&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/readme.txt (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/readme.txt Fri Oct 9 17:02:32 2009
@@ -1,7 +1,7 @@
-This plugin extension adds search results clustering capability to Nutch search
-frontend.
-
-Carrot2 JARs come from codebase in version: 2.1
-
-See the WIKI for more information about configuration and installation
-of this plugin.
+This plugin extension adds search results clustering capability to Nutch search
+frontend.
+
+Carrot2 JARs come from codebase in version: 2.1
+
+See the WIKI for more information about configuration and installation
+of this plugin.
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/readme.txt
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java?rev=823614&r1=823613&r2=823614&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java Fri Oct 9 17:02:32 2009
@@ -1,330 +1,330 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.clustering.carrot2;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
-import org.apache.nutch.clustering.HitsCluster;
-import org.apache.nutch.clustering.OnlineClusterer;
-import org.apache.nutch.searcher.HitDetails;
-import org.carrot2.core.DuplicatedKeyException;
-import org.carrot2.core.InitializationException;
-import org.carrot2.core.LocalComponent;
-import org.carrot2.core.LocalComponentFactory;
-import org.carrot2.core.LocalControllerBase;
-import org.carrot2.core.LocalProcess;
-import org.carrot2.core.LocalProcessBase;
-import org.carrot2.core.MissingComponentException;
-import org.carrot2.core.MissingProcessException;
-import org.carrot2.core.ProcessingResult;
-import org.carrot2.core.clustering.RawCluster;
-import org.carrot2.core.controller.ControllerHelper;
-import org.carrot2.core.controller.LoaderExtensionUnknownException;
-import org.carrot2.core.impl.ArrayOutputComponent;
-import org.carrot2.core.linguistic.Language;
-import org.carrot2.filter.lingo.local.LingoLocalFilterComponent;
-import org.carrot2.util.tokenizer.languages.AllKnownLanguages;
-
-
-
-/**
- * This plugin provides an implementation of {@link OnlineClusterer}
- * extension using clustering components of the Carrot2 project
- * (<a href="http://www.carrot2.org">http://www.carrot2.org</a>).
- *
- * <p>This class hardcodes an equivalent of the following Carrot2 process:
- * <pre><![CDATA[
- * <local-process id="yahoo-lingo">
- * <name>Yahoo Search API -- Lingo Classic Clusterer</name>
- *
- * <input component-key="input-nutch" />
- * <filter component-key="filter-lingo" />
- * <output component-key="output-clustersConsumer" />
- * </local-process>
- * ]]></pre>
- */
-public class Clusterer implements OnlineClusterer, Configurable {
- /** Default language property name. */
- private final static String CONF_PROP_DEFAULT_LANGUAGE =
- "extension.clustering.carrot2.defaultLanguage";
-
- /** Recognizable languages property name. */
- private final static String CONF_PROP_LANGUAGES =
- "extension.clustering.carrot2.languages";
-
- /** Internal clustering process ID in Carrot2 LocalController */
- private final static String PROCESS_ID = "nutch-lingo";
-
- public static final Log logger = LogFactory.getLog(Clusterer.class);
-
- /** The LocalController instance used for clustering */
- private LocalControllerBase controller;
-
- /** Nutch configuration. */
- private Configuration conf;
-
- /**
- * Default language for hits. English by default, but may be changed
- * via a property in Nutch configuration.
- */
- private String defaultLanguage = "en";
-
- /**
- * A list of recognizable languages..
- * English only by default, but configurable via Nutch configuration.
- */
- private String [] languages = new String [] {defaultLanguage};
-
- /**
- * An empty public constructor for making new instances
- * of the clusterer.
- */
- public Clusterer() {
- // Don't forget to call {@link #setConf(Configuration)}.
- }
-
- /**
- * See {@link OnlineClusterer} for documentation.
- */
- public HitsCluster [] clusterHits(HitDetails [] hitDetails, String [] descriptions) {
- if (this.controller == null) {
- logger.error("initialize() not called.");
- return new HitsCluster[0];
- }
-
- final Map requestParams = new HashMap();
- requestParams.put(NutchInputComponent.NUTCH_INPUT_HIT_DETAILS_ARRAY,
- hitDetails);
- requestParams.put(NutchInputComponent.NUTCH_INPUT_SUMMARIES_ARRAY,
- descriptions);
-
- try {
- // The input component takes Nutch's results so we don't need the query argument.
- final ProcessingResult result =
- controller.query(PROCESS_ID, "no-query", requestParams);
-
- final ArrayOutputComponent.Result output =
- (ArrayOutputComponent.Result) result.getQueryResult();
-
- final List outputClusters = output.clusters;
- final HitsCluster [] clusters = new HitsCluster[ outputClusters.size() ];
-
- int j = 0;
- for (Iterator i = outputClusters.iterator(); i.hasNext(); j++) {
- RawCluster rcluster = (RawCluster) i.next();
- clusters[j] = new HitsClusterAdapter(rcluster, hitDetails);
- }
-
- // invoke Carrot2 process here.
- return clusters;
- } catch (MissingProcessException e) {
- throw new RuntimeException("Missing clustering process.", e);
- } catch (Exception e) {
- throw new RuntimeException("Unidentified problems with the clustering.", e);
- }
- }
-
- /**
- * Implementation of {@link Configurable}
- */
- public void setConf(Configuration conf) {
- this.conf = conf;
-
- // Configure default language and other component settings.
- if (conf.get(CONF_PROP_DEFAULT_LANGUAGE) != null) {
- // Change the default language.
- this.defaultLanguage = conf.get(CONF_PROP_DEFAULT_LANGUAGE);
- }
- if (conf.getStrings(CONF_PROP_LANGUAGES) != null) {
- this.languages = conf.getStrings(CONF_PROP_LANGUAGES);
- }
-
- if (logger.isInfoEnabled()) {
- logger.info("Default language: " + defaultLanguage);
- logger.info("Enabled languages: " + Arrays.asList(languages));
- }
-
- initialize();
- }
-
- /**
- * Implementation of {@link Configurable}
- */
- public Configuration getConf() {
- return conf;
- }
-
- /**
- * Initialize clustering processes and Carrot2 components.
- */
- private synchronized void initialize() {
- // Initialize language list, temporarily switching off logging
- // of warnings. This is a bit of a hack, but we don't want to
- // redistribute the entire Carrot2 distro and this prevents
- // nasty ClassNotFound warnings.
- final Logger c2Logger = Logger.getLogger("org.carrot2");
- final Level original = c2Logger.getLevel();
- c2Logger.setLevel(Level.ERROR);
- AllKnownLanguages.getLanguageCodes();
- c2Logger.setLevel(original);
-
- // Initialize the controller.
- controller = new LocalControllerBase();
-
- final Configuration nutchConf = getConf();
- final String processResource = nutchConf.get(
- "extension.clustering.carrot2.process-resource");
-
- if (processResource == null) {
- logger.info("Using default clustering algorithm (Lingo).");
- addDefaultProcess();
- } else {
- logger.info("Using custom clustering process: " + processResource);
- controller.setComponentAutoload(true);
-
- final ControllerHelper helper = new ControllerHelper();
- final InputStream is = Thread.currentThread()
- .getContextClassLoader().getResourceAsStream(processResource);
- if (is != null) {
- try {
- final LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
- public LocalComponent getInstance() {
- return new NutchInputComponent(defaultLanguage);
- }
- };
- controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
-
- final LocalProcess process = helper.loadProcess(
- helper.getExtension(processResource), is).getProcess();
- controller.addProcess(PROCESS_ID, process);
- is.close();
- } catch (IOException e) {
- logger.error("Could not load process resource: " + processResource, e);
- } catch (LoaderExtensionUnknownException e) {
- logger.error("Unrecognized extension of process resource: " + processResource);
- } catch (InstantiationException e) {
- logger.error("Could not instantiate process: " + processResource, e);
- } catch (InitializationException e) {
- logger.error("Could not initialize process: " + processResource, e);
- } catch (DuplicatedKeyException e) {
- logger.error("Duplicated key (unreachable?): " + processResource, e);
- } catch (MissingComponentException e) {
- logger.error("Some components are missing, could not initialize process: "
- + processResource, e);
- }
- } else {
- logger.error("Could not find process resource: " + processResource);
- }
- }
- }
-
- /**
- * Adds a default clustering process using Lingo algorithm.
- */
- private void addDefaultProcess() {
- try {
- addComponentFactories();
- addProcesses();
- } catch (DuplicatedKeyException e) {
- logger.fatal("Duplicated component or process identifier.", e);
- }
- }
-
- /** Adds the required component factories to a local Carrot2 controller. */
- private void addComponentFactories() throws DuplicatedKeyException {
- // * <input component-key="input-nutch" />
- LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
- public LocalComponent getInstance() {
- return new NutchInputComponent(defaultLanguage);
- }
- };
- controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
-
- // * <filter component-key="filter-lingo" />
- LocalComponentFactory lingoFactory = new LocalComponentFactory() {
- public LocalComponent getInstance() {
- final HashMap defaults = new HashMap();
-
- // These are adjustments settings for the clustering algorithm.
- // If you try the live WebStart demo of Carrot2 you can see how they affect
- // the final clustering: http://www.carrot2.org
- defaults.put("lsi.threshold.clusterAssignment", "0.150");
- defaults.put("lsi.threshold.candidateCluster", "0.775");
-
- // Initialize a new Lingo clustering component.
- ArrayList languageList = new ArrayList(languages.length);
- for (int i = 0; i < languages.length; i++) {
- final String lcode = languages[i];
- try {
- final Language lang = AllKnownLanguages.getLanguageForIsoCode(lcode);
- if (lang == null) {
- logger.warn("Language not supported in Carrot2: " + lcode);
- } else {
- languageList.add(lang);
- logger.debug("Language loaded: " + lcode);
- }
- } catch (Throwable t) {
- logger.warn("Language could not be loaded: " + lcode, t);
- }
- }
- return new LingoLocalFilterComponent(
- (Language []) languageList.toArray(new Language [languageList.size()]), defaults);
- }
- };
- controller.addLocalComponentFactory("filter-lingo", lingoFactory);
-
- // * <output component-key="output-clustersConsumer" />
- LocalComponentFactory clusterConsumerOutputFactory = new LocalComponentFactory() {
- public LocalComponent getInstance() {
- return new ArrayOutputComponent();
- }
- };
- controller.addLocalComponentFactory("output-array",
- clusterConsumerOutputFactory);
- }
-
- /**
- * Adds a hardcoded clustering process to the local controller.
- */
- private void addProcesses() {
- final LocalProcessBase process = new LocalProcessBase(
- "input-nutch",
- "output-array",
- new String [] {"filter-lingo"},
- "The Lingo clustering algorithm (www.carrot2.org).",
- "");
-
- try {
- controller.addProcess(PROCESS_ID, process);
- } catch (Exception e) {
- throw new RuntimeException("Could not assemble clustering process.", e);
- }
- }
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.clustering.carrot2;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.nutch.clustering.HitsCluster;
+import org.apache.nutch.clustering.OnlineClusterer;
+import org.apache.nutch.searcher.HitDetails;
+import org.carrot2.core.DuplicatedKeyException;
+import org.carrot2.core.InitializationException;
+import org.carrot2.core.LocalComponent;
+import org.carrot2.core.LocalComponentFactory;
+import org.carrot2.core.LocalControllerBase;
+import org.carrot2.core.LocalProcess;
+import org.carrot2.core.LocalProcessBase;
+import org.carrot2.core.MissingComponentException;
+import org.carrot2.core.MissingProcessException;
+import org.carrot2.core.ProcessingResult;
+import org.carrot2.core.clustering.RawCluster;
+import org.carrot2.core.controller.ControllerHelper;
+import org.carrot2.core.controller.LoaderExtensionUnknownException;
+import org.carrot2.core.impl.ArrayOutputComponent;
+import org.carrot2.core.linguistic.Language;
+import org.carrot2.filter.lingo.local.LingoLocalFilterComponent;
+import org.carrot2.util.tokenizer.languages.AllKnownLanguages;
+
+
+
+/**
+ * This plugin provides an implementation of {@link OnlineClusterer}
+ * extension using clustering components of the Carrot2 project
+ * (<a href="http://www.carrot2.org">http://www.carrot2.org</a>).
+ *
+ * <p>This class hardcodes an equivalent of the following Carrot2 process:
+ * <pre><![CDATA[
+ * <local-process id="yahoo-lingo">
+ * <name>Yahoo Search API -- Lingo Classic Clusterer</name>
+ *
+ * <input component-key="input-nutch" />
+ * <filter component-key="filter-lingo" />
+ * <output component-key="output-clustersConsumer" />
+ * </local-process>
+ * ]]></pre>
+ */
+public class Clusterer implements OnlineClusterer, Configurable {
+ /** Default language property name. */
+ private final static String CONF_PROP_DEFAULT_LANGUAGE =
+ "extension.clustering.carrot2.defaultLanguage";
+
+ /** Recognizable languages property name. */
+ private final static String CONF_PROP_LANGUAGES =
+ "extension.clustering.carrot2.languages";
+
+ /** Internal clustering process ID in Carrot2 LocalController */
+ private final static String PROCESS_ID = "nutch-lingo";
+
+ public static final Log logger = LogFactory.getLog(Clusterer.class);
+
+ /** The LocalController instance used for clustering */
+ private LocalControllerBase controller;
+
+ /** Nutch configuration. */
+ private Configuration conf;
+
+ /**
+ * Default language for hits. English by default, but may be changed
+ * via a property in Nutch configuration.
+ */
+ private String defaultLanguage = "en";
+
+ /**
+ * A list of recognizable languages..
+ * English only by default, but configurable via Nutch configuration.
+ */
+ private String [] languages = new String [] {defaultLanguage};
+
+ /**
+ * An empty public constructor for making new instances
+ * of the clusterer.
+ */
+ public Clusterer() {
+ // Don't forget to call {@link #setConf(Configuration)}.
+ }
+
+ /**
+ * See {@link OnlineClusterer} for documentation.
+ */
+ public HitsCluster [] clusterHits(HitDetails [] hitDetails, String [] descriptions) {
+ if (this.controller == null) {
+ logger.error("initialize() not called.");
+ return new HitsCluster[0];
+ }
+
+ final Map requestParams = new HashMap();
+ requestParams.put(NutchInputComponent.NUTCH_INPUT_HIT_DETAILS_ARRAY,
+ hitDetails);
+ requestParams.put(NutchInputComponent.NUTCH_INPUT_SUMMARIES_ARRAY,
+ descriptions);
+
+ try {
+ // The input component takes Nutch's results so we don't need the query argument.
+ final ProcessingResult result =
+ controller.query(PROCESS_ID, "no-query", requestParams);
+
+ final ArrayOutputComponent.Result output =
+ (ArrayOutputComponent.Result) result.getQueryResult();
+
+ final List outputClusters = output.clusters;
+ final HitsCluster [] clusters = new HitsCluster[ outputClusters.size() ];
+
+ int j = 0;
+ for (Iterator i = outputClusters.iterator(); i.hasNext(); j++) {
+ RawCluster rcluster = (RawCluster) i.next();
+ clusters[j] = new HitsClusterAdapter(rcluster, hitDetails);
+ }
+
+ // invoke Carrot2 process here.
+ return clusters;
+ } catch (MissingProcessException e) {
+ throw new RuntimeException("Missing clustering process.", e);
+ } catch (Exception e) {
+ throw new RuntimeException("Unidentified problems with the clustering.", e);
+ }
+ }
+
+ /**
+ * Implementation of {@link Configurable}
+ */
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+
+ // Configure default language and other component settings.
+ if (conf.get(CONF_PROP_DEFAULT_LANGUAGE) != null) {
+ // Change the default language.
+ this.defaultLanguage = conf.get(CONF_PROP_DEFAULT_LANGUAGE);
+ }
+ if (conf.getStrings(CONF_PROP_LANGUAGES) != null) {
+ this.languages = conf.getStrings(CONF_PROP_LANGUAGES);
+ }
+
+ if (logger.isInfoEnabled()) {
+ logger.info("Default language: " + defaultLanguage);
+ logger.info("Enabled languages: " + Arrays.asList(languages));
+ }
+
+ initialize();
+ }
+
+ /**
+ * Implementation of {@link Configurable}
+ */
+ public Configuration getConf() {
+ return conf;
+ }
+
+ /**
+ * Initialize clustering processes and Carrot2 components.
+ */
+ private synchronized void initialize() {
+ // Initialize language list, temporarily switching off logging
+ // of warnings. This is a bit of a hack, but we don't want to
+ // redistribute the entire Carrot2 distro and this prevents
+ // nasty ClassNotFound warnings.
+ final Logger c2Logger = Logger.getLogger("org.carrot2");
+ final Level original = c2Logger.getLevel();
+ c2Logger.setLevel(Level.ERROR);
+ AllKnownLanguages.getLanguageCodes();
+ c2Logger.setLevel(original);
+
+ // Initialize the controller.
+ controller = new LocalControllerBase();
+
+ final Configuration nutchConf = getConf();
+ final String processResource = nutchConf.get(
+ "extension.clustering.carrot2.process-resource");
+
+ if (processResource == null) {
+ logger.info("Using default clustering algorithm (Lingo).");
+ addDefaultProcess();
+ } else {
+ logger.info("Using custom clustering process: " + processResource);
+ controller.setComponentAutoload(true);
+
+ final ControllerHelper helper = new ControllerHelper();
+ final InputStream is = Thread.currentThread()
+ .getContextClassLoader().getResourceAsStream(processResource);
+ if (is != null) {
+ try {
+ final LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
+ public LocalComponent getInstance() {
+ return new NutchInputComponent(defaultLanguage);
+ }
+ };
+ controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
+
+ final LocalProcess process = helper.loadProcess(
+ helper.getExtension(processResource), is).getProcess();
+ controller.addProcess(PROCESS_ID, process);
+ is.close();
+ } catch (IOException e) {
+ logger.error("Could not load process resource: " + processResource, e);
+ } catch (LoaderExtensionUnknownException e) {
+ logger.error("Unrecognized extension of process resource: " + processResource);
+ } catch (InstantiationException e) {
+ logger.error("Could not instantiate process: " + processResource, e);
+ } catch (InitializationException e) {
+ logger.error("Could not initialize process: " + processResource, e);
+ } catch (DuplicatedKeyException e) {
+ logger.error("Duplicated key (unreachable?): " + processResource, e);
+ } catch (MissingComponentException e) {
+ logger.error("Some components are missing, could not initialize process: "
+ + processResource, e);
+ }
+ } else {
+ logger.error("Could not find process resource: " + processResource);
+ }
+ }
+ }
+
+ /**
+ * Adds a default clustering process using Lingo algorithm.
+ */
+ private void addDefaultProcess() {
+ try {
+ addComponentFactories();
+ addProcesses();
+ } catch (DuplicatedKeyException e) {
+ logger.fatal("Duplicated component or process identifier.", e);
+ }
+ }
+
+ /** Adds the required component factories to a local Carrot2 controller. */
+ private void addComponentFactories() throws DuplicatedKeyException {
+ // * <input component-key="input-nutch" />
+ LocalComponentFactory nutchInputFactory = new LocalComponentFactory() {
+ public LocalComponent getInstance() {
+ return new NutchInputComponent(defaultLanguage);
+ }
+ };
+ controller.addLocalComponentFactory("input-nutch", nutchInputFactory);
+
+ // * <filter component-key="filter-lingo" />
+ LocalComponentFactory lingoFactory = new LocalComponentFactory() {
+ public LocalComponent getInstance() {
+ final HashMap defaults = new HashMap();
+
+ // These are adjustments settings for the clustering algorithm.
+ // If you try the live WebStart demo of Carrot2 you can see how they affect
+ // the final clustering: http://www.carrot2.org
+ defaults.put("lsi.threshold.clusterAssignment", "0.150");
+ defaults.put("lsi.threshold.candidateCluster", "0.775");
+
+ // Initialize a new Lingo clustering component.
+ ArrayList languageList = new ArrayList(languages.length);
+ for (int i = 0; i < languages.length; i++) {
+ final String lcode = languages[i];
+ try {
+ final Language lang = AllKnownLanguages.getLanguageForIsoCode(lcode);
+ if (lang == null) {
+ logger.warn("Language not supported in Carrot2: " + lcode);
+ } else {
+ languageList.add(lang);
+ logger.debug("Language loaded: " + lcode);
+ }
+ } catch (Throwable t) {
+ logger.warn("Language could not be loaded: " + lcode, t);
+ }
+ }
+ return new LingoLocalFilterComponent(
+ (Language []) languageList.toArray(new Language [languageList.size()]), defaults);
+ }
+ };
+ controller.addLocalComponentFactory("filter-lingo", lingoFactory);
+
+ // * <output component-key="output-clustersConsumer" />
+ LocalComponentFactory clusterConsumerOutputFactory = new LocalComponentFactory() {
+ public LocalComponent getInstance() {
+ return new ArrayOutputComponent();
+ }
+ };
+ controller.addLocalComponentFactory("output-array",
+ clusterConsumerOutputFactory);
+ }
+
+ /**
+ * Adds a hardcoded clustering process to the local controller.
+ */
+ private void addProcesses() {
+ final LocalProcessBase process = new LocalProcessBase(
+ "input-nutch",
+ "output-array",
+ new String [] {"filter-lingo"},
+ "The Lingo clustering algorithm (www.carrot2.org).",
+ "");
+
+ try {
+ controller.addProcess(PROCESS_ID, process);
+ } catch (Exception e) {
+ throw new RuntimeException("Could not assemble clustering process.", e);
+ }
+ }
+}
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/Clusterer.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/HitsClusterAdapter.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/HitsClusterAdapter.java?rev=823614&r1=823613&r2=823614&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/HitsClusterAdapter.java (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/HitsClusterAdapter.java Fri Oct 9 17:02:32 2009
@@ -1,108 +1,108 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.clustering.carrot2;
-
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.nutch.clustering.HitsCluster;
-import org.apache.nutch.searcher.HitDetails;
-import org.carrot2.core.clustering.RawCluster;
-import org.carrot2.core.clustering.RawDocument;
-
-/**
- * An adapter of Carrot2's {@link RawCluster} interface to
- * {@link HitsCluster} interface.
- */
-public class HitsClusterAdapter implements HitsCluster {
- private RawCluster rawCluster;
- private HitDetails [] hits;
-
- /**
- * Lazily initialized subclusters array.
- */
- private HitsCluster [] subclusters;
-
- /**
- * Lazily initialized documents array.
- */
- private HitDetails [] documents;
-
- /**
- * Creates a new adapter.
- */
- public HitsClusterAdapter(RawCluster rawCluster, HitDetails [] hits) {
- this.rawCluster = rawCluster;
- this.hits = hits;
- }
-
- /*
- * @see org.apache.nutch.clustering.HitsCluster#getSubclusters()
- */
- public HitsCluster[] getSubclusters() {
- if (this.subclusters == null) {
- final List rawSubclusters = rawCluster.getSubclusters();
- if (rawSubclusters == null || rawSubclusters.size() == 0) {
- subclusters = null;
- } else {
- subclusters = new HitsCluster[rawSubclusters.size()];
- int j = 0;
- for (Iterator i = rawSubclusters.iterator(); i.hasNext(); j++) {
- RawCluster c = (RawCluster) i.next();
- subclusters[j] = new HitsClusterAdapter(c, hits);
- }
- }
- }
-
- return subclusters;
- }
-
- /*
- * @see org.apache.nutch.clustering.HitsCluster#getHits()
- */
- public HitDetails[] getHits() {
- if (documents == null) {
- List rawDocuments = this.rawCluster.getDocuments();
- documents = new HitDetails[ rawDocuments.size() ];
-
- int j = 0;
- for (Iterator i = rawDocuments.iterator(); i.hasNext(); j++) {
- RawDocument doc = (RawDocument) i.next();
- Integer offset = (Integer) doc.getId();
- documents[j] = this.hits[offset.intValue()];
- }
- }
-
- return documents;
- }
-
- /*
- * @see org.apache.nutch.clustering.HitsCluster#getDescriptionLabels()
- */
- public String[] getDescriptionLabels() {
- List phrases = this.rawCluster.getClusterDescription();
- return (String []) phrases.toArray( new String [ phrases.size() ]);
- }
-
- /*
- * @see org.apache.nutch.clustering.HitsCluster#isJunkCluster()
- */
- public boolean isJunkCluster() {
- return rawCluster.getProperty(RawCluster.PROPERTY_JUNK_CLUSTER) != null;
- }
-}
-
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.clustering.carrot2;
+
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.nutch.clustering.HitsCluster;
+import org.apache.nutch.searcher.HitDetails;
+import org.carrot2.core.clustering.RawCluster;
+import org.carrot2.core.clustering.RawDocument;
+
+/**
+ * An adapter of Carrot2's {@link RawCluster} interface to
+ * {@link HitsCluster} interface.
+ */
+public class HitsClusterAdapter implements HitsCluster {
+ private RawCluster rawCluster;
+ private HitDetails [] hits;
+
+ /**
+ * Lazily initialized subclusters array.
+ */
+ private HitsCluster [] subclusters;
+
+ /**
+ * Lazily initialized documents array.
+ */
+ private HitDetails [] documents;
+
+ /**
+ * Creates a new adapter.
+ */
+ public HitsClusterAdapter(RawCluster rawCluster, HitDetails [] hits) {
+ this.rawCluster = rawCluster;
+ this.hits = hits;
+ }
+
+ /*
+ * @see org.apache.nutch.clustering.HitsCluster#getSubclusters()
+ */
+ public HitsCluster[] getSubclusters() {
+ if (this.subclusters == null) {
+ final List rawSubclusters = rawCluster.getSubclusters();
+ if (rawSubclusters == null || rawSubclusters.size() == 0) {
+ subclusters = null;
+ } else {
+ subclusters = new HitsCluster[rawSubclusters.size()];
+ int j = 0;
+ for (Iterator i = rawSubclusters.iterator(); i.hasNext(); j++) {
+ RawCluster c = (RawCluster) i.next();
+ subclusters[j] = new HitsClusterAdapter(c, hits);
+ }
+ }
+ }
+
+ return subclusters;
+ }
+
+ /*
+ * @see org.apache.nutch.clustering.HitsCluster#getHits()
+ */
+ public HitDetails[] getHits() {
+ if (documents == null) {
+ List rawDocuments = this.rawCluster.getDocuments();
+ documents = new HitDetails[ rawDocuments.size() ];
+
+ int j = 0;
+ for (Iterator i = rawDocuments.iterator(); i.hasNext(); j++) {
+ RawDocument doc = (RawDocument) i.next();
+ Integer offset = (Integer) doc.getId();
+ documents[j] = this.hits[offset.intValue()];
+ }
+ }
+
+ return documents;
+ }
+
+ /*
+ * @see org.apache.nutch.clustering.HitsCluster#getDescriptionLabels()
+ */
+ public String[] getDescriptionLabels() {
+ List phrases = this.rawCluster.getClusterDescription();
+ return (String []) phrases.toArray( new String [ phrases.size() ]);
+ }
+
+ /*
+ * @see org.apache.nutch.clustering.HitsCluster#isJunkCluster()
+ */
+ public boolean isJunkCluster() {
+ return rawCluster.getProperty(RawCluster.PROPERTY_JUNK_CLUSTER) != null;
+ }
+}
+
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/HitsClusterAdapter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/HitsClusterAdapter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchDocument.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchDocument.java?rev=823614&r1=823613&r2=823614&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchDocument.java (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchDocument.java Fri Oct 9 17:02:32 2009
@@ -1,65 +1,65 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.clustering.carrot2;
-
-import org.apache.nutch.searcher.HitDetails;
-import org.carrot2.core.clustering.RawDocument;
-import org.carrot2.core.clustering.RawDocumentBase;
-
-/**
- * An adapter class that implements {@link RawDocument} required for Carrot2.
- */
-public class NutchDocument extends RawDocumentBase {
- /**
- * Integer identifier of this document. We need a subclass of
- * {@link java.lang.Object}, so this should do.
- */
- private final Integer id;
-
- /**
- * Creates a new document with the given id, <code>summary</code> and wrapping
- * a <code>details</code> hit details.
- */
- public NutchDocument(int id, HitDetails details, String summary, String defaultLanguage) {
- super(details.getValue("url"), details.getValue("title"), summary);
-
- // Handle document language -- attempt to extract it from the details,
- // otherwise set to the default.
- String lang = details.getValue("lang");
- if (lang == null) {
- // No default language. Take the default from the configuration file.
- lang = defaultLanguage;
- }
-
- // Use this language for the snippet. Truncate longer ISO codes
- // to only include two-letter language code.
- if (lang.length() > 2) {
- lang = lang.substring(0, 2);
- }
- lang = lang.toLowerCase();
- super.setProperty(RawDocument.PROPERTY_LANGUAGE, lang);
-
- this.id = Integer.valueOf(id);
- }
-
- /*
- * @see com.dawidweiss.carrot.core.local.clustering.RawDocument#getId()
- */
- public Object getId() {
- return id;
- }
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.clustering.carrot2;
+
+import org.apache.nutch.searcher.HitDetails;
+import org.carrot2.core.clustering.RawDocument;
+import org.carrot2.core.clustering.RawDocumentBase;
+
+/**
+ * An adapter class that implements {@link RawDocument} required for Carrot2.
+ */
+public class NutchDocument extends RawDocumentBase {
+ /**
+ * Integer identifier of this document. We need a subclass of
+ * {@link java.lang.Object}, so this should do.
+ */
+ private final Integer id;
+
+ /**
+ * Creates a new document with the given id, <code>summary</code> and wrapping
+ * a <code>details</code> hit details.
+ */
+ public NutchDocument(int id, HitDetails details, String summary, String defaultLanguage) {
+ super(details.getValue("url"), details.getValue("title"), summary);
+
+ // Handle document language -- attempt to extract it from the details,
+ // otherwise set to the default.
+ String lang = details.getValue("lang");
+ if (lang == null) {
+ // No default language. Take the default from the configuration file.
+ lang = defaultLanguage;
+ }
+
+ // Use this language for the snippet. Truncate longer ISO codes
+ // to only include two-letter language code.
+ if (lang.length() > 2) {
+ lang = lang.substring(0, 2);
+ }
+ lang = lang.toLowerCase();
+ super.setProperty(RawDocument.PROPERTY_LANGUAGE, lang);
+
+ this.id = Integer.valueOf(id);
+ }
+
+ /*
+ * @see com.dawidweiss.carrot.core.local.clustering.RawDocument#getId()
+ */
+ public Object getId() {
+ return id;
+ }
}
\ No newline at end of file
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchDocument.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchDocument.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchInputComponent.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchInputComponent.java?rev=823614&r1=823613&r2=823614&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchInputComponent.java (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchInputComponent.java Fri Oct 9 17:02:32 2009
@@ -1,108 +1,108 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.clustering.carrot2;
-
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.nutch.searcher.HitDetails;
-import org.carrot2.core.LocalInputComponentBase;
-import org.carrot2.core.ProcessingException;
-import org.carrot2.core.RequestContext;
-import org.carrot2.core.clustering.RawDocumentsConsumer;
-import org.carrot2.core.clustering.RawDocumentsProducer;
-
-/**
- * An input component that ignores the query passed from the
- * controller and instead looks for data stored in the request context.
- * This enables us to reuse the same physical component implementation
- * for data that has already been acquired from Nutch.
- */
-public class NutchInputComponent extends LocalInputComponentBase {
- public final static String NUTCH_INPUT_HIT_DETAILS_ARRAY
- = "NUTCH_INPUT_HIT_DETAILS_ARRAY";
-
- public final static String NUTCH_INPUT_SUMMARIES_ARRAY
- = "NUTCH_INPUT_SUMMARIES_ARRAY";
-
- /** Capabilities required from the next component in the chain */
- private final static Set SUCCESSOR_CAPABILITIES = toSet(RawDocumentsConsumer.class);
-
- /** This component's capabilities */
- private final static Set COMPONENT_CAPABILITIES = toSet(RawDocumentsProducer.class);
-
- /**
- * Default language code for hits that don't have their own.
- */
- private String defaultLanguage;
-
- /**
- * Creates an input component with the given default language code.
- */
- public NutchInputComponent(String defaultLanguage) {
- this.defaultLanguage = defaultLanguage;
- }
-
- /*
- * @see com.dawidweiss.carrot.core.local.LocalInputComponent#setQuery(java.lang.String)
- */
- public void setQuery(String query) {
- // ignore the query; data will be provided from the request context.
- }
-
- /**
- * A callback hook that starts the processing.
- */
- public void startProcessing(RequestContext context) throws ProcessingException {
- // let successor components know that the processing has started.
- super.startProcessing(context);
-
- // get the information about documents from the context.
- final Map params = context.getRequestParameters();
- final HitDetails [] details = (HitDetails[]) params.get(NUTCH_INPUT_HIT_DETAILS_ARRAY);
- final String [] summaries = (String[]) params.get(NUTCH_INPUT_SUMMARIES_ARRAY);
-
- if (details == null)
- throw new ProcessingException("Details array must not be null.");
-
- if (summaries == null)
- throw new ProcessingException("Summaries array must not be null.");
-
- if (summaries.length != details.length)
- throw new ProcessingException("Summaries and details must be of the same length.");
-
- // produce 'documents' for successor components.
- final RawDocumentsConsumer consumer = (RawDocumentsConsumer) next;
- for (int i = 0; i < summaries.length; i++) {
- consumer.addDocument(new NutchDocument(i, details[i], summaries[i], defaultLanguage));
- }
- }
-
- /**
- * Returns the capabilities provided by this component.
- */
- public Set getComponentCapabilities() {
- return COMPONENT_CAPABILITIES;
- }
-
- /**
- * Returns the capabilities required from the successor component.
- */
- public Set getRequiredSuccessorCapabilities() {
- return SUCCESSOR_CAPABILITIES;
- }
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.clustering.carrot2;
+
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.nutch.searcher.HitDetails;
+import org.carrot2.core.LocalInputComponentBase;
+import org.carrot2.core.ProcessingException;
+import org.carrot2.core.RequestContext;
+import org.carrot2.core.clustering.RawDocumentsConsumer;
+import org.carrot2.core.clustering.RawDocumentsProducer;
+
+/**
+ * An input component that ignores the query passed from the
+ * controller and instead looks for data stored in the request context.
+ * This enables us to reuse the same physical component implementation
+ * for data that has already been acquired from Nutch.
+ */
+public class NutchInputComponent extends LocalInputComponentBase {
+ public final static String NUTCH_INPUT_HIT_DETAILS_ARRAY
+ = "NUTCH_INPUT_HIT_DETAILS_ARRAY";
+
+ public final static String NUTCH_INPUT_SUMMARIES_ARRAY
+ = "NUTCH_INPUT_SUMMARIES_ARRAY";
+
+ /** Capabilities required from the next component in the chain */
+ private final static Set SUCCESSOR_CAPABILITIES = toSet(RawDocumentsConsumer.class);
+
+ /** This component's capabilities */
+ private final static Set COMPONENT_CAPABILITIES = toSet(RawDocumentsProducer.class);
+
+ /**
+ * Default language code for hits that don't have their own.
+ */
+ private String defaultLanguage;
+
+ /**
+ * Creates an input component with the given default language code.
+ */
+ public NutchInputComponent(String defaultLanguage) {
+ this.defaultLanguage = defaultLanguage;
+ }
+
+ /*
+ * @see com.dawidweiss.carrot.core.local.LocalInputComponent#setQuery(java.lang.String)
+ */
+ public void setQuery(String query) {
+ // ignore the query; data will be provided from the request context.
+ }
+
+ /**
+ * A callback hook that starts the processing.
+ */
+ public void startProcessing(RequestContext context) throws ProcessingException {
+ // let successor components know that the processing has started.
+ super.startProcessing(context);
+
+ // get the information about documents from the context.
+ final Map params = context.getRequestParameters();
+ final HitDetails [] details = (HitDetails[]) params.get(NUTCH_INPUT_HIT_DETAILS_ARRAY);
+ final String [] summaries = (String[]) params.get(NUTCH_INPUT_SUMMARIES_ARRAY);
+
+ if (details == null)
+ throw new ProcessingException("Details array must not be null.");
+
+ if (summaries == null)
+ throw new ProcessingException("Summaries array must not be null.");
+
+ if (summaries.length != details.length)
+ throw new ProcessingException("Summaries and details must be of the same length.");
+
+ // produce 'documents' for successor components.
+ final RawDocumentsConsumer consumer = (RawDocumentsConsumer) next;
+ for (int i = 0; i < summaries.length; i++) {
+ consumer.addDocument(new NutchDocument(i, details[i], summaries[i], defaultLanguage));
+ }
+ }
+
+ /**
+ * Returns the capabilities provided by this component.
+ */
+ public Set getComponentCapabilities() {
+ return COMPONENT_CAPABILITIES;
+ }
+
+ /**
+ * Returns the capabilities required from the successor component.
+ */
+ public Set getRequiredSuccessorCapabilities() {
+ return SUCCESSOR_CAPABILITIES;
+ }
+}
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchInputComponent.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/java/org/apache/nutch/clustering/carrot2/NutchInputComponent.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/test/org/apache/nutch/clustering/carrot2/TestClusterer.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/test/org/apache/nutch/clustering/carrot2/TestClusterer.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/test/org/apache/nutch/clustering/carrot2/test-input.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/clustering-carrot2/src/test/org/apache/nutch/clustering/carrot2/test-input.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/README.txt
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/build.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/build.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/conf/crawl-urlfilter.txt
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/conf/nutch-site.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/conf/nutch-site.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/data/anchor.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/data/rdf.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/data/rel.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/plugin.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/plugin.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCQueryFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCQueryFilter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/web/include/footer.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/web/include/header.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/web/include/style.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/web/search.jsp
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/web/web.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/creativecommons/src/web/web.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/feed/build.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/feed/build.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/feed/lib/rome-0.9.LICENSE.txt
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/feed/plugin.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/feed/plugin.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/indexer/feed/FeedIndexingFilter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/feed/src/java/org/apache/nutch/parse/feed/FeedParser.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/field-basic/build.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/field-basic/build.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/field-basic/plugin.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/field-basic/plugin.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/field-basic/src/java/org/apache/nutch/indexer/field/basic/BasicFieldFilter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/field-boost/build.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/field-boost/build.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/field-boost/plugin.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/field-boost/plugin.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/field-boost/src/java/org/apache/nutch/indexer/field/boost/BoostFieldFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/field-boost/src/java/org/apache/nutch/indexer/field/boost/BoostFieldFilter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/index-anchor/build.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-anchor/build.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/index-anchor/plugin.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-anchor/plugin.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/AnchorIndexingFilter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/index-anchor/src/java/org/apache/nutch/indexer/anchor/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-basic/build.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-basic/build.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/index-basic/plugin.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-basic/plugin.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-more/build.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-more/build.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/index-more/plugin.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-more/plugin.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/languageidentifier/build.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/languageidentifier/build.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/languageidentifier/plugin.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/languageidentifier/plugin.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Propchange: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL