You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2005/08/27 00:47:12 UTC
svn commit: r240359 - in /lucene/nutch/trunk/src:
java/org/apache/nutch/analysis/ java/org/apache/nutch/indexer/
plugin/nutch-extensionpoints/
Author: jerome
Date: Fri Aug 26 15:47:04 2005
New Revision: 240359
URL: http://svn.apache.org/viewcvs?rev=240359&view=rev
Log:
Add an analysis extension point
Added:
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java (with props)
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalyzer.java (with props)
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java
lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
Added: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java?rev=240359&view=auto
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java (added)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java Fri Aug 26 15:47:04 2005
@@ -0,0 +1,107 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.analysis;
+
+// JDK imports
+import java.util.HashMap;
+import java.util.Map;
+import java.util.logging.Logger;
+
+// Nutch imports
+import org.apache.nutch.plugin.Extension;
+import org.apache.nutch.plugin.ExtensionPoint;
+import org.apache.nutch.plugin.PluginRepository;
+import org.apache.nutch.plugin.PluginRuntimeException;
+import org.apache.nutch.util.LogFormatter;
+
+
+/**
+ * Creates and caches {@link NutchAnalyzer} plugins.
+ *
+ * @author Jérôme Charron
+ */
+public class AnalyzerFactory {
+
+ public final static Logger LOG =
+ LogFormatter.getLogger(AnalyzerFactory.class.getName());
+
+ private final static ExtensionPoint X_POINT =
+ PluginRepository.getInstance()
+ .getExtensionPoint(NutchAnalyzer.X_POINT_ID);
+
+ private final static Map CACHE = new HashMap();
+
+ private final static NutchAnalyzer DEFAULT_ANALYSER =
+ new NutchDocumentAnalyzer();
+
+
+ static {
+ if (X_POINT == null) {
+ throw new RuntimeException("x point " + NutchAnalyzer.X_POINT_ID +
+ " not found.");
+ }
+ }
+
+
+ private AnalyzerFactory() {}
+
+
+ /**
+ * Returns the appropriate {@link Analyser} implementation given a language
+ * code.
+ *
+ * <p>NutchAnalyser extensions should define the attribute "lang". The first
+ * plugin found whose "lang" attribute equals the specified lang parameter is
+ * used. If none match, then the {@link NutchDocumentAnalyzer} is used.
+ */
+ public static NutchAnalyzer get(String lang) {
+
+ NutchAnalyzer analyzer = DEFAULT_ANALYSER;
+ Extension extension = getExtension(lang);
+ if (extension != null) {
+ try {
+ analyzer = (NutchAnalyzer) extension.getExtensionInstance();
+ } catch (PluginRuntimeException pre) {
+ analyzer = DEFAULT_ANALYSER;
+ }
+ }
+ return analyzer;
+ }
+
+ private static Extension getExtension(String lang) {
+
+ Extension extension = (Extension) CACHE.get(lang);
+ if (extension == null) {
+ extension = findExtension(lang);
+ CACHE.put(lang, extension);
+ }
+ return extension;
+ }
+
+ private static Extension findExtension(String lang) {
+
+ if (lang != null) {
+ Extension[] extensions = X_POINT.getExtentens();
+ for (int i=0; i<extensions.length; i++) {
+ if (lang.equals(extensions[i].getAttribute("lang"))) {
+ return extensions[i];
+ }
+ }
+ }
+ return null;
+ }
+
+}
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalyzer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalyzer.java?rev=240359&view=auto
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalyzer.java (added)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalyzer.java Fri Aug 26 15:47:04 2005
@@ -0,0 +1,45 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.analysis;
+
+// JDK imports
+import java.io.Reader;
+
+// Lucene imports
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+
+
+/**
+ * Extension point for analysis.
+ * All plugins found which implement this extension point are run
+ * sequentially on the parse.
+ *
+ * @author Jérôme Charron
+ */
+public abstract class NutchAnalyzer extends Analyzer {
+
+ /** The name of the extension point. */
+ final static String X_POINT_ID = NutchAnalyzer.class.getName();
+
+
+ /**
+ * Creates a TokenStream which tokenizes all the text in the provided Reader.
+ */
+ public abstract TokenStream tokenStream(String fieldName, Reader reader);
+
+
+}
Propchange: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalyzer.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java?rev=240359&r1=240358&r2=240359&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java Fri Aug 26 15:47:04 2005
@@ -13,20 +13,25 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.nutch.analysis;
+// JDK imports
+import java.io.Reader;
+import java.io.IOException;
+
+// Lucene imports
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
-import java.io.Reader;
-import java.io.IOException;
-/** The analyzer used for Nutch documents. Uses the JavaCC-defined lexical
- * analyzer {@link NutchDocumentTokenizer}, with no stop list. This keeps it
- * consistent with query parsing. */
-public class NutchDocumentAnalyzer extends Analyzer {
+
+/**
+ * The analyzer used for Nutch documents.
+ * Uses the JavaCC-defined lexical analyzer {@link NutchDocumentTokenizer},
+ * with no stop list. This keeps it consistent with query parsing.
+ */
+public class NutchDocumentAnalyzer extends NutchAnalyzer {
/** Analyzer used to index textual content. */
private static class ContentAnalyzer extends Analyzer {
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java?rev=240359&r1=240358&r2=240359&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java Fri Aug 26 15:47:04 2005
@@ -16,25 +16,20 @@
package org.apache.nutch.indexer;
-import org.apache.nutch.pagedb.*;
-import org.apache.nutch.linkdb.*;
import org.apache.nutch.fetcher.*;
import org.apache.nutch.parse.*;
import org.apache.nutch.analysis.NutchDocumentAnalyzer;
-import org.apache.nutch.db.*;
-import org.apache.nutch.io.*;
import org.apache.nutch.fs.*;
import org.apache.nutch.segment.SegmentReader;
import org.apache.nutch.util.*;
-
-import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import java.util.logging.*;
-import java.util.*;
import java.io.*;
+import org.apache.nutch.analysis.AnalyzerFactory;
+import org.apache.nutch.analysis.NutchAnalyzer;
/** Creates an index for the output corresponding to a single fetcher run. */
public class IndexSegment {
@@ -149,7 +144,11 @@
doc = IndexingFilters.filter(doc, parse, fetcherOutput);
// add the document to the index
- writer.addDocument(doc);
+ NutchAnalyzer analyzer = AnalyzerFactory.get(doc.get("lang"));
+ LOG.info(" Indexing [" + doc.getField("url").stringValue() +
+ "] with analyzer " + analyzer + " (" + doc.getField("lang").stringValue() + ")");
+ //LOG.info(" Doc is " + doc);
+ writer.addDocument(doc, analyzer);
if (count > 0 && count % LOG_STEP == 0) {
curTime = System.currentTimeMillis();
LOG.info(" Processed " + count + " records (" +
Modified: lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml?rev=240359&r1=240358&r2=240359&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml Fri Aug 26 15:47:04 2005
@@ -40,5 +40,8 @@
id="org.apache.nutch.net.URLFilter"
name="Nutch URL Filter"/>
+<extension-point
+ id="org.apache.nutch.analysis.NutchAnalyzer"
+ name="Nutch Analysis"/>
</plugin>
Re: [Nutch-cvs] svn commit: r240359 - in /lucene/nutch/trunk/src: java/org/apache/nutch/analysis/ java/org/apache/nutch/indexer/ plugin/nutch-extensionpoints/
Posted by Jérôme Charron <je...@gmail.com>.
> I see several instances of 'analySer' in comments/javadoc and some
> > variables. That should probably be changed to american version -
> > analyzer, for consistency's sake.
>
> Corrected/Committed
(http://svn.apache.org/viewcvs.cgi?rev=265020&view=rev)
Regards
Jérôme
--
http://motrech.free.fr/
http://www.frutch.org/
Re: [Nutch-cvs] svn commit: r240359 - in /lucene/nutch/trunk/src: java/org/apache/nutch/analysis/ java/org/apache/nutch/indexer/ plugin/nutch-extensionpoints/
Posted by Jérôme Charron <je...@gmail.com>.
>
> I see several instances of 'analySer' in comments/javadoc and some
> variables. That should probably be changed to american version -
> analyzer, for consistency's sake.
Yes, that's right.
Thanks.
Jérôme
--
http://motrech.free.fr/
http://www.frutch.org/
Re: [Nutch-cvs] svn commit: r240359 - in /lucene/nutch/trunk/src: java/org/apache/nutch/analysis/ java/org/apache/nutch/indexer/ plugin/nutch-extensionpoints/
Posted by og...@yahoo.com.
I see several instances of 'analySer' in comments/javadoc and some
variables. That should probably be changed to american version -
analyzer, for consistency's sake.
Otis
--- jerome@apache.org wrote:
> Author: jerome
> Date: Fri Aug 26 15:47:04 2005
> New Revision: 240359
>
> URL: http://svn.apache.org/viewcvs?rev=240359&view=rev
> Log:
> Add an analysis extension point
>
> Added:
>
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java
> (with props)
>
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalyzer.java
> (with props)
> Modified:
>
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java
>
>
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java
> lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
>
> Added:
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java
> URL:
>
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java?rev=240359&view=auto
>
==============================================================================
> ---
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java
> (added)
> +++
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java
> Fri Aug 26 15:47:04 2005
> @@ -0,0 +1,107 @@
> +/**
> + * Copyright 2005 The Apache Software Foundation
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing,
> software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions
> and
> + * limitations under the License.
> + */
> +package org.apache.nutch.analysis;
> +
> +// JDK imports
> +import java.util.HashMap;
> +import java.util.Map;
> +import java.util.logging.Logger;
> +
> +// Nutch imports
> +import org.apache.nutch.plugin.Extension;
> +import org.apache.nutch.plugin.ExtensionPoint;
> +import org.apache.nutch.plugin.PluginRepository;
> +import org.apache.nutch.plugin.PluginRuntimeException;
> +import org.apache.nutch.util.LogFormatter;
> +
> +
> +/**
> + * Creates and caches {@link NutchAnalyzer} plugins.
> + *
> + * @author Jérôme Charron
> + */
> +public class AnalyzerFactory {
> +
> + public final static Logger LOG =
> + LogFormatter.getLogger(AnalyzerFactory.class.getName());
> +
> + private final static ExtensionPoint X_POINT =
> + PluginRepository.getInstance()
> +
> .getExtensionPoint(NutchAnalyzer.X_POINT_ID);
> +
> + private final static Map CACHE = new HashMap();
> +
> + private final static NutchAnalyzer DEFAULT_ANALYSER =
> + new
> NutchDocumentAnalyzer();
> +
> +
> + static {
> + if (X_POINT == null) {
> + throw new RuntimeException("x point " +
> NutchAnalyzer.X_POINT_ID +
> + " not found.");
> + }
> + }
> +
> +
> + private AnalyzerFactory() {}
> +
> +
> + /**
> + * Returns the appropriate {@link Analyser} implementation given a
> language
> + * code.
> + *
> + * <p>NutchAnalyser extensions should define the attribute "lang".
> The first
> + * plugin found whose "lang" attribute equals the specified lang
> parameter is
> + * used. If none match, then the {@link NutchDocumentAnalyzer} is
> used.
> + */
> + public static NutchAnalyzer get(String lang) {
> +
> + NutchAnalyzer analyzer = DEFAULT_ANALYSER;
> + Extension extension = getExtension(lang);
> + if (extension != null) {
> + try {
> + analyzer = (NutchAnalyzer)
> extension.getExtensionInstance();
> + } catch (PluginRuntimeException pre) {
> + analyzer = DEFAULT_ANALYSER;
> + }
> + }
> + return analyzer;
> + }
> +
> + private static Extension getExtension(String lang) {
> +
> + Extension extension = (Extension) CACHE.get(lang);
> + if (extension == null) {
> + extension = findExtension(lang);
> + CACHE.put(lang, extension);
> + }
> + return extension;
> + }
> +
> + private static Extension findExtension(String lang) {
> +
> + if (lang != null) {
> + Extension[] extensions = X_POINT.getExtentens();
> + for (int i=0; i<extensions.length; i++) {
> + if (lang.equals(extensions[i].getAttribute("lang"))) {
> + return extensions[i];
> + }
> + }
> + }
> + return null;
> + }
> +
> +}
>
> Propchange:
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java
>
------------------------------------------------------------------------------
> svn:eol-style = native
>
> Added:
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalyzer.java
> URL:
>
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalyzer.java?rev=240359&view=auto
>
==============================================================================
> ---
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalyzer.java
> (added)
> +++
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalyzer.java
> Fri Aug 26 15:47:04 2005
> @@ -0,0 +1,45 @@
> +/**
> + * Copyright 2005 The Apache Software Foundation
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + * http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing,
> software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> implied.
> + * See the License for the specific language governing permissions
> and
> + * limitations under the License.
> + */
> +package org.apache.nutch.analysis;
> +
> +// JDK imports
> +import java.io.Reader;
> +
> +// Lucene imports
> +import org.apache.lucene.analysis.Analyzer;
> +import org.apache.lucene.analysis.TokenStream;
> +
> +
> +/**
> + * Extension point for analysis.
> + * All plugins found which implement this extension point are run
> + * sequentially on the parse.
> + *
> + * @author Jérôme Charron
> + */
> +public abstract class NutchAnalyzer extends Analyzer {
> +
> + /** The name of the extension point. */
> + final static String X_POINT_ID = NutchAnalyzer.class.getName();
> +
> +
> + /**
> + * Creates a TokenStream which tokenizes all the text in the
> provided Reader.
> + */
> + public abstract TokenStream tokenStream(String fieldName, Reader
> reader);
> +
> +
> +}
>
> Propchange:
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalyzer.java
>
------------------------------------------------------------------------------
> svn:eol-style = native
>
> Modified:
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java
> URL:
>
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java?rev=240359&r1=240358&r2=240359&view=diff
>
==============================================================================
> ---
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java
> (original)
> +++
>
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java
> Fri Aug 26 15:47:04 2005
> @@ -13,20 +13,25 @@
> * See the License for the specific language governing permissions
> and
> * limitations under the License.
> */
> -
> package org.apache.nutch.analysis;
>
> +// JDK imports
> +import java.io.Reader;
> +import java.io.IOException;
> +
> +// Lucene imports
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.TokenFilter;
> import org.apache.lucene.analysis.TokenStream;
> import org.apache.lucene.analysis.Token;
> -import java.io.Reader;
> -import java.io.IOException;
>
> -/** The analyzer used for Nutch documents. Uses the JavaCC-defined
> lexical
> - * analyzer {@link NutchDocumentTokenizer}, with no stop list. This
> keeps it
> - * consistent with query parsing. */
> -public class NutchDocumentAnalyzer extends Analyzer {
> +
> +/**
> + * The analyzer used for Nutch documents.
> + * Uses the JavaCC-defined lexical analyzer {@link
> NutchDocumentTokenizer},
> + * with no stop list. This keeps it consistent with query parsing.
> + */
> +public class NutchDocumentAnalyzer extends NutchAnalyzer {
>
> /** Analyzer used to index textual content. */
> private static class ContentAnalyzer extends Analyzer {
>
> Modified:
>
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java
> URL:
>
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java?rev=240359&r1=240358&r2=240359&view=diff
>
==============================================================================
> ---
>
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java
> (original)
> +++
>
lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSegment.java
> Fri Aug 26 15:47:04 2005
> @@ -16,25 +16,20 @@
>
> package org.apache.nutch.indexer;
>
> -import org.apache.nutch.pagedb.*;
> -import org.apache.nutch.linkdb.*;
> import org.apache.nutch.fetcher.*;
> import org.apache.nutch.parse.*;
> import org.apache.nutch.analysis.NutchDocumentAnalyzer;
> -import org.apache.nutch.db.*;
> -import org.apache.nutch.io.*;
> import org.apache.nutch.fs.*;
> import org.apache.nutch.segment.SegmentReader;
> import org.apache.nutch.util.*;
> -
> -import org.apache.lucene.index.IndexReader;
> import org.apache.lucene.index.IndexWriter;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
>
> import java.util.logging.*;
> -import java.util.*;
> import java.io.*;
> +import org.apache.nutch.analysis.AnalyzerFactory;
> +import org.apache.nutch.analysis.NutchAnalyzer;
>
> /** Creates an index for the output corresponding to a single
> fetcher run. */
> public class IndexSegment {
> @@ -149,7 +144,11 @@
> doc = IndexingFilters.filter(doc, parse,
> fetcherOutput);
>
> // add the document to the index
> - writer.addDocument(doc);
> + NutchAnalyzer analyzer =
> AnalyzerFactory.get(doc.get("lang"));
> + LOG.info(" Indexing [" +
> doc.getField("url").stringValue() +
> + "] with analyzer " + analyzer + " (" +
> doc.getField("lang").stringValue() + ")");
> + //LOG.info(" Doc is " + doc);
> + writer.addDocument(doc, analyzer);
> if (count > 0 && count % LOG_STEP == 0) {
> curTime = System.currentTimeMillis();
> LOG.info(" Processed " + count + " records (" +
>
> Modified:
> lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
> URL:
>
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml?rev=240359&r1=240358&r2=240359&view=diff
>
==============================================================================
> --- lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
> (original)
> +++ lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
> Fri Aug 26 15:47:04 2005
> @@ -40,5 +40,8 @@
> id="org.apache.nutch.net.URLFilter"
> name="Nutch URL Filter"/>
>
> +<extension-point
> + id="org.apache.nutch.analysis.NutchAnalyzer"
> + name="Nutch Analysis"/>
>
> </plugin>
>
>
>
>
> -------------------------------------------------------
> SF.Net email is Sponsored by the Better Software Conference & EXPO
> September 19-22, 2005 * San Francisco, CA * Development Lifecycle
> Practices
> Agile & Plan-Driven Development * Managing Projects & Teams * Testing
> & QA
> Security * Process Improvement & Measurement *
> http://www.sqe.com/bsce5sf
> _______________________________________________
> Nutch-cvs mailing list
> Nutch-cvs@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/nutch-cvs
>