You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2012/05/14 00:12:09 UTC
svn commit: r1337994 - in /mahout/trunk:
examples/src/main/java/org/apache/mahout/analysis/ integration/
integration/src/main/java/org/apache/mahout/text/wikipedia/
Author: robinanil
Date: Sun May 13 22:12:08 2012
New Revision: 1337994
URL: http://svn.apache.org/viewvc?rev=1337994&view=rev
Log:
MAHOUT-1010 cleaning up dependency and moving wikipedia to integration
Added:
mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
- copied, changed from r1337932, mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java
Removed:
mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/
Modified:
mahout/trunk/integration/pom.xml
mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java
Modified: mahout/trunk/integration/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/pom.xml?rev=1337994&r1=1337993&r2=1337994&view=diff
==============================================================================
--- mahout/trunk/integration/pom.xml (original)
+++ mahout/trunk/integration/pom.xml Sun May 13 22:12:08 2012
@@ -148,6 +148,16 @@
</dependency>
<dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-benchmark</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-analyzers</artifactId>
+ </dependency>
+
+
+ <dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.0.1</version>
Copied: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java (from r1337932, mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java?p2=mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java&r1=1337932&r2=1337994&rev=1337994&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java Sun May 13 22:12:08 2012
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.mahout.analysis;
+package org.apache.mahout.text.wikipedia;
import java.io.Reader;
import java.util.Set;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java?rev=1337994&r1=1337993&r2=1337994&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java Sun May 13 22:12:08 2012
@@ -41,12 +41,11 @@ import org.apache.hadoop.mapreduce.lib.o
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericsUtil;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.mahout.analysis.WikipediaAnalyzer;
import org.apache.mahout.common.ClassUtils;
import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.iterator.FileLineIterable;
-import org.apache.mahout.common.HadoopUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java?rev=1337994&r1=1337993&r2=1337994&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java Sun May 13 22:12:08 2012
@@ -25,7 +25,6 @@ import java.util.Locale;
import java.util.Set;
import java.util.regex.Pattern;
-import com.google.common.collect.Lists;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DefaultStringifier;
@@ -35,13 +34,13 @@ import org.apache.hadoop.mapreduce.Mappe
import org.apache.hadoop.util.GenericsUtil;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.mahout.analysis.WikipediaAnalyzer;
import org.apache.mahout.common.ClassUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import com.google.common.collect.Lists;
+
/**
* Maps over Wikipedia xml format and output all document having the category listed in the input category
* file