You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2012/05/14 00:12:09 UTC

svn commit: r1337994 - in /mahout/trunk: examples/src/main/java/org/apache/mahout/analysis/ integration/ integration/src/main/java/org/apache/mahout/text/wikipedia/

Author: robinanil
Date: Sun May 13 22:12:08 2012
New Revision: 1337994

URL: http://svn.apache.org/viewvc?rev=1337994&view=rev
Log:
MAHOUT-1010 cleaning up dependency and moving wikipedia to integration

Added:
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
      - copied, changed from r1337932, mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java
Removed:
    mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/
Modified:
    mahout/trunk/integration/pom.xml
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java

Modified: mahout/trunk/integration/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/pom.xml?rev=1337994&r1=1337993&r2=1337994&view=diff
==============================================================================
--- mahout/trunk/integration/pom.xml (original)
+++ mahout/trunk/integration/pom.xml Sun May 13 22:12:08 2012
@@ -148,6 +148,16 @@
     </dependency>
 
     <dependency>
+        <groupId>org.apache.lucene</groupId>
+        <artifactId>lucene-benchmark</artifactId>
+    </dependency>
+    <dependency>
+        <groupId>org.apache.lucene</groupId>
+        <artifactId>lucene-analyzers</artifactId>
+    </dependency>
+
+
+    <dependency>
       <groupId>commons-io</groupId>
       <artifactId>commons-io</artifactId>
       <version>2.0.1</version>

Copied: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java (from r1337932, mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java?p2=mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java&p1=mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java&r1=1337932&r2=1337994&rev=1337994&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/analysis/WikipediaAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java Sun May 13 22:12:08 2012
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.analysis;
+package org.apache.mahout.text.wikipedia;
 
 import java.io.Reader;
 import java.util.Set;

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java?rev=1337994&r1=1337993&r2=1337994&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorDriver.java Sun May 13 22:12:08 2012
@@ -41,12 +41,11 @@ import org.apache.hadoop.mapreduce.lib.o
 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 import org.apache.hadoop.util.GenericsUtil;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.mahout.analysis.WikipediaAnalyzer;
 import org.apache.mahout.common.ClassUtils;
 import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.common.iterator.FileLineIterable;
-import org.apache.mahout.common.HadoopUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java?rev=1337994&r1=1337993&r2=1337994&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaDatasetCreatorMapper.java Sun May 13 22:12:08 2012
@@ -25,7 +25,6 @@ import java.util.Locale;
 import java.util.Set;
 import java.util.regex.Pattern;
 
-import com.google.common.collect.Lists;
 import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.DefaultStringifier;
@@ -35,13 +34,13 @@ import org.apache.hadoop.mapreduce.Mappe
 import org.apache.hadoop.util.GenericsUtil;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
-
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.mahout.analysis.WikipediaAnalyzer;
 import org.apache.mahout.common.ClassUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.collect.Lists;
+
 /**
  * Maps over Wikipedia xml format and output all document having the category listed in the input category
  * file