You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by fr...@apache.org on 2013/12/18 14:20:43 UTC

svn commit: r1551935 - in /mahout/trunk: ./ core/src/main/java/org/apache/mahout/common/lucene/ core/src/main/java/org/apache/mahout/vectorizer/document/ core/src/main/java/org/apache/mahout/vectorizer/encoders/ core/src/test/java/org/apache/mahout/vec...

Author: frankscholten
Date: Wed Dec 18 13:20:42 2013
New Revision: 1551935

URL: http://svn.apache.org/r1551935
Log:
MAHOUT-1364: Upgrade Mahout codebase to Lucene 4.6.

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DictionaryVectorizerTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DocumentProcessorTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/DriverTest.java
    mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
    mahout/trunk/pom.xml

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java Wed Dec 18 13:20:42 2013
@@ -32,7 +32,7 @@ public final class AnalyzerUtils {
    * @throws ClassNotFoundException - {@link ClassNotFoundException}
    */
   public static Analyzer createAnalyzer(String analyzerClassName) throws ClassNotFoundException {
-    return createAnalyzer(analyzerClassName, Version.LUCENE_45);
+    return createAnalyzer(analyzerClassName, Version.LUCENE_46);
   }
 
   public static Analyzer createAnalyzer(String analyzerClassName, Version version) throws ClassNotFoundException {
@@ -47,7 +47,7 @@ public final class AnalyzerUtils {
    * @return {@link Analyzer}
    */
   public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass) {
-    return createAnalyzer(analyzerClass, Version.LUCENE_45);
+    return createAnalyzer(analyzerClass, Version.LUCENE_46);
   }
 
   public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass, Version version) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/lucene/TokenStreamIterator.java Wed Dec 18 13:20:42 2013
@@ -45,6 +45,8 @@ public final class TokenStreamIterator e
       if (tokenStream.incrementToken()) {
         return tokenStream.getAttribute(CharTermAttribute.class).toString();
       } else {
+        tokenStream.end();
+        tokenStream.close();
         return endOfData();
       }
     } catch (IOException e) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java Wed Dec 18 13:20:42 2013
@@ -42,7 +42,6 @@ public class SequenceFileTokenizerMapper
   @Override
   protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
     TokenStream stream = analyzer.tokenStream(key.toString(), new StringReader(value.toString()));
-    stream.reset();
     CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
     stream.reset();
     StringTuple document = new StringTuple();

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java Wed Dec 18 13:20:42 2013
@@ -99,7 +99,7 @@ public class LuceneTextValueEncoder exte
       // do nothing
     }
   }
-  //GSI: TODO: we really need a way to make sure we call the TokenStream workflow here (i.e. end and close when done)
+
   private static final class LuceneTokenIterable implements Iterable<String> {
     private boolean firstTime = true;
     private final TokenStream tokenStream;

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DictionaryVectorizerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DictionaryVectorizerTest.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DictionaryVectorizerTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DictionaryVectorizerTest.java Wed Dec 18 13:20:42 2013
@@ -20,6 +20,8 @@ package org.apache.mahout.vectorizer;
 import java.io.IOException;
 import java.util.List;
 
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
 import com.google.common.io.Closeables;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -45,6 +47,7 @@ import org.junit.Test;
 /**
  * Test the dictionary Vector
  */
+@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
 public final class DictionaryVectorizerTest extends MahoutTestCase {
 
   private static final int NUM_DOCS = 100;

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DocumentProcessorTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DocumentProcessorTest.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DocumentProcessorTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DocumentProcessorTest.java Wed Dec 18 13:20:42 2013
@@ -19,6 +19,7 @@ package org.apache.mahout.vectorizer;
 
 import java.util.Arrays;
 
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
 import com.google.common.io.Closeables;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
@@ -38,6 +39,7 @@ import org.junit.Test;
  * by the {@link DocumentProcessor} into {@link SequenceFile}s of document ID and tokens (as
  * {@link StringTuple}).
  */
+@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
 public class DocumentProcessorTest extends MahoutTestCase {
 
   @Test

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java Wed Dec 18 13:20:42 2013
@@ -16,6 +16,7 @@ package org.apache.mahout.vectorizer;
  * limitations under the License.
  */
 
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
 import com.google.common.collect.Lists;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -39,6 +40,7 @@ import org.junit.Test;
 import java.util.Arrays;
 import java.util.List;
 
+@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
 public class HighDFWordsPrunerTest extends MahoutTestCase {
   private static final int NUM_DOCS = 100;
 

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java Wed Dec 18 13:20:42 2013
@@ -20,6 +20,7 @@ package org.apache.mahout.vectorizer;
 import java.io.IOException;
 import java.util.List;
 
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
 import com.google.common.collect.Lists;
 import com.google.common.io.Closeables;
 import org.apache.hadoop.conf.Configuration;
@@ -36,6 +37,7 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 import org.junit.Test;
 
+@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
 public class SparseVectorsFromSequenceFilesTest extends MahoutTestCase {
 
   private static final int NUM_DOCS = 100;

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java Wed Dec 18 13:20:42 2013
@@ -70,7 +70,7 @@ public final class TextValueEncoderTest 
   @Test
   public void testLuceneEncoding() throws Exception {
     LuceneTextValueEncoder enc = new LuceneTextValueEncoder("text");
-    enc.setAnalyzer(new WhitespaceAnalyzer(Version.LUCENE_45));
+    enc.setAnalyzer(new WhitespaceAnalyzer(Version.LUCENE_46));
     Vector v1 = new DenseVector(200);
     enc.addToVector("test1 and more", v1);
     enc.flush(1, v1);

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java Wed Dec 18 13:20:42 2013
@@ -60,7 +60,7 @@ public final class NewsgroupHelper {
   private static final long WEEK = 7 * 24 * 3600;
   
   private final Random rand = RandomUtils.getRandom();  
-  private final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
+  private final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
   private final FeatureVectorEncoder encoder = new StaticWordValueEncoder("body");
   private final FeatureVectorEncoder bias = new ConstantValueEncoder("Intercept");
   

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java Wed Dec 18 13:20:42 2013
@@ -27,7 +27,7 @@ import org.apache.hadoop.mapreduce.Input
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.lucene.index.SegmentInfoPerCommit;
+import org.apache.lucene.index.SegmentCommitInfo;
 import org.apache.lucene.index.SegmentInfos;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -57,7 +57,7 @@ public class LuceneSegmentInputFormat ex
       SegmentInfos segmentInfos = new SegmentInfos();
       segmentInfos.read(directory);
 
-      for (SegmentInfoPerCommit segmentInfo : segmentInfos) {
+      for (SegmentCommitInfo segmentInfo : segmentInfos) {
         LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(indexPath, segmentInfo.info.name,
                                                                          segmentInfo.sizeInBytes());
         inputSplits.add(inputSplit);

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java Wed Dec 18 13:20:42 2013
@@ -21,8 +21,8 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.lucene.index.SegmentCommitInfo;
 import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentInfoPerCommit;
 import org.apache.lucene.index.SegmentInfos;
 
 import java.io.DataInput;
@@ -87,14 +87,14 @@ public class LuceneSegmentInputSplit ext
    * @return the segment info or throws exception if not found
    * @throws IOException if an error occurs when accessing the directory
    */
-  public SegmentInfoPerCommit getSegment(Configuration configuration) throws IOException {
+  public SegmentCommitInfo getSegment(Configuration configuration) throws IOException {
     ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath,
                                                                             false, configuration);
 
     SegmentInfos segmentInfos = new SegmentInfos();
     segmentInfos.read(directory);
 
-    for (SegmentInfoPerCommit segmentInfo : segmentInfos) {
+    for (SegmentCommitInfo segmentInfo : segmentInfos) {
       if (segmentInfo.info.name.equals(segmentInfoName)) {
         return segmentInfo;
       }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java Wed Dec 18 13:20:42 2013
@@ -22,7 +22,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.lucene.index.SegmentInfoPerCommit;
+import org.apache.lucene.index.SegmentCommitInfo;
 import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Scorer;
@@ -52,7 +52,7 @@ public class LuceneSegmentRecordReader e
     Configuration configuration = context.getConfiguration();
     LuceneStorageConfiguration lucene2SeqConfiguration = new LuceneStorageConfiguration(configuration);
 
-    SegmentInfoPerCommit segmentInfo = inputSplit.getSegment(configuration);
+    SegmentCommitInfo segmentInfo = inputSplit.getSegment(configuration);
     segmentReader = new SegmentReader(segmentInfo, USE_TERM_INFO, IOContext.READ);
 
 

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java Wed Dec 18 13:20:42 2013
@@ -44,7 +44,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 
-import static org.apache.lucene.util.Version.LUCENE_45;
+import static org.apache.lucene.util.Version.LUCENE_46;
 
 /**
  * Holds all the configuration for {@link SequenceFilesFromLuceneStorage}, which generates a sequence file
@@ -212,7 +212,7 @@ public class LuceneStorageConfiguration 
       }
       idField = in.readUTF();
       fields = Arrays.asList(in.readUTF().split(SEPARATOR_FIELDS));
-      query = new QueryParser(LUCENE_45, "query", new StandardAnalyzer(LUCENE_45)).parse(in.readUTF());
+      query = new QueryParser(LUCENE_46, "query", new StandardAnalyzer(LUCENE_46)).parse(in.readUTF());
       maxHits = in.readInt();
     } catch (ParseException e) {
       throw new RuntimeException("Could not deserialize " + this.getClass().getName(), e);

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java Wed Dec 18 13:20:42 2013
@@ -41,7 +41,7 @@ import org.apache.lucene.util.Version;
  * stop words, excluding non-alpha-numeric tokens, and porter stemming.
  */
 public final class MailArchivesClusteringAnalyzer extends StopwordAnalyzerBase {
-  private static final Version LUCENE_VERSION = Version.LUCENE_45;
+  private static final Version LUCENE_VERSION = Version.LUCENE_46;
   
   // extended set of stop words composed of common mail terms like "hi",
   // HTML tags, and Java keywords asmany of the messages in the archives

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java Wed Dec 18 13:20:42 2013
@@ -24,13 +24,7 @@ import org.apache.hadoop.fs.FSDataOutput
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.lucene.store.BufferedIndexInput;
-import org.apache.lucene.store.BufferedIndexOutput;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.Lock;
+import org.apache.lucene.store.*;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -193,11 +187,26 @@ public class ReadOnlyFileSystemDirectory
   }
 
   @Override
+  public void clearLock(String name) throws IOException {
+    // do nothing
+  }
+
+  @Override
   public void close() throws IOException {
     // do not close the file system
   }
 
   @Override
+  public void setLockFactory(LockFactory lockFactory) throws IOException {
+    // do nothing
+  }
+
+  @Override
+  public LockFactory getLockFactory() {
+    return null;
+  }
+
+  @Override
   public String toString() {
     return this.getClass().getName() + "@" + directory;
   }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java Wed Dec 18 13:20:42 2013
@@ -97,8 +97,8 @@ public class SequenceFilesFromLuceneStor
     if (hasOption(OPTION_QUERY)) {
       try {
         String queryString = COMPILE.matcher(getOption(OPTION_QUERY)).replaceAll("");
-        QueryParser queryParser = new QueryParser(Version.LUCENE_45, queryString,
-            new StandardAnalyzer(Version.LUCENE_45));
+        QueryParser queryParser = new QueryParser(Version.LUCENE_46, queryString,
+            new StandardAnalyzer(Version.LUCENE_46));
         query = queryParser.parse(queryString);
       } catch (ParseException e) {
         throw new IllegalArgumentException(e.getMessage(), e);

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java Wed Dec 18 13:20:42 2013
@@ -24,7 +24,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DocumentStoredFieldVisitor;
-import org.apache.lucene.index.SegmentInfoPerCommit;
+import org.apache.lucene.index.SegmentCommitInfo;
 import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.store.IOContext;
 
@@ -49,7 +49,7 @@ public class SequenceFilesFromLuceneStor
     Configuration configuration = context.getConfiguration();
     l2sConf = new LuceneStorageConfiguration(configuration);
     LuceneSegmentInputSplit inputSplit = (LuceneSegmentInputSplit) context.getInputSplit();
-    SegmentInfoPerCommit segmentInfo = inputSplit.getSegment(configuration);
+    SegmentCommitInfo segmentInfo = inputSplit.getSegment(configuration);
     segmentReader = new SegmentReader(segmentInfo, LuceneSeqFileHelper.USE_TERM_INFOS, IOContext.READ);
   }
 

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java Wed Dec 18 13:20:42 2013
@@ -33,19 +33,19 @@ import org.apache.lucene.util.Version;
 public class WikipediaAnalyzer extends StopwordAnalyzerBase {
   
   public WikipediaAnalyzer() {
-    super(Version.LUCENE_45, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+    super(Version.LUCENE_46, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
   }
   
   public WikipediaAnalyzer(CharArraySet stopSet) {
-    super(Version.LUCENE_45, stopSet);
+    super(Version.LUCENE_46, stopSet);
   }
 
   @Override
   protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
     Tokenizer tokenizer = new WikipediaTokenizer(reader);
-    TokenStream result = new StandardFilter(Version.LUCENE_45, tokenizer);
-    result = new LowerCaseFilter(Version.LUCENE_45, result);
-    result = new StopFilter(Version.LUCENE_45, result, getStopwordSet());
+    TokenStream result = new StandardFilter(Version.LUCENE_46, tokenizer);
+    result = new LowerCaseFilter(Version.LUCENE_46, result);
+    result = new StopFilter(Version.LUCENE_46, result, getStopwordSet());
     return new TokenStreamComponents(tokenizer, result);
   }
 }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java Wed Dec 18 13:20:42 2013
@@ -38,7 +38,7 @@ public class AnalyzerTransformer impleme
   private static final Logger log = LoggerFactory.getLogger(AnalyzerTransformer.class);
 
   public AnalyzerTransformer() {
-    this(new StandardAnalyzer(Version.LUCENE_45), "text");
+    this(new StandardAnalyzer(Version.LUCENE_46), "text");
   }
 
   public AnalyzerTransformer(Analyzer analyzer) {

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Wed Dec 18 13:20:42 2013
@@ -97,7 +97,7 @@ public final class TestClusterDumper ext
     RAMDirectory directory = new RAMDirectory();
     
     IndexWriter writer = new IndexWriter(directory, 
-           new IndexWriterConfig(Version.LUCENE_45, new StandardAnalyzer(Version.LUCENE_45)));
+           new IndexWriterConfig(Version.LUCENE_46, new StandardAnalyzer(Version.LUCENE_46)));
             
     try {
       for (int i = 0; i < docs2.length; i++) {

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java Wed Dec 18 13:20:42 2013
@@ -60,7 +60,7 @@ public abstract class AbstractLuceneStor
   }
 
   protected void commitDocuments(Directory directory, Iterable<SingleFieldDocument> theDocs) throws IOException{
-    IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_45, new StandardAnalyzer(Version.LUCENE_45)));
+    IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_46, new StandardAnalyzer(Version.LUCENE_46)));
 
     for (SingleFieldDocument singleFieldDocument : theDocs) {
       indexWriter.addDocument(singleFieldDocument.asLuceneDocument());

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java Wed Dec 18 13:20:42 2013
@@ -17,7 +17,7 @@
 package org.apache.mahout.text;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.lucene.index.SegmentInfoPerCommit;
+import org.apache.lucene.index.SegmentCommitInfo;
 import org.apache.lucene.index.SegmentReader;
 import org.apache.lucene.store.IOContext;
 import org.apache.mahout.common.HadoopUtil;
@@ -78,7 +78,7 @@ public class LuceneSegmentInputSplitTest
 
   private void assertSegmentContainsOneDoc(String segmentName) throws IOException {
     LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(indexPath1, segmentName, 1000);
-    SegmentInfoPerCommit segment = inputSplit.getSegment(configuration);
+    SegmentCommitInfo segment = inputSplit.getSegment(configuration);
     SegmentReader segmentReader = new SegmentReader(segment, 1, IOContext.READ);//SegmentReader.get(true, segment, 1);
     assertEquals(segmentName, segment.info.name);
     assertEquals(1, segmentReader.numDocs());

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java Wed Dec 18 13:20:42 2013
@@ -59,7 +59,7 @@ public class LuceneSegmentRecordReaderTe
 
   @Test
   public void testKey() throws Exception {
-    for (SegmentInfoPerCommit segmentInfo : segmentInfos) {
+    for (SegmentCommitInfo segmentInfo : segmentInfos) {
       int docId = 0;
       LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(getIndexPath1(), segmentInfo.info.name, segmentInfo.sizeInBytes());
       TaskAttemptContext context = getTaskAttemptContext(configuration, new TaskAttemptID());
@@ -77,7 +77,7 @@ public class LuceneSegmentRecordReaderTe
   @Test(expected = IllegalArgumentException.class)
   public void testNonExistingIdField() throws Exception {
     configuration = new LuceneStorageConfiguration(getConfiguration(), asList(getIndexPath1()), new Path("output"), "nonExistingId", asList(FIELD)).serialize();
-    SegmentInfoPerCommit segmentInfo = segmentInfos.iterator().next();
+    SegmentCommitInfo segmentInfo = segmentInfos.iterator().next();
     LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(getIndexPath1(), segmentInfo.info.name, segmentInfo.sizeInBytes());
     TaskAttemptContext context = getTaskAttemptContext(configuration, new TaskAttemptID());
     recordReader.initialize(inputSplit, context);
@@ -86,7 +86,7 @@ public class LuceneSegmentRecordReaderTe
   @Test(expected = IllegalArgumentException.class)
   public void testNonExistingField() throws Exception {
     configuration = new LuceneStorageConfiguration(getConfiguration(), asList(getIndexPath1()), new Path("output"), ID_FIELD, asList("nonExistingField")).serialize();
-    SegmentInfoPerCommit segmentInfo = segmentInfos.iterator().next();
+    SegmentCommitInfo segmentInfo = segmentInfos.iterator().next();
     LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(getIndexPath1(), segmentInfo.info.name, segmentInfo.sizeInBytes());
     TaskAttemptContext context = getTaskAttemptContext(configuration, new TaskAttemptID());
     recordReader.initialize(inputSplit, context);

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/TestSequenceFilesFromDirectory.java Wed Dec 18 13:20:42 2013
@@ -132,7 +132,9 @@ public final class TestSequenceFilesFrom
       "--chunkSize", "64",
       "--charset", Charsets.UTF_8.name(),
       "--method", "mapreduce",
-      "--keyPrefix", "UID"});
+      "--keyPrefix", "UID",
+      "--fileFilterClass", ""
+    });
 
     checkMRResultFiles(conf, mrOutputDir, DATA1, "UID");
 

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilterTest.java Wed Dec 18 13:20:42 2013
@@ -80,7 +80,7 @@ public final class BloomTokenFilterTest 
   @Test
   public void testAnalyzer() throws IOException {
     Reader reader = new StringReader(input);
-    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_45);
+    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_46);
     TokenStream ts = analyzer.tokenStream(null, reader);
     ts.reset();
     validateTokens(allTokens, ts);
@@ -92,7 +92,7 @@ public final class BloomTokenFilterTest 
   @Test
   public void testNonKeepdAnalyzer() throws IOException {
     Reader reader = new StringReader(input);
-    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_45);
+    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_46);
     TokenStream ts = analyzer.tokenStream(null, reader);
     ts.reset();
     TokenStream f = new BloomTokenFilter(getFilter(filterTokens), false /* toss matching tokens */, ts);
@@ -105,7 +105,7 @@ public final class BloomTokenFilterTest 
   @Test
   public void testKeepAnalyzer() throws IOException {
     Reader reader = new StringReader(input);
-    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_45);
+    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_46);
     TokenStream ts = analyzer.tokenStream(null, reader);
     ts.reset();
     TokenStream f = new BloomTokenFilter(getFilter(filterTokens), true /* keep matching tokens */, ts);
@@ -118,7 +118,7 @@ public final class BloomTokenFilterTest 
   @Test
   public void testShingleFilteredAnalyzer() throws IOException {
     Reader reader = new StringReader(input);
-    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_45);
+    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_46);
     TokenStream ts = analyzer.tokenStream(null, reader);
     ts.reset();
     ShingleFilter sf = new ShingleFilter(ts, 3);

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/CachedTermInfoTest.java Wed Dec 18 13:20:42 2013
@@ -100,7 +100,7 @@ public class CachedTermInfoTest extends 
   static RAMDirectory createTestIndex(FieldType fieldType,
                                       RAMDirectory directory,
                                       int startingId) throws IOException {
-    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_45, new WhitespaceAnalyzer(Version.LUCENE_45)));
+    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_46, new WhitespaceAnalyzer(Version.LUCENE_46)));
 
     try {
       for (int i = 0; i < DOCS.length; i++) {

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/DriverTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/DriverTest.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/DriverTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/DriverTest.java Wed Dec 18 13:20:42 2013
@@ -91,8 +91,8 @@ public class DriverTest extends MahoutTe
   public void sequenceFileDictionary() throws IOException {
 
     Directory index = new SimpleFSDirectory(indexDir);
-    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
-    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer);
+    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
+    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
     final IndexWriter writer = new IndexWriter(index, config);
 
     try {

Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java Wed Dec 18 13:20:42 2013
@@ -177,7 +177,7 @@ public final class LuceneIterableTest ex
   static RAMDirectory createTestIndex(FieldType fieldType,
                                               RAMDirectory directory,
                                               int startingId) throws IOException {
-    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_45,new StandardAnalyzer(Version.LUCENE_45)));
+    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_46,new StandardAnalyzer(Version.LUCENE_46)));
 
     try {
       for (int i = 0; i < DOCS.length; i++) {

Modified: mahout/trunk/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/pom.xml?rev=1551935&r1=1551934&r2=1551935&view=diff
==============================================================================
--- mahout/trunk/pom.xml (original)
+++ mahout/trunk/pom.xml Wed Dec 18 13:20:42 2013
@@ -105,7 +105,7 @@
     <mfindbugs.version>2.5.2</mfindbugs.version>
     <mjavadoc.version>2.9.1</mjavadoc.version>
     <hadoop.version>1.2.1</hadoop.version>
-    <lucene.version>4.5.1</lucene.version>
+    <lucene.version>4.6.0</lucene.version>
   </properties>
   <issueManagement>
     <system>Jira</system>