You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ak...@apache.org on 2015/04/08 19:22:10 UTC
mahout git commit: Revert "MAHOUT-1649: Upgrade to Lucene 4.10.x,
this closes apache/mahout#114"
Repository: mahout
Updated Branches:
refs/heads/master b5c63caf9 -> 864ba1aea
Revert "MAHOUT-1649: Upgrade to Lucene 4.10.x, this closes apache/mahout#114"
This reverts commit 670a7d219e4eab8c7735083d52cefa13e81197fb.
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/864ba1ae
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/864ba1ae
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/864ba1ae
Branch: refs/heads/master
Commit: 864ba1aea23ece6491acff38adab0cd1b0d29354
Parents: b5c63ca
Author: Andrew Musselman <ak...@apache.org>
Authored: Wed Apr 8 10:21:39 2015 -0700
Committer: Andrew Musselman <ak...@apache.org>
Committed: Wed Apr 8 10:21:39 2015 -0700
----------------------------------------------------------------------
.../mahout/classifier/NewsgroupHelper.java | 2 +-
integration/pom.xml | 10 -
.../mahout/text/LuceneSegmentInputFormat.java | 4 +-
.../mahout/text/LuceneSegmentInputSplit.java | 4 +-
.../mahout/text/LuceneSegmentRecordReader.java | 3 +-
.../mahout/text/LuceneStorageConfiguration.java | 4 +-
.../text/MailArchivesClusteringAnalyzer.java | 22 +-
.../text/ReadOnlyFileSystemDirectory.java | 354 +++++++++++++++++++
.../text/SequenceFilesFromLuceneStorage.java | 1 +
.../SequenceFilesFromLuceneStorageDriver.java | 3 +-
.../SequenceFilesFromMailArchivesMapper.java | 29 +-
.../text/wikipedia/WikipediaAnalyzer.java | 10 +-
.../mahout/utils/regex/AnalyzerTransformer.java | 2 +-
.../mahout/common/lucene/AnalyzerUtils.java | 4 +-
.../encoders/InteractionValueEncoder.java | 6 +-
.../mahout/classifier/ConfusionMatrixTest.java | 4 +-
.../classifier/df/DecisionForestTest.java | 1 +
.../apache/mahout/classifier/df/data/Utils.java | 10 +-
.../mapreduce/partial/PartialBuilderTest.java | 16 +-
.../classifier/mlp/TestNeuralNetwork.java | 11 +-
.../classifier/naivebayes/NaiveBayesTest.java | 17 +-
.../encoders/TextValueEncoderTest.java | 2 +-
pom.xml | 4 +-
23 files changed, 448 insertions(+), 75 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
----------------------------------------------------------------------
diff --git a/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java b/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
index 2c857cc..3674a57 100644
--- a/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
+++ b/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
@@ -60,7 +60,7 @@ public final class NewsgroupHelper {
private static final long WEEK = 7 * 24 * 3600;
private final Random rand = RandomUtils.getRandom();
- private final Analyzer analyzer = new StandardAnalyzer();
+ private final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
private final FeatureVectorEncoder encoder = new StaticWordValueEncoder("body");
private final FeatureVectorEncoder bias = new ConstantValueEncoder("Intercept");
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/pom.xml
----------------------------------------------------------------------
diff --git a/integration/pom.xml b/integration/pom.xml
index 9dcc03a..fcb85cb 100644
--- a/integration/pom.xml
+++ b/integration/pom.xml
@@ -139,16 +139,6 @@
</dependency>
<dependency>
- <groupId>org.apache.solr</groupId>
- <artifactId>solr-core</artifactId>
- <version>${lucene.version}</version>
- </dependency>
- <dependency>
- <groupId>commons-httpclient</groupId>
- <artifactId>commons-httpclient</artifactId>
- <version>3.1</version>
- </dependency>
- <dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongo-java-driver</artifactId>
<version>2.11.2</version>
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
index 60d48ce..1c4f8de 100644
--- a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
+++ b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
@@ -32,7 +32,6 @@ import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfos;
-import org.apache.solr.store.hdfs.HdfsDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -53,7 +52,8 @@ public class LuceneSegmentInputFormat extends InputFormat {
List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths();
for (Path indexPath : indexPaths) {
- HdfsDirectory directory = new HdfsDirectory(indexPath, configuration);
+ ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath,
+ false, configuration);
SegmentInfos segmentInfos = new SegmentInfos();
segmentInfos.read(directory);
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
index f30c7fb..1441e32 100644
--- a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
+++ b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
@@ -24,7 +24,6 @@ import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentInfos;
-import org.apache.solr.store.hdfs.HdfsDirectory;
import java.io.DataInput;
import java.io.DataOutput;
@@ -89,7 +88,8 @@ public class LuceneSegmentInputSplit extends InputSplit implements Writable {
* @throws IOException if an error occurs when accessing the directory
*/
public SegmentCommitInfo getSegment(Configuration configuration) throws IOException {
- HdfsDirectory directory = new HdfsDirectory(indexPath, configuration);
+ ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath,
+ false, configuration);
SegmentInfos segmentInfos = new SegmentInfos();
segmentInfos.read(directory);
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
index d41ead2..485e856 100644
--- a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
+++ b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
@@ -62,8 +62,9 @@ public class LuceneSegmentRecordReader extends RecordReader<Text, NullWritable>
for (String field : lucene2SeqConfiguration.getFields()) {
LuceneIndexHelper.fieldShouldExistInIndex(segmentReader, field);
}
+
Weight weight = lucene2SeqConfiguration.getQuery().createWeight(searcher);
- scorer = weight.scorer(segmentReader.getContext(), segmentReader.getLiveDocs());
+ scorer = weight.scorer(segmentReader.getContext(), false, false, null);
if (scorer == null) {
throw new IllegalArgumentException("Could not create query scorer for query: "
+ lucene2SeqConfiguration.getQuery());
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java b/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
index 7eed822..b36f3e9 100644
--- a/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
+++ b/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
@@ -40,12 +40,12 @@ import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
-import org.apache.lucene.util.Version;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
+import static org.apache.lucene.util.Version.LUCENE_46;
/**
* Holds all the configuration for {@link SequenceFilesFromLuceneStorage}, which generates a sequence file
@@ -213,7 +213,7 @@ public class LuceneStorageConfiguration implements Writable {
}
idField = in.readUTF();
fields = Arrays.asList(in.readUTF().split(SEPARATOR_FIELDS));
- query = new QueryParser(Version.LUCENE_4_10_3, "query", new StandardAnalyzer(Version.LUCENE_4_10_3)).parse(in.readUTF());
+ query = new QueryParser(LUCENE_46, "query", new StandardAnalyzer(LUCENE_46)).parse(in.readUTF());
maxHits = in.readInt();
} catch (ParseException e) {
throw new RuntimeException("Could not deserialize " + this.getClass().getName(), e);
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java b/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
index 4f6ba78..8776c5f 100644
--- a/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
+++ b/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
@@ -21,6 +21,7 @@ import java.io.Reader;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@@ -41,13 +42,13 @@ import org.apache.lucene.util.Version;
* stop words, excluding non-alpha-numeric tokens, and porter stemming.
*/
public final class MailArchivesClusteringAnalyzer extends StopwordAnalyzerBase {
- private static final Version LUCENE_VERSION = Version.LUCENE_4_10_3;
-
+ private static final Version LUCENE_VERSION = Version.LUCENE_46;
+
// extended set of stop words composed of common mail terms like "hi",
// HTML tags, and Java keywords asmany of the messages in the archives
// are subversion check-in notifications
- private static final CharArraySet STOP_SET = new CharArraySet(Arrays.asList(
+ private static final CharArraySet STOP_SET = new CharArraySet(LUCENE_VERSION, Arrays.asList(
"3d","7bit","a0","about","above","abstract","across","additional","after",
"afterwards","again","against","align","all","almost","alone","along",
"already","also","although","always","am","among","amongst","amoungst",
@@ -107,17 +108,22 @@ public final class MailArchivesClusteringAnalyzer extends StopwordAnalyzerBase {
private static final Matcher MATCHER = ALPHA_NUMERIC.matcher("");
public MailArchivesClusteringAnalyzer() {
- super(STOP_SET);
+ super(LUCENE_VERSION, STOP_SET);
}
+ public MailArchivesClusteringAnalyzer(CharArraySet stopSet) {
+ super(LUCENE_VERSION, stopSet);
+
+ }
+
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = new StandardTokenizer(reader);
- TokenStream result = new StandardFilter(tokenizer);
- result = new LowerCaseFilter(result);
+ Tokenizer tokenizer = new StandardTokenizer(LUCENE_VERSION, reader);
+ TokenStream result = new StandardFilter(LUCENE_VERSION, tokenizer);
+ result = new LowerCaseFilter(LUCENE_VERSION, result);
result = new ASCIIFoldingFilter(result);
result = new AlphaNumericMaxLengthFilter(result);
- result = new StopFilter(result, STOP_SET);
+ result = new StopFilter(LUCENE_VERSION, result, STOP_SET);
result = new PorterStemFilter(result);
return new TokenStreamComponents(tokenizer, result);
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java b/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
new file mode 100644
index 0000000..e97e35b
--- /dev/null
+++ b/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
@@ -0,0 +1,354 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.text;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.lucene.store.BaseDirectory;
+import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.store.BufferedIndexOutput;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.Lock;
+import org.apache.lucene.store.LockFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Collection;
+
+//TODO: is there a better way of doing this in Lucene 4.x?
+
+/**
+ * This class implements a read-only Lucene Directory on top of a general FileSystem.
+ * Currently it does not support locking.
+ * <p/>
+ * // TODO: Rename to FileSystemReadOnlyDirectory
+ */
+public class ReadOnlyFileSystemDirectory extends BaseDirectory {
+
+ private final FileSystem fs;
+ private final Path directory;
+ private final int ioFileBufferSize;
+
+ private static final Logger log = LoggerFactory.getLogger(ReadOnlyFileSystemDirectory.class);
+
+ /**
+ * Constructor
+ *
+ * @param fs - filesystem
+ * @param directory - directory path
+ * @param create - if true create the directory
+ * @param conf - MR Job Configuration
+ * @throws IOException
+ */
+
+ public ReadOnlyFileSystemDirectory(FileSystem fs, Path directory, boolean create,
+ Configuration conf) throws IOException {
+
+ this.fs = fs;
+ this.directory = directory;
+ this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096);
+
+ if (create) {
+ create();
+ }
+
+ boolean isDir = false;
+ try {
+ FileStatus status = fs.getFileStatus(directory);
+ if (status != null) {
+ isDir = status.isDir();
+ }
+ } catch (IOException e) {
+ log.error(e.getMessage(), e);
+ }
+ if (!isDir) {
+ throw new IOException(directory + " is not a directory");
+ }
+ }
+
+
+ private void create() throws IOException {
+ if (!fs.exists(directory)) {
+ fs.mkdirs(directory);
+ }
+
+ boolean isDir = false;
+ try {
+ FileStatus status = fs.getFileStatus(directory);
+ if (status != null) {
+ isDir = status.isDir();
+ }
+ } catch (IOException e) {
+ log.error(e.getMessage(), e);
+ }
+ if (!isDir) {
+ throw new IOException(directory + " is not a directory");
+ }
+
+ // clear old index files
+ FileStatus[] fileStatus =
+ fs.listStatus(directory, LuceneIndexFileNameFilter.getFilter());
+ for (FileStatus status : fileStatus) {
+ if (!fs.delete(status.getPath(), true)) {
+ throw new IOException("Cannot delete index file "
+ + status.getPath());
+ }
+ }
+ }
+
+ public String[] list() throws IOException {
+ FileStatus[] fileStatus =
+ fs.listStatus(directory, LuceneIndexFileNameFilter.getFilter());
+ String[] result = new String[fileStatus.length];
+ for (int i = 0; i < fileStatus.length; i++) {
+ result[i] = fileStatus[i].getPath().getName();
+ }
+ return result;
+ }
+
+ @Override
+ public String[] listAll() throws IOException {
+ return list();
+ }
+
+ @Override
+ public boolean fileExists(String name) throws IOException {
+ return fs.exists(new Path(directory, name));
+ }
+
+ @Override
+ public long fileLength(String name) throws IOException {
+ return fs.getFileStatus(new Path(directory, name)).getLen();
+ }
+
+ @Override
+ public void deleteFile(String name) throws IOException {
+ if (!fs.delete(new Path(directory, name), true)) {
+ throw new IOException("Cannot delete index file " + name);
+ }
+ }
+
+ @Override
+ public IndexOutput createOutput(String name, IOContext context) throws IOException {
+ //TODO: What should we be doing with the IOContext here, if anything?
+ Path file = new Path(directory, name);
+ if (fs.exists(file) && !fs.delete(file, true)) {
+ // delete the existing one if applicable
+ throw new IOException("Cannot overwrite index file " + file);
+ }
+
+ return new FileSystemIndexOutput(file, ioFileBufferSize);
+ }
+
+ @Override
+ public void sync(Collection<String> names) throws IOException {
+ // do nothing, as this is read-only
+ }
+
+ @Override
+ public IndexInput openInput(String name, IOContext context) throws IOException {
+ return new FileSystemIndexInput(new Path(directory, name), ioFileBufferSize);
+ }
+
+ @Override
+ public Lock makeLock(final String name) {
+ return new Lock() {
+ public boolean obtain() {
+ return true;
+ }
+
+ public void release() {
+ }
+
+ public boolean isLocked() {
+ throw new UnsupportedOperationException();
+ }
+
+ public String toString() {
+ return "Lock@" + new Path(directory, name);
+ }
+ };
+ }
+
+ @Override
+ public void clearLock(String name) throws IOException {
+ // do nothing
+ }
+
+ @Override
+ public void close() throws IOException {
+ // do not close the file system
+ }
+
+ @Override
+ public void setLockFactory(LockFactory lockFactory) throws IOException {
+ // do nothing
+ }
+
+ @Override
+ public LockFactory getLockFactory() {
+ return null;
+ }
+
+ @Override
+ public String toString() {
+ return this.getClass().getName() + "@" + directory;
+ }
+
+ private class FileSystemIndexInput extends BufferedIndexInput implements Cloneable {
+
+ // shared by clones
+ private class Descriptor {
+ public final FSDataInputStream in;
+ public long position; // cache of in.getPos()
+
+ public Descriptor(Path file, int ioFileBufferSize) throws IOException {
+ this.in = fs.open(file, ioFileBufferSize);
+ }
+ }
+
+ private final Path filePath; // for debugging
+ private final Descriptor descriptor;
+ private final long length;
+ private boolean isOpen;
+ private boolean isClone;
+
+ public FileSystemIndexInput(Path path, int ioFileBufferSize)
+ throws IOException {
+ super("FSII_" + path.getName(), ioFileBufferSize);
+ filePath = path;
+ descriptor = new Descriptor(path, ioFileBufferSize);
+ length = fs.getFileStatus(path).getLen();
+ isOpen = true;
+ }
+
+ @Override
+ protected void readInternal(byte[] b, int offset, int len)
+ throws IOException {
+ long position = getFilePointer();
+ if (position != descriptor.position) {
+ descriptor.in.seek(position);
+ descriptor.position = position;
+ }
+ int total = 0;
+ do {
+ int i = descriptor.in.read(b, offset + total, len - total);
+ if (i == -1) {
+ throw new IOException("Read past EOF");
+ }
+ descriptor.position += i;
+ total += i;
+ } while (total < len);
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (!isClone) {
+ if (isOpen) {
+ descriptor.in.close();
+ isOpen = false;
+ } else {
+ throw new IOException("Index file " + filePath + " already closed");
+ }
+ }
+ }
+
+ @Override
+ protected void seekInternal(long position) {
+ // handled in readInternal()
+ }
+
+ @Override
+ public long length() {
+ return length;
+ }
+
+ @Override
+ protected void finalize() throws Throwable {
+ super.finalize();
+ if (!isClone && isOpen) {
+ close(); // close the file
+ }
+ }
+
+ @Override
+ public BufferedIndexInput clone() {
+ FileSystemIndexInput clone = (FileSystemIndexInput) super.clone();
+ clone.isClone = true;
+ return clone;
+ }
+ }
+
+ private class FileSystemIndexOutput extends BufferedIndexOutput {
+
+ private final Path filePath; // for debugging
+ private final FSDataOutputStream out;
+ private boolean isOpen;
+
+ public FileSystemIndexOutput(Path path, int ioFileBufferSize)
+ throws IOException {
+ filePath = path;
+ // overwrite is true by default
+ out = fs.create(path, true, ioFileBufferSize);
+ isOpen = true;
+ }
+
+ @Override
+ public void flushBuffer(byte[] b, int offset, int size) throws IOException {
+ out.write(b, offset, size);
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (isOpen) {
+ super.close();
+ out.close();
+ isOpen = false;
+ } else {
+ throw new IOException("Index file " + filePath + " already closed");
+ }
+ }
+
+ @Override
+ public void seek(long pos) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long length() throws IOException {
+ return out.getPos();
+ }
+
+ @Override
+ protected void finalize() throws Throwable {
+ super.finalize();
+ if (isOpen) {
+ close(); // close the file
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
index 4906d3a..b7fd495 100644
--- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
+++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
@@ -82,6 +82,7 @@ public class SequenceFilesFromLuceneStorage {
processedDocs = writerCollector.processedDocs;
Closeables.close(sequenceFileWriter, false);
directory.close();
+ //searcher.close();
reader.close();
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
index 4de372f..1bd3f3e 100644
--- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
+++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
@@ -96,7 +96,8 @@ public class SequenceFilesFromLuceneStorageDriver extends AbstractJob {
if (hasOption(OPTION_QUERY)) {
try {
String queryString = COMPILE.matcher(getOption(OPTION_QUERY)).replaceAll("");
- QueryParser queryParser = new QueryParser(queryString, new StandardAnalyzer());
+ QueryParser queryParser = new QueryParser(Version.LUCENE_46, queryString,
+ new StandardAnalyzer(Version.LUCENE_46));
query = queryParser.parse(queryString);
} catch (ParseException e) {
throw new IllegalArgumentException(e.getMessage(), e);
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchivesMapper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchivesMapper.java b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchivesMapper.java
index 07226d3..203e8fb 100644
--- a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchivesMapper.java
+++ b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchivesMapper.java
@@ -17,21 +17,9 @@
package org.apache.mahout.text;
-import java.io.ByteArrayInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
@@ -44,6 +32,17 @@ import org.apache.mahout.common.iterator.FileLineIterable;
import org.apache.mahout.utils.email.MailOptions;
import org.apache.mahout.utils.email.MailProcessor;
+import java.io.ByteArrayInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
import static org.apache.mahout.text.SequenceFilesFromMailArchives.BODY_OPTION;
import static org.apache.mahout.text.SequenceFilesFromMailArchives.BODY_SEPARATOR_OPTION;
import static org.apache.mahout.text.SequenceFilesFromMailArchives.CHARSET_OPTION;
@@ -94,13 +93,13 @@ public class SequenceFilesFromMailArchivesMapper extends Mapper<IntWritable, Byt
options.setCharset(charset);
}
- List<Pattern> patterns = new ArrayList<>(5);
+ List<Pattern> patterns = Lists.newArrayListWithCapacity(5);
// patternOrder is used downstream so that we can know what order the
// text is in instead
// of encoding it in the string, which
// would require more processing later to remove it pre feature
// selection.
- Map<String, Integer> patternOrder = new HashMap<>();
+ Map<String, Integer> patternOrder = Maps.newHashMap();
int order = 0;
if (!configuration.get(FROM_OPTION[1], "").equals("")) {
patterns.add(MailProcessor.FROM_PREFIX);
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
index eae3d6d..ad55ba7 100644
--- a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
+++ b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
@@ -34,19 +34,19 @@ import org.apache.lucene.util.Version;
public class WikipediaAnalyzer extends StopwordAnalyzerBase {
public WikipediaAnalyzer() {
- super(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+ super(Version.LUCENE_46, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
}
public WikipediaAnalyzer(CharArraySet stopSet) {
- super(stopSet);
+ super(Version.LUCENE_46, stopSet);
}
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new WikipediaTokenizer(reader);
- TokenStream result = new StandardFilter(tokenizer);
- result = new LowerCaseFilter(result);
- result = new StopFilter(result, getStopwordSet());
+ TokenStream result = new StandardFilter(Version.LUCENE_46, tokenizer);
+ result = new LowerCaseFilter(Version.LUCENE_46, result);
+ result = new StopFilter(Version.LUCENE_46, result, getStopwordSet());
return new TokenStreamComponents(tokenizer, result);
}
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java b/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
index 16623c9..36b166a 100644
--- a/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
+++ b/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
@@ -37,7 +37,7 @@ public class AnalyzerTransformer implements RegexTransformer {
private static final Logger log = LoggerFactory.getLogger(AnalyzerTransformer.class);
public AnalyzerTransformer() {
- this(new StandardAnalyzer());
+ this(new StandardAnalyzer(Version.LUCENE_46), "text");
}
public AnalyzerTransformer(Analyzer analyzer) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java b/mr/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
index cfaac07..37ca383 100644
--- a/mr/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
+++ b/mr/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
@@ -32,7 +32,7 @@ public final class AnalyzerUtils {
* @throws ClassNotFoundException - {@link ClassNotFoundException}
*/
public static Analyzer createAnalyzer(String analyzerClassName) throws ClassNotFoundException {
- return createAnalyzer(analyzerClassName, Version.LUCENE_4_10_3);
+ return createAnalyzer(analyzerClassName, Version.LUCENE_46);
}
public static Analyzer createAnalyzer(String analyzerClassName, Version version) throws ClassNotFoundException {
@@ -47,7 +47,7 @@ public final class AnalyzerUtils {
* @return {@link Analyzer}
*/
public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass) {
- return createAnalyzer(analyzerClass, Version.LUCENE_4_10_3);
+ return createAnalyzer(analyzerClass, Version.LUCENE_46);
}
public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass, Version version) {
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/main/java/org/apache/mahout/vectorizer/encoders/InteractionValueEncoder.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/vectorizer/encoders/InteractionValueEncoder.java b/mr/src/main/java/org/apache/mahout/vectorizer/encoders/InteractionValueEncoder.java
index e0f6ce1..0be8823 100644
--- a/mr/src/main/java/org/apache/mahout/vectorizer/encoders/InteractionValueEncoder.java
+++ b/mr/src/main/java/org/apache/mahout/vectorizer/encoders/InteractionValueEncoder.java
@@ -18,9 +18,11 @@
package org.apache.mahout.vectorizer.encoders;
import java.util.Locale;
-import org.apache.commons.io.Charsets;
+
import org.apache.mahout.math.Vector;
+import com.google.common.base.Charsets;
+
public class InteractionValueEncoder extends FeatureVectorEncoder {
private final FeatureVectorEncoder firstEncoder;
private final FeatureVectorEncoder secondEncoder;
@@ -86,7 +88,7 @@ public class InteractionValueEncoder extends FeatureVectorEncoder {
int n = (k + j) % data.size();
if (isTraceEnabled()) {
trace(String.format("%s:%s", new String(originalForm1, Charsets.UTF_8), new String(originalForm2,
- Charsets.UTF_8)), n);
+ Charsets.UTF_8)), n);
}
data.set(n, data.get(n) + w);
}
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/classifier/ConfusionMatrixTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/ConfusionMatrixTest.java b/mr/src/test/java/org/apache/mahout/classifier/ConfusionMatrixTest.java
index 8edc99b..3ffff85 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/ConfusionMatrixTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/ConfusionMatrixTest.java
@@ -17,11 +17,11 @@
package org.apache.mahout.classifier;
-import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
+import com.google.common.collect.Lists;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.Matrix;
import org.junit.Test;
@@ -102,7 +102,7 @@ public final class ConfusionMatrixTest extends MahoutTestCase {
}
private static ConfusionMatrix fillConfusionMatrix(int[][] values, String[] labels, String defaultLabel) {
- Collection<String> labelList = new ArrayList<>();
+ Collection<String> labelList = Lists.newArrayList();
labelList.add(labels[0]);
labelList.add(labels[1]);
ConfusionMatrix confusionMatrix = new ConfusionMatrix(labelList, defaultLabel);
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java
index d7ab09c..f1ec07f 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java
@@ -143,6 +143,7 @@ public final class DecisionForestTest extends MahoutTestCase {
Data testData = DataLoader.loadData(dataset, TEST_DATA);
double noValue = dataset.valueOf(4, "no");
+ double yesValue = dataset.valueOf(4, "yes");
assertEquals(noValue, forest.classify(testData.getDataset(), rng, testData.get(0)), EPSILON);
// This one is tie-broken -- 1 is OK too
//assertEquals(yesValue, forest.classify(testData.getDataset(), rng, testData.get(1)), EPSILON);
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java b/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
index db62d85..1cf8b6a 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
@@ -23,8 +23,9 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Random;
+import com.google.common.base.Charsets;
+import com.google.common.io.Closeables;
import com.google.common.io.Files;
-import org.apache.commons.io.Charsets;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -233,12 +234,17 @@ public final class Utils {
}
private static void writeDataToFile(String[] sData, Path path) throws IOException {
- try (BufferedWriter output = Files.newWriter(new File(path.toString()), Charsets.UTF_8)){
+ BufferedWriter output = null;
+ try {
+ output = Files.newWriter(new File(path.toString()), Charsets.UTF_8);
for (String line : sData) {
output.write(line);
output.write('\n');
}
+ } finally {
+ Closeables.close(output, false);
}
+
}
public static Path writeDataToTestFile(String[] sData) throws IOException {
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
index e41071c..3903c33 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
@@ -18,24 +18,25 @@
package org.apache.mahout.classifier.df.mapreduce.partial;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Random;
+import com.google.common.collect.Lists;
+import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.mapreduce.Job;
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.classifier.df.builder.DefaultTreeBuilder;
import org.apache.mahout.classifier.df.builder.TreeBuilder;
import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
import org.apache.mahout.classifier.df.node.Leaf;
import org.apache.mahout.classifier.df.node.Node;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.common.RandomUtils;
import org.junit.Test;
public final class PartialBuilderTest extends MahoutTestCase {
@@ -65,10 +66,15 @@ public final class PartialBuilderTest extends MahoutTestCase {
FileSystem fs = base.getFileSystem(conf);
Path outputFile = new Path(base, "PartialBuilderTest.seq");
- try (Writer writer = SequenceFile.createWriter(fs, conf, outputFile, TreeID.class, MapredOutput.class)){
+ Writer writer = SequenceFile.createWriter(fs, conf, outputFile,
+ TreeID.class, MapredOutput.class);
+
+ try {
for (int index = 0; index < NUM_TREES; index++) {
writer.append(keys[index], values[index]);
}
+ } finally {
+ Closeables.close(writer, false);
}
// load the output and make sure its valid
@@ -110,7 +116,7 @@ public final class PartialBuilderTest extends MahoutTestCase {
private static void randomKeyValues(Random rng, TreeID[] keys, MapredOutput[] values, int[] firstIds) {
int index = 0;
int firstId = 0;
- Collection<Integer> partitions = new ArrayList<>();
+ Collection<Integer> partitions = Lists.newArrayList();
for (int p = 0; p < NUM_MAPS; p++) {
// select a random partition, not yet selected
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/classifier/mlp/TestNeuralNetwork.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/mlp/TestNeuralNetwork.java b/mr/src/test/java/org/apache/mahout/classifier/mlp/TestNeuralNetwork.java
index 917bf1a..ebe5424 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/mlp/TestNeuralNetwork.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/mlp/TestNeuralNetwork.java
@@ -19,14 +19,11 @@ package org.apache.mahout.classifier.mlp;
import java.io.File;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
-import com.google.common.io.Files;
import org.apache.commons.csv.CSVUtils;
-import org.apache.commons.io.Charsets;
import org.apache.mahout.classifier.mlp.NeuralNetwork.TrainingMethod;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.DenseMatrix;
@@ -35,6 +32,10 @@ import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.Vector;
import org.junit.Test;
+import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+
/** Test the functionality of {@link NeuralNetwork}. */
public class TestNeuralNetwork extends MahoutTestCase {
@@ -217,7 +218,7 @@ public class TestNeuralNetwork extends MahoutTestCase {
File cancerDataset = getTestTempFile("cancer.csv");
writeLines(cancerDataset, Datasets.CANCER);
- List<Vector> records = new ArrayList<>();
+ List<Vector> records = Lists.newArrayList();
// Returns a mutable list of the data
List<String> cancerDataSetList = Files.readLines(cancerDataset, Charsets.UTF_8);
// Skip the header line, hence remove the first element in the list
@@ -271,7 +272,7 @@ public class TestNeuralNetwork extends MahoutTestCase {
writeLines(irisDataset, Datasets.IRIS);
int numOfClasses = 3;
- List<Vector> records = new ArrayList<>();
+ List<Vector> records = Lists.newArrayList();
// Returns a mutable list of the data
List<String> irisDataSetList = Files.readLines(irisDataset, Charsets.UTF_8);
// Skip the header line, hence remove the first element in the list
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java b/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
index b0672bf..abd666e 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
@@ -19,6 +19,7 @@ package org.apache.mahout.classifier.naivebayes;
import java.io.File;
+import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -64,8 +65,10 @@ public class NaiveBayesTest extends MahoutTestCase {
outputDir.delete();
tempDir = getTestTempDir("tmp");
- try (SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), conf,
- new Path(inputFile.getAbsolutePath()), Text.class, VectorWritable.class)) {
+ SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), conf,
+ new Path(inputFile.getAbsolutePath()), Text.class, VectorWritable.class);
+
+ try {
writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_DOMESTIC));
@@ -76,6 +79,8 @@ public class NaiveBayesTest extends MahoutTestCase {
writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SUV, ORIGIN_DOMESTIC));
writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_RED, TYPE_SUV, ORIGIN_IMPORTED));
writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, ORIGIN_IMPORTED));
+ } finally {
+ Closeables.close(writer, false);
}
}
@@ -83,8 +88,8 @@ public class NaiveBayesTest extends MahoutTestCase {
public void toyData() throws Exception {
TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
trainNaiveBayes.setConf(conf);
- trainNaiveBayes.run(new String[]{"--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
- "--tempDir", tempDir.getAbsolutePath()});
+ trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
+ "--tempDir", tempDir.getAbsolutePath() });
NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf);
@@ -102,9 +107,9 @@ public class NaiveBayesTest extends MahoutTestCase {
public void toyDataComplementary() throws Exception {
TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
trainNaiveBayes.setConf(conf);
- trainNaiveBayes.run(new String[]{"--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
+ trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
"--trainComplementary",
- "--tempDir", tempDir.getAbsolutePath()});
+ "--tempDir", tempDir.getAbsolutePath() });
NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf);
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java b/mr/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java
index 3b7c93e..4446fef 100644
--- a/mr/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java
+++ b/mr/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java
@@ -70,7 +70,7 @@ public final class TextValueEncoderTest extends MahoutTestCase {
@Test
public void testLuceneEncoding() throws Exception {
LuceneTextValueEncoder enc = new LuceneTextValueEncoder("text");
- enc.setAnalyzer(new WhitespaceAnalyzer());
+ enc.setAnalyzer(new WhitespaceAnalyzer(Version.LUCENE_46));
Vector v1 = new DenseVector(200);
enc.addToVector("test1 and more", v1);
enc.flush(1, v1);
http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index ab1734d..80b4a20 100644
--- a/pom.xml
+++ b/pom.xml
@@ -115,8 +115,8 @@
<mfindbugs.version>2.5.2</mfindbugs.version>
<mjavadoc.version>2.9.1</mjavadoc.version>
<hbase.version>1.0.0</hbase.version>
- <lucene.version>4.10.3</lucene.version>
- <slf4j.version>1.7.12</slf4j.version>
+ <lucene.version>4.6.1</lucene.version>
+ <slf4j.version>1.7.10</slf4j.version>
<scala.compat.version>2.10</scala.compat.version>
<scala.version>2.10.4</scala.version>
<spark.version>1.1.1</spark.version>