You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2013/06/09 22:50:53 UTC
svn commit: r1491293 - in
/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer:
DictionaryVectorizer.java term/TFPartialVectorReducer.java
Author: gsingers
Date: Sun Jun 9 20:50:53 2013
New Revision: 1491293
URL: http://svn.apache.org/r1491293
Log:
MAHOUT-992: qualify the path to make sure we get it from the local file system properly
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java?rev=1491293&r1=1491292&r2=1491293&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java Sun Jun 9 20:50:53 2013
@@ -388,7 +388,7 @@ public final class DictionaryVectorizer
if (parseArguments(args) == null) {
return -1;
}
- String tfDirName = getOption("tfDirName");
+ String tfDirName = getOption("tfDirName", "tfDir");
int minSupport = getInt("minSupport", 2);
int maxNGramSize = getInt("maxNGramSize", 1);
float minLLRValue = getFloat("minLLR", LLRReducer.DEFAULT_MIN_LLR);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java?rev=1491293&r1=1491292&r2=1491293&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java Sun Jun 9 20:50:53 2013
@@ -47,6 +47,7 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
+import java.util.Arrays;
import java.util.Iterator;
/**
@@ -135,15 +136,19 @@ public class TFPartialVectorReducer exte
}
localFiles[0] = new Path(filesURIs[0].getPath());
}
+
dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
maxNGramSize = conf.getInt(DictionaryVectorizer.MAX_NGRAMS, maxNGramSize);
-
- Path dictionaryFile = localFiles[0];
+ if (log.isInfoEnabled()) {
+ log.info("Cache Files: " + Arrays.asList(localFiles));
+ }
+ //MAHOUT-1247
+ localFiles[0] = localFs.makeQualified(localFiles[0]);
// key is word value is id
for (Pair<Writable, IntWritable> record
- : new SequenceFileIterable<Writable, IntWritable>(dictionaryFile, true, conf)) {
+ : new SequenceFileIterable<Writable, IntWritable>(localFiles[0], true, conf)) {
dictionary.put(record.getFirst().toString(), record.getSecond().get());
}
}