You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2015/04/23 05:03:46 UTC
mahout git commit: MAHOUT-1690:CLONE - Some vector dumper flags are
expecting arguments. This closes apache/mahout#122
Repository: mahout
Updated Branches:
refs/heads/master 6a3f93e6f -> a3f78bde9
MAHOUT-1690:CLONE - Some vector dumper flags are expecting arguments. This closes apache/mahout#122
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/a3f78bde
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/a3f78bde
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/a3f78bde
Branch: refs/heads/master
Commit: a3f78bde9bf87d3d37931f878015b490761e75ce
Parents: 6a3f93e
Author: Suneel Marthi <su...@gmail.com>
Authored: Wed Apr 22 23:04:56 2015 -0400
Committer: Suneel Marthi <su...@gmail.com>
Committed: Wed Apr 22 23:04:56 2015 -0400
----------------------------------------------------------------------
CHANGELOG | 4 +-
.../mahout/utils/vectors/VectorDumper.java | 114 ++++++++++---------
2 files changed, 61 insertions(+), 57 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/a3f78bde/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index 5611588..52799ba 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,9 +2,11 @@ Mahout Change Log
Release 0.11.0 - unreleased
+ MAHOUT-1690: CLONE - Some vector dumper flags are expecting arguments. (smarthi)
+
MAHOUT-1693: FunctionalMatrixView materializes row vectors in scala shell (apalumbo)
- MAHOUT-1680: Renamed mahout-distribution to apache-mahout-distribution
+ MAHOUT-1680: Renamed mahout-distribution to apache-mahout-distribution (sslavic)
Release 0.10.0 - 2015-04-11
http://git-wip-us.apache.org/repos/asf/mahout/blob/a3f78bde/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
----------------------------------------------------------------------
diff --git a/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java b/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
index 93ad0d5..e1c3fbc 100644
--- a/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
+++ b/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
@@ -5,9 +5,9 @@
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -17,14 +17,7 @@
package org.apache.mahout.utils.vectors;
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Set;
-
+import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
import org.apache.commons.io.Charsets;
@@ -46,6 +39,13 @@ import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.Iterator;
+import java.util.Set;
+
/**
* Can read in a {@link org.apache.hadoop.io.SequenceFile} of {@link Vector}s and dump
* out the results using {@link Vector#asFormatString()} to either the console or to a
@@ -55,7 +55,8 @@ public final class VectorDumper extends AbstractJob {
private static final Logger log = LoggerFactory.getLogger(VectorDumper.class);
- private VectorDumper() {}
+ private VectorDumper() {
+ }
@Override
public int run(String[] args) throws Exception {
@@ -84,9 +85,9 @@ public final class VectorDumper extends AbstractJob {
addOption("sizeOnly", "sz", "Dump only the size of the vector");
addOption("numItems", "ni", "Output at most <n> vecors", false);
addOption("vectorSize", "vs", "Truncate vectors to <vs> length when dumping (most useful when in"
- + " conjunction with -sort", false);
- addOption(buildOption("filter", "fi", "Only dump out those vectors whose name matches the filter."
- + " Multiple items may be specified by repeating the argument.", true, 1, Integer.MAX_VALUE, false, null));
+ + " conjunction with -sort", false);
+ addOption(buildOption("filter", "fi", "Only dump out those vectors whose name matches the filter."
+ + " Multiple items may be specified by repeating the argument.", true, 1, Integer.MAX_VALUE, false, null));
if (parseArguments(args, false, true) == null) {
return -1;
@@ -120,19 +121,22 @@ public final class VectorDumper extends AbstractJob {
String[] dictionary = null;
if (hasOption("dictionary")) {
String dictFile = getOption("dictionary");
- if ("text".equals(dictionaryType)) {
- dictionary = VectorHelper.loadTermDictionary(new File(dictFile));
- } else if ("sequencefile".equals(dictionaryType)) {
- dictionary = VectorHelper.loadTermDictionary(conf, dictFile);
- } else {
- //TODO: support Lucene's FST as a dictionary type
- throw new IOException("Invalid dictionary type: " + dictionaryType);
+ switch (dictionaryType) {
+ case "text":
+ dictionary = VectorHelper.loadTermDictionary(new File(dictFile));
+ break;
+ case "sequencefile":
+ dictionary = VectorHelper.loadTermDictionary(conf, dictFile);
+ break;
+ default:
+ //TODO: support Lucene's FST as a dictionary type
+ throw new IOException("Invalid dictionary type: " + dictionaryType);
}
}
Set<String> filters;
if (hasOption("filter")) {
- filters = new HashSet<>(getOptions("filter"));
+ filters = Sets.newHashSet(getOptions("filter"));
} else {
filters = null;
}
@@ -175,8 +179,8 @@ public final class VectorDumper extends AbstractJob {
}
}
int maxIndexesPerVector = hasOption("vectorSize")
- ? Integer.parseInt(getOption("vectorSize"))
- : Integer.MAX_VALUE;
+ ? Integer.parseInt(getOption("vectorSize"))
+ : Integer.MAX_VALUE;
long itemCount = 0;
int fileCount = 0;
for (Path path : pathArr) {
@@ -201,10 +205,10 @@ public final class VectorDumper extends AbstractJob {
Vector vector;
try {
vector = ((VectorWritable)
- (transposeKeyValue ? keyWritable : valueWritable)).get();
+ (transposeKeyValue ? keyWritable : valueWritable)).get();
} catch (ClassCastException e) {
if ((transposeKeyValue ? keyWritable : valueWritable)
- instanceof WeightedPropertyVectorWritable) {
+ instanceof WeightedPropertyVectorWritable) {
vector =
((WeightedPropertyVectorWritable)
(transposeKeyValue ? keyWritable : valueWritable)).getVector();
@@ -212,39 +216,37 @@ public final class VectorDumper extends AbstractJob {
throw e;
}
}
- if (filters != null
- && vector instanceof NamedVector
- && !filters.contains(((NamedVector) vector).getName())) {
- //we are filtering out this item, skip
- continue;
- }
- if (sizeOnly) {
- if (vector instanceof NamedVector) {
- writer.write(((NamedVector) vector).getName());
- writer.write(":");
- } else {
- writer.write(String.valueOf(i++));
- writer.write(":");
- }
- writer.write(String.valueOf(vector.size()));
- writer.write('\n');
- } else if (nameOnly) {
- if (vector instanceof NamedVector) {
- writer.write(((NamedVector) vector).getName());
+ if (filters == null
+ || !(vector instanceof NamedVector)
+ || filters.contains(((NamedVector) vector).getName())) {
+ if (sizeOnly) {
+ if (vector instanceof NamedVector) {
+ writer.write(((NamedVector) vector).getName());
+ writer.write(":");
+ } else {
+ writer.write(String.valueOf(i++));
+ writer.write(":");
+ }
+ writer.write(String.valueOf(vector.size()));
writer.write('\n');
- }
- } else {
- String fmtStr;
- if (useCSV) {
- fmtStr = VectorHelper.vectorToCSVString(vector, namesAsComments);
+ } else if (nameOnly) {
+ if (vector instanceof NamedVector) {
+ writer.write(((NamedVector) vector).getName());
+ writer.write('\n');
+ }
} else {
- fmtStr = VectorHelper.vectorToJson(vector, dictionary, maxIndexesPerVector,
- sortVectors);
+ String fmtStr;
+ if (useCSV) {
+ fmtStr = VectorHelper.vectorToCSVString(vector, namesAsComments);
+ } else {
+ fmtStr = VectorHelper.vectorToJson(vector, dictionary, maxIndexesPerVector,
+ sortVectors);
+ }
+ writer.write(fmtStr);
+ writer.write('\n');
}
- writer.write(fmtStr);
- writer.write('\n');
+ itemCount++;
}
- itemCount++;
}
}
writer.flush();