You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2011/07/05 00:01:20 UTC
svn commit: r1142821 - in /mahout/trunk:
integration/src/main/java/org/apache/mahout/benchmark/
integration/src/main/java/org/apache/mahout/text/
integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/
integration/src/main/java/org/apac...
Author: srowen
Date: Mon Jul 4 22:01:20 2011
New Revision: 1142821
URL: http://svn.apache.org/viewvc?rev=1142821&view=rev
Log:
Style, warnings, remove deprecated call
Removed:
mahout/trunk/math/src/main/java/org/apache/mahout/math/UnboundLabelException.java
Modified:
mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromCsvFilter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectoryFilter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java Mon Jul 4 22:01:20 2011
@@ -69,7 +69,9 @@ import org.slf4j.LoggerFactory;
public class VectorBenchmarks {
private static final Logger log = LoggerFactory.getLogger(VectorBenchmarks.class);
- private static final Pattern TAB_PATTERN = Pattern.compile("\t");
+
+ private static final Pattern TAB_NEWLINE_PATTERN = Pattern.compile("[\n\t]");
+ private static final String[] EMPTY = new String[0];
private final Vector[][] vectors;
private final Vector[] clusters;
@@ -136,8 +138,7 @@ public class VectorBenchmarks {
float opsPerSec = loop * numVectors * 1000000000.0f / stats.getSumTime();
log.info("{} {} \n{} {} \nSpeed: {} UnitsProcessed/sec {} MBytes/sec ",
new Object[] {benchmarkName, implName, content, stats.toString(), opsPerSec, speed});
- String info = stats.toString().replaceAll("\n", "\t") + "\tSpeed = " + opsPerSec + " /sec\tRate = "
- + speed + " MB/s";
+
if (!implType.containsKey(implName)) {
implType.put(implName, implType.size());
}
@@ -147,9 +148,10 @@ public class VectorBenchmarks {
}
List<String[]> implStats = statsMap.get(benchmarkName);
while (implStats.size() < implId + 1) {
- implStats.add(new String[] {});
+ implStats.add(EMPTY);
}
- implStats.set(implId, TAB_PATTERN.split(info));
+ implStats.set(implId,
+ TAB_NEWLINE_PATTERN.split(stats + "\tSpeed = " + opsPerSec + " /sec\tRate = " + speed + " MB/s"));
}
public void createBenchmark() {
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java Mon Jul 4 22:01:20 2011
@@ -43,23 +43,25 @@ public final class PrefixAdditionFilter
@Override
protected void process(FileStatus fst, Path current) throws IOException {
+ FileSystem fs = getFs();
+ ChunkedWriter writer = getWriter();
if (fst.isDir()) {
fs.listStatus(fst.getPath(),
- new PrefixAdditionFilter(conf, prefix + Path.SEPARATOR + current.getName(),
- options, writer, fs));
+ new PrefixAdditionFilter(getConf(), getPrefix() + Path.SEPARATOR + current.getName(),
+ getOptions(), writer, fs));
} else {
InputStream in = null;
try {
in = fs.open(fst.getPath());
StringBuilder file = new StringBuilder();
- for (String aFit : new FileLineIterable(in, charset, false)) {
+ for (String aFit : new FileLineIterable(in, getCharset(), false)) {
file.append(aFit).append('\n');
}
String name = current.getName().equals(fst.getPath().getName())
? current.getName()
: current.getName() + Path.SEPARATOR + fst.getPath().getName();
- writer.write(prefix + Path.SEPARATOR + name, file.toString());
+ writer.write(getPrefix() + Path.SEPARATOR + name, file.toString());
} finally {
Closeables.closeQuietly(in);
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromCsvFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromCsvFilter.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromCsvFilter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromCsvFilter.java Mon Jul 4 22:01:20 2011
@@ -37,7 +37,7 @@ import java.util.regex.Pattern;
public final class SequenceFilesFromCsvFilter extends SequenceFilesFromDirectoryFilter {
private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromCsvFilter.class);
- private static final Pattern TAB = Pattern.compile("\\t");
+ private static final Pattern TAB = Pattern.compile("\t");
public static final String[] KEY_COLUMN_OPTION = {"keyColumn", "kcol"};
public static final String[] VALUE_COLUMN_OPTION = {"valueColumn", "vcol"};
@@ -83,18 +83,20 @@ public final class SequenceFilesFromCsvF
@Override
protected void process(FileStatus fst, Path current) throws IOException {
+ FileSystem fs = getFs();
+ ChunkedWriter writer = getWriter();
if (fst.isDir()) {
fs.listStatus(fst.getPath(),
- new SequenceFilesFromCsvFilter(conf, prefix + Path.SEPARATOR + current.getName(),
- this.options, writer, fs));
+ new SequenceFilesFromCsvFilter(getConf(), getPrefix() + Path.SEPARATOR + current.getName(),
+ this.getOptions(), writer, getFs()));
} else {
InputStream in = fs.open(fst.getPath());
- for (CharSequence aFit : new FileLineIterable(in, charset, false)) {
+ for (CharSequence aFit : new FileLineIterable(in, getCharset(), false)) {
String[] columns = TAB.split(aFit);
log.info("key : {}, value : {}", columns[keyColumn], columns[valueColumn]);
String key = columns[keyColumn];
String value = columns[valueColumn];
- writer.write(prefix + key, value);
+ writer.write(getPrefix() + key, value);
}
}
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectoryFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectoryFilter.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectoryFilter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectoryFilter.java Mon Jul 4 22:01:20 2011
@@ -35,18 +35,16 @@ import java.util.Map;
public abstract class SequenceFilesFromDirectoryFilter extends SequenceFilesFromDirectory implements PathFilter {
private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromDirectoryFilter.class);
- protected final String prefix;
- protected final ChunkedWriter writer;
- protected final Charset charset;
- protected final Configuration conf;
- protected final FileSystem fs;
- protected final Map<String, String> options;
+ private final String prefix;
+ private final ChunkedWriter writer;
+ private final Charset charset;
+ private final FileSystem fs;
+ private final Map<String, String> options;
protected SequenceFilesFromDirectoryFilter() {
this.prefix = null;
this.writer = null;
this.charset = null;
- this.conf = null;
this.fs = null;
this.options = null;
}
@@ -56,12 +54,28 @@ public abstract class SequenceFilesFromD
Map<String, String> options,
ChunkedWriter writer,
FileSystem fs) {
- this.conf = conf;
this.prefix = keyPrefix;
this.writer = writer;
this.charset = Charset.forName(options.get(SequenceFilesFromDirectory.CHARSET_OPTION[0]));
this.fs = fs;
this.options = options;
+ setConf(conf);
+ }
+
+ protected final String getPrefix() {
+ return prefix;
+ }
+
+ protected final ChunkedWriter getWriter() {
+ return writer;
+ }
+
+ protected final Charset getCharset() {
+ return charset;
+ }
+
+ protected final FileSystem getFs() {
+ return fs;
}
protected final Map<String, String> getOptions() {
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java Mon Jul 4 22:01:20 2011
@@ -224,8 +224,6 @@ public final class SequenceFilesFromMail
String key = prefix + File.separator + current.getName() + File.separator + messageId;
writer.write(key, file.toString());
file.setLength(0); // reset the buffer
- messageId = null;
- inBody = false;
}
} catch (FileNotFoundException e) {
// Skip file.
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java Mon Jul 4 22:01:20 2011
@@ -30,7 +30,7 @@ import org.apache.hadoop.util.bloom.Filt
import org.apache.hadoop.util.bloom.Key;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Emits tokens based on bloom filter membership.
@@ -38,7 +38,7 @@ import org.apache.lucene.analysis.tokena
public final class BloomTokenFilter extends TokenFilter {
private final Filter filter;
- private final TermAttribute termAtt;
+ private final CharTermAttribute termAtt;
private final CharsetEncoder encoder;
private final Key key;
private final boolean keepMembers;
@@ -56,7 +56,7 @@ public final class BloomTokenFilter exte
this.filter = filter;
this.keepMembers = keepMembers;
this.key = new Key();
- this.termAtt = addAttribute(TermAttribute.class);
+ this.termAtt = addAttribute(CharTermAttribute.class);
this.encoder = Charsets.UTF_8.newEncoder().
onMalformedInput(CodingErrorAction.REPORT).
onUnmappableCharacter(CodingErrorAction.REPORT);
@@ -65,7 +65,7 @@ public final class BloomTokenFilter exte
@Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
- ByteBuffer bytes = encoder.encode(CharBuffer.wrap(termAtt.termBuffer(), 0, termAtt.termLength()));
+ ByteBuffer bytes = encoder.encode(CharBuffer.wrap(termAtt.buffer(), 0, termAtt.length()));
key.set(bytes.array(), 1.0f);
boolean member = filter.membershipTest(key);
if ((keepMembers && member) || (!keepMembers && !member)) {
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java Mon Jul 4 22:01:20 2011
@@ -111,7 +111,7 @@ public class ARFFVectorIterable implemen
label = ARFFType.DATE.getLabel(lower);
type = ARFFType.DATE;
//TODO: DateFormatter map
- DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+ DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
int idx = lower.indexOf(ARFFType.DATE.getIndicator());
String[] split = ARFFVectorIterable.SPACE_PATTERN.split(line);
if (split.length >= 4) { //we have a date format
@@ -119,7 +119,7 @@ public class ARFFVectorIterable implemen
if (formStr.startsWith("\"")) {
formStr = formStr.substring(1, formStr.length() - 1);
}
- format = new SimpleDateFormat(formStr);
+ format = new SimpleDateFormat(formStr, Locale.ENGLISH);
}
model.addDateFormat(labelNumInt, format);
//@attribute <name> date [<date-format>]
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java Mon Jul 4 22:01:20 2011
@@ -25,6 +25,7 @@ import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
+import java.util.Locale;
import java.util.Map;
import java.util.regex.Pattern;
@@ -143,7 +144,7 @@ public class MapBackedARFFModel implemen
protected double processDate(String data, int idx) {
DateFormat format = dateMap.get(idx);
if (format == null) {
- format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+ format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
}
double result;
try {
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java Mon Jul 4 22:01:20 2011
@@ -93,12 +93,12 @@ public abstract class AbstractMatrix imp
@Override
public double get(String rowLabel, String columnLabel) {
if (columnLabelBindings == null || rowLabelBindings == null) {
- throw new UnboundLabelException();
+ throw new IllegalStateException("Unbound label");
}
Integer row = rowLabelBindings.get(rowLabel);
Integer col = columnLabelBindings.get(columnLabel);
if (row == null || col == null) {
- throw new UnboundLabelException();
+ throw new IllegalStateException("Unbound label");
}
return get(row, col);
@@ -117,11 +117,11 @@ public abstract class AbstractMatrix imp
@Override
public void set(String rowLabel, double[] rowData) {
if (columnLabelBindings == null) {
- throw new UnboundLabelException();
+ throw new IllegalStateException("Unbound label");
}
Integer row = rowLabelBindings.get(rowLabel);
if (row == null) {
- throw new UnboundLabelException();
+ throw new IllegalStateException("Unbound label");
}
set(row, rowData);
}
@@ -138,12 +138,12 @@ public abstract class AbstractMatrix imp
@Override
public void set(String rowLabel, String columnLabel, double value) {
if (columnLabelBindings == null || rowLabelBindings == null) {
- throw new UnboundLabelException();
+ throw new IllegalStateException("Unbound label");
}
Integer row = rowLabelBindings.get(rowLabel);
Integer col = columnLabelBindings.get(columnLabel);
if (row == null || col == null) {
- throw new UnboundLabelException();
+ throw new IllegalStateException("Unbound label");
}
set(row, col, value);
}
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java Mon Jul 4 22:01:20 2011
@@ -18,6 +18,7 @@
package org.apache.mahout.math;
import com.google.common.collect.AbstractIterator;
+import com.google.common.primitives.Doubles;
import org.apache.mahout.math.function.Functions;
import java.util.Arrays;
@@ -390,13 +391,28 @@ public class SequentialAccessSparseVecto
this.index = index;
this.value = value;
}
-
+
@Override
- public int compareTo(final OrderedElement that) {
+ public int compareTo(OrderedElement that) {
// both indexes are positive, and neither can be Integer.MAX_VALUE (otherwise there would be
// an array somewhere with Integer.MAX_VALUE + 1 elements)
return this.index - that.index;
}
+
+ @Override
+ public int hashCode() {
+ return index ^ Doubles.hashCode(value);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof OrderedElement)) {
+ return false;
+ }
+ OrderedElement other = (OrderedElement) o;
+ return index == other.index && value == other.value;
+ }
+
}
}
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java Mon Jul 4 22:01:20 2011
@@ -9,14 +9,14 @@ import org.apache.mahout.math.VectorIter
import java.util.Map;
public class LanczosState {
- protected Matrix diagonalMatrix;
- protected VectorIterable corpus;
+ protected Matrix diagonalMatrix;
+ protected final VectorIterable corpus;
protected double scaleFactor;
protected int iterationNumber;
- protected int desiredRank;
+ protected final int desiredRank;
protected Map<Integer, Vector> basis;
- protected Map<Integer, Double> singularValues;
+ protected final Map<Integer, Double> singularValues;
protected Map<Integer, Vector> singularVectors;
public LanczosState(VectorIterable corpus, int numCols, int desiredRank, Vector initialVector) {
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java Mon Jul 4 22:01:20 2011
@@ -617,7 +617,7 @@ public abstract class MatrixTest extends
assertEquals("FeeBaz", m.get(0, 2), m.get("Fee", "Baz"), EPSILON);
}
- @Test(expected = UnboundLabelException.class)
+ @Test(expected = IllegalStateException.class)
public void testSettingLabelBindings() {
Matrix m = matrixFactory(new double[][]{{1, 3, 4}, {5, 2, 3},
{1, 4, 2}});
Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java Mon Jul 4 22:01:20 2011
@@ -465,7 +465,7 @@ public final class TestMatrixView extend
assertEquals("FeeBar", test.get(0, 1), test.get("Fee", "Bar"), EPSILON);
}
- @Test(expected = UnboundLabelException.class)
+ @Test(expected = IllegalStateException.class)
public void testSettingLabelBindings() {
assertNull("row bindings", test.getRowLabelBindings());
assertNull("col bindings", test.getColumnLabelBindings());