You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2011/07/05 00:01:20 UTC

svn commit: r1142821 - in /mahout/trunk: integration/src/main/java/org/apache/mahout/benchmark/ integration/src/main/java/org/apache/mahout/text/ integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/ integration/src/main/java/org/apac...

Author: srowen
Date: Mon Jul  4 22:01:20 2011
New Revision: 1142821

URL: http://svn.apache.org/viewvc?rev=1142821&view=rev
Log:
Style, warnings, remove deprecated call

Removed:
    mahout/trunk/math/src/main/java/org/apache/mahout/math/UnboundLabelException.java
Modified:
    mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromCsvFilter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectoryFilter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
    mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java
    mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java Mon Jul  4 22:01:20 2011
@@ -69,7 +69,9 @@ import org.slf4j.LoggerFactory;
 public class VectorBenchmarks {
 
   private static final Logger log = LoggerFactory.getLogger(VectorBenchmarks.class);
-  private static final Pattern TAB_PATTERN = Pattern.compile("\t");
+
+  private static final Pattern TAB_NEWLINE_PATTERN = Pattern.compile("[\n\t]");
+  private static final String[] EMPTY = new String[0];
 
   private final Vector[][] vectors;
   private final Vector[] clusters;
@@ -136,8 +138,7 @@ public class VectorBenchmarks {
     float opsPerSec = loop * numVectors * 1000000000.0f / stats.getSumTime();
     log.info("{} {} \n{} {} \nSpeed: {} UnitsProcessed/sec {} MBytes/sec                                   ",
       new Object[] {benchmarkName, implName, content, stats.toString(), opsPerSec, speed});
-    String info = stats.toString().replaceAll("\n", "\t") + "\tSpeed = " + opsPerSec + " /sec\tRate = "
-                  + speed + " MB/s";
+
     if (!implType.containsKey(implName)) {
       implType.put(implName, implType.size());
     }
@@ -147,9 +148,10 @@ public class VectorBenchmarks {
     }
     List<String[]> implStats = statsMap.get(benchmarkName);
     while (implStats.size() < implId + 1) {
-      implStats.add(new String[] {});
+      implStats.add(EMPTY);
     }
-    implStats.set(implId, TAB_PATTERN.split(info));
+    implStats.set(implId,
+                  TAB_NEWLINE_PATTERN.split(stats + "\tSpeed = " + opsPerSec + " /sec\tRate = " + speed + " MB/s"));
   }
   
   public void createBenchmark() {

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/PrefixAdditionFilter.java Mon Jul  4 22:01:20 2011
@@ -43,23 +43,25 @@ public final class PrefixAdditionFilter 
 
   @Override
   protected void process(FileStatus fst, Path current) throws IOException {
+    FileSystem fs = getFs();
+    ChunkedWriter writer = getWriter();
     if (fst.isDir()) {
       fs.listStatus(fst.getPath(),
-                    new PrefixAdditionFilter(conf, prefix + Path.SEPARATOR + current.getName(),
-                        options, writer, fs));
+                    new PrefixAdditionFilter(getConf(), getPrefix() + Path.SEPARATOR + current.getName(),
+                                             getOptions(), writer, fs));
     } else {
       InputStream in = null;
       try {
         in = fs.open(fst.getPath());
 
         StringBuilder file = new StringBuilder();
-        for (String aFit : new FileLineIterable(in, charset, false)) {
+        for (String aFit : new FileLineIterable(in, getCharset(), false)) {
           file.append(aFit).append('\n');
         }
         String name = current.getName().equals(fst.getPath().getName())
             ? current.getName()
             : current.getName() + Path.SEPARATOR + fst.getPath().getName();
-        writer.write(prefix + Path.SEPARATOR + name, file.toString());
+        writer.write(getPrefix() + Path.SEPARATOR + name, file.toString());
       } finally {
         Closeables.closeQuietly(in);
       }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromCsvFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromCsvFilter.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromCsvFilter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromCsvFilter.java Mon Jul  4 22:01:20 2011
@@ -37,7 +37,7 @@ import java.util.regex.Pattern;
 public final class SequenceFilesFromCsvFilter extends SequenceFilesFromDirectoryFilter {
 
   private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromCsvFilter.class);
-  private static final Pattern TAB = Pattern.compile("\\t");
+  private static final Pattern TAB = Pattern.compile("\t");
 
   public static final String[] KEY_COLUMN_OPTION = {"keyColumn", "kcol"};
   public static final String[] VALUE_COLUMN_OPTION = {"valueColumn", "vcol"};
@@ -83,18 +83,20 @@ public final class SequenceFilesFromCsvF
 
   @Override
   protected void process(FileStatus fst, Path current) throws IOException {
+    FileSystem fs = getFs();
+    ChunkedWriter writer = getWriter();
     if (fst.isDir()) {
       fs.listStatus(fst.getPath(),
-                    new SequenceFilesFromCsvFilter(conf, prefix + Path.SEPARATOR + current.getName(),
-                        this.options, writer, fs));
+                    new SequenceFilesFromCsvFilter(getConf(), getPrefix() + Path.SEPARATOR + current.getName(),
+                                                   this.getOptions(), writer, getFs()));
     } else {
       InputStream in = fs.open(fst.getPath());
-      for (CharSequence aFit : new FileLineIterable(in, charset, false)) {
+      for (CharSequence aFit : new FileLineIterable(in, getCharset(), false)) {
         String[] columns = TAB.split(aFit);
         log.info("key : {}, value : {}", columns[keyColumn], columns[valueColumn]);
         String key = columns[keyColumn];
         String value = columns[valueColumn];
-        writer.write(prefix + key, value);
+        writer.write(getPrefix() + key, value);
       }
     }
   }

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectoryFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectoryFilter.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectoryFilter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromDirectoryFilter.java Mon Jul  4 22:01:20 2011
@@ -35,18 +35,16 @@ import java.util.Map;
 public abstract class SequenceFilesFromDirectoryFilter extends SequenceFilesFromDirectory implements PathFilter {
   private static final Logger log = LoggerFactory.getLogger(SequenceFilesFromDirectoryFilter.class);
 
-  protected final String prefix;
-  protected final ChunkedWriter writer;
-  protected final Charset charset;
-  protected final Configuration conf;
-  protected final FileSystem fs;
-  protected final Map<String, String> options;
+  private final String prefix;
+  private final ChunkedWriter writer;
+  private final Charset charset;
+  private final FileSystem fs;
+  private final Map<String, String> options;
 
   protected SequenceFilesFromDirectoryFilter() {
     this.prefix = null;
     this.writer = null;
     this.charset = null;
-    this.conf = null;
     this.fs = null;
     this.options = null;
   }
@@ -56,12 +54,28 @@ public abstract class SequenceFilesFromD
                                              Map<String, String> options,
                                              ChunkedWriter writer,
                                              FileSystem fs) {
-    this.conf = conf;
     this.prefix = keyPrefix;
     this.writer = writer;
     this.charset = Charset.forName(options.get(SequenceFilesFromDirectory.CHARSET_OPTION[0]));
     this.fs = fs;
     this.options = options;
+    setConf(conf);
+  }
+
+  protected final String getPrefix() {
+    return prefix;
+  }
+
+  protected final ChunkedWriter getWriter() {
+    return writer;
+  }
+
+  protected final Charset getCharset() {
+    return charset;
+  }
+
+  protected final FileSystem getFs() {
+    return fs;
   }
 
   protected final Map<String, String> getOptions() {

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java Mon Jul  4 22:01:20 2011
@@ -224,8 +224,6 @@ public final class SequenceFilesFromMail
           String key = prefix + File.separator + current.getName() + File.separator + messageId;
           writer.write(key, file.toString());
           file.setLength(0); // reset the buffer
-          messageId = null;
-          inBody = false;
         }
       } catch (FileNotFoundException e) {
         // Skip file.

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/BloomTokenFilter.java Mon Jul  4 22:01:20 2011
@@ -30,7 +30,7 @@ import org.apache.hadoop.util.bloom.Filt
 import org.apache.hadoop.util.bloom.Key;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 
 /**
  * Emits tokens based on bloom filter membership.
@@ -38,7 +38,7 @@ import org.apache.lucene.analysis.tokena
 public final class BloomTokenFilter extends TokenFilter {
   
   private final Filter filter;
-  private final TermAttribute termAtt;
+  private final CharTermAttribute termAtt;
   private final CharsetEncoder encoder;
   private final Key key;
   private final boolean keepMembers;
@@ -56,7 +56,7 @@ public final class BloomTokenFilter exte
     this.filter = filter;
     this.keepMembers = keepMembers;
     this.key = new Key();
-    this.termAtt = addAttribute(TermAttribute.class);
+    this.termAtt = addAttribute(CharTermAttribute.class);
     this.encoder = Charsets.UTF_8.newEncoder().
       onMalformedInput(CodingErrorAction.REPORT).
       onUnmappableCharacter(CodingErrorAction.REPORT);
@@ -65,7 +65,7 @@ public final class BloomTokenFilter exte
   @Override
   public boolean incrementToken() throws IOException {
     while (input.incrementToken()) {
-      ByteBuffer bytes =  encoder.encode(CharBuffer.wrap(termAtt.termBuffer(), 0, termAtt.termLength()));
+      ByteBuffer bytes =  encoder.encode(CharBuffer.wrap(termAtt.buffer(), 0, termAtt.length()));
       key.set(bytes.array(), 1.0f);
       boolean member = filter.membershipTest(key);
       if ((keepMembers && member) || (!keepMembers && !member)) {

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/ARFFVectorIterable.java Mon Jul  4 22:01:20 2011
@@ -111,7 +111,7 @@ public class ARFFVectorIterable implemen
           label = ARFFType.DATE.getLabel(lower);
           type = ARFFType.DATE;
           //TODO: DateFormatter map
-          DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+          DateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
           int idx = lower.indexOf(ARFFType.DATE.getIndicator());
           String[] split = ARFFVectorIterable.SPACE_PATTERN.split(line);
           if (split.length >= 4) { //we have a date format
@@ -119,7 +119,7 @@ public class ARFFVectorIterable implemen
             if (formStr.startsWith("\"")) {
               formStr = formStr.substring(1, formStr.length() - 1);
             }
-            format = new SimpleDateFormat(formStr);
+            format = new SimpleDateFormat(formStr, Locale.ENGLISH);
           }
           model.addDateFormat(labelNumInt, format);
           //@attribute <name> date [<date-format>]

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java Mon Jul  4 22:01:20 2011
@@ -25,6 +25,7 @@ import java.text.SimpleDateFormat;
 import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.Locale;
 import java.util.Map;
 import java.util.regex.Pattern;
 
@@ -143,7 +144,7 @@ public class MapBackedARFFModel implemen
   protected double processDate(String data, int idx) {
     DateFormat format = dateMap.get(idx);
     if (format == null) {
-      format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+      format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ENGLISH);
     }
     double result;
     try {

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/AbstractMatrix.java Mon Jul  4 22:01:20 2011
@@ -93,12 +93,12 @@ public abstract class AbstractMatrix imp
   @Override
   public double get(String rowLabel, String columnLabel) {
     if (columnLabelBindings == null || rowLabelBindings == null) {
-      throw new UnboundLabelException();
+      throw new IllegalStateException("Unbound label");
     }
     Integer row = rowLabelBindings.get(rowLabel);
     Integer col = columnLabelBindings.get(columnLabel);
     if (row == null || col == null) {
-      throw new UnboundLabelException();
+      throw new IllegalStateException("Unbound label");
     }
 
     return get(row, col);
@@ -117,11 +117,11 @@ public abstract class AbstractMatrix imp
   @Override
   public void set(String rowLabel, double[] rowData) {
     if (columnLabelBindings == null) {
-      throw new UnboundLabelException();
+      throw new IllegalStateException("Unbound label");
     }
     Integer row = rowLabelBindings.get(rowLabel);
     if (row == null) {
-      throw new UnboundLabelException();
+      throw new IllegalStateException("Unbound label");
     }
     set(row, rowData);
   }
@@ -138,12 +138,12 @@ public abstract class AbstractMatrix imp
   @Override
   public void set(String rowLabel, String columnLabel, double value) {
     if (columnLabelBindings == null || rowLabelBindings == null) {
-      throw new UnboundLabelException();
+      throw new IllegalStateException("Unbound label");
     }
     Integer row = rowLabelBindings.get(rowLabel);
     Integer col = columnLabelBindings.get(columnLabel);
     if (row == null || col == null) {
-      throw new UnboundLabelException();
+      throw new IllegalStateException("Unbound label");
     }
     set(row, col, value);
   }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java Mon Jul  4 22:01:20 2011
@@ -18,6 +18,7 @@
 package org.apache.mahout.math;
 
 import com.google.common.collect.AbstractIterator;
+import com.google.common.primitives.Doubles;
 import org.apache.mahout.math.function.Functions;
 
 import java.util.Arrays;
@@ -390,13 +391,28 @@ public class SequentialAccessSparseVecto
       this.index = index;
       this.value = value;
     }
-    
+
     @Override
-    public int compareTo(final OrderedElement that) {
+    public int compareTo(OrderedElement that) {
       // both indexes are positive, and neither can be Integer.MAX_VALUE (otherwise there would be
       // an array somewhere with Integer.MAX_VALUE + 1 elements)
       return this.index - that.index;
     }
+
+    @Override
+    public int hashCode() {
+      return index ^ Doubles.hashCode(value);
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (!(o instanceof OrderedElement)) {
+        return false;
+      }
+      OrderedElement other = (OrderedElement) o;
+      return index == other.index && value == other.value;
+    }
+
   }
   
 }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java Mon Jul  4 22:01:20 2011
@@ -9,14 +9,14 @@ import org.apache.mahout.math.VectorIter
 import java.util.Map;
 
 public class LanczosState {
-  protected Matrix diagonalMatrix;
-  protected VectorIterable corpus;
+  protected  Matrix diagonalMatrix;
+  protected final VectorIterable corpus;
   protected double scaleFactor;
   protected int iterationNumber;
-  protected int desiredRank;
+  protected final int desiredRank;
   protected Map<Integer, Vector> basis;
 
-  protected Map<Integer, Double> singularValues;
+  protected final Map<Integer, Double> singularValues;
   protected Map<Integer, Vector> singularVectors;
 
   public LanczosState(VectorIterable corpus, int numCols, int desiredRank, Vector initialVector) {

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java Mon Jul  4 22:01:20 2011
@@ -617,7 +617,7 @@ public abstract class MatrixTest extends
     assertEquals("FeeBaz", m.get(0, 2), m.get("Fee", "Baz"), EPSILON);
   }
 
-  @Test(expected = UnboundLabelException.class)
+  @Test(expected = IllegalStateException.class)
   public void testSettingLabelBindings() {
     Matrix m = matrixFactory(new double[][]{{1, 3, 4}, {5, 2, 3},
         {1, 4, 2}});

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java?rev=1142821&r1=1142820&r2=1142821&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java Mon Jul  4 22:01:20 2011
@@ -465,7 +465,7 @@ public final class TestMatrixView extend
     assertEquals("FeeBar", test.get(0, 1), test.get("Fee", "Bar"), EPSILON);
   }
 
-  @Test(expected = UnboundLabelException.class)
+  @Test(expected = IllegalStateException.class)
   public void testSettingLabelBindings() {
     assertNull("row bindings", test.getRowLabelBindings());
     assertNull("col bindings", test.getColumnLabelBindings());