You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2010/02/13 20:57:24 UTC
svn commit: r909882 [3/5] - in
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout: classifier/
classifier/bayes/ classifier/bayes/algorithm/ classifier/bayes/common/
classifier/bayes/datastore/ classifier/bayes/exceptions/
classifier/bayes/inter...
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java Sat Feb 13 19:57:04 2010
@@ -39,8 +39,7 @@
public class BayesWeightSummerReducer extends MapReduceBase implements
Reducer<StringTuple,DoubleWritable,StringTuple,DoubleWritable> {
- private static final Logger log = LoggerFactory
- .getLogger(BayesWeightSummerReducer.class);
+ private static final Logger log = LoggerFactory.getLogger(BayesWeightSummerReducer.class);
private HTable table;
@@ -70,20 +69,19 @@
String feature = key.stringAt(1);
Put bu = new Put(Bytes.toBytes(feature));
- bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes
- .toBytes(BayesConstants.FEATURE_SUM), Bytes.toBytes(sum));
+ bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes.toBytes(BayesConstants.FEATURE_SUM),
+ Bytes.toBytes(sum));
table.put(bu);
} else if (key.stringAt(0).equals(BayesConstants.LABEL_SUM)) {
String label = key.stringAt(1);
Put bu = new Put(Bytes.toBytes(BayesConstants.LABEL_SUM));
- bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes
- .toBytes(label), Bytes.toBytes(sum));
+ bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes.toBytes(label), Bytes.toBytes(sum));
table.put(bu);
} else if (key.stringAt(0).equals(BayesConstants.TOTAL_SUM)) {
Put bu = new Put(Bytes.toBytes(BayesConstants.HBASE_COUNTS_ROW));
- bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes
- .toBytes(BayesConstants.TOTAL_SUM), Bytes.toBytes(sum));
+ bu.add(Bytes.toBytes(BayesConstants.HBASE_COLUMN_FAMILY), Bytes.toBytes(BayesConstants.TOTAL_SUM),
+ Bytes.toBytes(sum));
table.put(bu);
}
}
@@ -94,15 +92,17 @@
@Override
public void configure(JobConf job) {
try {
- Parameters params = Parameters
- .fromString(job.get("bayes.parameters", ""));
- if (params.get("dataSource").equals("hbase")) useHbase = true;
- else return;
+ Parameters params = Parameters.fromString(job.get("bayes.parameters", ""));
+ if (params.get("dataSource").equals("hbase")) {
+ useHbase = true;
+ } else {
+ return;
+ }
HBaseConfiguration hBconf = new HBaseConfiguration(job);
table = new HTable(hBconf, job.get("output.table"));
} catch (IOException e) {
- log.error("Unexpected error during configuration", e);
+ BayesWeightSummerReducer.log.error("Unexpected error during configuration", e);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/JobExecutor.java Sat Feb 13 19:57:04 2010
@@ -30,9 +30,10 @@
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+
/**
* Base class for executing the Bayes Map/Reduce Jobs
- *
+ *
*/
public final class JobExecutor {
/** Logger for this class. */
@@ -41,8 +42,7 @@
private JobExecutor() { }
/**
- * Execute a bayes classification job. Input and output path are parsed from
- * the input parameters.
+ * Execute a bayes classification job. Input and output path are parsed from the input parameters.
*
* @param args
* input parameters.
@@ -58,8 +58,8 @@
Option outputOpt = DefaultOptionCreator.outputOption().create();
Option helpOpt = DefaultOptionCreator.helpOption();
- Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(
- outputOpt).withOption(helpOpt).create();
+ Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(helpOpt)
+ .create();
try {
Parser parser = new Parser();
@@ -76,7 +76,7 @@
job.runJob(input, output, new BayesParameters(1));
} catch (OptionException e) {
- log.error(e.getMessage());
+ JobExecutor.log.error(e.getMessage());
CommandLineUtil.printHelp(group);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/model/ClassifierContext.java Sat Feb 13 19:57:04 2010
@@ -23,9 +23,10 @@
import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
import org.apache.mahout.classifier.bayes.interfaces.Algorithm;
import org.apache.mahout.classifier.bayes.interfaces.Datastore;
+
/**
* The Classifier Wrapper used for choosing the {@link Algorithm} and {@link Datastore}
- *
+ *
*/
public class ClassifierContext {
@@ -38,8 +39,7 @@
}
/**
- * Initializes the Context. Gets the necessary data and checks if the
- * Datastore is valid
+ * Initializes the Context. Gets the necessary data and checks if the Datastore is valid
*
* @throws InvalidDatastoreException
*/
@@ -54,14 +54,11 @@
* @param document
* The document to classify
* @param defaultCategory
- * The default category to assign Ties are broken by comparing the
- * category
- * @return A Collection of
- * {@link org.apache.mahout.classifier.ClassifierResult}s.
+ * The default category to assign Ties are broken by comparing the category
+ * @return A Collection of {@link org.apache.mahout.classifier.ClassifierResult}s.
* @throws InvalidDatastoreException
*/
- public ClassifierResult classifyDocument(String[] document,
- String defaultCategory) throws InvalidDatastoreException {
+ public ClassifierResult classifyDocument(String[] document, String defaultCategory) throws InvalidDatastoreException {
return algorithm.classifyDocument(document, datastore, defaultCategory);
}
@@ -73,17 +70,15 @@
* @param defaultCategory
* The default category to assign
* @param numResults
- * The maximum number of results to return, ranked by score. Ties are
- * broken by comparing the category
- * @return A Collection of
- * {@link org.apache.mahout.classifier.ClassifierResult}s.
+ * The maximum number of results to return, ranked by score. Ties are broken by comparing the
+ * category
+ * @return A Collection of {@link org.apache.mahout.classifier.ClassifierResult}s.
* @throws InvalidDatastoreException
*/
public ClassifierResult[] classifyDocument(String[] document,
String defaultCategory,
int numResults) throws InvalidDatastoreException {
- return algorithm.classifyDocument(document, datastore, defaultCategory,
- numResults);
+ return algorithm.classifyDocument(document, datastore, defaultCategory, numResults);
}
/**
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java Sat Feb 13 19:57:04 2010
@@ -21,14 +21,13 @@
import org.apache.commons.cli2.util.HelpFormatter;
public final class CommandLineUtil {
-
- private CommandLineUtil() {
- }
-
+
+ private CommandLineUtil() { }
+
public static void printHelp(Group group) {
HelpFormatter formatter = new HelpFormatter();
formatter.setGroup(group);
formatter.print();
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterable.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterable.java Sat Feb 13 19:57:04 2010
@@ -24,48 +24,49 @@
import java.util.Iterator;
/**
- * Iterable representing the lines of a text file. It can produce an {@link Iterator} over those lines. This assumes the
- * text file's lines are delimited in a manner consistent with how {@link java.io.BufferedReader} defines lines.
- *
+ * Iterable representing the lines of a text file. It can produce an {@link Iterator} over those lines. This
+ * assumes the text file's lines are delimited in a manner consistent with how {@link java.io.BufferedReader}
+ * defines lines.
+ *
* This class will uncompress files that end in .zip or .gz accordingly, too.
*/
public final class FileLineIterable implements Iterable<String> {
-
+
private static final Charset UTF8 = Charset.forName("UTF-8");
-
+
private final InputStream is;
private final Charset encoding;
private final boolean skipFirstLine;
-
+
/** Creates a {@link FileLineIterable} over a given file, assuming a UTF-8 encoding. */
public FileLineIterable(File file) throws IOException {
- this(file, UTF8, false);
+ this(file, FileLineIterable.UTF8, false);
}
-
+
/** Creates a {@link FileLineIterable} over a given file, assuming a UTF-8 encoding. */
public FileLineIterable(File file, boolean skipFirstLine) throws IOException {
- this(file, UTF8, skipFirstLine);
+ this(file, FileLineIterable.UTF8, skipFirstLine);
}
-
+
/** Creates a {@link FileLineIterable} over a given file, using the given encoding. */
public FileLineIterable(File file, Charset encoding, boolean skipFirstLine) throws IOException {
this(FileLineIterator.getFileInputStream(file), encoding, skipFirstLine);
}
-
+
public FileLineIterable(InputStream is) {
- this(is, UTF8, false);
+ this(is, FileLineIterable.UTF8, false);
}
-
+
public FileLineIterable(InputStream is, boolean skipFirstLine) {
- this(is, UTF8, skipFirstLine);
+ this(is, FileLineIterable.UTF8, skipFirstLine);
}
-
+
public FileLineIterable(InputStream is, Charset encoding, boolean skipFirstLine) {
this.is = is;
this.encoding = encoding;
this.skipFirstLine = skipFirstLine;
}
-
+
@Override
public Iterator<String> iterator() {
try {
@@ -74,5 +75,5 @@
throw new IllegalStateException(ioe);
}
}
-
+
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/FileLineIterator.java Sat Feb 13 19:57:04 2010
@@ -21,7 +21,6 @@
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
@@ -33,56 +32,62 @@
import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
/**
- * Iterates over the lines of a text file. This assumes the text file's lines are delimited in a manner consistent with
- * how {@link BufferedReader} defines lines.
- *
+ * Iterates over the lines of a text file. This assumes the text file's lines are delimited in a manner
+ * consistent with how {@link BufferedReader} defines lines.
+ *
* This class will uncompress files that end in .zip or .gz accordingly, too.
*/
public final class FileLineIterator implements SkippingIterator<String>, Closeable {
-
+
private static final Charset UTF8 = Charset.forName("UTF-8");
-
+
private final BufferedReader reader;
private String nextLine;
-
+
/**
* Creates a {@link FileLineIterator} over a given file, assuming a UTF-8 encoding.
- *
- * @throws FileNotFoundException if the file does not exist
- * @throws IOException if the file cannot be read
+ *
+ * @throws FileNotFoundException
+ * if the file does not exist
+ * @throws IOException
+ * if the file cannot be read
*/
public FileLineIterator(File file) throws IOException {
- this(file, UTF8, false);
+ this(file, FileLineIterator.UTF8, false);
}
-
+
/**
* Creates a {@link FileLineIterator} over a given file, assuming a UTF-8 encoding.
- *
- * @throws FileNotFoundException if the file does not exist
- * @throws IOException if the file cannot be read
+ *
+ * @throws FileNotFoundException
+ * if the file does not exist
+ * @throws IOException
+ * if the file cannot be read
*/
public FileLineIterator(File file, boolean skipFirstLine) throws IOException {
- this(file, UTF8, skipFirstLine);
+ this(file, FileLineIterator.UTF8, skipFirstLine);
}
-
+
/**
* Creates a {@link FileLineIterator} over a given file, using the given encoding.
- *
- * @throws FileNotFoundException if the file does not exist
- * @throws IOException if the file cannot be read
+ *
+ * @throws FileNotFoundException
+ * if the file does not exist
+ * @throws IOException
+ * if the file cannot be read
*/
public FileLineIterator(File file, Charset encoding, boolean skipFirstLine) throws IOException {
- this(getFileInputStream(file), encoding, skipFirstLine);
+ this(FileLineIterator.getFileInputStream(file), encoding, skipFirstLine);
}
-
+
public FileLineIterator(InputStream is) throws IOException {
- this(is, UTF8, false);
+ this(is, FileLineIterator.UTF8, false);
}
-
+
public FileLineIterator(InputStream is, boolean skipFirstLine) throws IOException {
- this(is, UTF8, skipFirstLine);
+ this(is, FileLineIterator.UTF8, skipFirstLine);
}
-
+
public FileLineIterator(InputStream is, Charset encoding, boolean skipFirstLine) throws IOException {
reader = new BufferedReader(new InputStreamReader(is, encoding));
if (skipFirstLine) {
@@ -90,7 +95,7 @@
}
nextLine = reader.readLine();
}
-
+
static InputStream getFileInputStream(File file) throws IOException {
InputStream is = new FileInputStream(file);
String name = file.getName();
@@ -102,16 +107,16 @@
return is;
}
}
-
+
public String peek() {
return nextLine;
}
-
+
@Override
public boolean hasNext() {
return nextLine != null;
}
-
+
@Override
public String next() {
if (nextLine == null) {
@@ -130,7 +135,7 @@
}
return result;
}
-
+
/**
* @throws UnsupportedOperationException
*/
@@ -138,7 +143,7 @@
public void remove() {
throw new UnsupportedOperationException();
}
-
+
@Override
public void skip(int n) {
try {
@@ -149,11 +154,11 @@
close();
}
}
-
+
@Override
public void close() {
nextLine = null;
IOUtils.quietClose(reader);
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java Sat Feb 13 19:57:04 2010
@@ -31,24 +31,24 @@
private static final Logger log = LoggerFactory.getLogger(HadoopUtil.class);
- private HadoopUtil() {}
+ private HadoopUtil() { }
public static void overwriteOutput(String output) throws IOException {
Configuration conf = new JobConf(KMeansDriver.class);
Path outPath = new Path(output);
FileSystem fs = FileSystem.get(outPath.toUri(), conf);
if (fs.exists(outPath)) {
- log.warn("Deleting {}", outPath);
+ HadoopUtil.log.warn("Deleting {}", outPath);
fs.delete(outPath, true);
}
- log.warn("Creating dir {}", outPath);
+ HadoopUtil.log.warn("Creating dir {}", outPath);
fs.mkdirs(outPath);
}
public static void deletePath(String output, FileSystem fs) throws IOException {
Path outPath = new Path(output);
if (fs.exists(outPath)) {
- log.warn("Deleting {}", outPath);
+ HadoopUtil.log.warn("Deleting {}", outPath);
fs.delete(outPath, true);
}
}
@@ -56,14 +56,14 @@
public static void deletePaths(Iterable<Path> paths, FileSystem fs) throws IOException {
for (Path path : paths) {
if (fs.exists(path)) {
- log.warn("Deleting {}", path);
+ HadoopUtil.log.warn("Deleting {}", path);
fs.delete(path, true);
}
}
}
public static void rename(Path from, Path to, FileSystem fs) throws IOException {
- log.warn("Renaming " + from.toUri() + " to " + to.toUri());
+ HadoopUtil.log.warn("Renaming " + from.toUri() + " to " + to.toUri());
fs.rename(from, to);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java Sat Feb 13 19:57:04 2010
@@ -17,9 +17,6 @@
package org.apache.mahout.common;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import java.io.Closeable;
import java.io.IOException;
import java.sql.Connection;
@@ -27,68 +24,77 @@
import java.sql.SQLException;
import java.sql.Statement;
-/** <p>I/O-related utility methods that don't have a better home.</p> */
-public final class IOUtils {
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+/**
+ * <p>
+ * I/O-related utility methods that don't have a better home.
+ * </p>
+ */
+public final class IOUtils {
+
private static final Logger log = LoggerFactory.getLogger(IOUtils.class);
-
- private IOUtils() {
- }
-
+
+ private IOUtils() { }
+
public static void quietClose(Closeable closeable) {
if (closeable != null) {
try {
closeable.close();
} catch (IOException ioe) {
- log.warn("Unexpected exception while closing; continuing", ioe);
+ IOUtils.log.warn("Unexpected exception while closing; continuing", ioe);
}
}
}
-
+
// Sheez, why can't ResultSet, Statement and Connection implement Closeable?
-
+
public static void quietClose(ResultSet closeable) {
if (closeable != null) {
try {
closeable.close();
} catch (SQLException sqle) {
- log.warn("Unexpected exception while closing; continuing", sqle);
+ IOUtils.log.warn("Unexpected exception while closing; continuing", sqle);
}
}
}
-
+
public static void quietClose(Statement closeable) {
if (closeable != null) {
try {
closeable.close();
} catch (SQLException sqle) {
- log.warn("Unexpected exception while closing; continuing", sqle);
+ IOUtils.log.warn("Unexpected exception while closing; continuing", sqle);
}
}
}
-
+
public static void quietClose(Connection closeable) {
if (closeable != null) {
try {
closeable.close();
} catch (SQLException sqle) {
- log.warn("Unexpected exception while closing; continuing", sqle);
+ IOUtils.log.warn("Unexpected exception while closing; continuing", sqle);
}
}
}
-
+
/**
- * Closes a {@link ResultSet}, {@link Statement} and {@link Connection} (if not null) and logs (but does not rethrow)
- * any resulting {@link SQLException}. This is useful for cleaning up after a database query.
- *
- * @param resultSet {@link ResultSet} to close
- * @param statement {@link Statement} to close
- * @param connection {@link Connection} to close
+ * Closes a {@link ResultSet}, {@link Statement} and {@link Connection} (if not null) and logs (but does not
+ * rethrow) any resulting {@link SQLException}. This is useful for cleaning up after a database query.
+ *
+ * @param resultSet
+ * {@link ResultSet} to close
+ * @param statement
+ * {@link Statement} to close
+ * @param connection
+ * {@link Connection} to close
*/
public static void quietClose(ResultSet resultSet, Statement statement, Connection connection) {
- quietClose(resultSet);
- quietClose(statement);
- quietClose(connection);
+ IOUtils.quietClose(resultSet);
+ IOUtils.quietClose(statement);
+ IOUtils.quietClose(connection);
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntegerTuple.java Sat Feb 13 19:57:04 2010
@@ -33,26 +33,27 @@
*
*/
public class IntegerTuple implements WritableComparable<IntegerTuple> {
-
+
private List<Integer> tuple = new ArrayList<Integer>();
-
- public IntegerTuple() {
- }
-
+
+ public IntegerTuple() { }
+
public IntegerTuple(Integer firstEntry) {
add(firstEntry);
}
public IntegerTuple(Collection<Integer> entries) {
- for(Integer entry: entries)
+ for (Integer entry : entries) {
add(entry);
+ }
}
public IntegerTuple(Integer[] entries) {
- for(Integer entry: entries)
+ for (Integer entry : entries) {
add(entry);
+ }
}
-
+
/**
* add an entry to the end of the list
*
@@ -62,7 +63,7 @@
public boolean add(Integer entry) {
return tuple.add(entry);
}
-
+
/**
* Fetches the string at the given location
*
@@ -72,7 +73,7 @@
public Integer integerAt(int index) {
return tuple.get(index);
}
-
+
/**
* Replaces the string at the given index with the given newString
*
@@ -83,7 +84,7 @@
public Integer replaceAt(int index, Integer newInteger) {
return tuple.set(index, newInteger);
}
-
+
/**
* Fetch the list of entries from the tuple
*
@@ -92,7 +93,7 @@
public List<Integer> getEntries() {
return Collections.unmodifiableList(this.tuple);
}
-
+
/**
* Returns the length of the tuple
*
@@ -101,34 +102,39 @@
public int length() {
return this.tuple.size();
}
-
+
@Override
public String toString() {
return tuple.toString();
}
-
+
@Override
public int hashCode() {
return tuple.hashCode();
}
-
+
@Override
public boolean equals(Object obj) {
- if (this == obj)
+ if (this == obj) {
return true;
- if (obj == null)
+ }
+ if (obj == null) {
return false;
- if (getClass() != obj.getClass())
+ }
+ if (getClass() != obj.getClass()) {
return false;
+ }
IntegerTuple other = (IntegerTuple) obj;
if (tuple == null) {
- if (other.tuple != null)
+ if (other.tuple != null) {
return false;
- } else if (!tuple.equals(other.tuple))
+ }
+ } else if (!tuple.equals(other.tuple)) {
return false;
+ }
return true;
}
-
+
@Override
public void readFields(DataInput in) throws IOException {
int len = in.readInt();
@@ -138,15 +144,15 @@
tuple.add(data);
}
}
-
+
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(tuple.size());
- for (Integer entry : tuple) {
- out.writeInt(entry);
+ for (Integer entry : tuple) {
+ out.writeInt(entry);
}
}
-
+
@Override
public int compareTo(IntegerTuple otherTuple) {
int thisLength = length();
@@ -154,8 +160,9 @@
int min = Math.min(thisLength, otherLength);
for (int i = 0; i < min; i++) {
int ret = this.tuple.get(i).compareTo(otherTuple.integerAt(i));
- if (ret == 0)
+ if (ret == 0) {
continue;
+ }
return ret;
}
if (thisLength < otherLength) {
@@ -166,5 +173,5 @@
return 0;
}
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/LongPair.java Sat Feb 13 19:57:04 2010
@@ -21,27 +21,27 @@
/** A simple (ordered) pair of longs. */
public final class LongPair implements Comparable<LongPair>, Serializable {
-
+
private final long first;
private final long second;
-
+
public LongPair(long first, long second) {
this.first = first;
this.second = second;
}
-
+
public long getFirst() {
return first;
}
-
+
public long getSecond() {
return second;
}
-
+
public LongPair swap() {
return new LongPair(second, first);
}
-
+
@Override
public boolean equals(Object obj) {
if (!(obj instanceof LongPair)) {
@@ -50,7 +50,7 @@
LongPair otherPair = (LongPair) obj;
return first == otherPair.getFirst() && second == otherPair.getSecond();
}
-
+
@Override
public int hashCode() {
int firstHash = RandomUtils.hashLong(first);
@@ -58,12 +58,12 @@
// for (a,b) versus (b,a)
return (firstHash >>> 16 | firstHash << 16) ^ RandomUtils.hashLong(second);
}
-
+
@Override
public String toString() {
return '(' + String.valueOf(first) + ',' + second + ')';
}
-
+
@Override
public int compareTo(LongPair o) {
if (first < o.getFirst()) {
@@ -74,5 +74,5 @@
return second < o.getSecond() ? -1 : second > o.getSecond() ? 1 : 0;
}
}
-
+
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Pair.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Pair.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Pair.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Pair.java Sat Feb 13 19:57:04 2010
@@ -21,56 +21,56 @@
/** A simple (ordered) pair of two objects. Elements may be null. */
public final class Pair<A, B> implements Serializable {
-
+
private final A first;
private final B second;
-
+
public Pair(A first, B second) {
this.first = first;
this.second = second;
}
-
+
public A getFirst() {
return first;
}
-
+
public B getSecond() {
return second;
}
-
+
public Pair<B, A> swap() {
return new Pair<B, A>(second, first);
}
-
+
@Override
public boolean equals(Object obj) {
- if (!(obj instanceof Pair)) {
+ if (!(obj instanceof Pair<?, ?>)) {
return false;
}
Pair<?, ?> otherPair = (Pair<?, ?>) obj;
- return isEqualOrNulls(first, otherPair.getFirst()) &&
- isEqualOrNulls(second, otherPair.getSecond());
+ return Pair.isEqualOrNulls(first, otherPair.getFirst()) &&
+ Pair.isEqualOrNulls(second, otherPair.getSecond());
}
-
+
private static boolean isEqualOrNulls(Object obj1, Object obj2) {
return obj1 == null ? obj2 == null : obj1.equals(obj2);
}
-
+
@Override
public int hashCode() {
- int firstHash = hashCodeNull(first);
+ int firstHash = Pair.hashCodeNull(first);
// Flip top and bottom 16 bits; this makes the hash function probably different
// for (a,b) versus (b,a)
- return (firstHash >>> 16 | firstHash << 16) ^ hashCodeNull(second);
+ return (firstHash >>> 16 | firstHash << 16) ^ Pair.hashCodeNull(second);
}
-
+
private static int hashCodeNull(Object obj) {
return obj == null ? 0 : obj.hashCode();
}
-
+
@Override
public String toString() {
return '(' + String.valueOf(first) + ',' + second + ')';
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java Sat Feb 13 19:57:04 2010
@@ -28,60 +28,62 @@
import org.slf4j.LoggerFactory;
public class Parameters {
-
+
private static final Logger log = LoggerFactory.getLogger(Parameters.class);
-
- private Map<String, String> params = new HashMap<String, String>();
-
- //private Configuration conf = new Configuration();
-
+
+ private Map<String,String> params = new HashMap<String,String>();
+
+ // private Configuration conf = new Configuration();
+
public Parameters() {
}
-
- private Parameters(Map<String, String> params) {
+
+ private Parameters(Map<String,String> params) {
this.params = params;
}
-
+
public String get(String key) {
return params.get(key);
}
-
+
public String get(String key, String defaultValue) {
String ret = params.get(key);
return ret == null ? defaultValue : ret;
}
-
+
public void set(String key, String value) {
params.put(key, value);
}
-
+
@Override
public String toString() {
Configuration conf = new Configuration();
- conf.set("io.serializations",
- "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
- DefaultStringifier<Map<String, String>> mapStringifier = new DefaultStringifier<Map<String, String>>(conf,
+ conf
+ .set("io.serializations",
+ "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
+ DefaultStringifier<Map<String,String>> mapStringifier = new DefaultStringifier<Map<String,String>>(conf,
GenericsUtil.getClass(params));
try {
return mapStringifier.toString(params);
} catch (IOException e) {
- log.info("Encountered IOException while deserializing returning empty string", e);
+ Parameters.log.info("Encountered IOException while deserializing returning empty string", e);
return "";
}
-
+
}
-
+
public String print() {
return params.toString();
}
-
+
public static Parameters fromString(String serializedString) throws IOException {
Configuration conf = new Configuration();
- conf.set("io.serializations",
- "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
- Map<String, String> params = new HashMap<String, String>();
- DefaultStringifier<Map<String, String>> mapStringifier = new DefaultStringifier<Map<String, String>>(conf,
+ conf
+ .set("io.serializations",
+ "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
+ Map<String,String> params = new HashMap<String,String>();
+ DefaultStringifier<Map<String,String>> mapStringifier = new DefaultStringifier<Map<String,String>>(conf,
GenericsUtil.getClass(params));
params = mapStringifier.fromString(serializedString);
return new Parameters(params);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/RandomUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/RandomUtils.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/RandomUtils.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/RandomUtils.java Sat Feb 13 19:57:04 2010
@@ -17,41 +17,45 @@
package org.apache.mahout.common;
-import org.uncommons.maths.random.MersenneTwisterRNG;
-
-import java.util.Random;
import java.nio.charset.Charset;
+import java.util.Random;
+
+import org.uncommons.maths.random.MersenneTwisterRNG;
/**
- * <p>The source of random stuff for the whole project. This lets us make all randomness in the project predictable, if
- * desired, for when we run unit tests, which should be repeatable.</p>
- *
- * <p>This class is increasingly incorrectly named as it also includes other mathematical utility methods.</p>
+ * <p>
+ * The source of random stuff for the whole project. This lets us make all randomness in the project
+ * predictable, if desired, for when we run unit tests, which should be repeatable.
+ * </p>
+ *
+ * <p>
+ * This class is increasingly incorrectly named as it also includes other mathematical utility methods.
+ * </p>
*/
public final class RandomUtils {
-
+
private static final byte[] STANDARD_SEED = "Mahout=Hadoop+ML".getBytes(Charset.forName("US-ASCII"));
-
+
private static boolean testSeed;
-
+
/** The largest prime less than 2<sup>31</sup>-1 that is the smaller of a twin prime pair. */
public static final int MAX_INT_SMALLER_TWIN_PRIME = 2147482949;
-
- private RandomUtils() {
- }
-
+
+ private RandomUtils() { }
+
public static void useTestSeed() {
- testSeed = true;
+ RandomUtils.testSeed = true;
}
-
+
public static Random getRandom() {
- return testSeed ? new MersenneTwisterRNG(STANDARD_SEED) : new MersenneTwisterRNG();
+ return RandomUtils.testSeed ? new MersenneTwisterRNG(RandomUtils.STANDARD_SEED)
+ : new MersenneTwisterRNG();
}
-
+
public static Random getRandom(long seed) {
- return new MersenneTwisterRNG(longSeedtoBytes(seed));
+ return new MersenneTwisterRNG(RandomUtils.longSeedtoBytes(seed));
}
-
+
public static byte[] longSeedtoBytes(long seed) {
byte[] seedBytes = new byte[16];
seedBytes[0] = (byte) (seed >>> 56);
@@ -60,58 +64,58 @@
seedBytes[3] = (byte) (seed >>> 32);
seedBytes[4] = (byte) (seed >>> 24);
seedBytes[5] = (byte) (seed >>> 16);
- seedBytes[6] = (byte) (seed >>> 8);
+ seedBytes[6] = (byte) (seed >>> 8);
seedBytes[7] = (byte) seed;
System.arraycopy(seedBytes, 0, seedBytes, 8, 8);
return seedBytes;
}
-
+
public static long seedBytesToLong(byte[] seed) {
- return
- ((seed[0] & 0xFFL) << 56) |
- ((seed[1] & 0xFFL) << 48) |
- ((seed[2] & 0xFFL) << 40) |
- ((seed[3] & 0xFFL) << 32) |
- ((seed[4] & 0xFFL) << 24) |
- ((seed[5] & 0xFFL) << 16) |
- ((seed[6] & 0xFFL) << 8) |
- (seed[7] & 0xFFL);
+ return (seed[0] & 0xFFL) << 56 | (seed[1] & 0xFFL) << 48 | (seed[2] & 0xFFL) << 40
+ | (seed[3] & 0xFFL) << 32 | (seed[4] & 0xFFL) << 24 | (seed[5] & 0xFFL) << 16
+ | (seed[6] & 0xFFL) << 8 | seed[7] & 0xFFL;
}
-
+
/** @return what {@link Double#hashCode()} would return for the same value */
public static int hashDouble(double value) {
// Just copied from Double.hashCode
long bits = Double.doubleToLongBits(value);
- return (int) (bits ^ (bits >>> 32));
+ return (int) (bits ^ bits >>> 32);
}
-
+
public static int hashFloat(float value) {
return Float.floatToIntBits(value);
}
-
+
public static int hashLong(long value) {
- return (int) (value ^ (value >>> 32));
+ return (int) (value ^ value >>> 32);
}
-
+
/**
- * <p>Finds next-largest "twin primes": numbers p and p+2 such that both are prime. Finds the smallest such p such
- * that the smaller twin, p, is greater than or equal to n. Returns p+2, the larger of the two twins.</p>
+ * <p>
+ * Finds next-largest "twin primes": numbers p and p+2 such that both are prime. Finds the smallest such p
+ * such that the smaller twin, p, is greater than or equal to n. Returns p+2, the larger of the two twins.
+ * </p>
*/
public static int nextTwinPrime(int n) {
- if (n > MAX_INT_SMALLER_TWIN_PRIME) {
+ if (n > RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
throw new IllegalArgumentException();
}
if (n <= 3) {
return 3;
}
- int next = nextPrime(n);
- while (isNotPrime(next + 2)) {
- next = nextPrime(next + 4);
+ int next = RandomUtils.nextPrime(n);
+ while (RandomUtils.isNotPrime(next + 2)) {
+ next = RandomUtils.nextPrime(next + 4);
}
return next + 2;
}
-
- /** <p>Finds smallest prime p such that p is greater than or equal to n.</p> */
+
+ /**
+ * <p>
+ * Finds smallest prime p such that p is greater than or equal to n.
+ * </p>
+ */
public static int nextPrime(int n) {
if (n < 2) {
return 2;
@@ -119,18 +123,18 @@
// Make sure the number is odd. Is this too clever?
n |= 0x1;
// There is no problem with overflow since Integer.MAX_INT is prime, as it happens
- while (isNotPrime(n)) {
+ while (RandomUtils.isNotPrime(n)) {
n += 2;
}
return n;
}
-
+
/** @return <code>true</code> iff n is not a prime */
public static boolean isNotPrime(int n) {
if (n < 2 || (n & 0x1) == 0) { // < 2 or even
return true;
}
- int max = 1 + (int) Math.sqrt((double) n);
+ int max = 1 + (int) Math.sqrt(n);
for (int d = 3; d <= max; d += 2) {
if (n % d == 0) {
return true;
@@ -138,5 +142,5 @@
}
return false;
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringRecordIterator.java Sat Feb 13 19:57:04 2010
@@ -23,7 +23,7 @@
import java.util.regex.Pattern;
public class StringRecordIterator implements Iterator<Pair<List<String>,Long>> {
-
+
private static final Long ONE = 1L;
private final Iterator<String> lineIterator;
@@ -43,7 +43,7 @@
public Pair<List<String>,Long> next() {
String line = lineIterator.next();
String[] items = splitter.split(line);
- return new Pair<List<String>,Long>(Arrays.asList(items), ONE);
+ return new Pair<List<String>,Long>(Arrays.asList(items), StringRecordIterator.ONE);
}
@Override
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringTuple.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringTuple.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringTuple.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringTuple.java Sat Feb 13 19:57:04 2010
@@ -31,12 +31,11 @@
* An Ordered List of Strings which can be used in a Hadoop Map/Reduce Job
*/
public final class StringTuple implements WritableComparable<StringTuple> {
-
+
private List<String> tuple = new ArrayList<String>();
-
- public StringTuple() {
- }
-
+
+ public StringTuple() { }
+
public StringTuple(String firstEntry) {
add(firstEntry);
}
@@ -46,13 +45,13 @@
add(entry);
}
}
-
+
public StringTuple(String[] entries) {
for (String entry : entries) {
add(entry);
}
}
-
+
/**
* add an entry to the end of the list
*
@@ -62,7 +61,7 @@
public boolean add(String entry) {
return tuple.add(entry);
}
-
+
/**
* Fetches the string at the given location
*
@@ -72,7 +71,7 @@
public String stringAt(int index) {
return tuple.get(index);
}
-
+
/**
* Replaces the string at the given index with the given newString
*
@@ -83,7 +82,7 @@
public String replaceAt(int index, String newString) {
return tuple.set(index, newString);
}
-
+
/**
* Fetch the list of entries from the tuple
*
@@ -92,7 +91,7 @@
public List<String> getEntries() {
return Collections.unmodifiableList(this.tuple);
}
-
+
/**
* Returns the length of the tuple
*
@@ -101,34 +100,39 @@
public int length() {
return this.tuple.size();
}
-
+
@Override
public String toString() {
return tuple.toString();
}
-
+
@Override
public int hashCode() {
return tuple.hashCode();
}
-
+
@Override
public boolean equals(Object obj) {
- if (this == obj)
+ if (this == obj) {
return true;
- if (obj == null)
+ }
+ if (obj == null) {
return false;
- if (getClass() != obj.getClass())
+ }
+ if (getClass() != obj.getClass()) {
return false;
+ }
StringTuple other = (StringTuple) obj;
if (tuple == null) {
- if (other.tuple != null)
+ if (other.tuple != null) {
return false;
- } else if (!tuple.equals(other.tuple))
+ }
+ } else if (!tuple.equals(other.tuple)) {
return false;
+ }
return true;
}
-
+
@Override
public void readFields(DataInput in) throws IOException {
int len = in.readInt();
@@ -139,7 +143,7 @@
tuple.add(value.toString());
}
}
-
+
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(tuple.size());
@@ -149,7 +153,7 @@
value.write(out);
}
}
-
+
@Override
public int compareTo(StringTuple otherTuple) {
int thisLength = length();
@@ -169,5 +173,5 @@
return 0;
}
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java Sat Feb 13 19:57:04 2010
@@ -17,40 +17,42 @@
package org.apache.mahout.common;
-import com.thoughtworks.xstream.XStream;
-
import java.util.regex.Pattern;
+import com.thoughtworks.xstream.XStream;
+
/**
* Offers two methods to convert an object to a string representation and restore the object given its string
* representation. Should use Hadoop Stringifier whenever available.
*/
public final class StringUtils {
-
+
private static final XStream xstream = new XStream();
private static final Pattern NEWLINE_PATTERN = Pattern.compile("\n");
-
+
private StringUtils() {
- // do nothing
+ // do nothing
}
-
+
/**
* Converts the object to a one-line string representation
- *
- * @param obj the object to convert
+ *
+ * @param obj
+ * the object to convert
* @return the string representation of the object
*/
public static String toString(Object obj) {
- return NEWLINE_PATTERN.matcher(xstream.toXML(obj)).replaceAll("");
+ return StringUtils.NEWLINE_PATTERN.matcher(StringUtils.xstream.toXML(obj)).replaceAll("");
}
-
+
/**
* Restores the object from its string representation.
- *
- * @param str the string representation of the object
+ *
+ * @param str
+ * the string representation of the object
* @return restored object
*/
public static <T> T fromString(String str) {
- return (T) xstream.fromXML(str);
+ return (T) StringUtils.xstream.fromXML(str);
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Summarizable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Summarizable.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Summarizable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Summarizable.java Sat Feb 13 19:57:04 2010
@@ -18,11 +18,11 @@
package org.apache.mahout.common;
/**
- * A Summarizable Interface. All Classes which implements this has to have a summarize function which generates a string
- * summary of the data contained in it
+ * A Summarizable Interface. All Classes which implements this has to have a summarize function which
+ * generates a string summary of the data contained in it
*/
public interface Summarizable {
-
+
/** @return Summary of the data inside the class */
String summarize();
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/TimingStatistics.java Sat Feb 13 19:57:04 2010
@@ -20,92 +20,91 @@
import java.io.Serializable;
public final class TimingStatistics implements Serializable {
-
+
private int nCalls;
private long minTime;
private long maxTime;
private long sumTime;
private double sumSquaredTime;
-
+
/** Creates a new instance of CallStats */
- public TimingStatistics() {
- }
-
- public TimingStatistics(int nCalls, long minTime, long maxTime, long sumTime,
- double sumSquaredTime) {
+ public TimingStatistics() { }
+
+ public TimingStatistics(int nCalls, long minTime, long maxTime, long sumTime, double sumSquaredTime) {
this.nCalls = nCalls;
this.minTime = minTime;
this.maxTime = maxTime;
this.sumTime = sumTime;
this.sumSquaredTime = sumSquaredTime;
}
-
+
public synchronized int getNCalls() {
return nCalls;
}
-
+
public synchronized long getMinTime() {
return Math.max(0, minTime);
}
-
+
public synchronized long getMaxTime() {
return maxTime;
}
-
+
public synchronized long getSumTime() {
return sumTime;
}
-
+
public synchronized double getSumSquaredTime() {
return sumSquaredTime;
}
-
+
public synchronized long getMeanTime() {
return nCalls == 0 ? 0 : sumTime / nCalls;
}
-
+
public synchronized long getStdDevTime() {
- if (nCalls == 0)
+ if (nCalls == 0) {
return 0;
+ }
double mean = getMeanTime();
double meanSquared = mean * mean;
double meanOfSquares = sumSquaredTime / nCalls;
double variance = meanOfSquares - meanSquared;
- if (variance < 0)
- return 0; // might happen due to rounding error
+ if (variance < 0) {
+ return 0; // might happen due to rounding error
+ }
return (long) Math.sqrt(variance);
}
-
+
+ @Override
public synchronized String toString() {
- return '\n' +
- "nCalls = " + nCalls + ";\n" +
- "sumTime = " + sumTime / 1000000000.0f + "s;\n" +
- "minTime = " + minTime / 1000000.0f + "ms;\n" +
- "maxTime = " + maxTime / 1000000.0f + "ms;\n" +
- "meanTime = " + getMeanTime() / 1000000.0f + "ms;\n" +
- "stdDevTime = " + getStdDevTime() / 1000000.0f + "ms;";
+ return '\n' + "nCalls = " + nCalls + ";\n" + "sumTime = " + sumTime / 1000000000.0f + "s;\n"
+ + "minTime = " + minTime / 1000000.0f + "ms;\n" + "maxTime = " + maxTime / 1000000.0f + "ms;\n"
+ + "meanTime = " + getMeanTime() / 1000000.0f + "ms;\n" + "stdDevTime = " + getStdDevTime()
+ / 1000000.0f + "ms;";
}
-
+
public Call newCall() {
return new Call();
}
-
+
public class Call {
private final long startTime = System.nanoTime();
-
- private Call() {
- }
-
+
+ private Call() { }
+
public void end() {
long elapsed = System.nanoTime() - startTime;
synchronized (TimingStatistics.this) {
nCalls++;
- if (elapsed < minTime || nCalls == 1)
+ if (elapsed < minTime || nCalls == 1) {
minTime = elapsed;
- if (elapsed > maxTime)
+ }
+ if (elapsed > maxTime) {
maxTime = elapsed;
+ }
sumTime += elapsed;
- sumSquaredTime += (double) (elapsed * elapsed);
+ sumSquaredTime += elapsed * elapsed;
}
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/Cache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/Cache.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/Cache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/Cache.java Sat Feb 13 19:57:04 2010
@@ -17,38 +17,41 @@
package org.apache.mahout.common.cache;
-public interface Cache<K, V> {
+public interface Cache<K,V> {
/**
- * Gets the Value from the Cache, If the object doesnt exist default behaviour
- * is to return null.
+ * Gets the Value from the Cache, If the object doesnt exist default behaviour is to return null.
*
* @param key
* @return V
*/
V get(K key);
-
+
/**
* returns true if the Cache contains the key
+ *
* @param key
* @return boolean
*/
boolean contains(K key);
-
+
/**
* puts the key and its value into the cache
+ *
* @param key
* @param value
*/
void set(K key, V value);
-
+
/**
* returns the current size of the cache
+ *
* @return long
*/
long size();
-
+
/**
* returns the total capacity of the cache defined at contruction time
+ *
* @return long
*/
long capacity();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/HybridCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/HybridCache.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/HybridCache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/HybridCache.java Sat Feb 13 19:57:04 2010
@@ -17,75 +17,78 @@
package org.apache.mahout.common.cache;
-public class HybridCache<K, V> implements Cache<K, V> {
-
- private int LFUCapacity = 0;
-
- private int LRUCapacity = 0;
-
- private LRUCache<K, V> lruCache = null;
-
- private LFUCache<K, V> lfuCache = null;
-
+public class HybridCache<K,V> implements Cache<K,V> {
+
+ private int lfuCapacity;
+
+ private int lruCapacity;
+
+ private LRUCache<K,V> lruCache;
+
+ private LFUCache<K,V> lfuCache;
+
public HybridCache(int lfuCapacity, int lruCapacity) {
-
- this.LFUCapacity = lfuCapacity;
- this.LRUCapacity = lruCapacity;
-
- lruCache = new LRUCache<K, V>(LRUCapacity);
- lfuCache = new LFUCache<K, V>(LFUCapacity);
-
+
+ this.lfuCapacity = lfuCapacity;
+ this.lruCapacity = lruCapacity;
+
+ lruCache = new LRUCache<K,V>(lruCapacity);
+ lfuCache = new LFUCache<K,V>(lfuCapacity);
+
}
-
+
@Override
public long capacity() {
- return LFUCapacity + LRUCapacity;
+ return lfuCapacity + lruCapacity;
}
-
+
@Override
public V get(K key) {
V LRUObject = LRUGet(key);
- if (LRUObject != null)
+ if (LRUObject != null) {
return LRUObject;
-
+ }
+
V lFUObject = LFUGet(key);
- if (lFUObject != null)
+ if (lFUObject != null) {
return lFUObject;
-
+ }
+
return null;
}
-
+
private V LFUGet(K key) {
- if (lfuCache.getEvictionCount() >= LFUCapacity)
+ if (lfuCache.getEvictionCount() >= lfuCapacity) {
return lfuCache.quickGet(key);
+ }
return lfuCache.get(key);
}
-
+
private V LRUGet(K key) {
return lruCache.get(key);
}
-
+
@Override
public void set(K key, V value) {
-
- if (lfuCache.size() < LFUCapacity)
+
+ if (lfuCache.size() < lfuCapacity) {
lfuCache.set(key, value);
- else if (lfuCache.getEvictionCount() < LFUCapacity) {
+ } else if (lfuCache.getEvictionCount() < lfuCapacity) {
lfuCache.set(key, value);
lruCache.set(key, value);
} else {
lruCache.set(key, value);
}
}
-
+
@Override
public long size() {
return lfuCache.size() + lruCache.size();
}
-
+
@Override
public boolean contains(K key) {
return lruCache.contains(key) || lfuCache.contains(key);
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LFUCache.java Sat Feb 13 19:57:04 2010
@@ -29,65 +29,67 @@
import org.apache.mahout.common.Pair;
-public class LFUCache<K, V> implements Cache<K, V> {
-
- private SortedMap<Long, Set<K>> evictionMap = null;
-
- private Map<K, Pair<V, AtomicLong>> dataMap = null;
-
- private int capacity = 0;
-
- private int evictionCount = 0;
-
- public LFUCache(int capacity) {
-
+public class LFUCache<K,V> implements Cache<K,V> {
+
+ private SortedMap<Long,Set<K>> evictionMap;
+
+ private Map<K,Pair<V,AtomicLong>> dataMap;
+
+ private int capacity;
+
+ private int evictionCount;
+
+ public LFUCache(int capacity) {
this.capacity = capacity;
-
- evictionMap = new TreeMap<Long, Set<K>>();
- dataMap = new HashMap<K, Pair<V, AtomicLong>>(capacity);
-
+
+ evictionMap = new TreeMap<Long,Set<K>>();
+ dataMap = new HashMap<K,Pair<V,AtomicLong>>(capacity);
}
-
+
@Override
public long capacity() {
return capacity;
}
-
+
public int getEvictionCount() {
return this.evictionCount;
}
-
+
@Override
public V get(K key) {
- Pair<V, AtomicLong> data = dataMap.get(key);
- if (data == null)
+ Pair<V,AtomicLong> data = dataMap.get(key);
+ if (data == null) {
return null;
- else {
+ } else {
V value = data.getFirst();
AtomicLong count = data.getSecond();
long oldCount = count.getAndIncrement();
incrementHit(key, oldCount);
return value;
}
-
+
}
- public V quickGet(K key){
- Pair<V, AtomicLong> data = dataMap.get(key);
- if (data == null)
+ public V quickGet(K key) {
+ Pair<V,AtomicLong> data = dataMap.get(key);
+ if (data == null) {
return null;
- else
+ } else {
return data.getFirst();
+ }
}
-
+
private void incrementHit(K key, long count) {
Set<K> keys = evictionMap.get(count);
- if (keys == null)
+ if (keys == null) {
throw new ConcurrentModificationException();
- if (keys.remove(key) == false)
+ }
+ if (keys.remove(key) == false) {
throw new ConcurrentModificationException();
- if (keys.isEmpty())
+ }
+ if (keys.isEmpty()) {
evictionMap.remove(count);
+ }
count++;
Set<K> keysNew = evictionMap.get(count);
if (keysNew == null) {
@@ -96,19 +98,20 @@
}
keysNew.add(key);
}
-
+
@Override
public void set(K key, V value) {
- if (dataMap.containsKey(key))
+ if (dataMap.containsKey(key)) {
return;
+ }
if (capacity == dataMap.size()) // Cache Full
{
removeLeastFrequent();
}
AtomicLong count = new AtomicLong(1L);
- Pair<V, AtomicLong> data = new Pair<V, AtomicLong>(value, count);
+ Pair<V,AtomicLong> data = new Pair<V,AtomicLong>(value, count);
dataMap.put(key, data);
-
+
Long countKey = 1L;
Set<K> keys = evictionMap.get(countKey);
if (keys == null) {
@@ -116,29 +119,31 @@
evictionMap.put(countKey, keys);
}
keys.add(key);
-
+
}
+
private void removeLeastFrequent() {
Long key = evictionMap.firstKey();
Set<K> values = evictionMap.get(key);
Iterator<K> it = values.iterator();
K keyToBeRemoved = it.next();
values.remove(keyToBeRemoved);
- if (values.isEmpty())
+ if (values.isEmpty()) {
evictionMap.remove(key);
+ }
dataMap.remove(keyToBeRemoved);
evictionCount++;
-
+
}
-
+
@Override
public long size() {
return dataMap.size();
}
-
+
@Override
public boolean contains(K key) {
- return (dataMap.containsKey(key));
+ return dataMap.containsKey(key);
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LRUCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LRUCache.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LRUCache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LRUCache.java Sat Feb 13 19:57:04 2010
@@ -20,48 +20,48 @@
import java.util.LinkedHashMap;
import java.util.Map;
-public class LRUCache<K, V> implements Cache<K, V> {
-
- private int capacity = 0;
+public class LRUCache<K,V> implements Cache<K,V> {
+
+ private int capacity;
- private Map<K, V> lruCache = null;
+ private Map<K,V> lruCache;
public LRUCache(final int capacity) {
-
+
this.capacity = capacity;
-
- lruCache = new LinkedHashMap<K,V>( (int)(capacity/0.75f + 1), 0.75f, true) {
+
+ lruCache = new LinkedHashMap<K,V>((int) (capacity / 0.75f + 1), 0.75f, true) {
@Override
- protected boolean removeEldestEntry (Map.Entry<K,V> eldest) {
+ protected boolean removeEldestEntry(Map.Entry<K,V> eldest) {
return size() > capacity;
}
};
-
+
}
-
+
@Override
public long capacity() {
return capacity;
}
-
+
@Override
public V get(K key) {
return lruCache.get(key);
}
-
+
@Override
public void set(K key, V value) {
- lruCache.put(key,value);
+ lruCache.put(key, value);
}
-
+
@Override
public long size() {
return lruCache.size();
}
-
+
@Override
public boolean contains(K key) {
- return (lruCache.containsKey(key));
+ return lruCache.containsKey(key);
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/cache/LeastKCache.java Sat Feb 13 19:57:04 2010
@@ -22,41 +22,40 @@
import java.util.Map;
import java.util.PriorityQueue;
-public class LeastKCache<K extends Comparable<? super K>, V> implements
- Cache<K, V> {
-
- private int capacity = 0;
-
- private Map<K, V> cache = null;
-
- private PriorityQueue<K> queue = null;
-
+public class LeastKCache<K extends Comparable<? super K>,V> implements Cache<K,V> {
+
+ private int capacity;
+
+ private Map<K,V> cache;
+
+ private PriorityQueue<K> queue;
+
public LeastKCache(int capacity) {
-
+
this.capacity = capacity;
-
- cache = new HashMap<K, V>(capacity);
+
+ cache = new HashMap<K,V>(capacity);
queue = new PriorityQueue<K>(capacity, new Comparator<K>() {
-
+
@Override
public int compare(K o1, K o2) {
return o2.compareTo(o1);
}
-
+
});
-
+
}
-
+
@Override
public final long capacity() {
return capacity;
}
-
+
@Override
public final V get(K key) {
return cache.get(key);
}
-
+
@Override
public final void set(K key, V value) {
if (contains(key) == false) {
@@ -68,15 +67,15 @@
cache.remove(k);
}
}
-
+
@Override
public final long size() {
return cache.size();
}
-
+
@Override
public final boolean contains(K key) {
- return (cache.containsKey(key));
+ return cache.containsKey(key);
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/commandline/DefaultOptionCreator.java Sat Feb 13 19:57:04 2010
@@ -22,92 +22,74 @@
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
public final class DefaultOptionCreator {
-
- private DefaultOptionCreator() {
- }
-
+
+ private DefaultOptionCreator() { }
+
/**
* Returns a default command line option for convergence delta specification.
*/
public static DefaultOptionBuilder convergenceOption() {
- return new DefaultOptionBuilder().withLongName("convergenceDelta")
- .withRequired(true).withShortName("v").withArgument(
- new ArgumentBuilder().withName("convergenceDelta").withMinimum(1).withMaximum(1)
- .create()).withDescription("The convergence delta value.");
+ return new DefaultOptionBuilder().withLongName("convergenceDelta").withRequired(true).withShortName("v")
+ .withArgument(
+ new ArgumentBuilder().withName("convergenceDelta").withMinimum(1).withMaximum(1).create())
+ .withDescription("The convergence delta value.");
}
-
+
/**
* Returns a default command line option for output directory specification.
*/
public static DefaultOptionBuilder outputOption() {
- return new DefaultOptionBuilder().withLongName("output").withRequired(true)
- .withShortName("o").withArgument(
- new ArgumentBuilder().withName("output").withMinimum(1).withMaximum(1).create())
+ return new DefaultOptionBuilder().withLongName("output").withRequired(true).withShortName("o")
+ .withArgument(new ArgumentBuilder().withName("output").withMinimum(1).withMaximum(1).create())
.withDescription("The directory pathname for output.");
}
-
+
/**
* Returns a default command line option for input directory specification.
*/
public static DefaultOptionBuilder inputOption() {
- return new DefaultOptionBuilder()
- .withLongName("input")
- .withRequired(true)
- .withShortName("i")
- .withArgument(
- new ArgumentBuilder().withName("input").withMinimum(1).withMaximum(1).create())
- .withDescription(
- "Path to job input directory");
+ return new DefaultOptionBuilder().withLongName("input").withRequired(true).withShortName("i")
+ .withArgument(new ArgumentBuilder().withName("input").withMinimum(1).withMaximum(1).create())
+ .withDescription("Path to job input directory");
}
-
+
/**
- * Returns a default command line option for specification of numbers of
- * clusters to create.
+ * Returns a default command line option for specification of numbers of clusters to create.
*/
public static DefaultOptionBuilder kOption() {
return new DefaultOptionBuilder()
.withLongName("k")
.withRequired(true)
- .withArgument(
- new ArgumentBuilder().withName("k").withMinimum(1).withMaximum(1).create())
+ .withArgument(new ArgumentBuilder().withName("k").withMinimum(1).withMaximum(1).create())
.withDescription(
- "The k in k-Means. k random Vectors will be chosen as the Centroid and written to the clusters output path.")
+ "The k in k-Means. k random Vectors will be chosen as the Centroid and written to the clusters output path.")
.withShortName("k");
}
-
+
/**
- * Returns a default command line option for specification of max number of
- * iterations.
+ * Returns a default command line option for specification of max number of iterations.
*/
public static DefaultOptionBuilder maxIterOption() {
- return new DefaultOptionBuilder()
- .withLongName("maxIter")
- .withRequired(true)
- .withShortName("x")
- .withArgument(
- new ArgumentBuilder().withName("maxIter").withMinimum(1).withMaximum(1).create())
+ return new DefaultOptionBuilder().withLongName("maxIter").withRequired(true).withShortName("x")
+ .withArgument(new ArgumentBuilder().withName("maxIter").withMinimum(1).withMaximum(1).create())
.withDescription("The maximum number of iterations.");
}
-
+
/**
- * Returns a default command line option for specification of distance measure
- * class to use.
+ * Returns a default command line option for specification of distance measure class to use.
*/
public static DefaultOptionBuilder distanceOption() {
- return new DefaultOptionBuilder()
- .withLongName("measure")
- .withRequired(true)
- .withShortName("d")
- .withArgument(
- new ArgumentBuilder().withName("measure").withMinimum(1).withMaximum(1).create())
+ return new DefaultOptionBuilder().withLongName("measure").withRequired(true).withShortName("d")
+ .withArgument(new ArgumentBuilder().withName("measure").withMinimum(1).withMaximum(1).create())
.withDescription("The classname of the DistanceMeasure.");
}
-
+
/**
* Returns a default command line option for help.
* */
public static Option helpOption() {
- return new DefaultOptionBuilder().withLongName("help").withDescription("Print out help").withShortName("h").create();
+ return new DefaultOptionBuilder().withLongName("help").withDescription("Print out help").withShortName(
+ "h").create();
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/CosineDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,36 +17,36 @@
package org.apache.mahout.common.distance;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.math.CardinalityException;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.common.parameters.Parameter;
-
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.common.parameters.Parameter;
+import org.apache.mahout.math.CardinalityException;
+import org.apache.mahout.math.Vector;
+
/**
* This class implements a cosine distance metric by dividing the dot product of two vectors by the product of their
* lengths
*/
public class CosineDistanceMeasure implements DistanceMeasure {
-
+
@Override
public void configure(JobConf job) {
// nothing to do
}
-
+
@Override
public Collection<Parameter<?>> getParameters() {
return Collections.emptyList();
}
-
+
@Override
public void createParameters(String prefix, JobConf jobConf) {
// nothing to do
}
-
+
public static double distance(double[] p1, double[] p2) {
double dotProduct = 0.0;
double lengthSquaredp1 = 0.0;
@@ -57,15 +57,15 @@
dotProduct += p1[i] * p2[i];
}
double denominator = Math.sqrt(lengthSquaredp1) * Math.sqrt(lengthSquaredp2);
-
+
// correct for floating-point rounding errors
if (denominator < dotProduct) {
denominator = dotProduct;
}
-
- return 1.0 - (dotProduct / denominator);
+
+ return 1.0 - dotProduct / denominator;
}
-
+
@Override
public double distance(Vector v1, Vector v2) {
if (v1.size() != v2.size()) {
@@ -83,21 +83,36 @@
Vector.Element elt = iter.next();
lengthSquaredv2 += elt.get() * elt.get();
}
-
+
double dotProduct = v1.dot(v2);
double denominator = Math.sqrt(lengthSquaredv1) * Math.sqrt(lengthSquaredv2);
-
+
// correct for floating-point rounding errors
if (denominator < dotProduct) {
denominator = dotProduct;
}
-
- return 1.0 - (dotProduct / denominator);
+
+ return 1.0 - dotProduct / denominator;
}
-
+
@Override
public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
- return distance(centroid, v); // TODO
+ Iterator<Vector.Element> iter = v.iterateNonZero();
+ double lengthSquaredv = 0.0;
+ while (iter.hasNext()) {
+ Vector.Element elt = iter.next();
+ lengthSquaredv += elt.get() * elt.get();
+ }
+
+ double dotProduct = centroid.dot(v);
+ double denominator = Math.sqrt(centroidLengthSquare) * Math.sqrt(lengthSquaredv);
+
+ // correct for floating-point rounding errors
+ if (denominator < dotProduct) {
+ denominator = dotProduct;
+ }
+
+ return 1.0 - dotProduct / denominator;
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/DistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,28 +17,32 @@
package org.apache.mahout.common.distance;
-import org.apache.mahout.math.Vector;
import org.apache.mahout.common.parameters.Parametered;
+import org.apache.mahout.math.Vector;
/** This interface is used for objects which can determine a distance metric between two points */
public interface DistanceMeasure extends Parametered {
-
+
/**
* Returns the distance metric applied to the arguments
- *
- * @param v1 a Vector defining a multidimensional point in some feature space
- * @param v2 a Vector defining a multidimensional point in some feature space
+ *
+ * @param v1
+ * a Vector defining a multidimensional point in some feature space
+ * @param v2
+ * a Vector defining a multidimensional point in some feature space
* @return a scalar doubles of the distance
*/
double distance(Vector v1, Vector v2);
-
+
/**
- * Optimized version of distance metric for sparse vectors. This distance computation requires operations proportional
- * to the number of non-zero elements in the vector instead of the cardinality of the vector.
- *
- * @param centroidLengthSquare Square of the length of centroid
- * @param centroid Centroid vector
+ * Optimized version of distance metric for sparse vectors. This distance computation requires operations
+ * proportional to the number of non-zero elements in the vector instead of the cardinality of the vector.
+ *
+ * @param centroidLengthSquare
+ * Square of the length of centroid
+ * @param centroid
+ * Centroid vector
*/
double distance(double centroidLengthSquare, Vector centroid, Vector v);
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/EuclideanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/EuclideanDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/EuclideanDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/EuclideanDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -20,18 +20,20 @@
import org.apache.mahout.math.Vector;
/**
- * This class implements a Euclidean distance metric by summing the square root of the squared differences between each
- * coordinate. <p/> If you don't care about the true distance and only need the values for comparison, then the base
- * class, {@link SquaredEuclideanDistanceMeasure}, will be faster since it doesn't do the actual square root of the
+ * This class implements a Euclidean distance metric by summing the square root of the squared differences
+ * between each coordinate.
+ * <p/>
+ * If you don't care about the true distance and only need the values for comparison, then the base class,
+ * {@link SquaredEuclideanDistanceMeasure}, will be faster since it doesn't do the actual square root of the
* squared differences.
*/
public class EuclideanDistanceMeasure extends SquaredEuclideanDistanceMeasure {
-
+
@Override
public double distance(Vector v1, Vector v2) {
return Math.sqrt(super.distance(v1, v2));
}
-
+
@Override
public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
return Math.sqrt(super.distance(centroidLengthSquare, centroid, v));
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/ManhattanDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,21 +17,21 @@
package org.apache.mahout.common.distance;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.math.CardinalityException;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.common.parameters.Parameter;
-
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.common.parameters.Parameter;
+import org.apache.mahout.math.CardinalityException;
+import org.apache.mahout.math.Vector;
+
/**
- * This class implements a "manhattan distance" metric by summing the absolute values of the difference between each
- * coordinate
+ * This class implements a "manhattan distance" metric by summing the absolute values of the difference
+ * between each coordinate
*/
public class ManhattanDistanceMeasure implements DistanceMeasure {
-
+
public static double distance(double[] p1, double[] p2) {
double result = 0.0;
for (int i = 0; i < p1.length; i++) {
@@ -39,22 +39,22 @@
}
return result;
}
-
+
@Override
public void configure(JobConf job) {
- // nothing to do
+ // nothing to do
}
-
+
@Override
public Collection<Parameter<?>> getParameters() {
return Collections.emptyList();
}
-
+
@Override
public void createParameters(String prefix, JobConf jobConf) {
- // nothing to do
+ // nothing to do
}
-
+
@Override
public double distance(Vector v1, Vector v2) {
if (v1.size() != v2.size()) {
@@ -62,17 +62,19 @@
}
double result = 0;
Vector vector = v1.minus(v2);
- Iterator<Vector.Element> iter = vector.iterateNonZero();//this contains all non zero elements between the two
+ Iterator<Vector.Element> iter = vector.iterateNonZero(); // this contains all non zero elements between
+ // the
+ // two
while (iter.hasNext()) {
Vector.Element e = iter.next();
result += Math.abs(v2.getQuick(e.index()) - v1.getQuick(e.index()));
}
return result;
}
-
+
@Override
public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
return distance(centroid, v); // TODO
}
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/SquaredEuclideanDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,40 +17,41 @@
package org.apache.mahout.common.distance;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.common.parameters.Parameter;
-
import java.util.Collection;
import java.util.Collections;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.mahout.common.parameters.Parameter;
+import org.apache.mahout.math.Vector;
+
/**
- * Like {@link EuclideanDistanceMeasure} but it does not take the square root. <p/> Thus, it is
- * not actually the Euclidean Distance, but it is saves on computation when you only need the distance for comparison
- * and don't care about the actual value as a distance.
+ * Like {@link EuclideanDistanceMeasure} but it does not take the square root.
+ * <p/>
+ * Thus, it is not actually the Euclidean Distance, but it is saves on computation when you only need the
+ * distance for comparison and don't care about the actual value as a distance.
*/
public class SquaredEuclideanDistanceMeasure implements DistanceMeasure {
-
+
@Override
public void configure(JobConf job) {
- // nothing to do
+ // nothing to do
}
-
+
@Override
public Collection<Parameter<?>> getParameters() {
return Collections.emptyList();
}
-
+
@Override
public void createParameters(String prefix, JobConf jobConf) {
- // nothing to do
+ // nothing to do
}
-
+
@Override
public double distance(Vector v1, Vector v2) {
return v1.getDistanceSquared(v2);
}
-
+
@Override
public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
return centroidLengthSquare - 2 * v.dot(centroid) + v.getLengthSquared();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/TanimotoDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,37 +17,35 @@
package org.apache.mahout.common.distance;
-import org.apache.mahout.math.Vector;
-
import java.util.Iterator;
+import org.apache.mahout.math.Vector;
+
/**
* Tanimoto coefficient implementation.
- *
+ *
* http://en.wikipedia.org/wiki/Jaccard_index
*/
public class TanimotoDistanceMeasure extends WeightedDistanceMeasure {
-
-
+
/**
* Calculates the distance between two vectors.
*
- * The coefficient (a measure of similarity) is:
- * T(a, b) = a.b / (|a|^2 + |b|^2 - a.b)
- *
+ * The coefficient (a measure of similarity) is: T(a, b) = a.b / (|a|^2 + |b|^2 - a.b)
+ *
* The distance d(a,b) = 1 - T(a,b)
- *
+ *
* @return 0 for perfect match, > 0 for greater distance
*/
@Override
public double distance(Vector a, Vector b) {
double ab = dot(a, b);
double denominator = dot(a, a) + dot(b, b) - ab;
- if(denominator < ab) { // correct for fp round-off: distance >= 0
+ if (denominator < ab) { // correct for fp round-off: distance >= 0
denominator = ab;
}
- if(denominator > 0) {
- // denom == 0 only when dot(a,a) == dot(b,b) == dot(a,b) == 0
+ if (denominator > 0) {
+ // denom == 0 only when dot(a,a) == dot(b,b) == dot(a,b) == 0
return 1 - ab / denominator;
} else {
return 0;
@@ -70,11 +68,10 @@
}
return dot;
}
-
+
@Override
public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
return distance(centroid, v); // TODO
}
-
+
}
-
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,45 +17,47 @@
package org.apache.mahout.common.distance;
+import java.io.DataInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.Vector;
import org.apache.mahout.common.parameters.ClassParameter;
import org.apache.mahout.common.parameters.Parameter;
import org.apache.mahout.common.parameters.PathParameter;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
-import java.io.DataInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
/** Abstract implementation of DistanceMeasure with support for weights. */
public abstract class WeightedDistanceMeasure implements DistanceMeasure {
-
+
private List<Parameter<?>> parameters;
private Parameter<Path> weightsFile;
private ClassParameter vectorClass;
private Vector weights;
-
+
@Override
public void createParameters(String prefix, JobConf jobConf) {
parameters = new ArrayList<Parameter<?>>();
- weightsFile = new PathParameter(prefix, "weightsFile", jobConf, null, "Path on DFS to a file containing the weights.");
+ weightsFile = new PathParameter(prefix, "weightsFile", jobConf, null,
+ "Path on DFS to a file containing the weights.");
parameters.add(weightsFile);
- vectorClass = new ClassParameter(prefix, "vectorClass", jobConf, DenseVector.class, "Class<Vector> file specified in parameter weightsFile has been serialized with.");
+ vectorClass = new ClassParameter(prefix, "vectorClass", jobConf, DenseVector.class,
+ "Class<Vector> file specified in parameter weightsFile has been serialized with.");
parameters.add(vectorClass);
}
-
+
@Override
public Collection<Parameter<?>> getParameters() {
return parameters;
}
-
+
@Override
public void configure(JobConf jobConf) {
if (parameters == null) {
@@ -84,14 +86,13 @@
throw new IllegalStateException(e);
}
}
-
+
public Vector getWeights() {
return weights;
}
-
+
public void setWeights(Vector weights) {
this.weights = weights;
}
-
-
+
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java?rev=909882&r1=909881&r2=909882&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/WeightedEuclideanDistanceMeasure.java Sat Feb 13 19:57:04 2010
@@ -17,16 +17,16 @@
package org.apache.mahout.common.distance;
-import org.apache.mahout.math.Vector;
-
import java.util.Iterator;
+import org.apache.mahout.math.Vector;
+
/**
- * This class implements a Euclidean distance metric by summing the square root of the squared differences between each
- * coordinate, optionally adding weights.
+ * This class implements a Euclidean distance metric by summing the square root of the squared differences
+ * between each coordinate, optionally adding weights.
*/
public class WeightedEuclideanDistanceMeasure extends WeightedDistanceMeasure {
-
+
@Override
public double distance(Vector p1, Vector p2) {
double result = 0;
@@ -47,10 +47,10 @@
}
return Math.sqrt(result);
}
-
+
@Override
public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
return distance(centroid, v); // TODO
}
-
+
}