You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hama.apache.org by ed...@apache.org on 2013/12/08 05:55:38 UTC
svn commit: r1548995 - in /hama/trunk: ./
commons/src/main/java/org/apache/hama/commons/io/
commons/src/main/java/org/apache/hama/commons/math/
examples/src/main/java/org/apache/hama/examples/
ml/src/main/java/org/apache/hama/ml/kmeans/ ml/src/test/jav...
Author: edwardyoon
Date: Sun Dec 8 04:55:37 2013
New Revision: 1548995
URL: http://svn.apache.org/r1548995
Log:
HAMA-827: Add NamedVector
Added:
hama/trunk/commons/src/main/java/org/apache/hama/commons/math/NamedDoubleVector.java
Modified:
hama/trunk/CHANGES.txt
hama/trunk/commons/src/main/java/org/apache/hama/commons/io/VectorWritable.java
hama/trunk/commons/src/main/java/org/apache/hama/commons/math/DenseDoubleVector.java
hama/trunk/examples/src/main/java/org/apache/hama/examples/Kmeans.java
hama/trunk/ml/src/main/java/org/apache/hama/ml/kmeans/KMeansBSP.java
hama/trunk/ml/src/test/java/org/apache/hama/ml/kmeans/TestKMeansBSP.java
Modified: hama/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hama/trunk/CHANGES.txt?rev=1548995&r1=1548994&r2=1548995&view=diff
==============================================================================
--- hama/trunk/CHANGES.txt (original)
+++ hama/trunk/CHANGES.txt Sun Dec 8 04:55:37 2013
@@ -4,6 +4,7 @@ Release 0.7.0 (unreleased changes)
NEW FEATURES
+ HAMA-827: Add NamedVector (edwardyoon)
HAMA-822: Add feature transformer interface to improve the power and flexibility of existing machine learning model (Yexi Jiang)
HAMA-774: CompositeInputFormat in Hama (Martin Illecker)
HAMA-815: Hama Pipes uses C++ templates (Martin Illecker)
Modified: hama/trunk/commons/src/main/java/org/apache/hama/commons/io/VectorWritable.java
URL: http://svn.apache.org/viewvc/hama/trunk/commons/src/main/java/org/apache/hama/commons/io/VectorWritable.java?rev=1548995&r1=1548994&r2=1548995&view=diff
==============================================================================
--- hama/trunk/commons/src/main/java/org/apache/hama/commons/io/VectorWritable.java (original)
+++ hama/trunk/commons/src/main/java/org/apache/hama/commons/io/VectorWritable.java Sun Dec 8 04:55:37 2013
@@ -24,6 +24,7 @@ import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hama.commons.math.DenseDoubleVector;
import org.apache.hama.commons.math.DoubleVector;
+import org.apache.hama.commons.math.NamedDoubleVector;
/**
* Writable for dense vectors.
@@ -102,6 +103,13 @@ public class VectorWritable implements W
for (int i = 0; i < vector.getDimension(); i++) {
out.writeDouble(vector.get(i));
}
+
+ if (vector.isNamed() && vector.getName() != null) {
+ out.writeBoolean(true);
+ out.writeUTF(vector.getName());
+ } else {
+ out.writeBoolean(false);
+ }
}
public static DoubleVector readVector(DataInput in) throws IOException {
@@ -111,6 +119,10 @@ public class VectorWritable implements W
for (int i = 0; i < length; i++) {
vector.set(i, in.readDouble());
}
+
+ if (in.readBoolean()) {
+ vector = new NamedDoubleVector(in.readUTF(), vector);
+ }
return vector;
}
Modified: hama/trunk/commons/src/main/java/org/apache/hama/commons/math/DenseDoubleVector.java
URL: http://svn.apache.org/viewvc/hama/trunk/commons/src/main/java/org/apache/hama/commons/math/DenseDoubleVector.java?rev=1548995&r1=1548994&r2=1548995&view=diff
==============================================================================
--- hama/trunk/commons/src/main/java/org/apache/hama/commons/math/DenseDoubleVector.java (original)
+++ hama/trunk/commons/src/main/java/org/apache/hama/commons/math/DenseDoubleVector.java Sun Dec 8 04:55:37 2013
@@ -181,10 +181,6 @@ public final class DenseDoubleVector imp
return newv;
}
- /*
- * (non-Javadoc)
- * @see de.jungblut.math.DoubleVector#subtract(de.jungblut.math.DoubleVector)
- */
@Override
public final DoubleVector subtractUnsafe(DoubleVector v) {
DoubleVector newv = new DenseDoubleVector(v.getLength());
@@ -194,10 +190,6 @@ public final class DenseDoubleVector imp
return newv;
}
- /*
- * (non-Javadoc)
- * @see de.jungblut.math.DoubleVector#subtract(double)
- */
@Override
public final DoubleVector subtract(double v) {
DenseDoubleVector newv = new DenseDoubleVector(vector.length);
Added: hama/trunk/commons/src/main/java/org/apache/hama/commons/math/NamedDoubleVector.java
URL: http://svn.apache.org/viewvc/hama/trunk/commons/src/main/java/org/apache/hama/commons/math/NamedDoubleVector.java?rev=1548995&view=auto
==============================================================================
--- hama/trunk/commons/src/main/java/org/apache/hama/commons/math/NamedDoubleVector.java (added)
+++ hama/trunk/commons/src/main/java/org/apache/hama/commons/math/NamedDoubleVector.java Sun Dec 8 04:55:37 2013
@@ -0,0 +1,245 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hama.commons.math;
+
+import java.util.Iterator;
+
+public final class NamedDoubleVector implements DoubleVector {
+
+ private final String name;
+ private final DoubleVector vector;
+
+ public NamedDoubleVector(String name, DoubleVector deepCopy) {
+ super();
+ this.name = name;
+ this.vector = deepCopy;
+ }
+
+ @Override
+ public double get(int index) {
+ return vector.get(index);
+ }
+
+ @Override
+ public int getLength() {
+ return vector.getLength();
+ }
+
+ @Override
+ public int getDimension() {
+ return vector.getDimension();
+ }
+
+ @Override
+ public void set(int index, double value) {
+ vector.set(index, value);
+ }
+
+ @Override
+ @Deprecated
+ public DoubleVector apply(DoubleVectorFunction func) {
+ return vector.apply(func);
+ }
+
+ @Override
+ @Deprecated
+ public DoubleVector apply(DoubleVector other, DoubleDoubleVectorFunction func) {
+ return vector.apply(other, func);
+ }
+
+ @Override
+ public DoubleVector applyToElements(DoubleFunction func) {
+ return vector.applyToElements(func);
+ }
+
+ @Override
+ public DoubleVector applyToElements(DoubleVector other,
+ DoubleDoubleFunction func) {
+ return vector.applyToElements(other, func);
+ }
+
+ @Override
+ public DoubleVector addUnsafe(DoubleVector vector2) {
+ return vector.addUnsafe(vector2);
+ }
+
+ @Override
+ public DoubleVector add(DoubleVector vector2) {
+ return vector.add(vector2);
+ }
+
+ @Override
+ public DoubleVector add(double scalar) {
+ return vector.add(scalar);
+ }
+
+ @Override
+ public DoubleVector subtractUnsafe(DoubleVector vector2) {
+ return vector.subtractUnsafe(vector2);
+ }
+
+ @Override
+ public DoubleVector subtract(DoubleVector vector2) {
+ return vector.subtract(vector2);
+ }
+
+ @Override
+ public DoubleVector subtract(double scalar) {
+ return vector.subtract(scalar);
+ }
+
+ @Override
+ public DoubleVector subtractFrom(double scalar) {
+ return vector.subtractFrom(scalar);
+ }
+
+ @Override
+ public DoubleVector multiply(double scalar) {
+ return vector.multiply(scalar);
+ }
+
+ @Override
+ public DoubleVector multiplyUnsafe(DoubleVector vector2) {
+ return vector.multiplyUnsafe(vector2);
+ }
+
+ @Override
+ public DoubleVector multiply(DoubleVector vector2) {
+ return vector.multiply(vector2);
+ }
+
+ @Override
+ public DoubleVector multiply(DoubleMatrix matrix) {
+ return vector.multiply(matrix);
+ }
+
+ @Override
+ public DoubleVector multiplyUnsafe(DoubleMatrix matrix) {
+ return vector.multiplyUnsafe(matrix);
+ }
+
+ @Override
+ public DoubleVector divide(double scalar) {
+ return vector.divide(scalar);
+ }
+
+ @Override
+ public DoubleVector divideFrom(double scalar) {
+ return vector.divideFrom(scalar);
+ }
+
+ @Override
+ public DoubleVector pow(int x) {
+ return vector.pow(x);
+ }
+
+ @Override
+ public DoubleVector abs() {
+ return vector.abs();
+ }
+
+ @Override
+ public DoubleVector sqrt() {
+ return vector.sqrt();
+ }
+
+ @Override
+ public double sum() {
+ return vector.sum();
+ }
+
+ @Override
+ public double dotUnsafe(DoubleVector vector2) {
+ return vector.dotUnsafe(vector2);
+ }
+
+ @Override
+ public double dot(DoubleVector vector2) {
+ return vector.dot(vector2);
+ }
+
+ @Override
+ public DoubleVector slice(int length) {
+ return vector.slice(length);
+ }
+
+ @Override
+ public DoubleVector sliceUnsafe(int length) {
+ return vector.sliceUnsafe(length);
+ }
+
+ @Override
+ public DoubleVector slice(int start, int end) {
+ return vector.slice(start, end);
+ }
+
+ @Override
+ public DoubleVector sliceUnsafe(int start, int end) {
+ return vector.sliceUnsafe(start, end);
+ }
+
+ @Override
+ public double max() {
+ return vector.max();
+ }
+
+ @Override
+ public double min() {
+ return vector.min();
+ }
+
+ @Override
+ public double[] toArray() {
+ return vector.toArray();
+ }
+
+ @Override
+ public DoubleVector deepCopy() {
+ return new NamedDoubleVector(name, vector.deepCopy());
+ }
+
+ @Override
+ public Iterator<DoubleVectorElement> iterateNonZero() {
+ return vector.iterateNonZero();
+ }
+
+ @Override
+ public Iterator<DoubleVectorElement> iterate() {
+ return vector.iterate();
+ }
+
+ @Override
+ public boolean isSparse() {
+ return vector.isSparse();
+ }
+
+ @Override
+ public boolean isNamed() {
+ return true;
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ public String toString() {
+ return name + ": " + vector.toString();
+ }
+
+}
Modified: hama/trunk/examples/src/main/java/org/apache/hama/examples/Kmeans.java
URL: http://svn.apache.org/viewvc/hama/trunk/examples/src/main/java/org/apache/hama/examples/Kmeans.java?rev=1548995&r1=1548994&r2=1548995&view=diff
==============================================================================
--- hama/trunk/examples/src/main/java/org/apache/hama/examples/Kmeans.java (original)
+++ hama/trunk/examples/src/main/java/org/apache/hama/examples/Kmeans.java Sun Dec 8 04:55:37 2013
@@ -86,7 +86,8 @@ public class Kmeans {
// prepare the input, like deleting old versions and creating centers
KMeansBSP.prepareInput(count, k, dimension, conf, in, center, out, fs);
} else {
- KMeansBSP.prepareInputText(k, conf, in, center, out, fs);
+ // Set the last argument to TRUE if first column is required to be the key
+ KMeansBSP.prepareInputText(k, conf, in, center, out, fs, true);
in = new Path(in.getParent(), "textinput/in.seq");
}
@@ -95,10 +96,9 @@ public class Kmeans {
// just submit the job
job.waitForCompletion(true);
- List<String> results = KMeansBSP.readOutput(conf, out, fs, 5);
+ List<String> results = KMeansBSP.readOutput(conf, out, fs, 10);
for (String line : results) {
System.out.println(line);
}
- System.out.println("...");
}
}
Modified: hama/trunk/ml/src/main/java/org/apache/hama/ml/kmeans/KMeansBSP.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/kmeans/KMeansBSP.java?rev=1548995&r1=1548994&r2=1548995&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/kmeans/KMeansBSP.java (original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/kmeans/KMeansBSP.java Sun Dec 8 04:55:37 2013
@@ -44,6 +44,7 @@ import org.apache.hama.bsp.sync.SyncExce
import org.apache.hama.commons.io.VectorWritable;
import org.apache.hama.commons.math.DenseDoubleVector;
import org.apache.hama.commons.math.DoubleVector;
+import org.apache.hama.commons.math.NamedDoubleVector;
import org.apache.hama.ml.distance.DistanceMeasurer;
import org.apache.hama.ml.distance.EuclidianDistance;
import org.apache.hama.util.ReflectionUtils;
@@ -80,7 +81,6 @@ public final class KMeansBSP
public final void setup(
BSPPeer<VectorWritable, NullWritable, IntWritable, VectorWritable, CenterMessage> peer)
throws IOException, InterruptedException {
-
conf = peer.getConfiguration();
Path centroids = new Path(peer.getConfiguration().get(CENTER_IN_PATH));
@@ -195,6 +195,7 @@ public final class KMeansBSP
// needs to be broadcasted.
final DoubleVector[] newCenterArray = new DoubleVector[centers.length];
final int[] summationCount = new int[centers.length];
+
// if our cache is not enabled, iterate over the disk items
if (cache == null) {
// we have an assignment step
@@ -222,6 +223,7 @@ public final class KMeansBSP
}
}
}
+
// now send messages about the local updates to each other peer
for (int i = 0; i < newCenterArray.length; i++) {
if (newCenterArray[i] != null) {
@@ -237,6 +239,7 @@ public final class KMeansBSP
final int[] summationCount, final DoubleVector key) {
final int lowestDistantCenter = getNearestCenter(key);
final DoubleVector clusterCenter = newCenterArray[lowestDistantCenter];
+
if (clusterCenter == null) {
newCenterArray[lowestDistantCenter] = key;
} else {
@@ -250,6 +253,7 @@ public final class KMeansBSP
private int getNearestCenter(DoubleVector key) {
int lowestDistantCenter = 0;
double lowestDistance = Double.MAX_VALUE;
+
for (int i = 0; i < centers.length; i++) {
final double estimatedDistance = distanceMeasurer.measureDistance(
centers[i], key);
@@ -419,9 +423,19 @@ public final class KMeansBSP
/**
* Reads input text files and writes it to a sequencefile.
+ *
+ * @param k
+ * @param conf
+ * @param txtIn
+ * @param center
+ * @param out
+ * @param fs
+ * @param hasKey true if first column is required to be the key.
+ * @return
+ * @throws IOException
*/
public static Path prepareInputText(int k, Configuration conf, Path txtIn,
- Path center, Path out, FileSystem fs) throws IOException {
+ Path center, Path out, FileSystem fs, boolean hasKey) throws IOException {
Path in;
if (fs.isFile(txtIn)) {
@@ -429,7 +443,7 @@ public final class KMeansBSP
} else {
in = new Path(txtIn, "textinput/in.seq");
}
-
+
if (fs.exists(out))
fs.delete(out, true);
@@ -454,11 +468,26 @@ public final class KMeansBSP
String line;
while ((line = br.readLine()) != null) {
String[] split = line.split("\t");
- DenseDoubleVector vec = new DenseDoubleVector(split.length);
- for (int j = 0; j < split.length; j++) {
- vec.set(j, Double.parseDouble(split[j]));
+ int columnLength = split.length;
+ int indexPos = 0;
+ if (hasKey) {
+ columnLength = columnLength - 1;
+ indexPos++;
+ }
+
+ DenseDoubleVector vec = new DenseDoubleVector(columnLength);
+ for (int j = 0; j < columnLength; j++) {
+ vec.set(j, Double.parseDouble(split[j + indexPos]));
+ }
+
+ VectorWritable vector;
+ if (hasKey) {
+ NamedDoubleVector named = new NamedDoubleVector(split[0], vec);
+ vector = new VectorWritable(named);
+ } else {
+ vector = new VectorWritable(vec);
}
- VectorWritable vector = new VectorWritable(vec);
+
dataWriter.append(vector, value);
if (k > i) {
assert centerWriter != null;
Modified: hama/trunk/ml/src/test/java/org/apache/hama/ml/kmeans/TestKMeansBSP.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/kmeans/TestKMeansBSP.java?rev=1548995&r1=1548994&r2=1548995&view=diff
==============================================================================
--- hama/trunk/ml/src/test/java/org/apache/hama/ml/kmeans/TestKMeansBSP.java (original)
+++ hama/trunk/ml/src/test/java/org/apache/hama/ml/kmeans/TestKMeansBSP.java Sun Dec 8 04:55:37 2013
@@ -63,7 +63,7 @@ public class TestKMeansBSP extends TestC
bw.write(sb.toString());
bw.close();
- in = KMeansBSP.prepareInputText(k, conf, in, center, out, fs);
+ in = KMeansBSP.prepareInputText(k, conf, in, center, out, fs, false);
BSPJob job = KMeansBSP.createJob(conf, in, out, true);