You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hama.apache.org by yx...@apache.org on 2013/12/05 16:39:41 UTC

svn commit: r1548171 - in /hama/trunk: ./ commons/src/main/java/org/apache/hama/commons/math/ ml/src/main/java/org/apache/hama/ml/ann/ ml/src/main/java/org/apache/hama/ml/perception/ ml/src/main/java/org/apache/hama/ml/regression/ ml/src/main/java/org/...

Author: yxjiang
Date: Thu Dec  5 15:39:41 2013
New Revision: 1548171

URL: http://svn.apache.org/r1548171
Log:
HAMA-822: Add feature transformer interface to improve the power and flexibility of existing machine learning model

Added:
    hama/trunk/ml/src/main/java/org/apache/hama/ml/util/
    hama/trunk/ml/src/main/java/org/apache/hama/ml/util/DefaultFeatureTransformer.java
    hama/trunk/ml/src/main/java/org/apache/hama/ml/util/FeatureTransformer.java
Modified:
    hama/trunk/CHANGES.txt
    hama/trunk/commons/src/main/java/org/apache/hama/commons/math/DoubleVector.java
    hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/AutoEncoder.java
    hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/NeuralNetwork.java
    hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/NeuralNetworkTrainer.java
    hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/SmallLayeredNeuralNetwork.java
    hama/trunk/ml/src/main/java/org/apache/hama/ml/perception/MultiLayerPerceptron.java
    hama/trunk/ml/src/main/java/org/apache/hama/ml/perception/SmallMultiLayerPerceptron.java
    hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LinearRegression.java
    hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegression.java
    hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestAutoEncoder.java
    hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java
    hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java

Modified: hama/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hama/trunk/CHANGES.txt?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/CHANGES.txt (original)
+++ hama/trunk/CHANGES.txt Thu Dec  5 15:39:41 2013
@@ -4,6 +4,7 @@ Release 0.7.0 (unreleased changes)
 
   NEW FEATURES
 
+   HAMA-822: Add feature transformer interface to improve the power and flexibility of existing machine learning model (Yexi Jiang)
    HAMA-774: CompositeInputFormat in Hama (Martin Illecker)
    HAMA-815: Hama Pipes uses C++ templates (Martin Illecker)  
 

Modified: hama/trunk/commons/src/main/java/org/apache/hama/commons/math/DoubleVector.java
URL: http://svn.apache.org/viewvc/hama/trunk/commons/src/main/java/org/apache/hama/commons/math/DoubleVector.java?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/commons/src/main/java/org/apache/hama/commons/math/DoubleVector.java (original)
+++ hama/trunk/commons/src/main/java/org/apache/hama/commons/math/DoubleVector.java Thu Dec  5 15:39:41 2013
@@ -281,9 +281,9 @@ public interface DoubleVector {
    * INCLUSIVE. For example vec = [0, 1, 2, 3, 4, 5], vec.slice(2, 5) = [2, 3,
    * 4, 5].
    * 
-   * @param offset must be > 0 and smaller than the dimension of the vector
-   * @param length must be > 0 and smaller than the dimension of the vector.
-   *          This must be greater than the offset.
+   * @param start must be >= 0 and smaller than the dimension of the vector
+   * @param end must be >= 0 and smaller than the dimension of the vector.
+   *          This must be greater than or equal to the start.
    * @return a new vector that is only (length) long.
    */
   public DoubleVector slice(int start, int end);
@@ -292,9 +292,9 @@ public interface DoubleVector {
    * Slices this vector from start to end, both are INCLUSIVE. For example vec =
    * [0, 1, 2, 3, 4, 5], vec.slice(2, 5) = [2, 3, 4, 5].
    * 
-   * @param offset must be > 0 and smaller than the dimension of the vector
-   * @param length must be > 0 and smaller than the dimension of the vector.
-   *          This must be greater than the offset.
+   * @param start must be >= 0 and smaller than the dimension of the vector
+   * @param end must be >= 0 and smaller than the dimension of the vector.
+   *          This must be greater than or equal to the start.
    * @return a new vector that is only (length) long.
    */
   public DoubleVector sliceUnsafe(int start, int end);

Modified: hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/AutoEncoder.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/AutoEncoder.java?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/AutoEncoder.java (original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/AutoEncoder.java Thu Dec  5 15:39:41 2013
@@ -27,6 +27,7 @@ import org.apache.hama.commons.math.Doub
 import org.apache.hama.commons.math.DoubleVector;
 import org.apache.hama.commons.math.FunctionFactory;
 import org.apache.hama.ml.ann.AbstractLayeredNeuralNetwork.LearningStyle;
+import org.apache.hama.ml.util.FeatureTransformer;
 
 import com.google.common.base.Preconditions;
 
@@ -176,8 +177,21 @@ public class AutoEncoder {
     return this.transform(inputInstance, 1);
   }
   
+  /**
+   * Get the label(s) according to the given features.
+   * @param inputInstance
+   * @return
+   */
   public DoubleVector getOutput(DoubleVector inputInstance) {
     return model.getOutput(inputInstance);
   }
+  
+  /**
+   * Set the feature transformer.
+   * @param featureTransformer
+   */
+  public void setFeatureTransformer(FeatureTransformer featureTransformer) {
+    this.model.setFeatureTransformer(featureTransformer);
+  }
 
 }

Modified: hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/NeuralNetwork.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/NeuralNetwork.java?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/NeuralNetwork.java (original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/NeuralNetwork.java Thu Dec  5 15:39:41 2013
@@ -20,10 +20,14 @@ package org.apache.hama.ml.ann;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Map;
 
+import org.apache.commons.lang.SerializationUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -31,6 +35,8 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableUtils;
+import org.apache.hama.ml.util.DefaultFeatureTransformer;
+import org.apache.hama.ml.util.FeatureTransformer;
 
 import com.google.common.base.Preconditions;
 
@@ -53,9 +59,12 @@ abstract class NeuralNetwork implements 
   // the path to store the model
   protected String modelPath;
 
+  protected FeatureTransformer featureTransformer;
+
   public NeuralNetwork() {
     this.learningRate = DEFAULT_LEARNING_RATE;
     this.modelType = this.getClass().getSimpleName();
+    this.featureTransformer = new DefaultFeatureTransformer();
   }
 
   public NeuralNetwork(String modelPath) {
@@ -179,6 +188,8 @@ abstract class NeuralNetwork implements 
     return this.modelPath;
   }
 
+  @SuppressWarnings({ "rawtypes", "unchecked" })
+  @Override
   public void readFields(DataInput input) throws IOException {
     // read model type
     this.modelType = WritableUtils.readString(input);
@@ -186,11 +197,40 @@ abstract class NeuralNetwork implements 
     this.learningRate = input.readDouble();
     // read model path
     this.modelPath = WritableUtils.readString(input);
+
     if (this.modelPath.equals("null")) {
       this.modelPath = null;
     }
+
+    // read feature transformer
+    int bytesLen = input.readInt();
+    byte[] featureTransformerBytes = new byte[bytesLen];
+    for (int i = 0; i < featureTransformerBytes.length; ++i) {
+      featureTransformerBytes[i] = input.readByte();
+    }
+
+    Class<? extends FeatureTransformer> featureTransformerCls = (Class<? extends FeatureTransformer>) SerializationUtils
+        .deserialize(featureTransformerBytes);
+
+    Constructor[] constructors = featureTransformerCls
+        .getDeclaredConstructors();
+    Constructor constructor = constructors[0];
+    
+    try {
+      this.featureTransformer = (FeatureTransformer) constructor
+          .newInstance(new Object[] {});
+    } catch (InstantiationException e) {
+      e.printStackTrace();
+    } catch (IllegalAccessException e) {
+      e.printStackTrace();
+    } catch (IllegalArgumentException e) {
+      e.printStackTrace();
+    } catch (InvocationTargetException e) {
+      e.printStackTrace();
+    }
   }
 
+  @Override
   public void write(DataOutput output) throws IOException {
     // write model type
     WritableUtils.writeString(output, modelType);
@@ -202,6 +242,22 @@ abstract class NeuralNetwork implements 
     } else {
       WritableUtils.writeString(output, "null");
     }
+
+    // serialize the class
+    Class<? extends FeatureTransformer> featureTransformerCls = this.featureTransformer
+        .getClass();
+    byte[] featureTransformerBytes = SerializationUtils
+        .serialize(featureTransformerCls);
+    output.writeInt(featureTransformerBytes.length);
+    output.write(featureTransformerBytes);
+  }
+
+  public void setFeatureTransformer(FeatureTransformer featureTransformer) {
+    this.featureTransformer = featureTransformer;
+  }
+
+  public FeatureTransformer getFeatureTransformer() {
+    return this.featureTransformer;
   }
 
 }

Modified: hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/NeuralNetworkTrainer.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/NeuralNetworkTrainer.java?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/NeuralNetworkTrainer.java (original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/NeuralNetworkTrainer.java Thu Dec  5 15:39:41 2013
@@ -29,6 +29,8 @@ import org.apache.hama.bsp.BSPPeer;
 import org.apache.hama.bsp.sync.SyncException;
 import org.apache.hama.commons.io.VectorWritable;
 import org.apache.hama.ml.perception.MLPMessage;
+import org.apache.hama.ml.util.DefaultFeatureTransformer;
+import org.apache.hama.ml.util.FeatureTransformer;
 
 /**
  * The trainer that is used to train the {@link SmallLayeredNeuralNetwork} with
@@ -46,13 +48,15 @@ public abstract class NeuralNetworkTrain
   protected int maxIteration;
   protected int batchSize;
   protected String trainingMode;
-
+  
+  protected FeatureTransformer featureTransformer;
+  
   @Override
   final public void setup(
       BSPPeer<LongWritable, VectorWritable, NullWritable, NullWritable, MLPMessage> peer)
       throws IOException, SyncException, InterruptedException {
     conf = peer.getConfiguration();
-
+    featureTransformer = new DefaultFeatureTransformer();
     this.extraSetup(peer);
   }
 

Modified: hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/SmallLayeredNeuralNetwork.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/SmallLayeredNeuralNetwork.java?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/SmallLayeredNeuralNetwork.java (original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/ann/SmallLayeredNeuralNetwork.java Thu Dec  5 15:39:41 2013
@@ -255,18 +255,22 @@ public class SmallLayeredNeuralNetwork e
   /**
    * Get the output of the model according to given feature instance.
    */
+  @Override
   public DoubleVector getOutput(DoubleVector instance) {
     Preconditions.checkArgument(this.layerSizeList.get(0) == instance
         .getDimension() + 1, String.format(
         "The dimension of input instance should be %d.",
         this.layerSizeList.get(0) - 1));
+    // transform the features to another space
+    DoubleVector transformedInstance = this.featureTransformer
+        .transform(instance);
     // add bias feature
     DoubleVector instanceWithBias = new DenseDoubleVector(
-        instance.getDimension() + 1);
+        transformedInstance.getDimension() + 1);
     instanceWithBias.set(0, 0.99999); // set bias to be a little bit less than
                                       // 1.0
     for (int i = 1; i < instanceWithBias.getDimension(); ++i) {
-      instanceWithBias.set(i, instance.get(i - 1));
+      instanceWithBias.set(i, transformedInstance.get(i - 1));
     }
 
     List<DoubleVector> outputCache = getOutputInternal(instanceWithBias);
@@ -280,13 +284,13 @@ public class SmallLayeredNeuralNetwork e
    * Calculate output internally, the intermediate output of each layer will be
    * stored.
    * 
-   * @param instance The instance contains the features.
+   * @param instanceWithBias The instance contains the features.
    * @return Cached output of each layer.
    */
-  public List<DoubleVector> getOutputInternal(DoubleVector instance) {
+  public List<DoubleVector> getOutputInternal(DoubleVector instanceWithBias) {
     List<DoubleVector> outputCache = new ArrayList<DoubleVector>();
     // fill with instance
-    DoubleVector intermediateOutput = instance;
+    DoubleVector intermediateOutput = instanceWithBias;
     outputCache.add(intermediateOutput);
 
     for (int i = 0; i < this.layerSizeList.size() - 1; ++i) {
@@ -330,6 +334,11 @@ public class SmallLayeredNeuralNetwork e
 
   @Override
   public DoubleMatrix[] trainByInstance(DoubleVector trainingInstance) {
+    DoubleVector transformedVector = this.featureTransformer
+        .transform(trainingInstance.sliceUnsafe(this.layerSizeList.get(0) - 1));
+    
+    
+
     int inputDimension = this.layerSizeList.get(0) - 1;
     int outputDimension;
     DoubleVector inputInstance = null;
@@ -347,10 +356,11 @@ public class SmallLayeredNeuralNetwork e
 
       inputInstance = new DenseDoubleVector(this.layerSizeList.get(0));
       inputInstance.set(0, 1); // add bias
+      // get the features from the transformed vector
       for (int i = 0; i < inputDimension; ++i) {
-        inputInstance.set(i + 1, trainingInstance.get(i));
+        inputInstance.set(i + 1, transformedVector.get(i));
       }
-
+      // get the labels from the original training instance
       labels = trainingInstance.sliceUnsafe(inputInstance.getDimension() - 1,
           trainingInstance.getDimension() - 1);
     } else if (this.learningStyle == LearningStyle.UNSUPERVISED) {
@@ -364,10 +374,12 @@ public class SmallLayeredNeuralNetwork e
 
       inputInstance = new DenseDoubleVector(this.layerSizeList.get(0));
       inputInstance.set(0, 1); // add bias
+      // get the features from the transformed vector
       for (int i = 0; i < inputDimension; ++i) {
-        inputInstance.set(i + 1, trainingInstance.get(i));
+        inputInstance.set(i + 1, transformedVector.get(i));
       }
-      labels = trainingInstance.deepCopy();
+      // get the labels by copying the transformed vector
+      labels = transformedVector.deepCopy();
     }
 
     List<DoubleVector> internalResults = this.getOutputInternal(inputInstance);

Modified: hama/trunk/ml/src/main/java/org/apache/hama/ml/perception/MultiLayerPerceptron.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/perception/MultiLayerPerceptron.java?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/perception/MultiLayerPerceptron.java (original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/perception/MultiLayerPerceptron.java Thu Dec  5 15:39:41 2013
@@ -26,6 +26,8 @@ import org.apache.hama.commons.math.Doub
 import org.apache.hama.commons.math.DoubleVector;
 import org.apache.hama.commons.math.FunctionFactory;
 import org.apache.hama.ml.ann.NeuralNetworkTrainer;
+import org.apache.hama.ml.util.DefaultFeatureTransformer;
+import org.apache.hama.ml.util.FeatureTransformer;
 
 /**
  * PerceptronBase defines the common behavior of all the concrete perceptrons.
@@ -50,6 +52,9 @@ public abstract class MultiLayerPerceptr
   protected DoubleDoubleFunction costFunction;
   protected DoubleFunction squashingFunction;
 
+  // transform the original features to new space
+  protected FeatureTransformer featureTransformer;
+
   /**
    * Initialize the MLP.
    * 
@@ -91,6 +96,8 @@ public abstract class MultiLayerPerceptr
         .createDoubleDoubleFunction(this.costFunctionName);
     this.squashingFunction = FunctionFactory
         .createDoubleFunction(this.squashingFunctionName);
+
+    this.featureTransformer = new DefaultFeatureTransformer();
   }
 
   /**
@@ -118,7 +125,11 @@ public abstract class MultiLayerPerceptr
    * @param featureVector The feature of an instance to feed the perceptron.
    * @return The results.
    */
-  public abstract DoubleVector output(DoubleVector featureVector);
+  public DoubleVector output(DoubleVector featureVector) {
+    return this.outputWrapper(this.featureTransformer.transform(featureVector));
+  }
+
+  public abstract DoubleVector outputWrapper(DoubleVector featureVector);
 
   /**
    * Use the class name as the type name.
@@ -176,4 +187,17 @@ public abstract class MultiLayerPerceptr
     return layerSizeArray;
   }
 
+  /**
+   * Set the feature transformer.
+   * 
+   * @param featureTransformer
+   */
+  public void setFeatureTransformer(FeatureTransformer featureTransformer) {
+    this.featureTransformer = featureTransformer;
+  }
+  
+  public FeatureTransformer getFeatureTransformer() {
+    return this.featureTransformer;
+  }
+
 }

Modified: hama/trunk/ml/src/main/java/org/apache/hama/ml/perception/SmallMultiLayerPerceptron.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/perception/SmallMultiLayerPerceptron.java?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/perception/SmallMultiLayerPerceptron.java (original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/perception/SmallMultiLayerPerceptron.java Thu Dec  5 15:39:41 2013
@@ -20,6 +20,8 @@ package org.apache.hama.ml.perception;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
@@ -28,6 +30,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Random;
 
+import org.apache.commons.lang.SerializationUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -46,6 +49,7 @@ import org.apache.hama.commons.math.Dens
 import org.apache.hama.commons.math.DoubleFunction;
 import org.apache.hama.commons.math.DoubleVector;
 import org.apache.hama.commons.math.FunctionFactory;
+import org.apache.hama.ml.util.FeatureTransformer;
 import org.mortbay.log.Log;
 
 /**
@@ -152,7 +156,7 @@ public final class SmallMultiLayerPercep
    * {@inheritDoc}
    * The model meta-data is stored in memory.
    */
-  public DoubleVector output(DoubleVector featureVector) {
+  public DoubleVector outputWrapper(DoubleVector featureVector) {
     List<double[]> outputCache = this.outputInternal(featureVector);
     // the output of the last layer is the output of the MLP
     return new DenseDoubleVector(outputCache.get(outputCache.size() - 1));
@@ -175,6 +179,10 @@ public final class SmallMultiLayerPercep
 
     // fill with input features
     intermediateResults[0] = 1.0; // bias
+
+    // transform the original features to another space
+    featureVector = this.featureTransformer.transform(featureVector);
+
     for (int i = 0; i < featureVector.getDimension(); ++i) {
       intermediateResults[i + 1] = featureVector.get(i);
     }
@@ -245,9 +253,10 @@ public final class SmallMultiLayerPercep
       return weightUpdateMatrices;
     }
 
+    // transform the features (exclude the labels) to new space
     double[] trainingVec = trainingInstance.toArray();
-    double[] trainingFeature = Arrays.copyOfRange(trainingVec, 0,
-        this.layerSizeArray[0]);
+    double[] trainingFeature = this.featureTransformer.transform(
+        trainingInstance.sliceUnsafe(0, this.layerSizeArray[0] - 1)).toArray();
     double[] trainingLabels = Arrays.copyOfRange(trainingVec,
         this.layerSizeArray[0], trainingVec.length);
 
@@ -402,6 +411,12 @@ public final class SmallMultiLayerPercep
     this.numberOfLayers = input.readInt();
     this.squashingFunctionName = WritableUtils.readString(input);
     this.costFunctionName = WritableUtils.readString(input);
+
+    this.squashingFunction = FunctionFactory
+        .createDoubleFunction(this.squashingFunctionName);
+    this.costFunction = FunctionFactory
+        .createDoubleDoubleFunction(this.costFunctionName);
+
     // read the number of neurons for each layer
     this.layerSizeArray = new int[this.numberOfLayers];
     for (int i = 0; i < numberOfLayers; ++i) {
@@ -411,10 +426,28 @@ public final class SmallMultiLayerPercep
     for (int i = 0; i < numberOfLayers - 1; ++i) {
       this.weightMatrice[i] = (DenseDoubleMatrix) MatrixWritable.read(input);
     }
-    this.squashingFunction = FunctionFactory
-        .createDoubleFunction(this.squashingFunctionName);
-    this.costFunction = FunctionFactory
-        .createDoubleDoubleFunction(this.costFunctionName);
+
+    // read feature transformer
+    int bytesLen = input.readInt();
+    byte[] featureTransformerBytes = new byte[bytesLen];
+    for (int i = 0; i < featureTransformerBytes.length; ++i) {
+      featureTransformerBytes[i] = input.readByte();
+    }
+    Class featureTransformerCls = (Class) SerializationUtils
+        .deserialize(featureTransformerBytes);
+    Constructor constructor = featureTransformerCls.getConstructors()[0];
+    try {
+      this.featureTransformer = (FeatureTransformer) constructor
+          .newInstance(new Object[] {});
+    } catch (InstantiationException e) {
+      e.printStackTrace();
+    } catch (IllegalAccessException e) {
+      e.printStackTrace();
+    } catch (IllegalArgumentException e) {
+      e.printStackTrace();
+    } catch (InvocationTargetException e) {
+      e.printStackTrace();
+    }
   }
 
   @Override
@@ -435,6 +468,14 @@ public final class SmallMultiLayerPercep
       MatrixWritable matrixWritable = new MatrixWritable(this.weightMatrice[i]);
       matrixWritable.write(output);
     }
+
+    // serialize the feature transformer
+    Class<? extends FeatureTransformer> featureTransformerCls = this.featureTransformer
+        .getClass();
+    byte[] featureTransformerBytes = SerializationUtils
+        .serialize(featureTransformerCls);
+    output.writeInt(featureTransformerBytes.length);
+    output.write(featureTransformerBytes);
   }
 
   /**

Modified: hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LinearRegression.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LinearRegression.java?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LinearRegression.java (original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LinearRegression.java Thu Dec  5 15:39:41 2013
@@ -26,6 +26,7 @@ import org.apache.hama.commons.math.Doub
 import org.apache.hama.commons.math.DoubleVector;
 import org.apache.hama.commons.math.FunctionFactory;
 import org.apache.hama.ml.ann.SmallLayeredNeuralNetwork;
+import org.apache.hama.ml.util.FeatureTransformer;
 
 /**
  * Linear regression model. It can be used for numeric regression or prediction.
@@ -175,5 +176,13 @@ public class LinearRegression {
   public DoubleVector getWeights() {
     return ann.getWeightsByLayer(0).getRowVector(0);
   }
+  
+  /**
+   * Set the feature transformer.
+   * @param transformer
+   */
+  public void setFeatureTransformer(FeatureTransformer featureTransformer) {
+    this.ann.setFeatureTransformer(featureTransformer);
+  }
 
 }

Modified: hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegression.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegression.java?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegression.java (original)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/regression/LogisticRegression.java Thu Dec  5 15:39:41 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hama.commons.math.DoubleVector;
 import org.apache.hama.commons.math.FunctionFactory;
 import org.apache.hama.ml.ann.SmallLayeredNeuralNetwork;
+import org.apache.hama.ml.util.FeatureTransformer;
 
 /**
  * The logistic regression model. It can be used to conduct 2-class
@@ -169,4 +170,11 @@ public class LogisticRegression {
     return ann.getWeightsByLayer(0).getRowVector(0);
   }
 
+  /**
+   * Set the feature transformer.
+   * @param featureTransformer
+   */
+  public void setFeatureTransformer(FeatureTransformer featureTransformer) {
+    this.ann.setFeatureTransformer(featureTransformer);
+  }
 }

Added: hama/trunk/ml/src/main/java/org/apache/hama/ml/util/DefaultFeatureTransformer.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/util/DefaultFeatureTransformer.java?rev=1548171&view=auto
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/util/DefaultFeatureTransformer.java (added)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/util/DefaultFeatureTransformer.java Thu Dec  5 15:39:41 2013
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hama.ml.util;
+
+import org.apache.hama.commons.math.DoubleVector;
+
+/**
+ * The defaultFeatureTransformer actually returns the original features.
+ */
+public class DefaultFeatureTransformer extends FeatureTransformer {
+
+  /**
+   * Directly return the original features.
+   */
+  @Override
+  public DoubleVector transform(DoubleVector originalFeatures) {
+    return originalFeatures;
+  }
+
+}

Added: hama/trunk/ml/src/main/java/org/apache/hama/ml/util/FeatureTransformer.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/main/java/org/apache/hama/ml/util/FeatureTransformer.java?rev=1548171&view=auto
==============================================================================
--- hama/trunk/ml/src/main/java/org/apache/hama/ml/util/FeatureTransformer.java (added)
+++ hama/trunk/ml/src/main/java/org/apache/hama/ml/util/FeatureTransformer.java Thu Dec  5 15:39:41 2013
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hama.ml.util;
+
+import org.apache.hama.commons.math.DoubleVector;
+
+/**
+ * FeatureTransformer defines the interface to transform the original features
+ * to new space.
+ * 
+ * NOTE: the user defined feature transformer must have a constructor with no parameters.
+ * 
+ */
+public abstract class FeatureTransformer {
+  
+  public FeatureTransformer() {
+  }
+  
+  /**
+   * Transform the original features to transformed space.
+   * @param originalFeatureVector
+   * @return
+   */
+  public abstract DoubleVector transform(DoubleVector originalFeatures);
+  
+}

Modified: hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestAutoEncoder.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestAutoEncoder.java?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestAutoEncoder.java (original)
+++ hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestAutoEncoder.java Thu Dec  5 15:39:41 2013
@@ -40,7 +40,6 @@ import org.apache.hama.commons.io.Vector
 import org.apache.hama.commons.math.DenseDoubleVector;
 import org.apache.hama.commons.math.DoubleVector;
 import org.apache.hama.ml.MLTestBase;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.mortbay.log.Log;
 
@@ -77,7 +76,6 @@ public class TestAutoEncoder extends MLT
 
   }
   
-  @Ignore
   @Test
   public void testAutoEncoderSwissRollDataset() {
     List<double[]> instanceList = new ArrayList<double[]>();

Modified: hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java (original)
+++ hama/trunk/ml/src/test/java/org/apache/hama/ml/ann/TestSmallLayeredNeuralNetwork.java Thu Dec  5 15:39:41 2013
@@ -47,6 +47,8 @@ import org.apache.hama.commons.math.Func
 import org.apache.hama.ml.MLTestBase;
 import org.apache.hama.ml.ann.AbstractLayeredNeuralNetwork.LearningStyle;
 import org.apache.hama.ml.ann.AbstractLayeredNeuralNetwork.TrainingMethod;
+import org.apache.hama.ml.util.DefaultFeatureTransformer;
+import org.apache.hama.ml.util.FeatureTransformer;
 import org.junit.Test;
 import org.mortbay.log.Log;
 
@@ -79,6 +81,10 @@ public class TestSmallLayeredNeuralNetwo
     matrices[1] = new DenseDoubleMatrix(1, 6, 0.8);
     ann.setWeightMatrices(matrices);
     ann.setLearningStyle(LearningStyle.UNSUPERVISED);
+    
+    FeatureTransformer defaultFeatureTransformer = new DefaultFeatureTransformer();
+    ann.setFeatureTransformer(defaultFeatureTransformer);
+    
 
     // write to file
     String modelPath = "/tmp/testSmallLayeredNeuralNetworkReadWrite";
@@ -111,6 +117,9 @@ public class TestSmallLayeredNeuralNetwo
         }
       }
     }
+    
+    FeatureTransformer copyTransformer = annCopy.getFeatureTransformer();
+    assertEquals(defaultFeatureTransformer.getClass().getName(), copyTransformer.getClass().getName());
   }
 
   @Test
@@ -408,8 +417,13 @@ public class TestSmallLayeredNeuralNetwo
 
     Log.info(String.format("Relative error: %f%%\n", errorRate * 100));
   }
-
+  
   @Test
+  public void testLogisticRegression() {
+    this.testLogisticRegressionDistributedVersion();
+    this.testLogisticRegressionDistributedVersionWithFeatureTransformer();
+  }
+
   public void testLogisticRegressionDistributedVersion() {
     // write data into a sequence file
     String tmpStrDatasetPath = "/tmp/logistic_regression_data";
@@ -514,5 +528,116 @@ public class TestSmallLayeredNeuralNetwo
         (double) (end - start) / 1000));
     Log.info(String.format("Relative error: %f%%\n", errorRate * 100));
   }
+  
+  public void testLogisticRegressionDistributedVersionWithFeatureTransformer() {
+    // write data into a sequence file
+    String tmpStrDatasetPath = "/tmp/logistic_regression_data_feature_transformer";
+    Path tmpDatasetPath = new Path(tmpStrDatasetPath);
+    String strDataPath = "src/test/resources/logistic_regression_data.txt";
+    String modelPath = "/tmp/logistic-regression-distributed-model-feature-transformer";
+
+    Configuration conf = new Configuration();
+    List<double[]> instanceList = new ArrayList<double[]>();
+    List<double[]> trainingInstances = null;
+    List<double[]> testInstances = null;
+
+    try {
+      FileSystem fs = FileSystem.get(new URI(tmpStrDatasetPath), conf);
+      fs.delete(tmpDatasetPath, true);
+      if (fs.exists(tmpDatasetPath)) {
+        fs.createNewFile(tmpDatasetPath);
+      }
+
+      BufferedReader br = new BufferedReader(new FileReader(strDataPath));
+      String line = null;
+      int count = 0;
+      while ((line = br.readLine()) != null) {
+        String[] tokens = line.trim().split(",");
+        double[] instance = new double[tokens.length];
+        for (int i = 0; i < tokens.length; ++i) {
+          instance[i] = Double.parseDouble(tokens[i]);
+        }
+        instanceList.add(instance);
+      }
+      br.close();
+      
+      zeroOneNormalization(instanceList, instanceList.get(0).length - 1);
+      
+      // write training data to temporal sequence file
+      SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
+          tmpDatasetPath, LongWritable.class, VectorWritable.class);
+      int testSize = 150;
+
+      Collections.shuffle(instanceList);
+      testInstances = new ArrayList<double[]>();
+      testInstances.addAll(instanceList.subList(instanceList.size() - testSize,
+          instanceList.size()));
+      trainingInstances = instanceList.subList(0, instanceList.size()
+          - testSize);
+
+      for (double[] instance : trainingInstances) {
+        DoubleVector vec = new DenseDoubleVector(instance);
+        writer.append(new LongWritable(count++), new VectorWritable(vec));
+      }
+      writer.close();
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (URISyntaxException e) {
+      e.printStackTrace();
+    }
+
+    // create model
+    int dimension = 8;
+    SmallLayeredNeuralNetwork ann = new SmallLayeredNeuralNetwork();
+    ann.setLearningRate(0.7);
+    ann.setMomemtumWeight(0.5);
+    ann.setRegularizationWeight(0.1);
+    ann.addLayer(dimension, false,
+        FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(dimension, false,
+        FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(dimension, false,
+        FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.addLayer(1, true, FunctionFactory.createDoubleFunction("Sigmoid"));
+    ann.setCostFunction(FunctionFactory
+        .createDoubleDoubleFunction("CrossEntropy"));
+    ann.setModelPath(modelPath);
+    
+    FeatureTransformer featureTransformer = new DefaultFeatureTransformer();
+    
+    ann.setFeatureTransformer(featureTransformer);
+
+    long start = new Date().getTime();
+    Map<String, String> trainingParameters = new HashMap<String, String>();
+    trainingParameters.put("tasks", "5");
+    trainingParameters.put("training.max.iterations", "2000");
+    trainingParameters.put("training.batch.size", "300");
+    trainingParameters.put("convergence.check.interval", "1000");
+    ann.train(tmpDatasetPath, trainingParameters);
+    
+
+    long end = new Date().getTime();
+
+    // validate results
+    double errorRate = 0;
+    // calculate the error on test instance
+    for (double[] testInstance : testInstances) {
+      DoubleVector instance = new DenseDoubleVector(testInstance);
+      double expected = instance.get(instance.getDimension() - 1);
+      instance = instance.slice(instance.getDimension() - 1);
+      instance = featureTransformer.transform(instance);
+      double actual = ann.getOutput(instance).get(0);
+      if (actual < 0.5 && expected >= 0.5 || actual >= 0.5 && expected < 0.5) {
+        ++errorRate;
+      }
+    }
+    errorRate /= testInstances.size();
+
+    Log.info(String.format("Training time: %fs\n",
+        (double) (end - start) / 1000));
+    Log.info(String.format("Relative error: %f%%\n", errorRate * 100));
+  }
 
 }

Modified: hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java
URL: http://svn.apache.org/viewvc/hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java?rev=1548171&r1=1548170&r2=1548171&view=diff
==============================================================================
--- hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java (original)
+++ hama/trunk/ml/src/test/java/org/apache/hama/ml/perception/TestSmallMultiLayerPerceptron.java Thu Dec  5 15:39:41 2013
@@ -26,6 +26,7 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.Random;
 
+import org.apache.commons.lang.SerializationUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -39,6 +40,8 @@ import org.apache.hama.commons.math.Dens
 import org.apache.hama.commons.math.DenseDoubleVector;
 import org.apache.hama.commons.math.DoubleMatrix;
 import org.apache.hama.commons.math.DoubleVector;
+import org.apache.hama.ml.util.DefaultFeatureTransformer;
+import org.apache.hama.ml.util.FeatureTransformer;
 import org.junit.Test;
 import org.mortbay.log.Log;
 
@@ -59,6 +62,8 @@ public class TestSmallMultiLayerPerceptr
     MultiLayerPerceptron mlp = new SmallMultiLayerPerceptron(learningRate,
         regularization, momentum, squashingFunctionName, costFunctionName,
         layerSizeArray);
+    FeatureTransformer transformer = new DefaultFeatureTransformer();
+    mlp.setFeatureTransformer(transformer);
     try {
       mlp.writeModelToFile(modelPath);
     } catch (IOException e) {
@@ -78,6 +83,7 @@ public class TestSmallMultiLayerPerceptr
       assertEquals(squashingFunctionName, mlp.getSquashingFunctionName());
       assertEquals(costFunctionName, mlp.getCostFunctionName());
       assertArrayEquals(layerSizeArray, mlp.getLayerSizeArray());
+      assertEquals(transformer.getClass().getName(), mlp.getFeatureTransformer().getClass().getName());
       // delete test file
       fs.delete(new Path(modelPath), true);
     } catch (IOException e) {
@@ -134,6 +140,14 @@ public class TestSmallMultiLayerPerceptr
       for (DoubleMatrix mat : matrices) {
         MatrixWritable.write(mat, output);
       }
+
+      // serialize the feature transformer
+      FeatureTransformer transformer = new DefaultFeatureTransformer();
+      Class<? extends FeatureTransformer> featureTransformerCls = transformer.getClass();
+      byte[] featureTransformerBytes = SerializationUtils.serialize(featureTransformerCls);
+      output.writeInt(featureTransformerBytes.length);
+      output.write(featureTransformerBytes);
+      
       output.close();
 
     } catch (IOException e) {
@@ -352,11 +366,16 @@ public class TestSmallMultiLayerPerceptr
       e.printStackTrace();
     }
   }
+  
+  @Test
+  public void testByRunningJobs() {
+    this.testTrainingByXOR();
+    this.testFeatureTransformer();
+  }
 
   /**
    * Test the XOR problem.
    */
-  @Test
   public void testTrainingByXOR() {
     // write in some training instances
     Configuration conf = new Configuration();
@@ -429,5 +448,77 @@ public class TestSmallMultiLayerPerceptr
       }
     }
   }
+  
+  /**
+   * Use transformer to extract the first half features of the original features.
+   */
+  public void testFeatureTransformer() {
+ // write in some training instances
+    Configuration conf = new Configuration();
+    String strDataPath = "/tmp/xor-training-by-xor";
+    Path dataPath = new Path(strDataPath);
+
+    // generate training data
+    DoubleVector[] trainingData = new DenseDoubleVector[] {
+        new DenseDoubleVector(new double[] { 0, 0, 0 }),
+        new DenseDoubleVector(new double[] { 0, 1, 1 }),
+        new DenseDoubleVector(new double[] { 1, 0, 1 }),
+        new DenseDoubleVector(new double[] { 1, 1, 0 }) };
+    
+    try {
+      URI uri = new URI(strDataPath);
+      FileSystem fs = FileSystem.get(uri, conf);
+      fs.delete(dataPath, true);
+      if (!fs.exists(dataPath)) {
+        fs.createNewFile(dataPath);
+        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
+            dataPath, LongWritable.class, VectorWritable.class);
+
+        for (int i = 0; i < 1000; ++i) {
+          VectorWritable vecWritable = new VectorWritable(trainingData[i % 4]);
+          writer.append(new LongWritable(i), vecWritable);
+        }
+        writer.close();
+      }
+
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+
+    // begin training
+    String modelPath = "/tmp/xorModel-training-by-xor.data";
+    double learningRate = 0.6;
+    double regularization = 0.02; // no regularization
+    double momentum = 0.3; // no momentum
+    String squashingFunctionName = "Tanh";
+    String costFunctionName = "SquaredError";
+    int[] layerSizeArray = new int[] { 1, 5, 1 };
+    SmallMultiLayerPerceptron mlp = new SmallMultiLayerPerceptron(learningRate,
+        regularization, momentum, squashingFunctionName, costFunctionName,
+        layerSizeArray);
+    
+    mlp.setFeatureTransformer(new FeatureTransformer() {
+
+      @Override
+      public DoubleVector transform(DoubleVector originalFeatures) {
+        return originalFeatures.sliceUnsafe(originalFeatures.getDimension() / 2);
+      }
+      
+    });
+
+    Map<String, String> trainingParams = new HashMap<String, String>();
+    trainingParams.put("training.iteration", "2000");
+    trainingParams.put("training.mode", "minibatch.gradient.descent");
+    trainingParams.put("training.batch.size", "100");
+    trainingParams.put("tasks", "3");
+    trainingParams.put("modelPath", modelPath);
+
+    try {
+      mlp.train(dataPath, trainingParams);
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+
+  }
 
 }