You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by to...@apache.org on 2015/12/17 08:04:13 UTC

svn commit: r1720488 - in /labs/yay/trunk: api/src/main/java/org/apache/yay/ core/src/main/java/org/apache/yay/core/ core/src/main/java/org/apache/yay/core/utils/ core/src/test/java/org/apache/yay/core/ core/src/test/resources/word2vec/

Author: tommaso
Date: Thu Dec 17 07:04:12 2015
New Revision: 1720488

URL: http://svn.apache.org/viewvc?rev=1720488&view=rev
Log:
performance improvements (parallel exec, simplified Feature APIs), revert derivative update function to latest working version

Removed:
    labs/yay/trunk/api/src/main/java/org/apache/yay/Feature.java
Modified:
    labs/yay/trunk/api/src/main/java/org/apache/yay/Input.java
    labs/yay/trunk/api/src/main/java/org/apache/yay/PredictionStrategy.java
    labs/yay/trunk/api/src/main/java/org/apache/yay/WeightLearningException.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/BasicPerceptron.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/EncodedTrainingSet.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/MaxSelectionFunction.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/NeuralNetworkFactory.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ConversionUtils.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ExamplesFactory.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java
    labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt

Modified: labs/yay/trunk/api/src/main/java/org/apache/yay/Input.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/api/src/main/java/org/apache/yay/Input.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/api/src/main/java/org/apache/yay/Input.java (original)
+++ labs/yay/trunk/api/src/main/java/org/apache/yay/Input.java Thu Dec 17 07:04:12 2015
@@ -18,7 +18,7 @@
  */
 package org.apache.yay;
 
-import java.util.ArrayList;
+import java.util.List;
 
 /**
  * A sample/input characterized by its features.
@@ -26,10 +26,10 @@ import java.util.ArrayList;
 public interface Input<F> {
 
   /**
-   * Get this <code>Input</code> {@link org.apache.yay.Feature}s
+   * Get this <code>Input</code> features
    *
-   * @return an <code>ArrayList</code> of {@link org.apache.yay.Feature}s
+   * @return an <code>ArrayList</code> of features
    */
-  ArrayList<Feature<F>> getFeatures();
+  List<F> getFeatures();
 
 }

Modified: labs/yay/trunk/api/src/main/java/org/apache/yay/PredictionStrategy.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/api/src/main/java/org/apache/yay/PredictionStrategy.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/api/src/main/java/org/apache/yay/PredictionStrategy.java (original)
+++ labs/yay/trunk/api/src/main/java/org/apache/yay/PredictionStrategy.java Thu Dec 17 07:04:12 2015
@@ -18,10 +18,11 @@
  */
 package org.apache.yay;
 
-import java.util.Collection;
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.commons.math3.linear.RealVector;
 
+import java.util.Collection;
+
 /**
  * A {@link PredictionStrategy} defines an algorithm for the prediction of outputs
  * of type <code>O</code> given inputs of type <code>I</code>.
@@ -38,6 +39,15 @@ public interface PredictionStrategy<I, O
   O[] predictOutput(Collection<I> inputs, RealMatrix[] weightsMatrixSet);
 
   /**
+   * Perform a prediction and returns a vector containing the outputs
+   *
+   * @param inputVector      a vector of input values
+   * @param weightsMatrixSet the initial set of weights defined by an array of matrix
+   * @return the array containing the last layer's outputs
+   */
+  RealVector predictOutput(RealVector inputVector, RealMatrix[] weightsMatrixSet);
+
+  /**
    * Perform a prediction on the given input values and weights settings returning
    * a debug output.
    *
@@ -47,4 +57,14 @@ public interface PredictionStrategy<I, O
    */
   RealVector[] debugOutput(Collection<I> inputs, RealMatrix[] weightsMatrixSet);
 
+  /**
+   * Perform a prediction on the given input values and weights settings returning
+   * a debug output.
+   *
+   * @param inputVector      a vector of input values
+   * @param weightsMatrixSet the initial set of weights defined by an array of matrix
+   * @return the perturbed neural network state via its activations values
+   */
+  RealVector[] debugOutput(RealVector inputVector, RealMatrix[] weightsMatrixSet);
+
 }

Modified: labs/yay/trunk/api/src/main/java/org/apache/yay/WeightLearningException.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/api/src/main/java/org/apache/yay/WeightLearningException.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/api/src/main/java/org/apache/yay/WeightLearningException.java (original)
+++ labs/yay/trunk/api/src/main/java/org/apache/yay/WeightLearningException.java Thu Dec 17 07:04:12 2015
@@ -30,4 +30,8 @@ public class WeightLearningException ext
   public WeightLearningException(String s, Exception e) {
     super(s, e);
   }
+
+  public WeightLearningException(String s) {
+    super(s);
+  }
 }

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java Thu Dec 17 07:04:12 2015
@@ -25,12 +25,14 @@ import org.apache.yay.DerivativeUpdateFu
 import org.apache.yay.LearningStrategy;
 import org.apache.yay.NeuralNetwork;
 import org.apache.yay.PredictionStrategy;
-import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
 import org.apache.yay.WeightLearningException;
 
-import java.util.Arrays;
-import java.util.Iterator;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
 /**
  * Back propagation learning algorithm for neural networks implementation (see
@@ -49,6 +51,7 @@ public class BackPropagationLearningStra
   private final double threshold;
   private final int batch;
   private final int maxIterations;
+  private final ExecutorService executorService = Executors.newCachedThreadPool();
 
   public BackPropagationLearningStrategy(double alpha, double threshold, PredictionStrategy<Double, Double> predictionStrategy,
                                          CostFunction<RealMatrix, Double, Double> costFunction) {
@@ -79,12 +82,11 @@ public class BackPropagationLearningStra
 
   @Override
   public RealMatrix[] learnWeights(RealMatrix[] weightsMatrixSet, TrainingSet<Double, Double> trainingExamples) throws WeightLearningException {
-    RealMatrix[] updatedWeights = weightsMatrixSet;
+    RealMatrix[] weights = weightsMatrixSet;
     try {
       int iterations = 0;
 
       NeuralNetwork neuralNetwork = NeuralNetworkFactory.create(weightsMatrixSet, new VoidLearningStrategy<>(), predictionStrategy);
-      Iterator<TrainingExample<Double, Double>> iterator = trainingExamples.iterator();
 
       double cost = Double.MAX_VALUE;
       long start = System.currentTimeMillis();
@@ -100,12 +102,12 @@ public class BackPropagationLearningStra
         double newCost = costFunction.calculateCost(nextBatch, neuralNetwork);
 
         if (Double.POSITIVE_INFINITY == newCost || newCost > cost && batch == -1) {
-          throw new RuntimeException("failed to converge at iteration " + iterations + " with alpha " + alpha + " : cost going from " + cost + " to " + newCost);
+          throw new WeightLearningException("failed to converge at iteration " + iterations + " with alpha " + alpha + " : cost going from " + cost + " to " + newCost);
         } else if (iterations > 1 && (cost == newCost || newCost < threshold || iterations > maxIterations)) {
-          System.out.println("successfully converged after " + (iterations - 1) + " iterations (alpha:" + alpha + ",threshold:" + threshold + ") with cost " + newCost + " and parameters " + Arrays.toString(neuralNetwork.getParameters()));
+          System.out.println("successfully converged after " + (iterations - 1) + " iterations (alpha:" + alpha + ",threshold:" + threshold + ") with cost " + newCost);
           break;
         } else if (Double.isNaN(newCost)) {
-          throw new RuntimeException("failed to converge at iteration " + iterations + " with alpha " + alpha + " : cost calculation underflow");
+          throw new WeightLearningException("failed to converge at iteration " + iterations + " with alpha " + alpha + " : cost calculation underflow");
         }
 
         // update registered cost
@@ -115,52 +117,72 @@ public class BackPropagationLearningStra
         RealMatrix[] derivatives = derivativeUpdateFunction.getUpdatedDerivatives(weightsMatrixSet, nextBatch);
 
         // calculate the updated parameters
-        updatedWeights = updateWeights(updatedWeights, derivatives, alpha);
+        weights = getUpdatedWeights(weights, derivatives, alpha);
 
         // update parameters in the hypothesis
-        neuralNetwork.setParameters(updatedWeights);
+        neuralNetwork.setParameters(weights);
 
         iterations++;
       }
+    } catch (WeightLearningException e) {
+      throw e;
     } catch (Exception e) {
       throw new WeightLearningException("error during backprop learning", e);
     }
 
-    return updatedWeights;
+    return weights;
   }
 
-  private RealMatrix[] updateWeights(RealMatrix[] weightsMatrixSet, final RealMatrix[] derivatives, final double alpha) {
-    RealMatrix[] updatedParameters = new RealMatrix[weightsMatrixSet.length];
+  private RealMatrix[] getUpdatedWeights(RealMatrix[] weightsMatrixSet, final RealMatrix[] derivatives, final double alpha) {
+    int length = weightsMatrixSet.length;
+
+    RealMatrix[] updatedParameters = new RealMatrix[length];
+
+    List<Future<RealMatrix>> futures = new ArrayList<>(length);
+
+    for (int l = 0; l < length; l++) {
 
-    for (int l = 0; l < weightsMatrixSet.length; l++) {
       RealMatrix realMatrix = weightsMatrixSet[l].copy();
       final int finalL = l;
-      RealMatrixChangingVisitor visitor = new RealMatrixChangingVisitor() {
+      futures.add(executorService.submit(() -> {
+        RealMatrixChangingVisitor visitor = new RealMatrixChangingVisitor() {
 
-        @Override
-        public void start(int rows, int columns, int startRow, int endRow, int startColumn, int endColumn) {
+          @Override
+          public void start(int rows, int columns, int startRow, int endRow, int startColumn, int endColumn) {
 
-        }
+          }
 
-        @Override
-        public double visit(int row, int column, double value) {
-          if (!(row == 0 && value == 0d) && !(column == 0 && value == 1d)) {
-            return value - alpha * derivatives[finalL].getEntry(row, column);
-          } else {
-            return value;
+          @Override
+          public double visit(int row, int column, double value) {
+            if (!(row == 0 && value == 0d) && !(column == 0 && value == 1d)) {
+              return value - alpha * derivatives[finalL].getEntry(row, column);
+            } else {
+              return value;
+            }
           }
-        }
 
-        @Override
-        public double end() {
-          return 0;
+          @Override
+          public double end() {
+            return 0;
+          }
+        };
+        realMatrix.walkInOptimizedOrder(visitor);
+        return realMatrix;
+      }));
+
+    }
+    int k = 0;
+    for (Future<RealMatrix> future : futures) {
+      if (updatedParameters[k] == null) {
+        try {
+          updatedParameters[k] = future.get();
+        } catch (Exception e) {
+          throw new RuntimeException(e);
         }
-      };
-      realMatrix.walkInOptimizedOrder(visitor);
-      if (updatedParameters[l] == null) {
-        updatedParameters[l] = realMatrix;
       }
+      k++;
     }
+
     return updatedParameters;
   }
 

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/BasicPerceptron.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/BasicPerceptron.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/BasicPerceptron.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/BasicPerceptron.java Thu Dec 17 07:04:12 2015
@@ -27,7 +27,6 @@ import org.apache.yay.PredictionExceptio
 import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
 import org.apache.yay.core.neuron.BinaryThresholdNeuron;
-import org.apache.yay.core.utils.ConversionUtils;
 
 import java.util.Collection;
 
@@ -61,7 +60,7 @@ public class BasicPerceptron implements
   }
 
   public void learn(TrainingExample<Double, Double> example) {
-    Collection<Double> doubles = ConversionUtils.toValuesCollection(example.getFeatures());
+    Collection<Double> doubles = example.getFeatures();
     Double[] inputs = doubles.toArray(new Double[doubles.size()]);
     Double calculatedOutput = perceptronNeuron.elaborate(inputs);
     int diff = calculatedOutput.compareTo(example.getOutput()[0]);
@@ -93,7 +92,7 @@ public class BasicPerceptron implements
 
   @Override
   public Double[] predict(Input<Double> input) throws PredictionException {
-    Double output = perceptronNeuron.elaborate(ConversionUtils.toValuesCollection(input.getFeatures()).toArray(
+    Double output = perceptronNeuron.elaborate(input.getFeatures().toArray(
             new Double[input.getFeatures().size()]));
     return new Double[]{output};
   }

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java Thu Dec 17 07:04:12 2015
@@ -18,20 +18,15 @@
  */
 package org.apache.yay.core;
 
-import org.apache.commons.math3.linear.OpenMapRealVector;
+import org.apache.commons.math3.linear.ArrayRealVector;
 import org.apache.commons.math3.linear.RealMatrix;
-import org.apache.commons.math3.linear.RealMatrixChangingVisitor;
 import org.apache.commons.math3.linear.RealVector;
 import org.apache.yay.DerivativeUpdateFunction;
-import org.apache.yay.Feature;
 import org.apache.yay.PredictionStrategy;
 import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
 import org.apache.yay.core.utils.ConversionUtils;
 
-import java.util.ArrayList;
-import java.util.Collection;
-
 /**
  * Default derivatives update function
  */
@@ -50,71 +45,49 @@ class DefaultDerivativeUpdateFunction im
     RealVector[] deltaVectors = new RealVector[weightsMatrixSet.length];
 
     int noOfMatrixes = weightsMatrixSet.length - 1;
+    double count = 0;
     for (TrainingExample<Double, Double> trainingExample : trainingExamples) {
       try {
         // get activations from feed forward propagation
-        ArrayList<Feature<Double>> features = trainingExample.getFeatures();
-        Collection<Double> input = ConversionUtils.toValuesCollection(features);
-
-        RealVector[] activations = predictionStrategy.debugOutput(input, weightsMatrixSet);
+        RealVector[] activations = predictionStrategy.debugOutput(trainingExample.getFeatures(), weightsMatrixSet);
 
         // calculate output error (corresponding to the last delta^l)
         RealVector nextLayerDelta = calculateOutputError(trainingExample, activations);
 
-        updateDeltaVectors(weightsMatrixSet, deltaVectors, noOfMatrixes, activations, nextLayerDelta);
+        deltaVectors[noOfMatrixes] = nextLayerDelta;
+
+        // back prop the error and update the deltas accordingly
+        for (int l = noOfMatrixes; l > 0; l--) {
+          RealVector currentActivationsVector = activations[l - 1];
+          nextLayerDelta = calculateDeltaVector(weightsMatrixSet[l], currentActivationsVector, nextLayerDelta);
+
+          // collect delta vectors for this example
+          deltaVectors[l - 1] = nextLayerDelta;
+        }
 
         RealVector[] newActivations = new RealVector[activations.length];
-        newActivations[0] = ConversionUtils.toRealVector(input);
+        newActivations[0] = ConversionUtils.toRealVector(trainingExample.getFeatures());
         System.arraycopy(activations, 0, newActivations, 1, activations.length - 1);
 
         // update triangle (big delta matrix)
-        updateTriangle(triangle, newActivations, deltaVectors);
+        updateTriangle(triangle, newActivations, deltaVectors, weightsMatrixSet);
+
       } catch (Exception e) {
         throw new RuntimeException("error during derivatives calculation", e);
       }
+      count++;
     }
 
-    for (RealMatrix aTriangle : triangle) {
-      aTriangle.walkInOptimizedOrder(new RealMatrixChangingVisitor() {
-        @Override
-        public void start(int rows, int columns, int startRow, int endRow, int startColumn, int endColumn) {
-
-        }
-
-        @Override
-        public double visit(int row, int column, double value) {
-          if (!(row == 0 && value == 0d) && !(column == 0 && value == 1d)) {
-            return value / trainingExamples.size();
-          } else {
-            return value;
-          }
-        }
-
-        @Override
-        public double end() {
-          return 0;
-        }
-      });
-    }
-    return triangle;
-  }
-
-  private void updateDeltaVectors(RealMatrix[] weightsMatrixSet, RealVector[] deltaVectors, int noOfMatrixes, RealVector[] activations,
-                                  RealVector nextLayerDelta) {
-    deltaVectors[noOfMatrixes] = nextLayerDelta;
-
-    // back prop the error and update the deltas accordingly
-    for (int l = noOfMatrixes; l > 0; l--) {
-      RealVector currentActivationsVector = activations[l - 1];
-      nextLayerDelta = calculateDeltaVector(weightsMatrixSet[l], currentActivationsVector, nextLayerDelta);
-
-      // collect delta vectors for this example
-      deltaVectors[l - 1] = nextLayerDelta;
+    RealMatrix[] derivatives = new RealMatrix[triangle.length];
+    for (int i = 0; i < triangle.length; i++) {
+      // TODO : introduce regularization diversification on bias term (currently not regularized)
+      derivatives[i] = triangle[i].scalarMultiply(1d / count);
     }
+    return derivatives;
   }
 
-  private void updateTriangle(RealMatrix[] triangle, RealVector[] activations, RealVector[] deltaVectors) {
-    for (int l = triangle.length - 1; l >= 0; l--) {
+  private void updateTriangle(RealMatrix[] triangle, RealVector[] activations, RealVector[] deltaVectors, RealMatrix[] weightsMatrixSet) {
+    for (int l = weightsMatrixSet.length - 1; l >= 0; l--) {
       RealMatrix realMatrix = deltaVectors[l].outerProduct(activations[l]);
       if (triangle[l] == null) {
         triangle[l] = realMatrix;
@@ -126,7 +99,7 @@ class DefaultDerivativeUpdateFunction im
 
   private RealVector calculateDeltaVector(RealMatrix thetaL, RealVector activationsVector, RealVector nextLayerDelta) {
     // TODO : remove the bias term from the error calculations
-    RealVector identity = new OpenMapRealVector(activationsVector.getDimension(), 1d);
+    ArrayRealVector identity = new ArrayRealVector(activationsVector.getDimension(), 1d);
     RealVector gz = activationsVector.ebeMultiply(identity.subtract(activationsVector)); // = a^l .* (1-a^l)
     return thetaL.preMultiply(nextLayerDelta).ebeMultiply(gz);
   }
@@ -134,12 +107,19 @@ class DefaultDerivativeUpdateFunction im
   private RealVector calculateOutputError(TrainingExample<Double, Double> trainingExample, RealVector[] activations) {
     RealVector output = activations[activations.length - 1];
 
+//    Double[] sampleOutput = new Double[output.getDimension()];
     Double[] actualOutput = trainingExample.getOutput();
-    RealVector learnedOutputRealVector = new OpenMapRealVector(actualOutput); // turn example output to a vector
+//    int sampleOutputIntValue = actualOutput.intValue();
+//    if (sampleOutputIntValue < sampleOutput.length) {
+//      sampleOutput[sampleOutputIntValue] = 1d;
+//    } else if (sampleOutput.length == 1) {
+//      sampleOutput[0] = actualOutput;
+//    } else {
+//      throw new RuntimeException("problem with multiclass output mapping");
+//    }
+    RealVector learnedOutputRealVector = new ArrayRealVector(actualOutput); // turn example output to a vector
 
-    // error calculation -> er_a = out_a * (1 - out_a) * (tgt_a - out_a) (was: output.subtract(learnedOutputRealVector)
-    // targetOutputRealVector.subtract(output).map(x -> Math.pow(x, 2)); // squared error
-    // return output.subtract(learnedOutputRealVector);
-    return output.ebeMultiply(new OpenMapRealVector(output.getDimension(), 1d).subtract(output)).ebeMultiply(output.subtract(learnedOutputRealVector));
+    // TODO : improve error calculation -> this could be er_a = out_a * (1 - out_a) * (tgt_a - out_a)
+    return output.subtract(learnedOutputRealVector);
   }
 }

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/EncodedTrainingSet.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/EncodedTrainingSet.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/EncodedTrainingSet.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/EncodedTrainingSet.java Thu Dec 17 07:04:12 2015
@@ -18,7 +18,6 @@
  */
 package org.apache.yay.core;
 
-import org.apache.yay.Feature;
 import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
 import org.apache.yay.core.utils.ConversionUtils;
@@ -57,12 +56,12 @@ public class EncodedTrainingSet extends
       @Override
       public TrainingExample<Double, Double> next() {
         TrainingExample<Double, Double> sample = EncodedTrainingSet.super.iterator().next();
-        Collection<Feature<Double>> features = sample.getFeatures();
+        Collection<Double> features = sample.getFeatures();
         int vocabularySize = vocabulary.size();
         Double[] outputs = new Double[vocabularySize * (window - 1)];
         Double[] inputs = new Double[vocabularySize];
-        for (Feature<Double> feature : features) {
-          inputs = ConversionUtils.hotEncode(feature.getValue().intValue(), vocabularySize);
+        for (Double feature : features) {
+          inputs = ConversionUtils.hotEncode(feature.intValue(), vocabularySize);
           break;
         }
         int k = 0;

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java Thu Dec 17 07:04:12 2015
@@ -54,20 +54,31 @@ public class FeedForwardStrategy impleme
 
   @Override
   public Double[] predictOutput(Collection<Double> input, RealMatrix[] realMatrixSet) {
-    RealVector[] activations = applyFF(input, realMatrixSet);
+    RealVector[] activations = debugOutput(input, realMatrixSet);
     RealVector x = activations[activations.length - 1];
     return ConversionUtils.toDoubleArray(x.toArray());
   }
 
+  @Override
+  public RealVector predictOutput(RealVector inputVector, RealMatrix[] weightsMatrixSet) {
+    RealVector[] activations = debugOutput(inputVector, weightsMatrixSet);
+    return activations[activations.length - 1];
+  }
+
   public RealVector[] debugOutput(Collection<Double> input, RealMatrix[] realMatrixSet) {
-    return applyFF(input, realMatrixSet);
+    Double[] doubles = input.toArray(new Double[input.size()]);
+    return applyFF(Stream.of(doubles).mapToDouble(Double::doubleValue).toArray(), realMatrixSet);
   }
 
-  private RealVector[] applyFF(Collection<Double> input, RealMatrix[] realMatrixSet) {
+  @Override
+  public RealVector[] debugOutput(RealVector inputVector, RealMatrix[] weightsMatrixSet) {
+    return applyFF(inputVector.toArray(), weightsMatrixSet);
+  }
+
+  private RealVector[] applyFF(double[] inputs, RealMatrix[] realMatrixSet) {
     RealVector[] debugOutput = new RealVector[realMatrixSet.length];
 
-    Double[] doubles = input.toArray(new Double[input.size()]);
-    RealMatrix x = MatrixUtils.createRowRealMatrix(Stream.of(doubles).mapToDouble(Double::doubleValue).toArray());
+    RealMatrix x = MatrixUtils.createRowRealMatrix(inputs);
     for (int w = 0; w < realMatrixSet.length; w++) {
       // compute matrix multiplication
       x = x.multiply(realMatrixSet[w].transpose());

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java Thu Dec 17 07:04:12 2015
@@ -67,13 +67,6 @@ public class LogisticRegressionCostFunct
           return res;
         }
       });
-//      for (int i = 0; i < layerMatrix.getColumnDimension(); i++) {
-//        double[] column = layerMatrix.getColumn(i);
-//        // starting from 1 to avoid including the bias unit in regularization
-//        for (int j = 1; j < column.length; j++) {
-//          res += Math.pow(column[j], 2d);
-//        }
-//      }
     }
     return (lambda / (2d * trainingExamples.size())) * res;
   }

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/MaxSelectionFunction.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/MaxSelectionFunction.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/MaxSelectionFunction.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/MaxSelectionFunction.java Thu Dec 17 07:04:12 2015
@@ -18,9 +18,10 @@
  */
 package org.apache.yay.core;
 
+import org.apache.yay.SelectionFunction;
+
 import java.util.Collection;
 import java.util.Collections;
-import org.apache.yay.SelectionFunction;
 
 /**
  * Selects the max value from a {@link Collection} of {@link Comparable} outputs.

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/NeuralNetworkFactory.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/NeuralNetworkFactory.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/NeuralNetworkFactory.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/NeuralNetworkFactory.java Thu Dec 17 07:04:12 2015
@@ -18,7 +18,6 @@
  */
 package org.apache.yay.core;
 
-import java.util.Collection;
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.yay.Input;
 import org.apache.yay.LearningException;
@@ -28,7 +27,8 @@ import org.apache.yay.PredictionExceptio
 import org.apache.yay.PredictionStrategy;
 import org.apache.yay.TrainingSet;
 import org.apache.yay.WeightLearningException;
-import org.apache.yay.core.utils.ConversionUtils;
+
+import java.util.Collection;
 
 /**
  * Factory class for creating {@link org.apache.yay.NeuralNetwork}s
@@ -49,7 +49,7 @@ public class NeuralNetworkFactory {
     return new NeuralNetwork() {
 
       private Double[] getOutputVector(Input<Double> input) {
-        Collection<Double> inputVector = ConversionUtils.toValuesCollection(input.getFeatures());
+        Collection<Double> inputVector = input.getFeatures();
         return predictionStrategy.predictOutput(inputVector, updatedRealMatrixSet);
       }
 

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ConversionUtils.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ConversionUtils.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ConversionUtils.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ConversionUtils.java Thu Dec 17 07:04:12 2015
@@ -22,16 +22,13 @@ import org.apache.commons.math3.linear.M
 import org.apache.commons.math3.linear.OpenMapRealVector;
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.commons.math3.linear.RealVector;
-import org.apache.yay.Feature;
 import org.apache.yay.Input;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.WeakHashMap;
-import java.util.stream.Collectors;
 
 /**
  * Temporary class for conversion between model objects and commons-math matrices/vectors
@@ -69,8 +66,8 @@ public class ConversionUtils {
   private static double[] toDoubleArray(Input<Double> sample) {
     double[] ar = new double[sample.getFeatures().size()];
     int i = 0;
-    for (Feature<Double> f : sample.getFeatures()) {
-      ar[i] = f.getValue();
+    for (Double f : sample.getFeatures()) {
+      ar[i] = f;
       i++;
     }
     return ar;
@@ -87,18 +84,6 @@ public class ConversionUtils {
   }
 
   /**
-   * turns a collection of features of type <code>T</code> into a collection of
-   * <code>T</code> objects.
-   *
-   * @param featureVector the vector of features
-   * @param <T>           the type of features
-   * @return a vector of Doubles
-   */
-  public static <T> Collection<T> toValuesCollection(Collection<Feature<T>> featureVector) {
-    return featureVector.stream().map(Feature::getValue).collect(Collectors.toCollection(ArrayList::new));
-  }
-
-  /**
    * this is just nice! :-) (thanks commons-math)
    *
    * @param ar a double array

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ExamplesFactory.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ExamplesFactory.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ExamplesFactory.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ExamplesFactory.java Thu Dec 17 07:04:12 2015
@@ -18,11 +18,11 @@
  */
 package org.apache.yay.core.utils;
 
-import org.apache.yay.Feature;
-import org.apache.yay.Input;
 import org.apache.yay.TrainingExample;
 
-import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
 
 /**
  * Factory class for {@link org.apache.yay.Input}s and {@link TrainingExample}s.
@@ -33,8 +33,11 @@ public class ExamplesFactory {
                                                                             final Double... featuresValues) {
     return new TrainingExample<Double, Double>() {
       @Override
-      public ArrayList<Feature<Double>> getFeatures() {
-        return doublesToFeatureVector(featuresValues);
+      public List<Double> getFeatures() {
+        List<Double> doubles = new LinkedList<>();
+        doubles.add(1d);
+        doubles.addAll(Arrays.asList(featuresValues));
+        return doubles;
       }
 
       @Override
@@ -45,11 +48,14 @@ public class ExamplesFactory {
   }
 
   public static TrainingExample<Double, Double> createDoubleArrayTrainingExample(final Double[] output,
-                                                                            final Double... featuresValues) {
+                                                                                 final Double... featuresValues) {
     return new TrainingExample<Double, Double>() {
       @Override
-      public ArrayList<Feature<Double>> getFeatures() {
-        return doublesToFeatureVector(featuresValues);
+      public List<Double> getFeatures() {
+        List<Double> doubles = new LinkedList<>();
+        doubles.add(1d);
+        doubles.addAll(Arrays.asList(featuresValues));
+        return doubles;
       }
 
       @Override
@@ -59,21 +65,4 @@ public class ExamplesFactory {
     };
   }
 
-  public static Input<Double> createDoubleInput(final Double... featuresValues) {
-    return () -> doublesToFeatureVector(featuresValues);
-  }
-
-  private static ArrayList<Feature<Double>> doublesToFeatureVector(Double[] featuresValues) {
-    ArrayList<Feature<Double>> features = new ArrayList<>();
-    Feature<Double> byasFeature = new Feature<>();
-    byasFeature.setValue(1d);
-    features.add(byasFeature);
-    for (Double d : featuresValues) {
-      Feature<Double> feature = new Feature<>();
-      feature.setValue(d);
-      features.add(feature);
-    }
-    return features;
-  }
-
 }

Modified: labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java (original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java Thu Dec 17 07:04:12 2015
@@ -18,16 +18,17 @@
  */
 package org.apache.yay.core;
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.LinkedList;
-import java.util.Random;
-import org.apache.yay.Feature;
 import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
 import org.junit.Before;
 import org.junit.Test;
 
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
@@ -143,16 +144,11 @@ public class BasicPerceptronTest {
                                                                 final Double... params) {
     return new TrainingExample<Double, Double>() {
       @Override
-      public ArrayList<Feature<Double>> getFeatures() {
-        ArrayList<Feature<Double>> features = new ArrayList<>();
-        Feature<Double> byasFeature = new Feature<>();
-        byasFeature.setValue(1d);
+      public List<Double> getFeatures() {
+        List<Double> features = new LinkedList<>();
+        Double byasFeature = 1d;
         features.add(byasFeature);
-        for (Double d : params) {
-          Feature<Double> feature = new Feature<>();
-          feature.setValue(d);
-          features.add(feature);
-        }
+        Collections.addAll(features, params);
         return features;
       }
 

Modified: labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java (original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java Thu Dec 17 07:04:12 2015
@@ -22,12 +22,19 @@ import org.apache.commons.math3.linear.A
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.commons.math3.ml.distance.CanberraDistance;
 import org.apache.commons.math3.ml.distance.DistanceMeasure;
-import org.apache.yay.*;
+import org.apache.yay.Input;
+import org.apache.yay.LearningStrategy;
+import org.apache.yay.NeuralNetwork;
+import org.apache.yay.TrainingExample;
+import org.apache.yay.TrainingSet;
 import org.apache.yay.core.utils.ExamplesFactory;
 import org.junit.Test;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
 import java.util.Random;
 
 import static org.junit.Assert.assertEquals;
@@ -105,15 +112,10 @@ public class NeuralNetworkIntegrationTes
 
   private Input<Double> createSample(final Double... params) {
     return () -> {
-      ArrayList<Feature<Double>> features = new ArrayList<Feature<Double>>();
-      Feature<Double> byasFeature = new Feature<Double>();
-      byasFeature.setValue(1d);
+      List<Double> features = new LinkedList<>();
+      Double byasFeature = 1d;
       features.add(byasFeature);
-      for (Double d : params) {
-        Feature<Double> feature = new Feature<Double>();
-        feature.setValue(d);
-        features.add(feature);
-      }
+      Collections.addAll(features, params);
       return features;
     };
   }

Modified: labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java (original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java Thu Dec 17 07:04:12 2015
@@ -23,9 +23,7 @@ import org.apache.commons.math3.linear.M
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.commons.math3.ml.distance.DistanceMeasure;
 import org.apache.commons.math3.ml.distance.EuclideanDistance;
-import org.apache.commons.math3.util.FastMath;
 import org.apache.yay.ActivationFunction;
-import org.apache.yay.Feature;
 import org.apache.yay.NeuralNetwork;
 import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
@@ -78,7 +76,7 @@ public class WordVectorsTest {
     Path path = Paths.get(getClass().getResource("/word2vec/test.txt").getFile());
 
     System.out.println("reading fragments");
-    int window = 4;
+    int window = 3;
     Queue<List<byte[]>> fragments = getFragments(path, window);
     assertFalse(fragments.isEmpty());
     System.out.println("generating vocabulary");
@@ -93,7 +91,7 @@ public class WordVectorsTest {
     int inputSize = next.getFeatures().size();
     int outputSize = next.getOutput().length;
 
-    int hiddenSize = 30;
+    int hiddenSize = 10;
     System.out.println("initializing neural network");
     RealMatrix[] randomWeights = createRandomWeights(inputSize, hiddenSize, outputSize);
 
@@ -128,31 +126,31 @@ public class WordVectorsTest {
     System.out.println("measuring similarities");
     Collection<DistanceMeasure> measures = new LinkedList<>();
     measures.add(new EuclideanDistance());
-    measures.add(new DistanceMeasure() {
-      @Override
-      public double compute(double[] a, double[] b) {
-        double dp = 0.0;
-        double na = 0.0;
-        double nb = 0.0;
-        for (int i = 0; i < a.length; i++) {
-          dp += a[i] * b[i];
-          na += Math.pow(a[i], 2);
-          nb += Math.pow(b[i], 2);
-        }
-        double cosineSimilarity = dp / (Math.sqrt(na) * Math.sqrt(nb));
-        return 1 / cosineSimilarity;
-      }
-
-      @Override
-      public String toString() {
-        return "inverse cosine similarity distance measure";
-      }
-    });
-    measures.add((DistanceMeasure) (a, b) -> {
-      double da = FastMath.sqrt(MatrixUtils.createRealVector(a).dotProduct(MatrixUtils.createRealVector(a)));
-      double db = FastMath.sqrt(MatrixUtils.createRealVector(b).dotProduct(MatrixUtils.createRealVector(b)));
-      return Math.abs(db - da);
-    });
+//    measures.add(new DistanceMeasure() {
+//      @Override
+//      public double compute(double[] a, double[] b) {
+//        double dp = 0.0;
+//        double na = 0.0;
+//        double nb = 0.0;
+//        for (int i = 0; i < a.length; i++) {
+//          dp += a[i] * b[i];
+//          na += Math.pow(a[i], 2);
+//          nb += Math.pow(b[i], 2);
+//        }
+//        double cosineSimilarity = dp / (Math.sqrt(na) * Math.sqrt(nb));
+//        return 1 / cosineSimilarity;
+//      }
+//
+//      @Override
+//      public String toString() {
+//        return "inverse cosine similarity distance measure";
+//      }
+//    });
+//    measures.add((DistanceMeasure) (a, b) -> {
+//      double da = FastMath.sqrt(MatrixUtils.createRealVector(a).dotProduct(MatrixUtils.createRealVector(a)));
+//      double db = FastMath.sqrt(MatrixUtils.createRealVector(b).dotProduct(MatrixUtils.createRealVector(b)));
+//      return Math.abs(db - da);
+//    });
     for (DistanceMeasure distanceMeasure : measures) {
       System.out.println("computing similarity using " + distanceMeasure);
       computeSimilarities(vocabulary, wordVectors, distanceMeasure);
@@ -255,10 +253,9 @@ public class WordVectorsTest {
         }
 
         @Override
-        public ArrayList<Feature<Double>> getFeatures() {
-          ArrayList<Feature<Double>> features = new ArrayList<>();
-          Feature<Double> e = new Feature<>();
-          e.setValue((double) vocabulary.indexOf(new String(finalInputWord)));
+        public List<Double> getFeatures() {
+          List<Double> features = new ArrayList<>();
+          Double e = (double) vocabulary.indexOf(new String(finalInputWord));
           features.add(e);
           return features;
         }
@@ -275,7 +272,6 @@ public class WordVectorsTest {
 
 
   private List<String> getVocabulary(Path path) throws IOException {
-    long start = System.currentTimeMillis();
     Set<String> vocabulary = new HashSet<>();
     ByteBuffer buf = ByteBuffer.allocate(100);
     try (SeekableByteChannel sbc = Files.newByteChannel(path)) {
@@ -307,16 +303,17 @@ public class WordVectorsTest {
     } finally {
       buf.clear();
     }
-    long end = System.currentTimeMillis();
     List<String> list = Arrays.asList(vocabulary.toArray(new String[vocabulary.size()]));
     Collections.sort(list);
-    System.out.println("vocabulary read in " + (end - start) / 60000 + " minutes (" + (list.size()) + ")");
+//    for (String iw : vocabulary) {
+//      System.out.println(iw +"->"+Arrays.toString(ConversionUtils.hotEncode(iw.getBytes(), list)));
+//    }
     return list;
   }
 
   private String cleanString(CharBuffer charBuffer) {
     String s = charBuffer.toString();
-    return s.toLowerCase().replaceAll("\\.", " ").replaceAll("\\;", " ").replaceAll("\\,", " ").replaceAll("\\:", " ").replaceAll("\\-","").replaceAll("\\\"","");
+    return s.toLowerCase().replaceAll("\\.", " ").replaceAll("\\;", " ").replaceAll("\\,", " ").replaceAll("\\:", " ").replaceAll("\\-\\s", "").replaceAll("\\\"", "");
   }
 
   private List<String> getVocabulary(Collection<byte[]> sentences) {

Modified: labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt (original)
+++ labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt Thu Dec 17 07:04:12 2015
@@ -1,10 +1,10 @@
-A calculus which combined the flexible geometric structure of vector mod- els with the crisp efficiency of Boolean logic would be extremely beneficial for modelling natural language. With this goal in mind, we present a formulation for logical connectives in vector spaces based on standard linear algebra, giving ex- amples of the use of vector negation to discriminate between different senses of ambiguous words. It turns out that the operators developed in this way are pre- cisely the connectives of quantum logic (Birkhoff and von Neumann, 1936), which to our knowledge have not been exploited before in natural language processing. In quantum logic, arbitrary sets are replaced by linear subspaces of a vector space, and set unions, intersections and complements are replaced by vector sum, inter- section and orthogonal complements of subspaces. We demonstrate that these logi- cal connectives (particularly the orthogonal complement for negation) are powerful tools for exploring and anal
 ysing word meanings and show distinct advantages over Boolean operators in document retrieval experiments.
+A calculus which combined the flexible geometric structure of vector models with the crisp efficiency of Boolean logic would be extremely beneficial for modelling natural language. With this goal in mind, we present a formulation for logical connectives in vector spaces based on standard linear algebra, giving ex- amples of the use of vector negation to discriminate between different senses of ambiguous words. It turns out that the operators developed in this way are pre- cisely the connectives of quantum logic (Birkhoff and von Neumann, 1936), which to our knowledge have not been exploited before in natural language processing. In quantum logic, arbitrary sets are replaced by linear subspaces of a vector space, and set unions, intersections and complements are replaced by vector sum, inter- section and orthogonal complements of subspaces. We demonstrate that these logi- cal connectives (particularly the orthogonal complement for negation) are powerful tools for exploring and analys
 ing word meanings and show distinct advantages over Boolean operators in document retrieval experiments.
 This paper is organised as follows. In Section 1.1 we describe some of the ways vectors have been used to represent the meanings of terms and documents in natural language processing, and describe the way the WORD-SPACE used in our later experiments is built automatically from text corpora. In Section 1.2 we define the logical connectives on vector spaces, focussing particularly on negation and disjunction. This introduces the basic material needed to understand the worked examples given in Section 1.3, and the document retrieval experiments described in Section 1.3.1. Section 1.4 gives a much fuller outline of the theory of quantum logic, the natural setting for the operators of Section 1.2. Finally, in Section 1.5, we examine the similarities between quantum logic and WORD-SPACE, asking whether quantum logic is an appropriate framework for modelling word-meanings or if the
 initial successes we have obtained are mainly coincidental.
 To some extent, this paper may have been written backwards, in that the im-plementation and examples are at the beginning and most of the theory is at the end. This is for two reasons. Firstly, we hoped to make the paper as accessible as possible and were afraid that beginning with an introduction to the full machinery of quantum logic would defeat this goal before the reader has a chance to realise that the techniques and equations used in this work are really quite elementary. Secondly, the link with ‘quantum logic’ was itself only brought to our attention after the bulk of the results in this paper had been obtained, and since this research is very much ongoing, we deemed it appropriate to give an honest account of its history and current state.
-We propose two novel model architectures for computing continuous vector repre- sentations of words from very large data sets. The quality of these representations is measured in a word similarity task, and the results are compared to the previ- ously best performing techniques based on different types of neural networks. We observe large improvements in accuracy at much lower computational cost, i.e. it takes less than a day to learn high quality word vectors from a 1.6 billion words data set. Furthermore, we show that these vectors provide state-of-the-art perfor- mance on our test set for measuring syntactic and semantic word similarities.
+We propose two novel model architectures for computing continuous vector representations of words from very large data sets. The quality of these representations is measured in a word similarity task, and the results are compared to the previ- ously best performing techniques based on different types of neural networks. We observe large improvements in accuracy at much lower computational cost, i.e. it takes less than a day to learn high quality word vectors from a 1.6 billion words data set. Furthermore, we show that these vectors provide state-of-the-art perfor- mance on our test set for measuring syntactic and semantic word similarities.
 Information Retrieval (IR) models need to deal with two difficult issues, vocabulary mismatch and term dependencies. Vocabulary mismatch corresponds to the difficulty of retrieving relevant documents that do not contain exact query terms but semantically related terms. Term dependencies refers to the need of considering the relationship between the words of the query when estimating the relevance of a document. A multitude of solutions has been proposed to solve each of these two problems, but no principled model solve both. In parallel, in the last few years, language models based on neural networks have been used to cope with complex natural language processing tasks like emotion and paraphrase detection. Although they present good abilities to cope with both term dependencies and vocabulary mismatch problems, thanks to the distributed representation of words they are based upon, such models could not be used readily in IR, where the estimation of one language model per document (
 or query) is required. This is both computationally unfeasible and prone to over-fitting. Based on a recent work that proposed to learn a generic language model that can be modified through a set of document-specific parameters, we explore use of new neural network models that are adapted to ad-hoc IR tasks. Within the language model IR framework, we propose and study the use of a generic language model as well as a document-specific language model. Both can be used as a smoothing component, but the latter is more adapted to the document at hand and has the potential of being used as a full document language model. We experiment with such models and analyze their results on TREC-1 to 8 datasets.
-Bidirectional Long Short-Term Mem- ory Recurrent Neural Network (BLSTM- RNN) has been shown to be very effec- tive for modeling and predicting sequen- tial data, e.g. speech utterances or hand- written documents. In this study, we propose to use BLSTM-RNN for a uni- fied tagging solution that can be applied to various tagging tasks including part- of-speech tagging, chunking and named entity recognition. Instead of exploiting specific features carefully optimized for each task, our solution only uses one set of task-independent features and internal representations learnt from unlabeled text for all tasks. Requiring no task specific knowledge or sophisticated feature engi- neering, our approach gets nearly state-of- the-art performance in all these three tag- ging tasks.
+Bidirectional Long Short-Term Memory Recurrent Neural Network (BLSTM-RNN) has been shown to be very effec- tive for modeling and predicting sequen- tial data, e.g. speech utterances or hand- written documents. In this study, we propose to use BLSTM-RNN for a uni- fied tagging solution that can be applied to various tagging tasks including part- of-speech tagging, chunking and named entity recognition. Instead of exploiting specific features carefully optimized for each task, our solution only uses one set of task-independent features and internal representations learnt from unlabeled text for all tasks. Requiring no task specific knowledge or sophisticated feature engi- neering, our approach gets nearly state-of- the-art performance in all these three tag- ging tasks.
 The recently introduced continuous Skip-gram model is an efficient method for learning high-quality distributed vector representations that capture a large num- ber of precise syntactic and semantic word relationships. In this paper we present several extensions that improve both the quality of the vectors and the training speed. By subsampling of the frequent words we obtain significant speedup and also learn more regular word representations. We also describe a simple alterna- tive to the hierarchical softmax called negative sampling.
 An inherent limitation of word representations is their indifference to word order and their inability to represent idiomatic phrases. For example, the meanings of “Canada” and “Air” cannot be easily combined to obtain “Air Canada”. Motivated by this example, we present a simple method for finding phrases in text, and show that learning good vector representations for millions of phrases is possible.
 We extend the word2vec framework to capture meaning across languages. The input consists of a source text and a word-aligned parallel text in a second language. The joint word2vec tool then repre- sents words in both languages within a common “semantic” vector space. The result can be used to enrich lexicons of under-resourced languages, to identify ambiguities, and to perform clustering and classification. Experiments were conducted on a parallel English-Arabic corpus, as well as on English and Hebrew Biblical texts.
@@ -13,7 +13,7 @@ We report our participation in the conte
 We present a comprehensive study of eval- uation methods for unsupervised embed- ding techniques that obtain meaningful representations of words from text. Differ- ent evaluations result in different orderings of embedding methods, calling into ques- tion the common assumption that there is one single optimal vector representation. We present new evaluation techniques that directly compare embeddings with respect to specific queries. These methods re- duce bias, provide greater insight, and allow us to solicit data-driven relevance judgments rapidly and accurately through crowdsourcing.
 Continuous word and phrase vectors have proven useful in a number of NLP tasks. Here we describe our experience using them as a source of features for the SemEval-2015 task 3, consisting of two community question an- swering subtasks: Answer Selection for cate- gorizing answers as potential, good, and bad with regards to their corresponding questions; and YES/NO inference for predicting a yes, no, or unsure response to a YES/NO question us- ing all of its good answers. Our system ranked 6th and 1st in the English answer selection and YES/NO inference subtasks respectively, and 2nd in the Arabic answer selection subtask.
 The word2vec model and application by Mikolov et al. have attracted a great amount of attention in recent two years. The vector representations of words learned by word2vec models have been proven to be able to carry semantic meanings and are useful in various NLP tasks. As an increasing number of researchers would like to experiment with word2vec, I notice that there lacks a material that comprehensively explains the parameter learning process of word2vec in details, thus preventing many people with less neural network experience from understanding how exactly word2vec works.
-This note provides detailed derivations and explanations of the parameter up- date equations for the word2vec models, including the original continuous bag-of-word (CBOW) and skip-gram models, as well as advanced tricks, hierarchical soft-max and negative sampling. In the appendix a review is given on the basics of neuron network models and backpropagation.
+This note provides detailed derivations and explanations of the parameter update equations for the word2vec models, including the original continuous bag-of-word (CBOW) and skip-gram models, as well as advanced tricks, hierarchical soft-max and negative sampling. In the appendix a review is given on the basics of neuron network models and backpropagation.
 Over the past few years, neural networks have re-emerged as powerful machine-learning
 models, yielding state-of-the-art results in fields such as image recognition and speech
 processing. More recently, neural network models started to be applied also to textual
@@ -22,4 +22,13 @@ network models from the perspective of n
 to bring natural-language researchers up to speed with the neural techniques. The tutorial
 covers input encoding for natural language tasks, feed-forward networks, convolutional
 networks, recurrent networks and recursive networks, as well as the computation graph
-abstraction for automatic gradient computation
\ No newline at end of file
+abstraction for automatic gradient computation
+The development of intelligent machines is one of the biggest unsolved
+challenges in computer science. In this paper, we propose some
+fundamental properties these machines should have, focusing in particular
+on communication and learning. We discuss a simple environment
+that could be used to incrementally teach a machine the basics
+of natural-language-based communication, as a prerequisite to more
+complex interaction with human users. We also present some conjectures
+on the sort of algorithms the machine should support in order
+to profitably learn from the environment.
\ No newline at end of file



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org