You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by to...@apache.org on 2015/11/02 17:10:29 UTC

svn commit: r1712061 - in /labs/yay/trunk/core/src: main/java/org/apache/yay/core/ test/java/org/apache/yay/core/ test/resources/word2vec/

Author: tommaso
Date: Mon Nov  2 16:10:29 2015
New Revision: 1712061

URL: http://svn.apache.org/viewvc?rev=1712061&view=rev
Log:
performance improvements for ff, backprop and softmax

Modified:
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/SoftmaxActivationFunction.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java
    labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java?rev=1712061&r1=1712060&r2=1712061&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java Mon Nov  2 16:10:29 2015
@@ -18,11 +18,8 @@
  */
 package org.apache.yay.core;
 
-import java.util.Arrays;
-import java.util.Iterator;
-
-import org.apache.commons.math3.linear.Array2DRowRealMatrix;
 import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.RealMatrixChangingVisitor;
 import org.apache.yay.CostFunction;
 import org.apache.yay.DerivativeUpdateFunction;
 import org.apache.yay.LearningStrategy;
@@ -32,6 +29,9 @@ import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
 import org.apache.yay.WeightLearningException;
 
+import java.util.Arrays;
+import java.util.Iterator;
+
 /**
  * Back propagation learning algorithm for neural networks implementation (see
  * <code>http://en.wikipedia.org/wiki/Backpropagation</code>).
@@ -110,7 +110,7 @@ public class BackPropagationLearningStra
         } else if (iterations > 1 && (cost == newCost || newCost < threshold || iterations > maxIterations)) {
           System.out.println("successfully converged after " + (iterations - 1) + " iterations (alpha:" + alpha + ",threshold:" + threshold + ") with cost " + newCost + " and parameters " + Arrays.toString(hypothesis.getParameters()));
           break;
-        } else if (Double.isNaN(newCost)){
+        } else if (Double.isNaN(newCost)) {
           throw new RuntimeException("failed to converge at iteration " + iterations + " with alpha " + alpha + " : cost calculation underflow");
         }
 
@@ -135,25 +135,39 @@ public class BackPropagationLearningStra
     return updatedWeights;
   }
 
-  private RealMatrix[] updateWeights(RealMatrix[] weightsMatrixSet, RealMatrix[] derivatives, double alpha) {
+  private RealMatrix[] updateWeights(RealMatrix[] weightsMatrixSet, final RealMatrix[] derivatives, final double alpha) {
     RealMatrix[] updatedParameters = new RealMatrix[weightsMatrixSet.length];
+
     for (int l = 0; l < weightsMatrixSet.length; l++) {
-      double[][] updatedWeights = weightsMatrixSet[l].getData();
-      for (int i = 0; i < updatedWeights.length; i++) {
-        for (int j = 0; j < updatedWeights[i].length; j++) {
-          double curVal = updatedWeights[i][j];
-          if (!(i == 0 && curVal == 0d) && !(j == 0 && curVal == 1d)) {
-            updatedWeights[i][j] = updatedWeights[i][j] - alpha * derivatives[l].getData()[i][j];
+      RealMatrix realMatrix = weightsMatrixSet[l].copy();
+      final double[][] data = derivatives[l].getData();
+      RealMatrixChangingVisitor visitor = new RealMatrixChangingVisitor(){
+
+        @Override
+        public void start(int rows, int columns, int startRow, int endRow, int startColumn, int endColumn) {
+
+        }
+
+        @Override
+        public double visit(int row, int column, double value) {
+          if (!(row == 0 && value == 0d) && !(column == 0 && value == 1d)) {
+            return value - alpha * data[row][column];
+          } else {
+            return value;
           }
         }
-      }
-      if (updatedParameters[l] != null) {
-        updatedParameters[l].setSubMatrix(updatedWeights, 0, 0);
-      } else {
-        updatedParameters[l] = new Array2DRowRealMatrix(updatedWeights);
+
+        @Override
+        public double end() {
+          return 0;
+        }
+      };
+      realMatrix.walkInOptimizedOrder(visitor);
+      if (updatedParameters[l]== null) {
+        updatedParameters[l] = realMatrix;
       }
     }
     return updatedParameters;
   }
 
-}
\ No newline at end of file
+}

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java?rev=1712061&r1=1712060&r2=1712061&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java Mon Nov  2 16:10:29 2015
@@ -96,7 +96,7 @@ public class FeedForwardStrategy impleme
           return 0;
         }
       };
-      x.walkInRowOrder(visitor);
+      x.walkInOptimizedOrder(visitor);
       debugOutput[w] = x.getRowVector(0);
     }
     return debugOutput;

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/SoftmaxActivationFunction.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/SoftmaxActivationFunction.java?rev=1712061&r1=1712060&r2=1712061&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/SoftmaxActivationFunction.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/SoftmaxActivationFunction.java Mon Nov  2 16:10:29 2015
@@ -21,21 +21,36 @@ package org.apache.yay.core;
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.yay.ActivationFunction;
 
+import java.util.Map;
+import java.util.WeakHashMap;
+
 /**
  * Softmax activation function
  */
 public class SoftmaxActivationFunction implements ActivationFunction<Double> {
 
-    @Override
-    public Double apply(RealMatrix weights, Double signal) {
-        double num = Math.exp(signal);
-        double den = 0d;
-        for (int i = 0; i < weights.getRowDimension(); i++) {
-            double[] row1 = weights.getRow(i);
-            for (int j = 0; j < weights.getColumnDimension(); j++) {
-                den += Math.exp(row1[j]);
-            }
+  private static final Map<RealMatrix, Double> cache = new WeakHashMap<RealMatrix, Double>();
+
+  @Override
+  public Double apply(RealMatrix weights, Double signal) {
+    double num = Math.exp(signal);
+    double den = getDen(weights);
+    return num / den;
+  }
+
+  private double getDen(RealMatrix weights) {
+    Double d = cache.get(weights);
+    if (d == null) {
+      double den = 0d;
+      for (int i = 0; i < weights.getRowDimension(); i++) {
+        double[] row1 = weights.getRow(i);
+        for (int j = 0; j < weights.getColumnDimension(); j++) {
+          den += Math.exp(row1[j]);
         }
-        return num / den;
+      }
+      d = den;
+      cache.put(weights, d);
     }
+    return d;
+  }
 }

Modified: labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java?rev=1712061&r1=1712060&r2=1712061&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java (original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java Mon Nov  2 16:10:29 2015
@@ -20,9 +20,13 @@ package org.apache.yay.core;
 
 import org.apache.commons.math3.linear.Array2DRowRealMatrix;
 import org.apache.commons.math3.linear.RealMatrix;
-import org.apache.commons.math3.ml.distance.*;
+import org.apache.commons.math3.ml.distance.CanberraDistance;
+import org.apache.commons.math3.ml.distance.ChebyshevDistance;
+import org.apache.commons.math3.ml.distance.DistanceMeasure;
+import org.apache.commons.math3.ml.distance.EarthMoversDistance;
+import org.apache.commons.math3.ml.distance.EuclideanDistance;
+import org.apache.commons.math3.ml.distance.ManhattanDistance;
 import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
-
 import org.apache.yay.ActivationFunction;
 import org.apache.yay.Feature;
 import org.apache.yay.NeuralNetwork;
@@ -31,6 +35,9 @@ import org.apache.yay.TrainingSet;
 import org.junit.Test;
 
 import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -67,14 +74,14 @@ public class WordVectorsTest {
 
     int inputSize = next.getFeatures().size();
     int outputSize = next.getOutput().length;
-    int hiddenSize = 50;
+    int hiddenSize = 100;
     RealMatrix[] randomWeights = createRandomWeights(inputSize, hiddenSize, outputSize);
 
     Map<Integer, ActivationFunction<Double>> activationFunctions = new HashMap<Integer, ActivationFunction<Double>>();
     activationFunctions.put(0, new IdentityActivationFunction<Double>());
     activationFunctions.put(1, new SoftmaxActivationFunction());
     FeedForwardStrategy predictionStrategy = new FeedForwardStrategy(activationFunctions);
-    BackPropagationLearningStrategy learningStrategy = new BackPropagationLearningStrategy(0.03d, 1,
+    BackPropagationLearningStrategy learningStrategy = new BackPropagationLearningStrategy(0.01d, 1,
             BackPropagationLearningStrategy.DEFAULT_THRESHOLD, predictionStrategy, new LogisticRegressionCostFunction(),
             100);
     NeuralNetwork neuralNetwork = NeuralNetworkFactory.create(randomWeights, learningStrategy, predictionStrategy);
@@ -130,6 +137,20 @@ public class WordVectorsTest {
       computeSimilarities(vocabulary, wordVectors, distanceMeasure);
     }
 
+    BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File("target/sg-vectors.csv")));
+    for (int i = 1; i < wordVectors.getColumnDimension(); i++) {
+      double[] a = wordVectors.getColumnVector(i).toArray();
+      String csq = Arrays.toString(Arrays.copyOfRange(a, 1, a.length));
+      csq = csq.substring(1, csq.length() - 1);
+      bufferedWriter.append(csq);
+      bufferedWriter.append(",");
+      bufferedWriter.append(vocabulary.get(i-1));
+      bufferedWriter.newLine();
+    }
+
+    bufferedWriter.flush();
+    bufferedWriter.close();
+
 //    RealMatrix mappingsMatrix = MatrixUtils.createRealMatrix(next.getFeatures().size(), next.getOutput().length);
 //
 //    BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File("target/sg-vectors.txt")));
@@ -365,7 +386,7 @@ public class WordVectorsTest {
     Collection<String> sentences = new LinkedList<String>();
     String line;
     while ((line = bufferedReader.readLine()) != null) {
-      sentences.add(line);
+      sentences.add(line.toLowerCase());
     }
     return sentences;
   }

Modified: labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt?rev=1712061&r1=1712060&r2=1712061&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt (original)
+++ labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt Mon Nov  2 16:10:29 2015
@@ -34,4 +34,19 @@ This is both computationally unfeasible
 Based on a recent work that proposed to learn a generic language model that can be modified through a set of document-specific parameters we explore use of new neural network models that are adapted to ad-hoc IR tasks
 Within the language model IR framework we propose and study the use of a generic language model as well as a document-specific language model
 Both can be used as a smoothing component but the latter is more adapted to the document at hand and has the potential of being used as a full document language model
-We experiment with such models and analyze their results on TREC-1 to 8 datasets
\ No newline at end of file
+We experiment with such models and analyze their results on TREC-1 to 8 datasets
+The word2vec model and application by Mikolov et al have attracted a great amount of attention in recent two years
+The vector representations of words learned by word2vec models have been proven to be able to carry semantic meanings and are useful in various NLP tasks
+As an increasing number of researchers would like to experiment with word2vec I notice that there lacks a material that comprehensively explains the parameter learning process of word2vec in details thus preventing many people with less neural network experience from understanding how exactly word2vec works
+This note provides detailed derivations and explanations of the parameter update equations for the word2vec models including the original continuous bag-of-word (CBOW) and skip-gram models as well as advanced tricks hierarchical soft-max and negative sampling
+In the appendix a review is given on the basics of neuron network models and backpropagation
+To avoid the inaccuracy caused by classifying the example into several categories given by TREC manually we take the word2vec to represent all attractions and user contexts in the continuous vector space learnt by neural network language models
+The base of NNML is using neural networks for the probability function
+The model learns simultaneously a distributed representation for each word along with the probability function for word sequences expressed in terms of these representations
+Training such large models we propose continuous bag of words as our framework and soft-max as the active function
+So we use the word2vec to train wikitravel corpus and got the word vector
+To avoid the curse of dimensionality by learning a distributed representation for words as our word vector we define a test set that compare different dimensionality of vectors for our task using the same training data and using the same model architecture
+We extend the word2vec framework to capture meaning across languages
+The input consists of a source text and a word-aligned parallel text in a second language
+The joint word2vec tool then represents words in both languages within a common “semantic” vector space
+The result can be used to enrich lexicons of under-resourced languages to identify ambiguities and to perform clustering and classification
\ No newline at end of file



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org