You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by to...@apache.org on 2015/10/07 12:55:37 UTC

svn commit: r1707236 - in /labs/yay/trunk/core/src: main/java/org/apache/yay/core/BackPropagationLearningStrategy.java test/java/org/apache/yay/core/Word2VecTest.java test/resources/word2vec/sentences.txt

Author: tommaso
Date: Wed Oct  7 10:55:37 2015
New Revision: 1707236

URL: http://svn.apache.org/viewvc?rev=1707236&view=rev
Log:
added iterations parameter to backprop, enhanced word2vec test

Modified:
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/Word2VecTest.java
    labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java?rev=1707236&r1=1707235&r2=1707236&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java Wed Oct  7 10:55:37 2015
@@ -40,7 +40,7 @@ public class BackPropagationLearningStra
 
   public static final double DEFAULT_THRESHOLD = 0.05;
   public static final int MAX_ITERATIONS = 100000;
-  public static final double DEFAULT_ALPHA = 0.000003;
+  public static final double DEFAULT_ALPHA = 0.0000003;
 
   private final PredictionStrategy<Double, Double> predictionStrategy;
   private final CostFunction<RealMatrix, Double, Double> costFunction;
@@ -106,11 +106,13 @@ public class BackPropagationLearningStra
         // calculate cost
         double newCost = costFunction.calculateAggregatedCost(samples, hypothesis);
 
-        if (newCost > cost && batch == -1) {
+        if (Double.POSITIVE_INFINITY == newCost || newCost > cost && batch == -1) {
           throw new RuntimeException("failed to converge at iteration " + iterations + " with alpha " + alpha + " : cost going from " + cost + " to " + newCost);
         } else if (iterations > 1 && (cost == newCost || newCost < threshold || iterations > maxIterations)) {
           System.out.println("successfully converged after " + (iterations - 1) + " iterations (alpha:" + alpha + ",threshold:" + threshold + ") with cost " + newCost + " and parameters " + Arrays.toString(hypothesis.getParameters()));
           break;
+        } else if (Double.isNaN(newCost)){
+          throw new RuntimeException("failed to converge at iteration " + iterations + " with alpha " + alpha + " : cost calculation underflow");
         }
 
         // update registered cost

Modified: labs/yay/trunk/core/src/test/java/org/apache/yay/core/Word2VecTest.java
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/Word2VecTest.java?rev=1707236&r1=1707235&r2=1707236&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/Word2VecTest.java (original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/Word2VecTest.java Wed Oct  7 10:55:37 2015
@@ -19,9 +19,14 @@
 package org.apache.yay.core;
 
 import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.io.ObjectOutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -31,7 +36,9 @@ import java.util.List;
 import java.util.Random;
 
 import org.apache.commons.math3.linear.Array2DRowRealMatrix;
+import org.apache.commons.math3.linear.MatrixUtils;
 import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.SingularValueDecomposition;
 import org.apache.yay.Feature;
 import org.apache.yay.Input;
 import org.apache.yay.NeuralNetwork;
@@ -58,69 +65,87 @@ public class Word2VecTest {
     Collection<String> fragments = getFragments(sentences, 4);
     assertFalse(fragments.isEmpty());
 
+    // TODO : make it possible to define the no. of hidden units
+    //    int n = new Random().nextInt(20);
     TrainingSet<Double, Double> trainingSet = createTrainingSet(vocabulary, fragments);
 
-//    int n = new Random().nextInt(20);
-
     TrainingExample<Double, Double> next = trainingSet.iterator().next();
-    int inputSize = next.getFeatures().size();
+    int inputSize = next.getFeatures().size() ;
     int outputSize = next.getOutput().length;
     RealMatrix[] randomWeights = createRandomWeights(inputSize, inputSize, outputSize);
 
     FeedForwardStrategy predictionStrategy = new FeedForwardStrategy(new IdentityActivationFunction<Double>());
     BackPropagationLearningStrategy learningStrategy = new BackPropagationLearningStrategy(BackPropagationLearningStrategy.
             DEFAULT_ALPHA, -1, BackPropagationLearningStrategy.DEFAULT_THRESHOLD, predictionStrategy, new LMSCostFunction(),
-            5);
+            10);
     NeuralNetwork neuralNetwork = NeuralNetworkFactory.create(randomWeights, learningStrategy, predictionStrategy);
 
     neuralNetwork.learn(trainingSet);
 
-    String word = vocabulary.get(new Random().nextInt(vocabulary.size()));
-//    final Double[] doubles = ConversionUtils.toValuesCollection(next.getFeatures()).toArray(new Double[next.getFeatures().size()]);
-    final Double[] doubles = hotEncode(word, vocabulary);
-//    String word = hotDecode(doubles, vocabulary);
-
-//    TrainingExample<Double, Double> input = ExamplesFactory.createDoubleArrayTrainingExample(new Double[outputSize], doubles);
-    Input<Double> input = new TrainingExample<Double, Double>() {
-      @Override
-      public ArrayList<Feature<Double>> getFeatures() {
-        ArrayList<Feature<Double>> features = new ArrayList<Feature<Double>>();
-        for (Double d : doubles) {
-          Feature<Double> f = new Feature<Double>();
-          f.setValue(d);
-          features.add(f);
+    RealMatrix vectorsMatrix = MatrixUtils.createRealMatrix(next.getFeatures().size(), next.getOutput().length);
+
+    BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File("target/vectors.txt")));
+    int m = 0;
+    for (String word : vocabulary) {
+      final Double[] doubles = hotEncode(word, vocabulary);
+      Input<Double> input = new TrainingExample<Double, Double>() {
+        @Override
+        public ArrayList<Feature<Double>> getFeatures() {
+          ArrayList<Feature<Double>> features = new ArrayList<Feature<Double>>();
+          Feature<Double> byasFeature = new Feature<Double>();
+          byasFeature.setValue(1d);
+          features.add(byasFeature);
+          for (Double d : doubles) {
+            Feature<Double> f = new Feature<Double>();
+            f.setValue(d);
+            features.add(f);
+          }
+          return features;
         }
-        return features;
-      }
 
-      @Override
-      public Double[] getOutput() {
-        return new Double[0];
+        @Override
+        public Double[] getOutput() {
+          return new Double[0];
+        }
+      };
+      Double[] predict = neuralNetwork.predict(input);
+      assertNotNull(predict);
+      double[] row = new double[predict.length];
+      for (int x = 0; x < row.length; x++) {
+        row[x] = predict[x];
       }
-    };
-    Double[] predict = neuralNetwork.predict(input);
-    assertNotNull(predict);
-
-    System.out.println(Arrays.toString(predict));
-
-    Double[] wordVec1 = Arrays.copyOfRange(predict, 0, vocabulary.size());
-    assertNotNull(wordVec1);
-    Double[] wordVec2 = Arrays.copyOfRange(predict, vocabulary.size(), 2 * vocabulary.size());
-    assertNotNull(wordVec2);
-    Double[] wordVec3 = Arrays.copyOfRange(predict, 2 * vocabulary.size(), 3 * vocabulary.size());
-    assertNotNull(wordVec3);
-
-    String word1 = hotDecode(wordVec1, vocabulary);
-    assertNotNull(word1);
-    assertTrue(vocabulary.contains(word1));
-    String word2 = hotDecode(wordVec2, vocabulary);
-    assertNotNull(word2);
-    assertTrue(vocabulary.contains(word2));
-    String word3 = hotDecode(wordVec3, vocabulary);
-    assertNotNull(word3);
-    assertTrue(vocabulary.contains(word3));
+      vectorsMatrix.setRow(m, row);
+      m++;
+
+      String vectorString = Arrays.toString(predict);
+      bufferedWriter.append(vectorString);
+      bufferedWriter.newLine();
+
+      Double[] wordVec1 = Arrays.copyOfRange(predict, 0, vocabulary.size());
+      assertNotNull(wordVec1);
+      Double[] wordVec2 = Arrays.copyOfRange(predict, vocabulary.size(), 2 * vocabulary.size());
+      assertNotNull(wordVec2);
+      Double[] wordVec3 = Arrays.copyOfRange(predict, 2 * vocabulary.size(), 3 * vocabulary.size());
+      assertNotNull(wordVec3);
+
+      String word1 = hotDecode(wordVec1, vocabulary);
+      assertNotNull(word1);
+      assertTrue(vocabulary.contains(word1));
+      String word2 = hotDecode(wordVec2, vocabulary);
+      assertNotNull(word2);
+      assertTrue(vocabulary.contains(word2));
+      String word3 = hotDecode(wordVec3, vocabulary);
+      assertNotNull(word3);
+      assertTrue(vocabulary.contains(word3));
+
+      System.out.println(word + " -> " + word1 + " " + word2 + " " + word3);
+    }
+    bufferedWriter.flush();
+    bufferedWriter.close();
+
+    ObjectOutputStream os = new ObjectOutputStream(new FileOutputStream(new File("target/vectors.bin")));
+    MatrixUtils.serializeRealMatrix(vectorsMatrix, os);
 
-    System.out.println(word + " -> " + word1 + " " + word2 + " " + word3);
   }
 
   private String hotDecode(Double[] doubles, List<String> vocabulary) {
@@ -136,7 +161,6 @@ public class Word2VecTest {
     return vocabulary.get(index);
   }
 
-
   private TrainingSet<Double, Double> createTrainingSet(List<String> vocabulary, Collection<String> fragments) {
     Collection<TrainingExample<Double, Double>> samples = new LinkedList<TrainingExample<Double, Double>>();
     for (String fragment : fragments) {
@@ -170,6 +194,9 @@ public class Word2VecTest {
           @Override
           public ArrayList<Feature<Double>> getFeatures() {
             ArrayList<Feature<Double>> features = new ArrayList<Feature<Double>>();
+            Feature<Double> byasFeature = new Feature<Double>();
+            byasFeature.setValue(1d);
+            features.add(byasFeature);
             for (Double d : input) {
               Feature<Double> e = new Feature<Double>();
               e.setValue(d);
@@ -283,4 +310,4 @@ public class Word2VecTest {
     }
     return initialWeights;
   }
-}
+}
\ No newline at end of file

Modified: labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt
URL: http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt?rev=1707236&r1=1707235&r2=1707236&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt (original)
+++ labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt Wed Oct  7 10:55:37 2015
@@ -2,14 +2,14 @@ The word2vec software of Tomas Mikolov a
 The learning models behind the software are described in two research papers
 We found the description of the models in these papers to be somewhat cryptic and hard to follow
 While the motivations and presentation may be obvious to the neural-networks language-modeling crowd we had to struggle quite a bit to figure out the rationale behind the equations
-This note is an attempt to explain the negative sampling equation in “Distributed Representations of Words and Phrases and their Compositionality” by Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg Corrado and Jeffrey Dean
+This note is an attempt to explain the negative sampling equation in “Distributed Representations of Words and Phrases and their Compositionality” by Tomas Mikolov Ilya Sutskever Kai Chen Greg Corrado and Jeffrey Dean
 The departure point of the paper is the skip-gram model
 In this model we are given a corpus of words w and their contexts c
-We consider the conditional probabilities p(c|w) and given a corpus Text, the goal is to set the parameters θ of p(c|w;θ) so as to maximize the corpus probability
+We consider the conditional probabilities p(c|w) and given a corpus Text the goal is to set the parameters θ of p(c|w;θ) so as to maximize the corpus probability
 The recently introduced continuous Skip-gram model is an efficient method for learning high-quality distributed vector representations that capture a large number of precise syntactic and semantic word relationships
 In this paper we present several extensions that improve both the quality of the vectors and the training speed
 By subsampling of the frequent words we obtain significant speedup and also learn more regular word representations
 We also describe a simple alternative to the hierarchical softmax called negative sampling
 An inherent limitation of word representations is their indifference to word order and their inability to represent idiomatic phrases
-For example, the meanings of “Canada” and “Air” cannot be easily combined to obtain “Air Canada”
-Motivated by this example, we present a simple method for finding phrases in text and show that learning good vector representations for millions of phrases is possible
\ No newline at end of file
+For example the meanings of “Canada” and “Air” cannot be easily combined to obtain “Air Canada”
+Motivated by this example we present a simple method for finding phrases in text and show that learning good vector representations for millions of phrases is possible
\ No newline at end of file



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org