You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by og...@apache.org on 2012/03/05 17:29:58 UTC

svn commit: r1297116 - in /incubator/stanbol/trunk/enhancer/engines/topic/src: main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java

Author: ogrisel
Date: Mon Mar  5 16:29:58 2012
New Revision: 1297116

URL: http://svn.apache.org/viewvc?rev=1297116&view=rev
Log:
STANBOL-197: speedup perf evaluation test by doing only 1 CV iteration instead of 3

Modified:
    incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
    incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java

Modified: incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1297116&r1=1297115&r2=1297116&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java Mon Mar  5 16:29:58 2012
@@ -864,14 +864,16 @@ public class TopicClassificationEngine e
         }
         int updatedTopics = 0;
         int cvFoldCount = 3; // 3-folds CV is hardcoded for now
-
+        int cvIterationCount = 1; // only one 3-folds CV iteration
+ 
         TopicClassificationEngine classifier = new TopicClassificationEngine();
         classifier.setTrainingSet(trainingSet);
         try {
             // TODO: make the temporary folder path configurable with a property
             evaluationFolder = File.createTempFile("stanbol-classifier-evaluation-", "-solr");
-            for (int cvFoldIndex = 0; cvFoldIndex < cvFoldCount; cvFoldIndex++) {
-                updatedTopics = performCVFold(classifier, cvFoldIndex, cvFoldCount, incremental);
+            for (int cvFoldIndex = 0; cvFoldIndex < cvIterationCount; cvFoldIndex++) {
+                updatedTopics = performCVFold(classifier, cvFoldIndex, cvFoldCount, cvIterationCount,
+                    incremental);
             }
         } catch (ConfigurationException e) {
             throw new ClassifierException(e);
@@ -887,12 +889,13 @@ public class TopicClassificationEngine e
     protected int performCVFold(final TopicClassificationEngine classifier,
                                 int cvFoldIndex,
                                 int cvFoldCount,
-                                boolean incremental) throws ConfigurationException,
+                                int cvIterations, boolean incremental) throws ConfigurationException,
                                                     TrainingSetException,
                                                     ClassifierException {
 
-        log.info(String.format("Performing evaluation CV iteration %d/%d on classifier %s", cvFoldIndex + 1,
-            cvFoldCount, engineId));
+        cvIterations = cvIterations <= 0 ? cvFoldCount : cvFoldCount;
+        log.info(String.format("Performing evaluation %d-fold CV iteration %d/%d on classifier %s",
+            cvFoldCount, cvFoldIndex + 1, cvIterations, engineId));
         long start = System.currentTimeMillis();
         FileUtils.deleteQuietly(evaluationFolder);
         evaluationFolder.mkdir();

Modified: incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1297116&r1=1297115&r2=1297116&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java Mon Mar  5 16:29:58 2012
@@ -425,7 +425,7 @@ public class TopicEngineTest extends Emb
             String topic = String.format("urn:t/%03d", i);
             performanceEstimates = classifier.getPerformanceEstimates(topic);
             assertTrue(performanceEstimates.uptodate);
-            assertGreater(performanceEstimates.precision, 0.5f);
+            assertGreater(performanceEstimates.precision, 0.45f);
             assertNotNull(performanceEstimates.falsePositiveExampleIds);
             assertNotNull(performanceEstimates.falseNegativeExampleIds);
             if (performanceEstimates.precision < 1) {
@@ -434,10 +434,12 @@ public class TopicEngineTest extends Emb
             if (performanceEstimates.recall < 1) {
                 assertFalse(performanceEstimates.falseNegativeExampleIds.isEmpty());
             }
-            assertGreater(performanceEstimates.recall, 0.5f);
-            assertGreater(performanceEstimates.f1, 0.65f);
-            assertGreater(performanceEstimates.positiveSupport, 10);
-            assertGreater(performanceEstimates.negativeSupport, 10);
+            assertGreater(performanceEstimates.recall, 0.45f);
+            assertGreater(performanceEstimates.f1, 0.55f);
+            // very small support, hence the estimates are unstable, hence we set low min expectations, but we
+            // need this test to run reasonably fast...
+            assertGreater(performanceEstimates.positiveSupport, 4);
+            assertGreater(performanceEstimates.negativeSupport, 4);
             assertNotNull(performanceEstimates.evaluationDate);
         }