You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by og...@apache.org on 2012/03/05 17:29:58 UTC
svn commit: r1297116 - in
/incubator/stanbol/trunk/enhancer/engines/topic/src:
main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Author: ogrisel
Date: Mon Mar 5 16:29:58 2012
New Revision: 1297116
URL: http://svn.apache.org/viewvc?rev=1297116&view=rev
Log:
STANBOL-197: speedup perf evaluation test by doing only 1 CV iteration instead of 3
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Modified: incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1297116&r1=1297115&r2=1297116&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java Mon Mar 5 16:29:58 2012
@@ -864,14 +864,16 @@ public class TopicClassificationEngine e
}
int updatedTopics = 0;
int cvFoldCount = 3; // 3-folds CV is hardcoded for now
-
+ int cvIterationCount = 1; // only one 3-folds CV iteration
+
TopicClassificationEngine classifier = new TopicClassificationEngine();
classifier.setTrainingSet(trainingSet);
try {
// TODO: make the temporary folder path configurable with a property
evaluationFolder = File.createTempFile("stanbol-classifier-evaluation-", "-solr");
- for (int cvFoldIndex = 0; cvFoldIndex < cvFoldCount; cvFoldIndex++) {
- updatedTopics = performCVFold(classifier, cvFoldIndex, cvFoldCount, incremental);
+ for (int cvFoldIndex = 0; cvFoldIndex < cvIterationCount; cvFoldIndex++) {
+ updatedTopics = performCVFold(classifier, cvFoldIndex, cvFoldCount, cvIterationCount,
+ incremental);
}
} catch (ConfigurationException e) {
throw new ClassifierException(e);
@@ -887,12 +889,13 @@ public class TopicClassificationEngine e
protected int performCVFold(final TopicClassificationEngine classifier,
int cvFoldIndex,
int cvFoldCount,
- boolean incremental) throws ConfigurationException,
+ int cvIterations, boolean incremental) throws ConfigurationException,
TrainingSetException,
ClassifierException {
- log.info(String.format("Performing evaluation CV iteration %d/%d on classifier %s", cvFoldIndex + 1,
- cvFoldCount, engineId));
+ cvIterations = cvIterations <= 0 ? cvFoldCount : cvFoldCount;
+ log.info(String.format("Performing evaluation %d-fold CV iteration %d/%d on classifier %s",
+ cvFoldCount, cvFoldIndex + 1, cvIterations, engineId));
long start = System.currentTimeMillis();
FileUtils.deleteQuietly(evaluationFolder);
evaluationFolder.mkdir();
Modified: incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1297116&r1=1297115&r2=1297116&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java Mon Mar 5 16:29:58 2012
@@ -425,7 +425,7 @@ public class TopicEngineTest extends Emb
String topic = String.format("urn:t/%03d", i);
performanceEstimates = classifier.getPerformanceEstimates(topic);
assertTrue(performanceEstimates.uptodate);
- assertGreater(performanceEstimates.precision, 0.5f);
+ assertGreater(performanceEstimates.precision, 0.45f);
assertNotNull(performanceEstimates.falsePositiveExampleIds);
assertNotNull(performanceEstimates.falseNegativeExampleIds);
if (performanceEstimates.precision < 1) {
@@ -434,10 +434,12 @@ public class TopicEngineTest extends Emb
if (performanceEstimates.recall < 1) {
assertFalse(performanceEstimates.falseNegativeExampleIds.isEmpty());
}
- assertGreater(performanceEstimates.recall, 0.5f);
- assertGreater(performanceEstimates.f1, 0.65f);
- assertGreater(performanceEstimates.positiveSupport, 10);
- assertGreater(performanceEstimates.negativeSupport, 10);
+ assertGreater(performanceEstimates.recall, 0.45f);
+ assertGreater(performanceEstimates.f1, 0.55f);
+ // very small support, hence the estimates are unstable, hence we set low min expectations, but we
+ // need this test to run reasonably fast...
+ assertGreater(performanceEstimates.positiveSupport, 4);
+ assertGreater(performanceEstimates.negativeSupport, 4);
assertNotNull(performanceEstimates.evaluationDate);
}