You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/02/27 07:31:29 UTC

svn commit: r1450615 - in /stanbol/trunk/enhancement-engines/topic/engine/src: main/java/org/apache/stanbol/enhancer/engine/topic/ test/java/org/apache/stanbol/enhancer/engine/topic/

Author: rwesten
Date: Wed Feb 27 06:31:29 2013
New Revision: 1450615

URL: http://svn.apache.org/r1450615
Log:
STANBOL-811: The TopicClassificationEngine does no longer use File#tempFile(..) to create the folder for performing CVFold's. The root folder is now configured by the unit tests. Within OSGI the property is not set and would cause a NPE if no ManagedSolrServer is present (This should be reviewed by ogrisel); Minor: also added some loggings to the unit tests.

Modified:
    stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
    stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
    stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java

Modified: stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1450615&r1=1450614&r2=1450615&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java (original)
+++ stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java Wed Feb 27 06:31:29 2013
@@ -306,6 +306,17 @@ public class TopicClassificationEngine e
     @Reference(cardinality = ReferenceCardinality.OPTIONAL_UNARY, bind = "bindManagedSolrServer", unbind = "unbindManagedSolrServer", strategy = ReferenceStrategy.EVENT, policy = ReferencePolicy.DYNAMIC)
     protected ManagedSolrServer managedSolrServerDummy; // trick to call the super class binders
 
+    /**
+     * Only used for testing outside an OSGI environment (see STANBOL-811: 
+     * the previously used {@link File#createTempFile(String, String)} does not
+     * work on some Windows versions.
+     */
+    private File embeddedSolrServerDir;
+
+    void configureEmbeddedSolrServerDir(File directory){
+        embeddedSolrServerDir = directory;
+    }
+    
     @Activate
     protected void activate(ComponentContext context) throws ConfigurationException, InvalidSyntaxException {
         @SuppressWarnings("unchecked")
@@ -1025,11 +1036,7 @@ public class TopicClassificationEngine e
             throw new ClassifierException("Another evaluation is already running");
         }
         int updatedTopics = 0;
-        File tmpfolder = null;
         try {
-            tmpfolder = File.createTempFile("stanbol-evaluation-folder-", ".tmp");
-            tmpfolder.delete();
-            tmpfolder.mkdir();
             evaluationRunning = true;
             int cvFoldCount = 3; // 3-folds CV is hardcoded for now
             int cvIterationCount = 3; // make it possible to limit the number of folds to use
@@ -1038,9 +1045,10 @@ public class TopicClassificationEngine e
             // statistics are up to date
             getTrainingSet().optimize();
 
-            // TODO: make the temporary folder path configurable with a property
+            // NOTE: The folder used to create the SolrServer used for CVFold
+            //       is now created within the #embeddedSolrServerDir
             for (int cvFoldIndex = 0; cvFoldIndex < cvIterationCount; cvFoldIndex++) {
-                updatedTopics = performCVFold(tmpfolder, cvFoldIndex, cvFoldCount, cvIterationCount,
+                updatedTopics = performCVFold(cvFoldIndex, cvFoldCount, cvIterationCount,
                     incremental);
             }
             SolrServer solrServer = getActiveSolrServer();
@@ -1052,14 +1060,12 @@ public class TopicClassificationEngine e
         } catch (SolrServerException e) {
             throw new ClassifierException(e);
         } finally {
-            FileUtils.deleteQuietly(tmpfolder);
             evaluationRunning = false;
         }
         return updatedTopics;
     }
 
-    protected int performCVFold(File tmpfolder,
-                                int cvFoldIndex,
+    protected int performCVFold(int cvFoldIndex,
                                 int cvFoldCount,
                                 int cvIterations,
                                 boolean incremental) throws ConfigurationException,
@@ -1079,7 +1085,12 @@ public class TopicClassificationEngine e
                 classifier.activate(context, getCanonicalConfiguration(engineName + "-evaluation"));
             } else {
                 // non-OSGi runtime, need to do the setup manually
-                EmbeddedSolrServer evaluationServer = EmbeddedSolrHelper.makeEmbeddedSolrServer(tmpfolder,
+                File solrServerDir = new File(embeddedSolrServerDir,engineName + "-evaluation");
+                if(solrServerDir.isDirectory()){
+                    FileUtils.forceDelete(solrServerDir);
+                }
+                FileUtils.forceMkdir(solrServerDir);
+                EmbeddedSolrServer evaluationServer = EmbeddedSolrHelper.makeEmbeddedSolrServer(solrServerDir,
                     "evaluationclassifierserver", "default-topic-model", "default-topic-model");
                 classifier.configure(getCanonicalConfiguration(evaluationServer));
             }

Modified: stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1450615&r1=1450614&r2=1450615&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java (original)
+++ stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java Wed Feb 27 06:31:29 2013
@@ -122,7 +122,8 @@ public class TopicEngineTest extends Emb
         classifierSolrServer = makeEmbeddedSolrServer(solrHome, "classifierserver", "test-topic-model",
             "default-topic-model");
         classifier = TopicClassificationEngine.fromParameters(getDefaultClassifierConfigParams());
-
+        //configure the directory used to create Embedded SolrServers for CVFold
+        classifier.configureEmbeddedSolrServerDir(solrHome);
         trainingSetSolrServer = makeEmbeddedSolrServer(solrHome, "trainingsetserver",
             "test-topic-trainingset", "default-topic-trainingset");
         trainingSet = new SolrTrainingSet();
@@ -162,6 +163,7 @@ public class TopicEngineTest extends Emb
 
     @Test
     public void testEngineConfiguration() throws ConfigurationException {
+        log.info(" --- testEngineConfiguration --- ");
         Hashtable<String,Object> config = getDefaultClassifierConfigParams();
         TopicClassificationEngine classifier = TopicClassificationEngine.fromParameters(config);
         assertNotNull(classifier);
@@ -199,6 +201,7 @@ public class TopicEngineTest extends Emb
 
     @Test
     public void testImportModelFromSKOS() throws Exception {
+        log.info(" --- testImportModelFromSKOS --- ");
         Parser parser = Parser.getInstance();
         parser.bindParsingProvider(new JenaParserProvider());
         Graph graph = parser.parse(getClass().getResourceAsStream("/sample-scheme.skos.rdf.xml"),
@@ -215,6 +218,7 @@ public class TopicEngineTest extends Emb
 
     @Test
     public void testProgrammaticThesaurusConstruction() throws Exception {
+        log.info(" --- testProgrammaticThesaurusConstruction --- ");
         // Register the roots of the taxonomy
         classifier.addConcept("http://example.com/topics/root1", null);
         classifier.addConcept("http://example.com/topics/root2", null);
@@ -254,6 +258,7 @@ public class TopicEngineTest extends Emb
 
     @Test
     public void testEmptyIndexTopicClassification() throws Exception {
+        log.info(" --- testEmptyIndexTopicClassification --- ");
         TopicClassificationEngine engine = TopicClassificationEngine
                 .fromParameters(getDefaultClassifierConfigParams());
         List<TopicSuggestion> suggestedTopics = engine.suggestTopics("This is a test.");
@@ -265,6 +270,7 @@ public class TopicEngineTest extends Emb
     // to get updated to work with the new Solr schema + move the CSV import directly to the classifier or
     // training set API
     public void testTopicClassification() throws Exception {
+        log.info(" --- testTopicClassification --- ");
         loadSampleTopicsFromTSV();
         List<TopicSuggestion> suggestedTopics = classifier
                 .suggestTopics("The Man Who Shot Liberty Valance is a 1962"
@@ -279,7 +285,7 @@ public class TopicEngineTest extends Emb
 
     @Test
     public void testTrainClassifierFromExamples() throws Exception {
-
+        log.info(" --- testTrainClassifierFromExamples --- ");
         // mini taxonomy for news articles
         String[] business = {"urn:topics/business", "http://dbpedia.org/resource/Business"};
         String[] technology = {"urn:topics/technology", "http://dbpedia.org/resource/Technology"};
@@ -410,6 +416,7 @@ public class TopicEngineTest extends Emb
 
     @Test
     public void testUpdatePerformanceEstimates() throws Exception {
+        log.info(" --- testUpdatePerformanceEstimates --- ");
         ClassificationReport performanceEstimates;
         // no registered topic
         try {
@@ -453,6 +460,7 @@ public class TopicEngineTest extends Emb
 
     @Test
     public void testCrossValidation() throws Exception {
+        log.info(" --- testCrossValidation --- ");
         // seed a pseudo random number generator for reproducible tests
         Random rng = new Random(0);
         ClassificationReport performanceEstimates;

Modified: stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java?rev=1450615&r1=1450614&r2=1450615&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java (original)
+++ stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java Wed Feb 27 06:31:29 2013
@@ -125,6 +125,7 @@ public class TrainingSetTest extends Emb
 
     @Test
     public void testDateSerialization() throws Exception {
+        log.info(" --- testDateSerialization --- ");
         GregorianCalendar timeUtc = new GregorianCalendar(TimeZone.getTimeZone("UTC"));
         timeUtc.set(2012, 23, 12, 06, 43, 00);
         timeUtc.set(Calendar.MILLISECOND, 0);
@@ -138,6 +139,7 @@ public class TrainingSetTest extends Emb
 
     @Test
     public void testEmptyTrainingSet() throws TrainingSetException {
+        log.info(" --- testEmptyTrainingSet --- ");
         Batch<Example> examples = trainingSet.getPositiveExamples(new ArrayList<String>(), null);
         assertEquals(examples.items.size(), 0);
         assertFalse(examples.hasMore);
@@ -157,6 +159,7 @@ public class TrainingSetTest extends Emb
 
     @Test
     public void testStoringExamples() throws ConfigurationException, TrainingSetException {
+        log.info(" --- testStoringExamples --- ");
         trainingSet.registerExample("example1", "Text of example1.", Arrays.asList(TOPIC_1));
         trainingSet.registerExample("example2", "Text of example2.", Arrays.asList(TOPIC_1, TOPIC_2));
         trainingSet.registerExample("example3", "Text of example3.", new ArrayList<String>());
@@ -201,6 +204,7 @@ public class TrainingSetTest extends Emb
 
     @Test
     public void testBatchingPositiveExamples() throws ConfigurationException, TrainingSetException {
+        log.info(" --- testBatchingPositiveExamples --- ");
         Set<String> expectedCollectedIds = new HashSet<String>();
         Set<String> expectedCollectedText = new HashSet<String>();
         Set<String> collectedIds = new HashSet<String>();
@@ -243,6 +247,7 @@ public class TrainingSetTest extends Emb
 
     @Test
     public void testBatchingNegativeExamplesAndAutoId() throws ConfigurationException, TrainingSetException {
+        log.info(" --- testBatchingNegativeExamplesAndAutoId --- ");
         Set<String> expectedCollectedIds = new HashSet<String>();
         Set<String> expectedCollectedText = new HashSet<String>();
         Set<String> collectedIds = new HashSet<String>();
@@ -276,6 +281,7 @@ public class TrainingSetTest extends Emb
 
     @Test
     public void testHasChangedSince() throws Exception {
+        log.info(" --- testHasChangedSince --- ");
         Date date0 = new Date();
         assertFalse(trainingSet.hasChangedSince(Arrays.asList(TOPIC_1), date0));
         assertFalse(trainingSet.hasChangedSince(Arrays.asList(TOPIC_2), date0));