You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/02/27 07:31:29 UTC
svn commit: r1450615 - in
/stanbol/trunk/enhancement-engines/topic/engine/src:
main/java/org/apache/stanbol/enhancer/engine/topic/
test/java/org/apache/stanbol/enhancer/engine/topic/
Author: rwesten
Date: Wed Feb 27 06:31:29 2013
New Revision: 1450615
URL: http://svn.apache.org/r1450615
Log:
STANBOL-811: The TopicClassificationEngine does no longer use File#tempFile(..) to create the folder for performing CVFold's. The root folder is now configured by the unit tests. Within OSGI the property is not set and would cause a NPE if no ManagedSolrServer is present (This should be reviewed by ogrisel); Minor: also added some loggings to the unit tests.
Modified:
stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
Modified: stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1450615&r1=1450614&r2=1450615&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java (original)
+++ stanbol/trunk/enhancement-engines/topic/engine/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java Wed Feb 27 06:31:29 2013
@@ -306,6 +306,17 @@ public class TopicClassificationEngine e
@Reference(cardinality = ReferenceCardinality.OPTIONAL_UNARY, bind = "bindManagedSolrServer", unbind = "unbindManagedSolrServer", strategy = ReferenceStrategy.EVENT, policy = ReferencePolicy.DYNAMIC)
protected ManagedSolrServer managedSolrServerDummy; // trick to call the super class binders
+ /**
+ * Only used for testing outside an OSGI environment (see STANBOL-811:
+ * the previously used {@link File#createTempFile(String, String)} does not
+ * work on some Windows versions.
+ */
+ private File embeddedSolrServerDir;
+
+ void configureEmbeddedSolrServerDir(File directory){
+ embeddedSolrServerDir = directory;
+ }
+
@Activate
protected void activate(ComponentContext context) throws ConfigurationException, InvalidSyntaxException {
@SuppressWarnings("unchecked")
@@ -1025,11 +1036,7 @@ public class TopicClassificationEngine e
throw new ClassifierException("Another evaluation is already running");
}
int updatedTopics = 0;
- File tmpfolder = null;
try {
- tmpfolder = File.createTempFile("stanbol-evaluation-folder-", ".tmp");
- tmpfolder.delete();
- tmpfolder.mkdir();
evaluationRunning = true;
int cvFoldCount = 3; // 3-folds CV is hardcoded for now
int cvIterationCount = 3; // make it possible to limit the number of folds to use
@@ -1038,9 +1045,10 @@ public class TopicClassificationEngine e
// statistics are up to date
getTrainingSet().optimize();
- // TODO: make the temporary folder path configurable with a property
+ // NOTE: The folder used to create the SolrServer used for CVFold
+ // is now created within the #embeddedSolrServerDir
for (int cvFoldIndex = 0; cvFoldIndex < cvIterationCount; cvFoldIndex++) {
- updatedTopics = performCVFold(tmpfolder, cvFoldIndex, cvFoldCount, cvIterationCount,
+ updatedTopics = performCVFold(cvFoldIndex, cvFoldCount, cvIterationCount,
incremental);
}
SolrServer solrServer = getActiveSolrServer();
@@ -1052,14 +1060,12 @@ public class TopicClassificationEngine e
} catch (SolrServerException e) {
throw new ClassifierException(e);
} finally {
- FileUtils.deleteQuietly(tmpfolder);
evaluationRunning = false;
}
return updatedTopics;
}
- protected int performCVFold(File tmpfolder,
- int cvFoldIndex,
+ protected int performCVFold(int cvFoldIndex,
int cvFoldCount,
int cvIterations,
boolean incremental) throws ConfigurationException,
@@ -1079,7 +1085,12 @@ public class TopicClassificationEngine e
classifier.activate(context, getCanonicalConfiguration(engineName + "-evaluation"));
} else {
// non-OSGi runtime, need to do the setup manually
- EmbeddedSolrServer evaluationServer = EmbeddedSolrHelper.makeEmbeddedSolrServer(tmpfolder,
+ File solrServerDir = new File(embeddedSolrServerDir,engineName + "-evaluation");
+ if(solrServerDir.isDirectory()){
+ FileUtils.forceDelete(solrServerDir);
+ }
+ FileUtils.forceMkdir(solrServerDir);
+ EmbeddedSolrServer evaluationServer = EmbeddedSolrHelper.makeEmbeddedSolrServer(solrServerDir,
"evaluationclassifierserver", "default-topic-model", "default-topic-model");
classifier.configure(getCanonicalConfiguration(evaluationServer));
}
Modified: stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1450615&r1=1450614&r2=1450615&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java (original)
+++ stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java Wed Feb 27 06:31:29 2013
@@ -122,7 +122,8 @@ public class TopicEngineTest extends Emb
classifierSolrServer = makeEmbeddedSolrServer(solrHome, "classifierserver", "test-topic-model",
"default-topic-model");
classifier = TopicClassificationEngine.fromParameters(getDefaultClassifierConfigParams());
-
+ //configure the directory used to create Embedded SolrServers for CVFold
+ classifier.configureEmbeddedSolrServerDir(solrHome);
trainingSetSolrServer = makeEmbeddedSolrServer(solrHome, "trainingsetserver",
"test-topic-trainingset", "default-topic-trainingset");
trainingSet = new SolrTrainingSet();
@@ -162,6 +163,7 @@ public class TopicEngineTest extends Emb
@Test
public void testEngineConfiguration() throws ConfigurationException {
+ log.info(" --- testEngineConfiguration --- ");
Hashtable<String,Object> config = getDefaultClassifierConfigParams();
TopicClassificationEngine classifier = TopicClassificationEngine.fromParameters(config);
assertNotNull(classifier);
@@ -199,6 +201,7 @@ public class TopicEngineTest extends Emb
@Test
public void testImportModelFromSKOS() throws Exception {
+ log.info(" --- testImportModelFromSKOS --- ");
Parser parser = Parser.getInstance();
parser.bindParsingProvider(new JenaParserProvider());
Graph graph = parser.parse(getClass().getResourceAsStream("/sample-scheme.skos.rdf.xml"),
@@ -215,6 +218,7 @@ public class TopicEngineTest extends Emb
@Test
public void testProgrammaticThesaurusConstruction() throws Exception {
+ log.info(" --- testProgrammaticThesaurusConstruction --- ");
// Register the roots of the taxonomy
classifier.addConcept("http://example.com/topics/root1", null);
classifier.addConcept("http://example.com/topics/root2", null);
@@ -254,6 +258,7 @@ public class TopicEngineTest extends Emb
@Test
public void testEmptyIndexTopicClassification() throws Exception {
+ log.info(" --- testEmptyIndexTopicClassification --- ");
TopicClassificationEngine engine = TopicClassificationEngine
.fromParameters(getDefaultClassifierConfigParams());
List<TopicSuggestion> suggestedTopics = engine.suggestTopics("This is a test.");
@@ -265,6 +270,7 @@ public class TopicEngineTest extends Emb
// to get updated to work with the new Solr schema + move the CSV import directly to the classifier or
// training set API
public void testTopicClassification() throws Exception {
+ log.info(" --- testTopicClassification --- ");
loadSampleTopicsFromTSV();
List<TopicSuggestion> suggestedTopics = classifier
.suggestTopics("The Man Who Shot Liberty Valance is a 1962"
@@ -279,7 +285,7 @@ public class TopicEngineTest extends Emb
@Test
public void testTrainClassifierFromExamples() throws Exception {
-
+ log.info(" --- testTrainClassifierFromExamples --- ");
// mini taxonomy for news articles
String[] business = {"urn:topics/business", "http://dbpedia.org/resource/Business"};
String[] technology = {"urn:topics/technology", "http://dbpedia.org/resource/Technology"};
@@ -410,6 +416,7 @@ public class TopicEngineTest extends Emb
@Test
public void testUpdatePerformanceEstimates() throws Exception {
+ log.info(" --- testUpdatePerformanceEstimates --- ");
ClassificationReport performanceEstimates;
// no registered topic
try {
@@ -453,6 +460,7 @@ public class TopicEngineTest extends Emb
@Test
public void testCrossValidation() throws Exception {
+ log.info(" --- testCrossValidation --- ");
// seed a pseudo random number generator for reproducible tests
Random rng = new Random(0);
ClassificationReport performanceEstimates;
Modified: stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java?rev=1450615&r1=1450614&r2=1450615&view=diff
==============================================================================
--- stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java (original)
+++ stanbol/trunk/enhancement-engines/topic/engine/src/test/java/org/apache/stanbol/enhancer/engine/topic/TrainingSetTest.java Wed Feb 27 06:31:29 2013
@@ -125,6 +125,7 @@ public class TrainingSetTest extends Emb
@Test
public void testDateSerialization() throws Exception {
+ log.info(" --- testDateSerialization --- ");
GregorianCalendar timeUtc = new GregorianCalendar(TimeZone.getTimeZone("UTC"));
timeUtc.set(2012, 23, 12, 06, 43, 00);
timeUtc.set(Calendar.MILLISECOND, 0);
@@ -138,6 +139,7 @@ public class TrainingSetTest extends Emb
@Test
public void testEmptyTrainingSet() throws TrainingSetException {
+ log.info(" --- testEmptyTrainingSet --- ");
Batch<Example> examples = trainingSet.getPositiveExamples(new ArrayList<String>(), null);
assertEquals(examples.items.size(), 0);
assertFalse(examples.hasMore);
@@ -157,6 +159,7 @@ public class TrainingSetTest extends Emb
@Test
public void testStoringExamples() throws ConfigurationException, TrainingSetException {
+ log.info(" --- testStoringExamples --- ");
trainingSet.registerExample("example1", "Text of example1.", Arrays.asList(TOPIC_1));
trainingSet.registerExample("example2", "Text of example2.", Arrays.asList(TOPIC_1, TOPIC_2));
trainingSet.registerExample("example3", "Text of example3.", new ArrayList<String>());
@@ -201,6 +204,7 @@ public class TrainingSetTest extends Emb
@Test
public void testBatchingPositiveExamples() throws ConfigurationException, TrainingSetException {
+ log.info(" --- testBatchingPositiveExamples --- ");
Set<String> expectedCollectedIds = new HashSet<String>();
Set<String> expectedCollectedText = new HashSet<String>();
Set<String> collectedIds = new HashSet<String>();
@@ -243,6 +247,7 @@ public class TrainingSetTest extends Emb
@Test
public void testBatchingNegativeExamplesAndAutoId() throws ConfigurationException, TrainingSetException {
+ log.info(" --- testBatchingNegativeExamplesAndAutoId --- ");
Set<String> expectedCollectedIds = new HashSet<String>();
Set<String> expectedCollectedText = new HashSet<String>();
Set<String> collectedIds = new HashSet<String>();
@@ -276,6 +281,7 @@ public class TrainingSetTest extends Emb
@Test
public void testHasChangedSince() throws Exception {
+ log.info(" --- testHasChangedSince --- ");
Date date0 = new Date();
assertFalse(trainingSet.hasChangedSince(Arrays.asList(TOPIC_1), date0));
assertFalse(trainingSet.hasChangedSince(Arrays.asList(TOPIC_2), date0));