You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by og...@apache.org on 2012/02/03 15:59:57 UTC

svn commit: r1240206 - in /incubator/stanbol/trunk/enhancer/engines/topic/src: main/java/org/apache/stanbol/enhancer/engine/topic/ main/java/org/apache/stanbol/enhancer/topic/ test/java/org/apache/stanbol/enhancer/engine/topic/

Author: ogrisel
Date: Fri Feb  3 14:59:56 2012
New Revision: 1240206

URL: http://svn.apache.org/viewvc?rev=1240206&view=rev
Log:
STANBOL-197: refactor API to use SKOS terminology

Modified:
    incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
    incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java
    incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicSuggestion.java
    incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java

Modified: incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1240206&r1=1240205&r2=1240206&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java Fri Feb  3 14:59:56 2012
@@ -16,7 +16,6 @@
  */
 package org.apache.stanbol.enhancer.engine.topic;
 
-
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -27,7 +26,6 @@ import java.util.Date;
 import java.util.Dictionary;
 import java.util.HashMap;
 import java.util.Hashtable;
-import java.util.Iterator;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
@@ -36,11 +34,9 @@ import java.util.Set;
 import java.util.UUID;
 
 import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.Triple;
 import org.apache.clerezza.rdf.core.UriRef;
 import org.apache.clerezza.rdf.core.impl.TripleImpl;
 import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang.StringUtils;
 import org.apache.felix.scr.annotations.Activate;
 import org.apache.felix.scr.annotations.Component;
@@ -91,11 +87,19 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Enhancement Engine that provides the ability to assign a text document to a set of topics indexed in a
+ * Enhancement Engine that provides the ability to assign a text document to a set of concepts indexed in a
  * dedicated Solr core. The assignment logic comes from terms frequencies match of the text of the document to
- * categorize with the text indexed for each topic.
+ * categorize with the text indexed for each concept.
+ * 
+ * The data model of the concept tree follows the SKOS model: concepts are organized in a hierarchical
+ * "scheme" with a "broader" relation (and the inferred "narrower" inverse relation). Concepts can also
+ * optionally be grounded in the real world by the mean of a foaf:primaryTopic link to an external resource
+ * such as a DBpedia entry.
  * 
- * The solr server is expected to be configured with the MoreLikeThisHandler and the matching fields from the
+ * A document is typically classified with the concept by using the dct:subject property to link the document
+ * (subject) to the concept (object).
+ * 
+ * The Solr server is expected to be configured with the MoreLikeThisHandler and the matching fields from the
  * engine configuration.
  */
 @Component(metatype = true, immediate = true, configurationFactory = true, policy = ConfigurationPolicy.REQUIRE)
@@ -106,7 +110,7 @@ import org.slf4j.LoggerFactory;
                      @Property(name = TopicClassificationEngine.SOLR_CORE),
                      @Property(name = TopicClassificationEngine.LANGUAGES),
                      @Property(name = TopicClassificationEngine.SIMILARTITY_FIELD),
-                     @Property(name = TopicClassificationEngine.TOPIC_URI_FIELD),
+                     @Property(name = TopicClassificationEngine.CONCEPT_URI_FIELD),
                      @Property(name = TopicClassificationEngine.BROADER_FIELD),
                      @Property(name = TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD, value = "last_update_dt"),
                      @Property(name = TopicClassificationEngine.PRECISION_FIELD, value = "precision"),
@@ -117,7 +121,7 @@ import org.slf4j.LoggerFactory;
                      @Property(name = TopicClassificationEngine.FALSE_POSITIVES_FIELD, value = "false_positives"),
                      @Property(name = TopicClassificationEngine.POSITIVE_SUPPORT_FIELD, value = "positive_support"),
                      @Property(name = TopicClassificationEngine.NEGATIVE_SUPPORT_FIELD, value = "negative_support"),
-                     @Property(name = Constants.SERVICE_RANKING, intValue=0)})
+                     @Property(name = Constants.SERVICE_RANKING, intValue = 0)})
 public class TopicClassificationEngine extends ConfiguredSolrCoreTracker implements EnhancementEngine,
         ServiceProperties, TopicClassifier {
 
@@ -139,10 +143,12 @@ public class TopicClassificationEngine e
 
     public static final String SIMILARTITY_FIELD = "org.apache.stanbol.enhancer.engine.topic.similarityField";
 
-    public static final String TOPIC_URI_FIELD = "org.apache.stanbol.enhancer.engine.topic.uriField";
+    public static final String CONCEPT_URI_FIELD = "org.apache.stanbol.enhancer.engine.topic.uriField";
 
     public static final String BROADER_FIELD = "org.apache.stanbol.enhancer.engine.topic.broaderField";
 
+    public static final String PRIMARY_TOPIC_FIELD = "org.apache.stanbol.enhancer.engine.topic.primaryTopicField";
+
     public static final String MODEL_UPDATE_DATE_FIELD = "org.apache.stanbol.enhancer.engine.topic.modelUpdateDateField";
 
     public static final String MODEL_EVALUATION_DATE_FIELD = "org.apache.stanbol.enhancer.engine.topic.modelEvaluationDateField";
@@ -171,7 +177,7 @@ public class TopicClassificationEngine e
      * Contains the only supported mime type {@link #PLAIN_TEXT_MIMETYPE}
      */
     protected static final Set<String> SUPPORTED_MIMETYPES = Collections.singleton(PLAIN_TEXT_MIMETYPE);
-    
+
     public static final String SOLR_NON_EMPTY_FIELD = "[\"\" TO *]";
 
     // TODO: make the following fields configurable
@@ -195,7 +201,7 @@ public class TopicClassificationEngine e
 
     protected String similarityField;
 
-    protected String topicUriField;
+    protected String conceptUriField;
 
     protected String broaderField;
 
@@ -254,7 +260,7 @@ public class TopicClassificationEngine e
         engineId = getRequiredStringParam(config, ENGINE_ID);
         entryIdField = getRequiredStringParam(config, ENTRY_ID_FIELD);
         modelEntryIdField = getRequiredStringParam(config, MODEL_ENTRY_ID_FIELD);
-        topicUriField = getRequiredStringParam(config, TOPIC_URI_FIELD);
+        conceptUriField = getRequiredStringParam(config, CONCEPT_URI_FIELD);
         entryTypeField = getRequiredStringParam(config, ENTRY_TYPE_FIELD);
         similarityField = getRequiredStringParam(config, SIMILARTITY_FIELD);
         acceptedLanguages = getStringListParan(config, LANGUAGES);
@@ -278,39 +284,40 @@ public class TopicClassificationEngine e
 
     @Override
     public int canEnhance(ContentItem ci) throws EngineException {
-        if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null &&
-                getActiveSolrServer() != null){
+        if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null && getActiveSolrServer() != null) {
             return ENHANCE_SYNCHRONOUS;
         } else {
             return CANNOT_ENHANCE;
         }
-        //TODO ogrisel: validate that it is no problem that this does no longer
-        //check that the text is not empty
-//        if (text.trim().length() == 0) {
-//            return CANNOT_ENHANCE;
-//        }
+        // TODO ogrisel: validate that it is no problem that this does no longer
+        // check that the text is not empty
+        // if (text.trim().length() == 0) {
+        // return CANNOT_ENHANCE;
+        // }
     }
 
     @Override
     public void computeEnhancements(ContentItem ci) throws EngineException {
         Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
-        if(contentPart == null){
-            throw new IllegalStateException("No ContentPart with a supported Mime Type"
-                    + "found for ContentItem "+ci.getUri()+"(supported: '"
-                    + SUPPORTED_MIMETYPES+"') -> this indicates that canEnhance was" 
-                    + "NOT called and indicates a bug in the used EnhancementJobManager!");
+        if (contentPart == null) {
+            throw new IllegalStateException(
+                    "No ContentPart with a supported Mime Type" + "found for ContentItem " + ci.getUri()
+                            + "(supported: '" + SUPPORTED_MIMETYPES
+                            + "') -> this indicates that canEnhance was"
+                            + "NOT called and indicates a bug in the used EnhancementJobManager!");
         }
         String text;
         try {
             text = ContentItemHelper.getText(contentPart.getValue());
         } catch (IOException e) {
-            throw new InvalidContentException(String.format("Unable to extract "
-                +" textual content from ContentPart %s of ContentItem %s!",
-                contentPart.getKey(),ci.getUri()), e);
-        }
-        if(text.trim().isEmpty()){
-            log.warn("ContentPart {} of ContentItem {} does not contain any " +
-            		"text to extract topics from",contentPart.getKey(),ci.getUri());
+            throw new InvalidContentException(String.format(
+                "Unable to extract " + " textual content from ContentPart %s of ContentItem %s!",
+                contentPart.getKey(), ci.getUri()), e);
+        }
+        if (text.trim().isEmpty()) {
+            log.warn(
+                "ContentPart {} of ContentItem {} does not contain any " + "text to extract topics from",
+                contentPart.getKey(), ci.getUri());
             return;
         }
         MGraph metadata = ci.getMetadata();
@@ -327,11 +334,11 @@ public class TopicClassificationEngine e
                 metadata.add(new TripleImpl(enhancement,
                         org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE,
                         TechnicalClasses.ENHANCER_TOPICANNOTATION));
-    
+
                 // add link to entity
                 metadata.add(new TripleImpl(enhancement,
                         org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE,
-                        new UriRef(topic.uri)));
+                        new UriRef(topic.conceptUri)));
                 // TODO: make it possible to dereference and the path to the root the entities according to a
                 // configuration parameter
             }
@@ -358,11 +365,12 @@ public class TopicClassificationEngine e
     public String getSchemeId() {
         return engineId;
     }
+
     @Override
     public String getName() {
         return engineId;
     }
-    
+
     @Override
     public List<String> getAcceptedLanguages() {
         return acceptedLanguages;
@@ -391,17 +399,17 @@ public class TopicClassificationEngine e
         // over query the number of suggestions to find a statistical cut based on the curve of the scores of
         // the top suggestion
         query.setRows(MAX_SUGGESTIONS * 3);
-        query.setFields(topicUriField);
+        query.setFields(conceptUriField);
         query.setIncludeScore(true);
         try {
             StreamQueryRequest request = new StreamQueryRequest(query);
             QueryResponse response = request.process(solrServer);
             SolrDocumentList results = response.getResults();
             for (SolrDocument result : results.toArray(new SolrDocument[0])) {
-                String uri = (String) result.getFirstValue(topicUriField);
+                String uri = (String) result.getFirstValue(conceptUriField);
                 if (uri == null) {
                     throw new ClassifierException(String.format(
-                        "Solr Core '%s' is missing required field '%s'.", solrCoreId, topicUriField));
+                        "Solr Core '%s' is missing required field '%s'.", solrCoreId, conceptUriField));
                 }
                 Float score = (Float) result.getFirstValue("score");
                 suggestedTopics.add(new TopicSuggestion(uri, score));
@@ -441,36 +449,36 @@ public class TopicClassificationEngine e
     }
 
     @Override
-    public Set<String> getNarrowerTopics(String broadTopicId) throws ClassifierException {
-        LinkedHashSet<String> narrowerTopics = new LinkedHashSet<String>();
+    public Set<String> getNarrowerConcepts(String broadTopicId) throws ClassifierException {
+        LinkedHashSet<String> narrowerConcepts = new LinkedHashSet<String>();
         if (broaderField == null) {
-            return narrowerTopics;
+            return narrowerConcepts;
         }
         SolrServer solrServer = getActiveSolrServer();
         SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY);
         query.addFilterQuery(broaderField + ":" + ClientUtils.escapeQueryChars(broadTopicId));
-        query.addField(topicUriField);
-        query.addSortField(topicUriField, SolrQuery.ORDER.asc);
+        query.addField(conceptUriField);
+        query.addSortField(conceptUriField, SolrQuery.ORDER.asc);
         try {
             for (SolrDocument result : solrServer.query(query).getResults()) {
-                narrowerTopics.add(result.getFirstValue(topicUriField).toString());
+                narrowerConcepts.add(result.getFirstValue(conceptUriField).toString());
             }
         } catch (SolrServerException e) {
             String msg = String.format("Error while fetching narrower topics of '%s' on Solr Core '%s'.",
                 broadTopicId, solrCoreId);
             throw new ClassifierException(msg, e);
         }
-        return narrowerTopics;
+        return narrowerConcepts;
     }
 
     @Override
-    public Set<String> getBroaderTopics(String id) throws ClassifierException {
-        LinkedHashSet<String> broaderTopics = new LinkedHashSet<String>();
+    public Set<String> getBroaderConcepts(String id) throws ClassifierException {
+        LinkedHashSet<String> broaderConcepts = new LinkedHashSet<String>();
         if (broaderField == null) {
-            return broaderTopics;
+            return broaderConcepts;
         }
         SolrServer solrServer = getActiveSolrServer();
-        SolrQuery query = new SolrQuery(topicUriField + ":" + ClientUtils.escapeQueryChars(id));
+        SolrQuery query = new SolrQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(id));
         query.addField(broaderField);
         try {
             for (SolrDocument result : solrServer.query(query).getResults()) {
@@ -480,7 +488,7 @@ public class TopicClassificationEngine e
                     continue;
                 }
                 for (Object value : broaderFieldValues) {
-                    broaderTopics.add(value.toString());
+                    broaderConcepts.add(value.toString());
                 }
             }
         } catch (SolrServerException e) {
@@ -488,18 +496,18 @@ public class TopicClassificationEngine e
                 solrCoreId);
             throw new ClassifierException(msg, e);
         }
-        return broaderTopics;
+        return broaderConcepts;
     }
 
     @Override
-    public Set<String> getTopicRoots() throws ClassifierException {
-        LinkedHashSet<String> rootTopics = new LinkedHashSet<String>();
+    public Set<String> getRootConcepts() throws ClassifierException {
+        LinkedHashSet<String> rootConcepts = new LinkedHashSet<String>();
         SolrServer solrServer = getActiveSolrServer();
         SolrQuery query = new SolrQuery();
         // TODO: this can be very big on flat thesauri: should we enable a paging API instead?
         query.setRows(MAX_ROOTS);
-        query.setFields(topicUriField);
-        query.setSortField(topicUriField, SolrQuery.ORDER.asc);
+        query.setFields(conceptUriField);
+        query.setSortField(conceptUriField, SolrQuery.ORDER.asc);
         if (broaderField != null) {
             // find any topic with an empty broaderField
             query.setParam("q", entryTypeField + ":" + METADATA_ENTRY + " AND -" + broaderField + ":"
@@ -515,36 +523,36 @@ public class TopicClassificationEngine e
                                        + " Some roots might be ignored.", engineId, MAX_ROOTS));
             }
             for (SolrDocument result : response.getResults()) {
-                rootTopics.add(result.getFirstValue(topicUriField).toString());
+                rootConcepts.add(result.getFirstValue(conceptUriField).toString());
             }
         } catch (SolrServerException e) {
             String msg = String.format("Error while fetching root topics on Solr Core '%s'.", solrCoreId);
             throw new ClassifierException(msg, e);
         }
-        return rootTopics;
+        return rootConcepts;
     }
 
     @Override
-    public void addTopic(String topicId, Collection<String> broaderTopics) throws ClassifierException {
+    public void addConcept(String conceptId, Collection<String> broaderConcepts) throws ClassifierException {
         // ensure that there is no previous topic registered with the same id
-        removeTopic(topicId);
+        removeConcept(conceptId);
 
         SolrInputDocument metadataEntry = new SolrInputDocument();
         String metadataEntryId = UUID.randomUUID().toString();
         String modelEntryId = UUID.randomUUID().toString();
-        metadataEntry.addField(topicUriField, topicId);
+        metadataEntry.addField(conceptUriField, conceptId);
         metadataEntry.addField(entryIdField, metadataEntryId);
         metadataEntry.addField(modelEntryIdField, modelEntryId);
         metadataEntry.addField(entryTypeField, METADATA_ENTRY);
-        if (broaderTopics != null && broaderField != null) {
-            metadataEntry.addField(broaderField, broaderTopics);
+        if (broaderConcepts != null && broaderField != null) {
+            metadataEntry.addField(broaderField, broaderConcepts);
         }
         SolrInputDocument modelEntry = new SolrInputDocument();
         modelEntry.addField(entryIdField, modelEntryId);
-        modelEntry.addField(topicUriField, topicId);
+        modelEntry.addField(conceptUriField, conceptId);
         modelEntry.addField(entryTypeField, MODEL_ENTRY);
-        if (broaderTopics != null) {
-            invalidateModelFields(broaderTopics, modelUpdateDateField, modelEvaluationDateField);
+        if (broaderConcepts != null) {
+            invalidateModelFields(broaderConcepts, modelUpdateDateField, modelEvaluationDateField);
         }
         SolrServer solrServer = getActiveSolrServer();
         try {
@@ -554,7 +562,7 @@ public class TopicClassificationEngine e
             solrServer.request(request);
             solrServer.commit();
         } catch (Exception e) {
-            String msg = String.format("Error adding topic with id '%s' on Solr Core '%s'", topicId,
+            String msg = String.format("Error adding topic with id '%s' on Solr Core '%s'", conceptId,
                 solrCoreId);
             throw new ClassifierException(msg, e);
         }
@@ -563,17 +571,18 @@ public class TopicClassificationEngine e
     /*
      * The commit is the responsibility of the caller.
      */
-    protected void invalidateModelFields(Collection<String> topicIds, String... fieldNames) throws ClassifierException {
-        if (topicIds.isEmpty() || fieldNames.length == 0) {
+    protected void invalidateModelFields(Collection<String> conceptIds, String... fieldNames) throws ClassifierException {
+        if (conceptIds.isEmpty() || fieldNames.length == 0) {
             return;
         }
         SolrServer solrServer = getActiveSolrServer();
         List<String> invalidatedFields = Arrays.asList(fieldNames);
         try {
             UpdateRequest request = new UpdateRequest();
-            for (String topicId : topicIds) {
+            for (String conceptId : conceptIds) {
                 SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY + " AND "
-                                                + topicUriField + ":" + ClientUtils.escapeQueryChars(topicId));
+                                                + conceptUriField + ":"
+                                                + ClientUtils.escapeQueryChars(conceptId));
                 for (SolrDocument result : solrServer.query(query).getResults()) {
                     // there should be only one (or none: tolerated)
                     SolrInputDocument newEntry = new SolrInputDocument();
@@ -588,19 +597,19 @@ public class TopicClassificationEngine e
             solrServer.request(request);
         } catch (Exception e) {
             String msg = String.format("Error invalidating topics [%s] on Solr Core '%s'",
-                StringUtils.join(topicIds, ", "), solrCoreId);
+                StringUtils.join(conceptIds, ", "), solrCoreId);
             throw new ClassifierException(msg, e);
         }
     }
 
     @Override
-    public void removeTopic(String topicId) throws ClassifierException {
+    public void removeConcept(String conceptId) throws ClassifierException {
         SolrServer solrServer = getActiveSolrServer();
         try {
-            solrServer.deleteByQuery(topicUriField + ":" + ClientUtils.escapeQueryChars(topicId));
+            solrServer.deleteByQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(conceptId));
             solrServer.commit();
         } catch (Exception e) {
-            String msg = String.format("Error removing topic with id '%s' on Solr Core '%s'", topicId,
+            String msg = String.format("Error removing topic with id '%s' on Solr Core '%s'", conceptId,
                 solrCoreId);
             throw new ClassifierException(msg, e);
         }
@@ -625,13 +634,13 @@ public class TopicClassificationEngine e
         String offset = null;
         boolean done = false;
         int batchSize = 1000;
-        query.addSortField(topicUriField, SolrQuery.ORDER.asc);
+        query.addSortField(conceptUriField, SolrQuery.ORDER.asc);
         query.setRows(batchSize + 1);
         try {
             while (!done) {
                 // batch over all the indexed topics
                 if (offset != null) {
-                    q += " AND " + topicUriField + ":[" + ClientUtils.escapeQueryChars(offset.toString())
+                    q += " AND " + conceptUriField + ":[" + ClientUtils.escapeQueryChars(offset.toString())
                          + " TO *]";
                 }
                 query.setQuery(q);
@@ -639,9 +648,9 @@ public class TopicClassificationEngine e
                 int count = 0;
                 List<SolrDocument> batchDocuments = new ArrayList<SolrDocument>();
                 for (SolrDocument result : response.getResults()) {
-                    String topicId = result.getFirstValue(topicUriField).toString();
+                    String conceptId = result.getFirstValue(conceptUriField).toString();
                     if (count == batchSize) {
-                        offset = topicId;
+                        offset = conceptId;
                     } else {
                         count++;
                         batchDocuments.add(result);
@@ -676,10 +685,10 @@ public class TopicClassificationEngine e
             public int process(List<SolrDocument> batch) throws ClassifierException, TrainingSetException {
                 int processed = 0;
                 for (SolrDocument result : batch) {
-                    String topicId = result.getFirstValue(topicUriField).toString();
+                    String conceptId = result.getFirstValue(conceptUriField).toString();
                     List<String> impactedTopics = new ArrayList<String>();
-                    impactedTopics.add(topicId);
-                    impactedTopics.addAll(getNarrowerTopics(topicId));
+                    impactedTopics.add(conceptId);
+                    impactedTopics.addAll(getNarrowerConcepts(conceptId));
                     if (incr) {
                         Date lastModelUpdate = (Date) result.getFirstValue(modelUpdateDateField);
                         if (lastModelUpdate != null
@@ -689,7 +698,7 @@ public class TopicClassificationEngine e
                     }
                     String metadataEntryId = result.getFirstValue(entryIdField).toString();
                     String modelEntryId = result.getFirstValue(modelEntryIdField).toString();
-                    updateTopic(topicId, metadataEntryId, modelEntryId, impactedTopics,
+                    updateTopic(conceptId, metadataEntryId, modelEntryId, impactedTopics,
                         result.getFieldValues(broaderField));
                     processed++;
                 }
@@ -702,7 +711,7 @@ public class TopicClassificationEngine e
     }
 
     /**
-     * @param topicId
+     * @param conceptId
      *            the topic model to update
      * @param metadataEntryId
      *            of the metadata entry id of the topic
@@ -710,14 +719,14 @@ public class TopicClassificationEngine e
      *            of the model entry id of the topic
      * @param impactedTopics
      *            the list of impacted topics (e.g. the topic node and direct children)
-     * @param broaderTopics
+     * @param broaderConcepts
      *            the collection of broader to re-add in the broader field
      */
-    protected void updateTopic(String topicId,
+    protected void updateTopic(String conceptId,
                                String metadataId,
                                String modelId,
                                List<String> impactedTopics,
-                               Collection<Object> broaderTopics) throws TrainingSetException,
+                               Collection<Object> broaderConcepts) throws TrainingSetException,
                                                                 ClassifierException {
         long start = System.currentTimeMillis();
         Batch<Example> examples = Batch.emtpyBatch(Example.class);
@@ -741,7 +750,7 @@ public class TopicClassificationEngine e
         // reindex the topic with the new text data collected from the examples
         SolrInputDocument modelEntry = new SolrInputDocument();
         modelEntry.addField(entryIdField, modelId);
-        modelEntry.addField(topicUriField, topicId);
+        modelEntry.addField(conceptUriField, conceptId);
         modelEntry.addField(entryTypeField, MODEL_ENTRY);
         if (sb.length() > 0) {
             modelEntry.addField(similarityField, sb);
@@ -752,9 +761,9 @@ public class TopicClassificationEngine e
         metadataEntry.addField(entryIdField, metadataId);
         metadataEntry.addField(modelEntryIdField, modelId);
         metadataEntry.addField(entryTypeField, METADATA_ENTRY);
-        metadataEntry.addField(topicUriField, topicId);
-        if (broaderTopics != null && broaderField != null) {
-            metadataEntry.addField(broaderField, broaderTopics);
+        metadataEntry.addField(conceptUriField, conceptId);
+        if (broaderConcepts != null && broaderField != null) {
+            metadataEntry.addField(broaderField, broaderConcepts);
         }
         if (modelUpdateDateField != null) {
             metadataEntry.addField(modelUpdateDateField, UTCTimeStamper.nowUtcDate());
@@ -767,12 +776,12 @@ public class TopicClassificationEngine e
             solrServer.request(request);
             // the commit is done by the caller in batch
         } catch (Exception e) {
-            String msg = String.format("Error updating topic with id '%s' on Solr Core '%s'", topicId,
+            String msg = String.format("Error updating topic with id '%s' on Solr Core '%s'", conceptId,
                 solrCoreId);
             throw new ClassifierException(msg, e);
         }
         long stop = System.currentTimeMillis();
-        log.debug("Sucessfully updated topic {} in {}s", topicId, (double) (stop - start) / 1000.);
+        log.debug("Sucessfully updated topic {} in {}s", conceptId, (double) (stop - start) / 1000.);
     }
 
     protected void checkTrainingSet() throws TrainingSetException {
@@ -800,7 +809,7 @@ public class TopicClassificationEngine e
         config.put(TopicClassificationEngine.ENTRY_TYPE_FIELD, "entry_type");
         config.put(TopicClassificationEngine.MODEL_ENTRY_ID_FIELD, "model_entry_id");
         config.put(TopicClassificationEngine.SOLR_CORE, server);
-        config.put(TopicClassificationEngine.TOPIC_URI_FIELD, "topic");
+        config.put(TopicClassificationEngine.CONCEPT_URI_FIELD, "topic");
         config.put(TopicClassificationEngine.SIMILARTITY_FIELD, "classifier_features");
         config.put(TopicClassificationEngine.BROADER_FIELD, "broader");
         config.put(TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD, "last_update_dt");
@@ -870,16 +879,16 @@ public class TopicClassificationEngine e
             @Override
             public int process(List<SolrDocument> batch) throws ClassifierException {
                 for (SolrDocument topicEntry : batch) {
-                    String topicId = topicEntry.getFirstValue(topicUriField).toString();
+                    String conceptId = topicEntry.getFirstValue(conceptUriField).toString();
                     Collection<Object> broader = topicEntry.getFieldValues(broaderField);
                     if (broader == null) {
-                        classifier.addTopic(topicId, null);
+                        classifier.addConcept(conceptId, null);
                     } else {
-                        List<String> broaderTopics = new ArrayList<String>();
-                        for (Object broaderTopic : broader) {
-                            broaderTopics.add(broaderTopic.toString());
+                        List<String> broaderConcepts = new ArrayList<String>();
+                        for (Object broaderConcept : broader) {
+                            broaderConcepts.add(broaderConcept.toString());
                         }
-                        classifier.addTopic(topicId, broaderTopics);
+                        classifier.addConcept(conceptId, broaderConcepts);
                     }
                 }
                 return batch.size();
@@ -900,7 +909,7 @@ public class TopicClassificationEngine e
             public int process(List<SolrDocument> batch) throws TrainingSetException, ClassifierException {
                 int offset;
                 for (SolrDocument topicMetadata : batch) {
-                    String topic = topicMetadata.getFirstValue(topicUriField).toString();
+                    String topic = topicMetadata.getFirstValue(conceptUriField).toString();
                     List<String> topics = Arrays.asList(topic);
                     List<String> falseNegativeExamples = new ArrayList<String>();
                     int truePositives = 0;
@@ -922,7 +931,7 @@ public class TopicClassificationEngine e
                                     .suggestTopics(example.contents);
                             boolean match = false;
                             for (TopicSuggestion suggestedTopic : suggestedTopics) {
-                                if (topic.equals(suggestedTopic.uri)) {
+                                if (topic.equals(suggestedTopic.conceptUri)) {
                                     match = true;
                                     truePositives++;
                                     break;
@@ -955,7 +964,7 @@ public class TopicClassificationEngine e
                             List<TopicSuggestion> suggestedTopics = classifier
                                     .suggestTopics(example.contents);
                             for (TopicSuggestion suggestedTopic : suggestedTopics) {
-                                if (topic.equals(suggestedTopic.uri)) {
+                                if (topic.equals(suggestedTopic.conceptUri)) {
                                     falsePositives++;
                                     if (falsePositiveExamples.size() < MAX_COLLECTED_EXAMPLES / foldCount) {
                                         falsePositiveExamples.add(example.id);
@@ -999,7 +1008,7 @@ public class TopicClassificationEngine e
      * Update the performance statistics in a metadata entry of a topic. It is the responsibility of the
      * caller to commit.
      */
-    protected void updatePerformanceMetadata(String topicId,
+    protected void updatePerformanceMetadata(String conceptId,
                                              float precision,
                                              float recall,
                                              int positiveSupport,
@@ -1008,8 +1017,8 @@ public class TopicClassificationEngine e
                                              List<String> falseNegativeExamples) throws ClassifierException {
         SolrServer solrServer = getActiveSolrServer();
         try {
-            SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY + " AND " + topicUriField
-                                            + ":" + ClientUtils.escapeQueryChars(topicId));
+            SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY + " AND " + conceptUriField
+                                            + ":" + ClientUtils.escapeQueryChars(conceptId));
             for (SolrDocument result : solrServer.query(query).getResults()) {
                 // there should be only one (or none: tolerated)
                 // fetch any old values to update (all metadata fields are assumed to be stored)s
@@ -1032,7 +1041,7 @@ public class TopicClassificationEngine e
             }
         } catch (Exception e) {
             String msg = String.format(
-                "Error updating performance metadata for topic '%s' on Solr Core '%s'", topicId, solrCoreId);
+                "Error updating performance metadata for topic '%s' on Solr Core '%s'", conceptId, solrCoreId);
             throw new ClassifierException(msg, e);
         }
     }
@@ -1063,15 +1072,15 @@ public class TopicClassificationEngine e
     }
 
     @Override
-    public ClassificationReport getPerformanceEstimates(String topicId) throws ClassifierException {
+    public ClassificationReport getPerformanceEstimates(String conceptId) throws ClassifierException {
 
         SolrServer solrServer = getActiveSolrServer();
-        SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY + " AND " + topicUriField + ":"
-                                        + ClientUtils.escapeQueryChars(topicId));
+        SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY + " AND " + conceptUriField
+                                        + ":" + ClientUtils.escapeQueryChars(conceptId));
         try {
             SolrDocumentList results = solrServer.query(query).getResults();
             if (results.isEmpty()) {
-                throw new ClassifierException(String.format("'%s' is not a registered topic", topicId));
+                throw new ClassifierException(String.format("'%s' is not a registered topic", conceptId));
             }
             SolrDocument metadata = results.get(0);
             Float precision = computeMeanValue(metadata, precisionField);
@@ -1097,7 +1106,7 @@ public class TopicClassificationEngine e
             return report;
         } catch (SolrServerException e) {
             throw new ClassifierException(String.format("Error fetching the performance report for topic "
-                                                        + topicId));
+                                                        + conceptId));
         }
     }
 

Modified: incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java?rev=1240206&r1=1240205&r2=1240206&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java Fri Feb  3 14:59:56 2012
@@ -25,8 +25,8 @@ import org.apache.stanbol.enhancer.topic
 import org.apache.stanbol.enhancer.topic.training.TrainingSetException;
 
 /**
- * Service interface for suggesting hierarchical topics from a specific scheme (a.k.a. taxonomy, thesaurus or
- * topics hierarchy) from the text content of a document or part of a document.
+ * Service interface for suggesting hierarchical concepts from a specific scheme (a.k.a. taxonomy, thesaurus or
+ * concepts hierarchy) from the text content of a document or part of a document.
  */
 public interface TopicClassifier {
 
@@ -46,30 +46,30 @@ public interface TopicClassifier {
      * 
      * @param text
      *            the text content to analyze
-     * @return the most likely topics related to the text
+     * @return the most likely concepts related to the text
      * @throws EngineException
      */
     List<TopicSuggestion> suggestTopics(String text) throws ClassifierException;
 
     /**
-     * @return the set of ids of topics directly broader than
+     * @return the set of ids of concepts directly broader than
      * @param id
      */
-    Set<String> getNarrowerTopics(String broadTopicId) throws ClassifierException;
+    Set<String> getNarrowerConcepts(String broadConceptId) throws ClassifierException;
 
     /**
-     * @return the set of ids of topics directly narrower than
+     * @return the set of ids of concepts directly narrower than
      * @param id
      */
-    Set<String> getBroaderTopics(String id) throws ClassifierException;
+    Set<String> getBroaderConcepts(String id) throws ClassifierException;
 
     /**
-     * @return the set of ids of topics without broader topics.
+     * @return the set of ids of concepts without broader concepts.
      */
-    Set<String> getTopicRoots() throws ClassifierException;
+    Set<String> getRootConcepts() throws ClassifierException;
 
     /**
-     * @return true if the classifier model can be updated with the {@code addTopic} / {@code removeTopic} /
+     * @return true if the classifier model can be updated with the {@code addConcept} / {@code removeConcept} /
      *         {@code updateModel} / methods.
      */
     boolean isUpdatable();
@@ -77,14 +77,14 @@ public interface TopicClassifier {
     /**
      * Register a topic and set it's ancestors in the taxonomy. Warning: re-adding an already existing topic
      * can delete the underlying statistical model. Calling {@code updateModel} is necessary to rebuild the
-     * statistical model based on the hierarchical structure of the topics and the registered training set.
+     * statistical model based on the hierarchical structure of the concepts and the registered training set.
      * 
      * @param id
      *            the new topic id
-     * @param broaderTopics
-     *            list of directly broader topics in the thesaurus
+     * @param broaderConcepts
+     *            list of directly broader concepts in the thesaurus
      */
-    void addTopic(String id, Collection<String> broaderTopics) throws ClassifierException;
+    void addConcept(String id, Collection<String> broaderConcepts) throws ClassifierException;
 
     /**
      * Remove a topic from the thesaurus. WARNING: it is the caller responsibility to recursively remove or
@@ -95,7 +95,7 @@ public interface TopicClassifier {
      * @param id
      *            if of the topic to remove from the model
      */
-    void removeTopic(String id) throws ClassifierException;
+    void removeConcept(String id) throws ClassifierException;
 
     /**
      * Register a training set to use to build the statistical model of the classifier.
@@ -104,10 +104,10 @@ public interface TopicClassifier {
 
     /**
      * Update (incrementally or from scratch) the statistical model of the classifier. Note: depending on the
-     * size of the dataset and the number of topics to update, this process can take a long time and should
+     * size of the dataset and the number of concepts to update, this process can take a long time and should
      * probably be wrapped in a dedicated thread if called by a the user interface layer.
      * 
-     * @return the number of updated topics
+     * @return the number of updated concepts
      */
     int updateModel(boolean incremental) throws TrainingSetException, ClassifierException;
 
@@ -115,7 +115,7 @@ public interface TopicClassifier {
      * Perform k-fold cross validation of the model to compute estimates of the precision, recall and f1
      * score.
      * 
-     * @return number of updated topics
+     * @return number of updated concepts
      */
     public int updatePerformanceEstimates(boolean incremental) throws ClassifierException,
                                                               TrainingSetException;

Modified: incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicSuggestion.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicSuggestion.java?rev=1240206&r1=1240205&r2=1240206&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicSuggestion.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicSuggestion.java Fri Feb  3 14:59:56 2012
@@ -26,27 +26,44 @@ import org.apache.commons.lang.StringUti
  */
 public class TopicSuggestion {
 
-    public final String uri;
-
-    public final List<String> paths = new ArrayList<String>();
-
+    /**
+     * The URI of the concept in the hierarchical conceptual scheme (that holds the broader relationship)
+     */
+    public final String conceptUri;
+
+    /**
+     * Reference to the broader concepts of this suggestion.
+     */
+    public final List<String> broader = new ArrayList<String>();
+
+    /**
+     * The (optional) URI of a resource that grounds this concepts in the real world. Can be null.
+     */
+    public final String primaryTopicUri;
+
+    /**
+     * The (positive) score of the suggestion: higher is better. Zero would mean unrelated. The absolute value
+     * is meaningless: suggestions scores cannot be compared across different input text documents nor
+     * distinct concept schemes.
+     */
     public final float score;
 
-    public TopicSuggestion(String uri, List<String> paths, float score) {
-        this.uri = uri;
-        if (paths != null) {
-            this.paths.addAll(paths);
+    public TopicSuggestion(String conceptUri, String primaryTopicUri, List<String> broader, float score) {
+        this.conceptUri = conceptUri;
+        this.primaryTopicUri = primaryTopicUri;
+        if (broader != null) {
+            this.broader.addAll(broader);
         }
         this.score = score;
     }
 
-    public TopicSuggestion(String uri, float score) {
-        this(uri, null, score);
+    public TopicSuggestion(String conceptUri, float score) {
+        this(conceptUri, null, null, score);
     }
 
     @Override
     public String toString() {
-        return String.format("TopicSuggestion(\"%s\", [%s], %f)", uri, StringUtils.join(paths, "\", \""),
-            score);
+        return String.format("TopicSuggestion(\"%s\", [%s], %f)", conceptUri,
+            StringUtils.join(broader, "\", \""), score);
     }
 }

Modified: incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1240206&r1=1240205&r2=1240206&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java Fri Feb  3 14:59:56 2012
@@ -105,7 +105,7 @@ public class TopicEngineTest extends Emb
         query.set("commit", true);
         query.set("separator", "\t");
         query.set("headers", false);
-        query.set("fieldnames", "topic,popularity,paths,text");
+        query.set("fieldnames", "topic,popularity,broader,text");
         query.set(CommonParams.STREAM_CONTENTTYPE, "text/plan;charset=utf-8");
         query.set(CommonParams.STREAM_BODY, IOUtils.toString(is, "utf-8"));
 
@@ -122,14 +122,14 @@ public class TopicEngineTest extends Emb
         assertNotNull(classifier);
         assertEquals(classifier.engineId, "test-engine");
         assertEquals(classifier.getActiveSolrServer(), classifierSolrServer);
-        assertEquals(classifier.topicUriField, "topic");
+        assertEquals(classifier.conceptUriField, "topic");
         assertEquals(classifier.similarityField, "classifier_features");
         assertEquals(classifier.acceptedLanguages, new ArrayList<String>());
 
         // check some required attributes
         Hashtable<String,Object> configWithMissingTopicField = new Hashtable<String,Object>();
         configWithMissingTopicField.putAll(config);
-        configWithMissingTopicField.remove(TopicClassificationEngine.TOPIC_URI_FIELD);
+        configWithMissingTopicField.remove(TopicClassificationEngine.CONCEPT_URI_FIELD);
         try {
             TopicClassificationEngine.fromParameters(configWithMissingTopicField);
             fail("Should have raised a ConfigurationException");
@@ -155,40 +155,40 @@ public class TopicEngineTest extends Emb
     @Test
     public void testProgrammaticThesaurusConstruction() throws Exception {
         // Register the roots of the taxonomy
-        classifier.addTopic("http://example.com/topics/root1", null);
-        classifier.addTopic("http://example.com/topics/root2", null);
-        classifier.addTopic("http://example.com/topics/root3", new ArrayList<String>());
-        assertEquals(0, classifier.getBroaderTopics("http://example.com/topics/root1").size());
-        assertEquals(0, classifier.getBroaderTopics("http://example.com/topics/root2").size());
-        assertEquals(0, classifier.getBroaderTopics("http://example.com/topics/root3").size());
-        assertEquals(3, classifier.getTopicRoots().size());
+        classifier.addConcept("http://example.com/topics/root1", null);
+        classifier.addConcept("http://example.com/topics/root2", null);
+        classifier.addConcept("http://example.com/topics/root3", new ArrayList<String>());
+        assertEquals(0, classifier.getBroaderConcepts("http://example.com/topics/root1").size());
+        assertEquals(0, classifier.getBroaderConcepts("http://example.com/topics/root2").size());
+        assertEquals(0, classifier.getBroaderConcepts("http://example.com/topics/root3").size());
+        assertEquals(3, classifier.getRootConcepts().size());
 
         // Register some non root nodes
-        classifier.addTopic("http://example.com/topics/node1",
+        classifier.addConcept("http://example.com/topics/node1",
             Arrays.asList("http://example.com/topics/root1", "http://example.com/topics/root2"));
-        classifier.addTopic("http://example.com/topics/node2",
+        classifier.addConcept("http://example.com/topics/node2",
             Arrays.asList("http://example.com/topics/root3"));
-        classifier.addTopic("http://example.com/topics/node3",
+        classifier.addConcept("http://example.com/topics/node3",
             Arrays.asList("http://example.com/topics/node1", "http://example.com/topics/node2"));
 
         // the root where not impacted
-        assertEquals(0, classifier.getBroaderTopics("http://example.com/topics/root1").size());
-        assertEquals(0, classifier.getBroaderTopics("http://example.com/topics/root2").size());
-        assertEquals(0, classifier.getBroaderTopics("http://example.com/topics/root3").size());
-        assertEquals(3, classifier.getTopicRoots().size());
+        assertEquals(0, classifier.getBroaderConcepts("http://example.com/topics/root1").size());
+        assertEquals(0, classifier.getBroaderConcepts("http://example.com/topics/root2").size());
+        assertEquals(0, classifier.getBroaderConcepts("http://example.com/topics/root3").size());
+        assertEquals(3, classifier.getRootConcepts().size());
 
         // the other nodes have the same broader topics as at creation time
-        assertEquals(2, classifier.getBroaderTopics("http://example.com/topics/node1").size());
-        assertEquals(1, classifier.getBroaderTopics("http://example.com/topics/node2").size());
-        assertEquals(2, classifier.getBroaderTopics("http://example.com/topics/node3").size());
+        assertEquals(2, classifier.getBroaderConcepts("http://example.com/topics/node1").size());
+        assertEquals(1, classifier.getBroaderConcepts("http://example.com/topics/node2").size());
+        assertEquals(2, classifier.getBroaderConcepts("http://example.com/topics/node3").size());
 
         // check the induced narrower relationships
-        assertEquals(1, classifier.getNarrowerTopics("http://example.com/topics/root1").size());
-        assertEquals(1, classifier.getNarrowerTopics("http://example.com/topics/root2").size());
-        assertEquals(1, classifier.getNarrowerTopics("http://example.com/topics/root3").size());
-        assertEquals(1, classifier.getNarrowerTopics("http://example.com/topics/node1").size());
-        assertEquals(1, classifier.getNarrowerTopics("http://example.com/topics/node2").size());
-        assertEquals(0, classifier.getNarrowerTopics("http://example.com/topics/node3").size());
+        assertEquals(1, classifier.getNarrowerConcepts("http://example.com/topics/root1").size());
+        assertEquals(1, classifier.getNarrowerConcepts("http://example.com/topics/root2").size());
+        assertEquals(1, classifier.getNarrowerConcepts("http://example.com/topics/root3").size());
+        assertEquals(1, classifier.getNarrowerConcepts("http://example.com/topics/node1").size());
+        assertEquals(1, classifier.getNarrowerConcepts("http://example.com/topics/node2").size());
+        assertEquals(0, classifier.getNarrowerConcepts("http://example.com/topics/node3").size());
     }
 
     @Test
@@ -213,7 +213,7 @@ public class TopicEngineTest extends Emb
         assertNotNull(suggestedTopics);
         assertEquals(suggestedTopics.size(), 10);
         TopicSuggestion bestSuggestion = suggestedTopics.get(0);
-        assertEquals(bestSuggestion.uri, "Category:American_films");
+        assertEquals(bestSuggestion.conceptUri, "Category:American_films");
     }
 
     @Test
@@ -229,13 +229,13 @@ public class TopicEngineTest extends Emb
         String music = "urn:topics/music";
         String law = "urn:topics/law";
 
-        classifier.addTopic(business, null);
-        classifier.addTopic(technology, null);
-        classifier.addTopic(sport, null);
-        classifier.addTopic(music, null);
-        classifier.addTopic(apple, Arrays.asList(business, technology));
-        classifier.addTopic(football, Arrays.asList(sport));
-        classifier.addTopic(worldcup, Arrays.asList(football));
+        classifier.addConcept(business, null);
+        classifier.addConcept(technology, null);
+        classifier.addConcept(sport, null);
+        classifier.addConcept(music, null);
+        classifier.addConcept(apple, Arrays.asList(business, technology));
+        classifier.addConcept(football, Arrays.asList(sport));
+        classifier.addConcept(worldcup, Arrays.asList(football));
 
         // train the classifier on an empty dataset
         classifier.setTrainingSet(trainingSet);
@@ -280,10 +280,10 @@ public class TopicEngineTest extends Emb
         // test the trained classifier
         suggestions = classifier.suggestTopics("I like the sound of vuvuzula in the morning!");
         assertTrue(suggestions.size() >= 4);
-        assertEquals(worldcup, suggestions.get(0).uri);
-        assertEquals(music, suggestions.get(1).uri);
-        assertEquals(football, suggestions.get(2).uri);
-        assertEquals(sport, suggestions.get(3).uri);
+        assertEquals(worldcup, suggestions.get(0).conceptUri);
+        assertEquals(music, suggestions.get(1).conceptUri);
+        assertEquals(football, suggestions.get(2).conceptUri);
+        assertEquals(sport, suggestions.get(3).conceptUri);
         // check that the scores are decreasing:
         assertTrue(suggestions.get(0).score >= suggestions.get(1).score);
         assertTrue(suggestions.get(1).score >= suggestions.get(2).score);
@@ -291,14 +291,14 @@ public class TopicEngineTest extends Emb
 
         suggestions = classifier.suggestTopics("Apple is no longer a startup.");
         assertTrue(suggestions.size() >= 3);
-        assertEquals(apple, suggestions.get(0).uri);
-        assertEquals(technology, suggestions.get(1).uri);
-        assertEquals(business, suggestions.get(2).uri);
+        assertEquals(apple, suggestions.get(0).conceptUri);
+        assertEquals(technology, suggestions.get(1).conceptUri);
+        assertEquals(business, suggestions.get(2).conceptUri);
 
         suggestions = classifier.suggestTopics("You can watch the worldcup on your iPad.");
         assertTrue(suggestions.size() >= 2);
-        assertEquals(apple, suggestions.get(0).uri);
-        assertEquals(worldcup, suggestions.get(1).uri);
+        assertEquals(apple, suggestions.get(0).conceptUri);
+        assertEquals(worldcup, suggestions.get(1).conceptUri);
 
         // test incremental update of a single root node
         Thread.sleep(10);
@@ -308,7 +308,7 @@ public class TopicEngineTest extends Emb
         assertEquals(0, classifier.updateModel(true));
         suggestions = classifier.suggestTopics("Glory box is best mixed as dubstep.");
         assertTrue(suggestions.size() >= 1);
-        assertEquals(music, suggestions.get(0).uri);
+        assertEquals(music, suggestions.get(0).conceptUri);
 
         // test incremental update of a leaf node (the parent topic needs re-indexing too)
         Thread.sleep(10);
@@ -331,12 +331,12 @@ public class TopicEngineTest extends Emb
                                           + " in which they intend to represent the state.",
             Arrays.asList(law));
         assertEquals(0, classifier.updateModel(true));
-        classifier.addTopic(law, null);
+        classifier.addConcept(law, null);
         assertEquals(1, classifier.updateModel(true));
         assertEquals(0, classifier.updateModel(true));
 
         // registering new subtopics invalidate the models of the parent as well
-        classifier.addTopic("urn:topics/sportsmafia", Arrays.asList(football, business));
+        classifier.addConcept("urn:topics/sportsmafia", Arrays.asList(football, business));
         assertEquals(3, classifier.updateModel(true));
         assertEquals(0, classifier.updateModel(true));
 
@@ -354,8 +354,8 @@ public class TopicEngineTest extends Emb
         }
 
         // register some topics
-        classifier.addTopic("urn:t/001", null);
-        classifier.addTopic("urn:t/002", Arrays.asList("urn:t/001"));
+        classifier.addConcept("urn:t/001", null);
+        classifier.addConcept("urn:t/002", Arrays.asList("urn:t/001"));
         performanceEstimates = classifier.getPerformanceEstimates("urn:t/002");
         assertFalse(performanceEstimates.uptodate);
 
@@ -370,7 +370,7 @@ public class TopicEngineTest extends Emb
         assertEquals(0.67f, performanceEstimates.f1, 0.01);
         assertEquals(34, performanceEstimates.positiveSupport);
         assertEquals(32, performanceEstimates.negativeSupport);
-        assertTrue(classifier.getBroaderTopics("urn:t/002").contains("urn:t/001"));
+        assertTrue(classifier.getBroaderConcepts("urn:t/002").contains("urn:t/001"));
 
         // accumulate other folds statistics and compute means of statistics
         classifier.updatePerformanceMetadata("urn:t/002", 0.79f, 0.63f, 10, 10, Arrays.asList("ex1", "ex5"),
@@ -456,7 +456,7 @@ public class TopicEngineTest extends Emb
         for (int i = 0; i < numberOfTopics; i++) {
             String topic = String.format("urn:t/%03d", i + 1);
             topics[i] = topic;
-            classifier.addTopic(topic, null);
+            classifier.addConcept(topic, null);
             String[] terms = randomVocabulary(i, vocabSizeMin, vocabSizeMax, rng);
             vocabularies.put(topic, terms);
         }
@@ -521,7 +521,7 @@ public class TopicEngineTest extends Emb
         config.put(TopicClassificationEngine.ENTRY_TYPE_FIELD, "entry_type");
         config.put(TopicClassificationEngine.MODEL_ENTRY_ID_FIELD, "model_entry_id");
         config.put(TopicClassificationEngine.SOLR_CORE, classifierSolrServer);
-        config.put(TopicClassificationEngine.TOPIC_URI_FIELD, "topic");
+        config.put(TopicClassificationEngine.CONCEPT_URI_FIELD, "topic");
         config.put(TopicClassificationEngine.SIMILARTITY_FIELD, "classifier_features");
         config.put(TopicClassificationEngine.BROADER_FIELD, "broader");
         config.put(TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD, "last_update_dt");