You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by og...@apache.org on 2012/02/03 15:59:57 UTC
svn commit: r1240206 - in
/incubator/stanbol/trunk/enhancer/engines/topic/src:
main/java/org/apache/stanbol/enhancer/engine/topic/
main/java/org/apache/stanbol/enhancer/topic/
test/java/org/apache/stanbol/enhancer/engine/topic/
Author: ogrisel
Date: Fri Feb 3 14:59:56 2012
New Revision: 1240206
URL: http://svn.apache.org/viewvc?rev=1240206&view=rev
Log:
STANBOL-197: refactor API to use SKOS terminology
Modified:
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java
incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicSuggestion.java
incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
Modified: incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java?rev=1240206&r1=1240205&r2=1240206&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.java Fri Feb 3 14:59:56 2012
@@ -16,7 +16,6 @@
*/
package org.apache.stanbol.enhancer.engine.topic;
-
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
@@ -27,7 +26,6 @@ import java.util.Date;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.Hashtable;
-import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
@@ -36,11 +34,9 @@ import java.util.Set;
import java.util.UUID;
import org.apache.clerezza.rdf.core.MGraph;
-import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
@@ -91,11 +87,19 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * Enhancement Engine that provides the ability to assign a text document to a set of topics indexed in a
+ * Enhancement Engine that provides the ability to assign a text document to a set of concepts indexed in a
* dedicated Solr core. The assignment logic comes from terms frequencies match of the text of the document to
- * categorize with the text indexed for each topic.
+ * categorize with the text indexed for each concept.
+ *
+ * The data model of the concept tree follows the SKOS model: concepts are organized in a hierarchical
+ * "scheme" with a "broader" relation (and the inferred "narrower" inverse relation). Concepts can also
+ * optionally be grounded in the real world by the mean of a foaf:primaryTopic link to an external resource
+ * such as a DBpedia entry.
*
- * The solr server is expected to be configured with the MoreLikeThisHandler and the matching fields from the
+ * A document is typically classified with the concept by using the dct:subject property to link the document
+ * (subject) to the concept (object).
+ *
+ * The Solr server is expected to be configured with the MoreLikeThisHandler and the matching fields from the
* engine configuration.
*/
@Component(metatype = true, immediate = true, configurationFactory = true, policy = ConfigurationPolicy.REQUIRE)
@@ -106,7 +110,7 @@ import org.slf4j.LoggerFactory;
@Property(name = TopicClassificationEngine.SOLR_CORE),
@Property(name = TopicClassificationEngine.LANGUAGES),
@Property(name = TopicClassificationEngine.SIMILARTITY_FIELD),
- @Property(name = TopicClassificationEngine.TOPIC_URI_FIELD),
+ @Property(name = TopicClassificationEngine.CONCEPT_URI_FIELD),
@Property(name = TopicClassificationEngine.BROADER_FIELD),
@Property(name = TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD, value = "last_update_dt"),
@Property(name = TopicClassificationEngine.PRECISION_FIELD, value = "precision"),
@@ -117,7 +121,7 @@ import org.slf4j.LoggerFactory;
@Property(name = TopicClassificationEngine.FALSE_POSITIVES_FIELD, value = "false_positives"),
@Property(name = TopicClassificationEngine.POSITIVE_SUPPORT_FIELD, value = "positive_support"),
@Property(name = TopicClassificationEngine.NEGATIVE_SUPPORT_FIELD, value = "negative_support"),
- @Property(name = Constants.SERVICE_RANKING, intValue=0)})
+ @Property(name = Constants.SERVICE_RANKING, intValue = 0)})
public class TopicClassificationEngine extends ConfiguredSolrCoreTracker implements EnhancementEngine,
ServiceProperties, TopicClassifier {
@@ -139,10 +143,12 @@ public class TopicClassificationEngine e
public static final String SIMILARTITY_FIELD = "org.apache.stanbol.enhancer.engine.topic.similarityField";
- public static final String TOPIC_URI_FIELD = "org.apache.stanbol.enhancer.engine.topic.uriField";
+ public static final String CONCEPT_URI_FIELD = "org.apache.stanbol.enhancer.engine.topic.uriField";
public static final String BROADER_FIELD = "org.apache.stanbol.enhancer.engine.topic.broaderField";
+ public static final String PRIMARY_TOPIC_FIELD = "org.apache.stanbol.enhancer.engine.topic.primaryTopicField";
+
public static final String MODEL_UPDATE_DATE_FIELD = "org.apache.stanbol.enhancer.engine.topic.modelUpdateDateField";
public static final String MODEL_EVALUATION_DATE_FIELD = "org.apache.stanbol.enhancer.engine.topic.modelEvaluationDateField";
@@ -171,7 +177,7 @@ public class TopicClassificationEngine e
* Contains the only supported mime type {@link #PLAIN_TEXT_MIMETYPE}
*/
protected static final Set<String> SUPPORTED_MIMETYPES = Collections.singleton(PLAIN_TEXT_MIMETYPE);
-
+
public static final String SOLR_NON_EMPTY_FIELD = "[\"\" TO *]";
// TODO: make the following fields configurable
@@ -195,7 +201,7 @@ public class TopicClassificationEngine e
protected String similarityField;
- protected String topicUriField;
+ protected String conceptUriField;
protected String broaderField;
@@ -254,7 +260,7 @@ public class TopicClassificationEngine e
engineId = getRequiredStringParam(config, ENGINE_ID);
entryIdField = getRequiredStringParam(config, ENTRY_ID_FIELD);
modelEntryIdField = getRequiredStringParam(config, MODEL_ENTRY_ID_FIELD);
- topicUriField = getRequiredStringParam(config, TOPIC_URI_FIELD);
+ conceptUriField = getRequiredStringParam(config, CONCEPT_URI_FIELD);
entryTypeField = getRequiredStringParam(config, ENTRY_TYPE_FIELD);
similarityField = getRequiredStringParam(config, SIMILARTITY_FIELD);
acceptedLanguages = getStringListParan(config, LANGUAGES);
@@ -278,39 +284,40 @@ public class TopicClassificationEngine e
@Override
public int canEnhance(ContentItem ci) throws EngineException {
- if(ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null &&
- getActiveSolrServer() != null){
+ if (ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES) != null && getActiveSolrServer() != null) {
return ENHANCE_SYNCHRONOUS;
} else {
return CANNOT_ENHANCE;
}
- //TODO ogrisel: validate that it is no problem that this does no longer
- //check that the text is not empty
-// if (text.trim().length() == 0) {
-// return CANNOT_ENHANCE;
-// }
+ // TODO ogrisel: validate that it is no problem that this does no longer
+ // check that the text is not empty
+ // if (text.trim().length() == 0) {
+ // return CANNOT_ENHANCE;
+ // }
}
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<UriRef,Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
- if(contentPart == null){
- throw new IllegalStateException("No ContentPart with a supported Mime Type"
- + "found for ContentItem "+ci.getUri()+"(supported: '"
- + SUPPORTED_MIMETYPES+"') -> this indicates that canEnhance was"
- + "NOT called and indicates a bug in the used EnhancementJobManager!");
+ if (contentPart == null) {
+ throw new IllegalStateException(
+ "No ContentPart with a supported Mime Type" + "found for ContentItem " + ci.getUri()
+ + "(supported: '" + SUPPORTED_MIMETYPES
+ + "') -> this indicates that canEnhance was"
+ + "NOT called and indicates a bug in the used EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
- throw new InvalidContentException(String.format("Unable to extract "
- +" textual content from ContentPart %s of ContentItem %s!",
- contentPart.getKey(),ci.getUri()), e);
- }
- if(text.trim().isEmpty()){
- log.warn("ContentPart {} of ContentItem {} does not contain any " +
- "text to extract topics from",contentPart.getKey(),ci.getUri());
+ throw new InvalidContentException(String.format(
+ "Unable to extract " + " textual content from ContentPart %s of ContentItem %s!",
+ contentPart.getKey(), ci.getUri()), e);
+ }
+ if (text.trim().isEmpty()) {
+ log.warn(
+ "ContentPart {} of ContentItem {} does not contain any " + "text to extract topics from",
+ contentPart.getKey(), ci.getUri());
return;
}
MGraph metadata = ci.getMetadata();
@@ -327,11 +334,11 @@ public class TopicClassificationEngine e
metadata.add(new TripleImpl(enhancement,
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE,
TechnicalClasses.ENHANCER_TOPICANNOTATION));
-
+
// add link to entity
metadata.add(new TripleImpl(enhancement,
org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE,
- new UriRef(topic.uri)));
+ new UriRef(topic.conceptUri)));
// TODO: make it possible to dereference and the path to the root the entities according to a
// configuration parameter
}
@@ -358,11 +365,12 @@ public class TopicClassificationEngine e
public String getSchemeId() {
return engineId;
}
+
@Override
public String getName() {
return engineId;
}
-
+
@Override
public List<String> getAcceptedLanguages() {
return acceptedLanguages;
@@ -391,17 +399,17 @@ public class TopicClassificationEngine e
// over query the number of suggestions to find a statistical cut based on the curve of the scores of
// the top suggestion
query.setRows(MAX_SUGGESTIONS * 3);
- query.setFields(topicUriField);
+ query.setFields(conceptUriField);
query.setIncludeScore(true);
try {
StreamQueryRequest request = new StreamQueryRequest(query);
QueryResponse response = request.process(solrServer);
SolrDocumentList results = response.getResults();
for (SolrDocument result : results.toArray(new SolrDocument[0])) {
- String uri = (String) result.getFirstValue(topicUriField);
+ String uri = (String) result.getFirstValue(conceptUriField);
if (uri == null) {
throw new ClassifierException(String.format(
- "Solr Core '%s' is missing required field '%s'.", solrCoreId, topicUriField));
+ "Solr Core '%s' is missing required field '%s'.", solrCoreId, conceptUriField));
}
Float score = (Float) result.getFirstValue("score");
suggestedTopics.add(new TopicSuggestion(uri, score));
@@ -441,36 +449,36 @@ public class TopicClassificationEngine e
}
@Override
- public Set<String> getNarrowerTopics(String broadTopicId) throws ClassifierException {
- LinkedHashSet<String> narrowerTopics = new LinkedHashSet<String>();
+ public Set<String> getNarrowerConcepts(String broadTopicId) throws ClassifierException {
+ LinkedHashSet<String> narrowerConcepts = new LinkedHashSet<String>();
if (broaderField == null) {
- return narrowerTopics;
+ return narrowerConcepts;
}
SolrServer solrServer = getActiveSolrServer();
SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY);
query.addFilterQuery(broaderField + ":" + ClientUtils.escapeQueryChars(broadTopicId));
- query.addField(topicUriField);
- query.addSortField(topicUriField, SolrQuery.ORDER.asc);
+ query.addField(conceptUriField);
+ query.addSortField(conceptUriField, SolrQuery.ORDER.asc);
try {
for (SolrDocument result : solrServer.query(query).getResults()) {
- narrowerTopics.add(result.getFirstValue(topicUriField).toString());
+ narrowerConcepts.add(result.getFirstValue(conceptUriField).toString());
}
} catch (SolrServerException e) {
String msg = String.format("Error while fetching narrower topics of '%s' on Solr Core '%s'.",
broadTopicId, solrCoreId);
throw new ClassifierException(msg, e);
}
- return narrowerTopics;
+ return narrowerConcepts;
}
@Override
- public Set<String> getBroaderTopics(String id) throws ClassifierException {
- LinkedHashSet<String> broaderTopics = new LinkedHashSet<String>();
+ public Set<String> getBroaderConcepts(String id) throws ClassifierException {
+ LinkedHashSet<String> broaderConcepts = new LinkedHashSet<String>();
if (broaderField == null) {
- return broaderTopics;
+ return broaderConcepts;
}
SolrServer solrServer = getActiveSolrServer();
- SolrQuery query = new SolrQuery(topicUriField + ":" + ClientUtils.escapeQueryChars(id));
+ SolrQuery query = new SolrQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(id));
query.addField(broaderField);
try {
for (SolrDocument result : solrServer.query(query).getResults()) {
@@ -480,7 +488,7 @@ public class TopicClassificationEngine e
continue;
}
for (Object value : broaderFieldValues) {
- broaderTopics.add(value.toString());
+ broaderConcepts.add(value.toString());
}
}
} catch (SolrServerException e) {
@@ -488,18 +496,18 @@ public class TopicClassificationEngine e
solrCoreId);
throw new ClassifierException(msg, e);
}
- return broaderTopics;
+ return broaderConcepts;
}
@Override
- public Set<String> getTopicRoots() throws ClassifierException {
- LinkedHashSet<String> rootTopics = new LinkedHashSet<String>();
+ public Set<String> getRootConcepts() throws ClassifierException {
+ LinkedHashSet<String> rootConcepts = new LinkedHashSet<String>();
SolrServer solrServer = getActiveSolrServer();
SolrQuery query = new SolrQuery();
// TODO: this can be very big on flat thesauri: should we enable a paging API instead?
query.setRows(MAX_ROOTS);
- query.setFields(topicUriField);
- query.setSortField(topicUriField, SolrQuery.ORDER.asc);
+ query.setFields(conceptUriField);
+ query.setSortField(conceptUriField, SolrQuery.ORDER.asc);
if (broaderField != null) {
// find any topic with an empty broaderField
query.setParam("q", entryTypeField + ":" + METADATA_ENTRY + " AND -" + broaderField + ":"
@@ -515,36 +523,36 @@ public class TopicClassificationEngine e
+ " Some roots might be ignored.", engineId, MAX_ROOTS));
}
for (SolrDocument result : response.getResults()) {
- rootTopics.add(result.getFirstValue(topicUriField).toString());
+ rootConcepts.add(result.getFirstValue(conceptUriField).toString());
}
} catch (SolrServerException e) {
String msg = String.format("Error while fetching root topics on Solr Core '%s'.", solrCoreId);
throw new ClassifierException(msg, e);
}
- return rootTopics;
+ return rootConcepts;
}
@Override
- public void addTopic(String topicId, Collection<String> broaderTopics) throws ClassifierException {
+ public void addConcept(String conceptId, Collection<String> broaderConcepts) throws ClassifierException {
// ensure that there is no previous topic registered with the same id
- removeTopic(topicId);
+ removeConcept(conceptId);
SolrInputDocument metadataEntry = new SolrInputDocument();
String metadataEntryId = UUID.randomUUID().toString();
String modelEntryId = UUID.randomUUID().toString();
- metadataEntry.addField(topicUriField, topicId);
+ metadataEntry.addField(conceptUriField, conceptId);
metadataEntry.addField(entryIdField, metadataEntryId);
metadataEntry.addField(modelEntryIdField, modelEntryId);
metadataEntry.addField(entryTypeField, METADATA_ENTRY);
- if (broaderTopics != null && broaderField != null) {
- metadataEntry.addField(broaderField, broaderTopics);
+ if (broaderConcepts != null && broaderField != null) {
+ metadataEntry.addField(broaderField, broaderConcepts);
}
SolrInputDocument modelEntry = new SolrInputDocument();
modelEntry.addField(entryIdField, modelEntryId);
- modelEntry.addField(topicUriField, topicId);
+ modelEntry.addField(conceptUriField, conceptId);
modelEntry.addField(entryTypeField, MODEL_ENTRY);
- if (broaderTopics != null) {
- invalidateModelFields(broaderTopics, modelUpdateDateField, modelEvaluationDateField);
+ if (broaderConcepts != null) {
+ invalidateModelFields(broaderConcepts, modelUpdateDateField, modelEvaluationDateField);
}
SolrServer solrServer = getActiveSolrServer();
try {
@@ -554,7 +562,7 @@ public class TopicClassificationEngine e
solrServer.request(request);
solrServer.commit();
} catch (Exception e) {
- String msg = String.format("Error adding topic with id '%s' on Solr Core '%s'", topicId,
+ String msg = String.format("Error adding topic with id '%s' on Solr Core '%s'", conceptId,
solrCoreId);
throw new ClassifierException(msg, e);
}
@@ -563,17 +571,18 @@ public class TopicClassificationEngine e
/*
* The commit is the responsibility of the caller.
*/
- protected void invalidateModelFields(Collection<String> topicIds, String... fieldNames) throws ClassifierException {
- if (topicIds.isEmpty() || fieldNames.length == 0) {
+ protected void invalidateModelFields(Collection<String> conceptIds, String... fieldNames) throws ClassifierException {
+ if (conceptIds.isEmpty() || fieldNames.length == 0) {
return;
}
SolrServer solrServer = getActiveSolrServer();
List<String> invalidatedFields = Arrays.asList(fieldNames);
try {
UpdateRequest request = new UpdateRequest();
- for (String topicId : topicIds) {
+ for (String conceptId : conceptIds) {
SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY + " AND "
- + topicUriField + ":" + ClientUtils.escapeQueryChars(topicId));
+ + conceptUriField + ":"
+ + ClientUtils.escapeQueryChars(conceptId));
for (SolrDocument result : solrServer.query(query).getResults()) {
// there should be only one (or none: tolerated)
SolrInputDocument newEntry = new SolrInputDocument();
@@ -588,19 +597,19 @@ public class TopicClassificationEngine e
solrServer.request(request);
} catch (Exception e) {
String msg = String.format("Error invalidating topics [%s] on Solr Core '%s'",
- StringUtils.join(topicIds, ", "), solrCoreId);
+ StringUtils.join(conceptIds, ", "), solrCoreId);
throw new ClassifierException(msg, e);
}
}
@Override
- public void removeTopic(String topicId) throws ClassifierException {
+ public void removeConcept(String conceptId) throws ClassifierException {
SolrServer solrServer = getActiveSolrServer();
try {
- solrServer.deleteByQuery(topicUriField + ":" + ClientUtils.escapeQueryChars(topicId));
+ solrServer.deleteByQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(conceptId));
solrServer.commit();
} catch (Exception e) {
- String msg = String.format("Error removing topic with id '%s' on Solr Core '%s'", topicId,
+ String msg = String.format("Error removing topic with id '%s' on Solr Core '%s'", conceptId,
solrCoreId);
throw new ClassifierException(msg, e);
}
@@ -625,13 +634,13 @@ public class TopicClassificationEngine e
String offset = null;
boolean done = false;
int batchSize = 1000;
- query.addSortField(topicUriField, SolrQuery.ORDER.asc);
+ query.addSortField(conceptUriField, SolrQuery.ORDER.asc);
query.setRows(batchSize + 1);
try {
while (!done) {
// batch over all the indexed topics
if (offset != null) {
- q += " AND " + topicUriField + ":[" + ClientUtils.escapeQueryChars(offset.toString())
+ q += " AND " + conceptUriField + ":[" + ClientUtils.escapeQueryChars(offset.toString())
+ " TO *]";
}
query.setQuery(q);
@@ -639,9 +648,9 @@ public class TopicClassificationEngine e
int count = 0;
List<SolrDocument> batchDocuments = new ArrayList<SolrDocument>();
for (SolrDocument result : response.getResults()) {
- String topicId = result.getFirstValue(topicUriField).toString();
+ String conceptId = result.getFirstValue(conceptUriField).toString();
if (count == batchSize) {
- offset = topicId;
+ offset = conceptId;
} else {
count++;
batchDocuments.add(result);
@@ -676,10 +685,10 @@ public class TopicClassificationEngine e
public int process(List<SolrDocument> batch) throws ClassifierException, TrainingSetException {
int processed = 0;
for (SolrDocument result : batch) {
- String topicId = result.getFirstValue(topicUriField).toString();
+ String conceptId = result.getFirstValue(conceptUriField).toString();
List<String> impactedTopics = new ArrayList<String>();
- impactedTopics.add(topicId);
- impactedTopics.addAll(getNarrowerTopics(topicId));
+ impactedTopics.add(conceptId);
+ impactedTopics.addAll(getNarrowerConcepts(conceptId));
if (incr) {
Date lastModelUpdate = (Date) result.getFirstValue(modelUpdateDateField);
if (lastModelUpdate != null
@@ -689,7 +698,7 @@ public class TopicClassificationEngine e
}
String metadataEntryId = result.getFirstValue(entryIdField).toString();
String modelEntryId = result.getFirstValue(modelEntryIdField).toString();
- updateTopic(topicId, metadataEntryId, modelEntryId, impactedTopics,
+ updateTopic(conceptId, metadataEntryId, modelEntryId, impactedTopics,
result.getFieldValues(broaderField));
processed++;
}
@@ -702,7 +711,7 @@ public class TopicClassificationEngine e
}
/**
- * @param topicId
+ * @param conceptId
* the topic model to update
* @param metadataEntryId
* of the metadata entry id of the topic
@@ -710,14 +719,14 @@ public class TopicClassificationEngine e
* of the model entry id of the topic
* @param impactedTopics
* the list of impacted topics (e.g. the topic node and direct children)
- * @param broaderTopics
+ * @param broaderConcepts
* the collection of broader to re-add in the broader field
*/
- protected void updateTopic(String topicId,
+ protected void updateTopic(String conceptId,
String metadataId,
String modelId,
List<String> impactedTopics,
- Collection<Object> broaderTopics) throws TrainingSetException,
+ Collection<Object> broaderConcepts) throws TrainingSetException,
ClassifierException {
long start = System.currentTimeMillis();
Batch<Example> examples = Batch.emtpyBatch(Example.class);
@@ -741,7 +750,7 @@ public class TopicClassificationEngine e
// reindex the topic with the new text data collected from the examples
SolrInputDocument modelEntry = new SolrInputDocument();
modelEntry.addField(entryIdField, modelId);
- modelEntry.addField(topicUriField, topicId);
+ modelEntry.addField(conceptUriField, conceptId);
modelEntry.addField(entryTypeField, MODEL_ENTRY);
if (sb.length() > 0) {
modelEntry.addField(similarityField, sb);
@@ -752,9 +761,9 @@ public class TopicClassificationEngine e
metadataEntry.addField(entryIdField, metadataId);
metadataEntry.addField(modelEntryIdField, modelId);
metadataEntry.addField(entryTypeField, METADATA_ENTRY);
- metadataEntry.addField(topicUriField, topicId);
- if (broaderTopics != null && broaderField != null) {
- metadataEntry.addField(broaderField, broaderTopics);
+ metadataEntry.addField(conceptUriField, conceptId);
+ if (broaderConcepts != null && broaderField != null) {
+ metadataEntry.addField(broaderField, broaderConcepts);
}
if (modelUpdateDateField != null) {
metadataEntry.addField(modelUpdateDateField, UTCTimeStamper.nowUtcDate());
@@ -767,12 +776,12 @@ public class TopicClassificationEngine e
solrServer.request(request);
// the commit is done by the caller in batch
} catch (Exception e) {
- String msg = String.format("Error updating topic with id '%s' on Solr Core '%s'", topicId,
+ String msg = String.format("Error updating topic with id '%s' on Solr Core '%s'", conceptId,
solrCoreId);
throw new ClassifierException(msg, e);
}
long stop = System.currentTimeMillis();
- log.debug("Sucessfully updated topic {} in {}s", topicId, (double) (stop - start) / 1000.);
+ log.debug("Sucessfully updated topic {} in {}s", conceptId, (double) (stop - start) / 1000.);
}
protected void checkTrainingSet() throws TrainingSetException {
@@ -800,7 +809,7 @@ public class TopicClassificationEngine e
config.put(TopicClassificationEngine.ENTRY_TYPE_FIELD, "entry_type");
config.put(TopicClassificationEngine.MODEL_ENTRY_ID_FIELD, "model_entry_id");
config.put(TopicClassificationEngine.SOLR_CORE, server);
- config.put(TopicClassificationEngine.TOPIC_URI_FIELD, "topic");
+ config.put(TopicClassificationEngine.CONCEPT_URI_FIELD, "topic");
config.put(TopicClassificationEngine.SIMILARTITY_FIELD, "classifier_features");
config.put(TopicClassificationEngine.BROADER_FIELD, "broader");
config.put(TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD, "last_update_dt");
@@ -870,16 +879,16 @@ public class TopicClassificationEngine e
@Override
public int process(List<SolrDocument> batch) throws ClassifierException {
for (SolrDocument topicEntry : batch) {
- String topicId = topicEntry.getFirstValue(topicUriField).toString();
+ String conceptId = topicEntry.getFirstValue(conceptUriField).toString();
Collection<Object> broader = topicEntry.getFieldValues(broaderField);
if (broader == null) {
- classifier.addTopic(topicId, null);
+ classifier.addConcept(conceptId, null);
} else {
- List<String> broaderTopics = new ArrayList<String>();
- for (Object broaderTopic : broader) {
- broaderTopics.add(broaderTopic.toString());
+ List<String> broaderConcepts = new ArrayList<String>();
+ for (Object broaderConcept : broader) {
+ broaderConcepts.add(broaderConcept.toString());
}
- classifier.addTopic(topicId, broaderTopics);
+ classifier.addConcept(conceptId, broaderConcepts);
}
}
return batch.size();
@@ -900,7 +909,7 @@ public class TopicClassificationEngine e
public int process(List<SolrDocument> batch) throws TrainingSetException, ClassifierException {
int offset;
for (SolrDocument topicMetadata : batch) {
- String topic = topicMetadata.getFirstValue(topicUriField).toString();
+ String topic = topicMetadata.getFirstValue(conceptUriField).toString();
List<String> topics = Arrays.asList(topic);
List<String> falseNegativeExamples = new ArrayList<String>();
int truePositives = 0;
@@ -922,7 +931,7 @@ public class TopicClassificationEngine e
.suggestTopics(example.contents);
boolean match = false;
for (TopicSuggestion suggestedTopic : suggestedTopics) {
- if (topic.equals(suggestedTopic.uri)) {
+ if (topic.equals(suggestedTopic.conceptUri)) {
match = true;
truePositives++;
break;
@@ -955,7 +964,7 @@ public class TopicClassificationEngine e
List<TopicSuggestion> suggestedTopics = classifier
.suggestTopics(example.contents);
for (TopicSuggestion suggestedTopic : suggestedTopics) {
- if (topic.equals(suggestedTopic.uri)) {
+ if (topic.equals(suggestedTopic.conceptUri)) {
falsePositives++;
if (falsePositiveExamples.size() < MAX_COLLECTED_EXAMPLES / foldCount) {
falsePositiveExamples.add(example.id);
@@ -999,7 +1008,7 @@ public class TopicClassificationEngine e
* Update the performance statistics in a metadata entry of a topic. It is the responsibility of the
* caller to commit.
*/
- protected void updatePerformanceMetadata(String topicId,
+ protected void updatePerformanceMetadata(String conceptId,
float precision,
float recall,
int positiveSupport,
@@ -1008,8 +1017,8 @@ public class TopicClassificationEngine e
List<String> falseNegativeExamples) throws ClassifierException {
SolrServer solrServer = getActiveSolrServer();
try {
- SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY + " AND " + topicUriField
- + ":" + ClientUtils.escapeQueryChars(topicId));
+ SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY + " AND " + conceptUriField
+ + ":" + ClientUtils.escapeQueryChars(conceptId));
for (SolrDocument result : solrServer.query(query).getResults()) {
// there should be only one (or none: tolerated)
// fetch any old values to update (all metadata fields are assumed to be stored)s
@@ -1032,7 +1041,7 @@ public class TopicClassificationEngine e
}
} catch (Exception e) {
String msg = String.format(
- "Error updating performance metadata for topic '%s' on Solr Core '%s'", topicId, solrCoreId);
+ "Error updating performance metadata for topic '%s' on Solr Core '%s'", conceptId, solrCoreId);
throw new ClassifierException(msg, e);
}
}
@@ -1063,15 +1072,15 @@ public class TopicClassificationEngine e
}
@Override
- public ClassificationReport getPerformanceEstimates(String topicId) throws ClassifierException {
+ public ClassificationReport getPerformanceEstimates(String conceptId) throws ClassifierException {
SolrServer solrServer = getActiveSolrServer();
- SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY + " AND " + topicUriField + ":"
- + ClientUtils.escapeQueryChars(topicId));
+ SolrQuery query = new SolrQuery(entryTypeField + ":" + METADATA_ENTRY + " AND " + conceptUriField
+ + ":" + ClientUtils.escapeQueryChars(conceptId));
try {
SolrDocumentList results = solrServer.query(query).getResults();
if (results.isEmpty()) {
- throw new ClassifierException(String.format("'%s' is not a registered topic", topicId));
+ throw new ClassifierException(String.format("'%s' is not a registered topic", conceptId));
}
SolrDocument metadata = results.get(0);
Float precision = computeMeanValue(metadata, precisionField);
@@ -1097,7 +1106,7 @@ public class TopicClassificationEngine e
return report;
} catch (SolrServerException e) {
throw new ClassifierException(String.format("Error fetching the performance report for topic "
- + topicId));
+ + conceptId));
}
}
Modified: incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java?rev=1240206&r1=1240205&r2=1240206&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicClassifier.java Fri Feb 3 14:59:56 2012
@@ -25,8 +25,8 @@ import org.apache.stanbol.enhancer.topic
import org.apache.stanbol.enhancer.topic.training.TrainingSetException;
/**
- * Service interface for suggesting hierarchical topics from a specific scheme (a.k.a. taxonomy, thesaurus or
- * topics hierarchy) from the text content of a document or part of a document.
+ * Service interface for suggesting hierarchical concepts from a specific scheme (a.k.a. taxonomy, thesaurus or
+ * concepts hierarchy) from the text content of a document or part of a document.
*/
public interface TopicClassifier {
@@ -46,30 +46,30 @@ public interface TopicClassifier {
*
* @param text
* the text content to analyze
- * @return the most likely topics related to the text
+ * @return the most likely concepts related to the text
* @throws EngineException
*/
List<TopicSuggestion> suggestTopics(String text) throws ClassifierException;
/**
- * @return the set of ids of topics directly broader than
+ * @return the set of ids of concepts directly broader than
* @param id
*/
- Set<String> getNarrowerTopics(String broadTopicId) throws ClassifierException;
+ Set<String> getNarrowerConcepts(String broadConceptId) throws ClassifierException;
/**
- * @return the set of ids of topics directly narrower than
+ * @return the set of ids of concepts directly narrower than
* @param id
*/
- Set<String> getBroaderTopics(String id) throws ClassifierException;
+ Set<String> getBroaderConcepts(String id) throws ClassifierException;
/**
- * @return the set of ids of topics without broader topics.
+ * @return the set of ids of concepts without broader concepts.
*/
- Set<String> getTopicRoots() throws ClassifierException;
+ Set<String> getRootConcepts() throws ClassifierException;
/**
- * @return true if the classifier model can be updated with the {@code addTopic} / {@code removeTopic} /
+ * @return true if the classifier model can be updated with the {@code addConcept} / {@code removeConcept} /
* {@code updateModel} / methods.
*/
boolean isUpdatable();
@@ -77,14 +77,14 @@ public interface TopicClassifier {
/**
* Register a topic and set it's ancestors in the taxonomy. Warning: re-adding an already existing topic
* can delete the underlying statistical model. Calling {@code updateModel} is necessary to rebuild the
- * statistical model based on the hierarchical structure of the topics and the registered training set.
+ * statistical model based on the hierarchical structure of the concepts and the registered training set.
*
* @param id
* the new topic id
- * @param broaderTopics
- * list of directly broader topics in the thesaurus
+ * @param broaderConcepts
+ * list of directly broader concepts in the thesaurus
*/
- void addTopic(String id, Collection<String> broaderTopics) throws ClassifierException;
+ void addConcept(String id, Collection<String> broaderConcepts) throws ClassifierException;
/**
* Remove a topic from the thesaurus. WARNING: it is the caller responsibility to recursively remove or
@@ -95,7 +95,7 @@ public interface TopicClassifier {
* @param id
* if of the topic to remove from the model
*/
- void removeTopic(String id) throws ClassifierException;
+ void removeConcept(String id) throws ClassifierException;
/**
* Register a training set to use to build the statistical model of the classifier.
@@ -104,10 +104,10 @@ public interface TopicClassifier {
/**
* Update (incrementally or from scratch) the statistical model of the classifier. Note: depending on the
- * size of the dataset and the number of topics to update, this process can take a long time and should
+ * size of the dataset and the number of concepts to update, this process can take a long time and should
* probably be wrapped in a dedicated thread if called by a the user interface layer.
*
- * @return the number of updated topics
+ * @return the number of updated concepts
*/
int updateModel(boolean incremental) throws TrainingSetException, ClassifierException;
@@ -115,7 +115,7 @@ public interface TopicClassifier {
* Perform k-fold cross validation of the model to compute estimates of the precision, recall and f1
* score.
*
- * @return number of updated topics
+ * @return number of updated concepts
*/
public int updatePerformanceEstimates(boolean incremental) throws ClassifierException,
TrainingSetException;
Modified: incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicSuggestion.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicSuggestion.java?rev=1240206&r1=1240205&r2=1240206&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicSuggestion.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/main/java/org/apache/stanbol/enhancer/topic/TopicSuggestion.java Fri Feb 3 14:59:56 2012
@@ -26,27 +26,44 @@ import org.apache.commons.lang.StringUti
*/
public class TopicSuggestion {
- public final String uri;
-
- public final List<String> paths = new ArrayList<String>();
-
+ /**
+ * The URI of the concept in the hierarchical conceptual scheme (that holds the broader relationship)
+ */
+ public final String conceptUri;
+
+ /**
+ * Reference to the broader concepts of this suggestion.
+ */
+ public final List<String> broader = new ArrayList<String>();
+
+ /**
+ * The (optional) URI of a resource that grounds this concepts in the real world. Can be null.
+ */
+ public final String primaryTopicUri;
+
+ /**
+ * The (positive) score of the suggestion: higher is better. Zero would mean unrelated. The absolute value
+ * is meaningless: suggestions scores cannot be compared across different input text documents nor
+ * distinct concept schemes.
+ */
public final float score;
- public TopicSuggestion(String uri, List<String> paths, float score) {
- this.uri = uri;
- if (paths != null) {
- this.paths.addAll(paths);
+ public TopicSuggestion(String conceptUri, String primaryTopicUri, List<String> broader, float score) {
+ this.conceptUri = conceptUri;
+ this.primaryTopicUri = primaryTopicUri;
+ if (broader != null) {
+ this.broader.addAll(broader);
}
this.score = score;
}
- public TopicSuggestion(String uri, float score) {
- this(uri, null, score);
+ public TopicSuggestion(String conceptUri, float score) {
+ this(conceptUri, null, null, score);
}
@Override
public String toString() {
- return String.format("TopicSuggestion(\"%s\", [%s], %f)", uri, StringUtils.join(paths, "\", \""),
- score);
+ return String.format("TopicSuggestion(\"%s\", [%s], %f)", conceptUri,
+ StringUtils.join(broader, "\", \""), score);
}
}
Modified: incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java
URL: http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java?rev=1240206&r1=1240205&r2=1240206&view=diff
==============================================================================
--- incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java (original)
+++ incubator/stanbol/trunk/enhancer/engines/topic/src/test/java/org/apache/stanbol/enhancer/engine/topic/TopicEngineTest.java Fri Feb 3 14:59:56 2012
@@ -105,7 +105,7 @@ public class TopicEngineTest extends Emb
query.set("commit", true);
query.set("separator", "\t");
query.set("headers", false);
- query.set("fieldnames", "topic,popularity,paths,text");
+ query.set("fieldnames", "topic,popularity,broader,text");
query.set(CommonParams.STREAM_CONTENTTYPE, "text/plan;charset=utf-8");
query.set(CommonParams.STREAM_BODY, IOUtils.toString(is, "utf-8"));
@@ -122,14 +122,14 @@ public class TopicEngineTest extends Emb
assertNotNull(classifier);
assertEquals(classifier.engineId, "test-engine");
assertEquals(classifier.getActiveSolrServer(), classifierSolrServer);
- assertEquals(classifier.topicUriField, "topic");
+ assertEquals(classifier.conceptUriField, "topic");
assertEquals(classifier.similarityField, "classifier_features");
assertEquals(classifier.acceptedLanguages, new ArrayList<String>());
// check some required attributes
Hashtable<String,Object> configWithMissingTopicField = new Hashtable<String,Object>();
configWithMissingTopicField.putAll(config);
- configWithMissingTopicField.remove(TopicClassificationEngine.TOPIC_URI_FIELD);
+ configWithMissingTopicField.remove(TopicClassificationEngine.CONCEPT_URI_FIELD);
try {
TopicClassificationEngine.fromParameters(configWithMissingTopicField);
fail("Should have raised a ConfigurationException");
@@ -155,40 +155,40 @@ public class TopicEngineTest extends Emb
@Test
public void testProgrammaticThesaurusConstruction() throws Exception {
// Register the roots of the taxonomy
- classifier.addTopic("http://example.com/topics/root1", null);
- classifier.addTopic("http://example.com/topics/root2", null);
- classifier.addTopic("http://example.com/topics/root3", new ArrayList<String>());
- assertEquals(0, classifier.getBroaderTopics("http://example.com/topics/root1").size());
- assertEquals(0, classifier.getBroaderTopics("http://example.com/topics/root2").size());
- assertEquals(0, classifier.getBroaderTopics("http://example.com/topics/root3").size());
- assertEquals(3, classifier.getTopicRoots().size());
+ classifier.addConcept("http://example.com/topics/root1", null);
+ classifier.addConcept("http://example.com/topics/root2", null);
+ classifier.addConcept("http://example.com/topics/root3", new ArrayList<String>());
+ assertEquals(0, classifier.getBroaderConcepts("http://example.com/topics/root1").size());
+ assertEquals(0, classifier.getBroaderConcepts("http://example.com/topics/root2").size());
+ assertEquals(0, classifier.getBroaderConcepts("http://example.com/topics/root3").size());
+ assertEquals(3, classifier.getRootConcepts().size());
// Register some non root nodes
- classifier.addTopic("http://example.com/topics/node1",
+ classifier.addConcept("http://example.com/topics/node1",
Arrays.asList("http://example.com/topics/root1", "http://example.com/topics/root2"));
- classifier.addTopic("http://example.com/topics/node2",
+ classifier.addConcept("http://example.com/topics/node2",
Arrays.asList("http://example.com/topics/root3"));
- classifier.addTopic("http://example.com/topics/node3",
+ classifier.addConcept("http://example.com/topics/node3",
Arrays.asList("http://example.com/topics/node1", "http://example.com/topics/node2"));
// the root where not impacted
- assertEquals(0, classifier.getBroaderTopics("http://example.com/topics/root1").size());
- assertEquals(0, classifier.getBroaderTopics("http://example.com/topics/root2").size());
- assertEquals(0, classifier.getBroaderTopics("http://example.com/topics/root3").size());
- assertEquals(3, classifier.getTopicRoots().size());
+ assertEquals(0, classifier.getBroaderConcepts("http://example.com/topics/root1").size());
+ assertEquals(0, classifier.getBroaderConcepts("http://example.com/topics/root2").size());
+ assertEquals(0, classifier.getBroaderConcepts("http://example.com/topics/root3").size());
+ assertEquals(3, classifier.getRootConcepts().size());
// the other nodes have the same broader topics as at creation time
- assertEquals(2, classifier.getBroaderTopics("http://example.com/topics/node1").size());
- assertEquals(1, classifier.getBroaderTopics("http://example.com/topics/node2").size());
- assertEquals(2, classifier.getBroaderTopics("http://example.com/topics/node3").size());
+ assertEquals(2, classifier.getBroaderConcepts("http://example.com/topics/node1").size());
+ assertEquals(1, classifier.getBroaderConcepts("http://example.com/topics/node2").size());
+ assertEquals(2, classifier.getBroaderConcepts("http://example.com/topics/node3").size());
// check the induced narrower relationships
- assertEquals(1, classifier.getNarrowerTopics("http://example.com/topics/root1").size());
- assertEquals(1, classifier.getNarrowerTopics("http://example.com/topics/root2").size());
- assertEquals(1, classifier.getNarrowerTopics("http://example.com/topics/root3").size());
- assertEquals(1, classifier.getNarrowerTopics("http://example.com/topics/node1").size());
- assertEquals(1, classifier.getNarrowerTopics("http://example.com/topics/node2").size());
- assertEquals(0, classifier.getNarrowerTopics("http://example.com/topics/node3").size());
+ assertEquals(1, classifier.getNarrowerConcepts("http://example.com/topics/root1").size());
+ assertEquals(1, classifier.getNarrowerConcepts("http://example.com/topics/root2").size());
+ assertEquals(1, classifier.getNarrowerConcepts("http://example.com/topics/root3").size());
+ assertEquals(1, classifier.getNarrowerConcepts("http://example.com/topics/node1").size());
+ assertEquals(1, classifier.getNarrowerConcepts("http://example.com/topics/node2").size());
+ assertEquals(0, classifier.getNarrowerConcepts("http://example.com/topics/node3").size());
}
@Test
@@ -213,7 +213,7 @@ public class TopicEngineTest extends Emb
assertNotNull(suggestedTopics);
assertEquals(suggestedTopics.size(), 10);
TopicSuggestion bestSuggestion = suggestedTopics.get(0);
- assertEquals(bestSuggestion.uri, "Category:American_films");
+ assertEquals(bestSuggestion.conceptUri, "Category:American_films");
}
@Test
@@ -229,13 +229,13 @@ public class TopicEngineTest extends Emb
String music = "urn:topics/music";
String law = "urn:topics/law";
- classifier.addTopic(business, null);
- classifier.addTopic(technology, null);
- classifier.addTopic(sport, null);
- classifier.addTopic(music, null);
- classifier.addTopic(apple, Arrays.asList(business, technology));
- classifier.addTopic(football, Arrays.asList(sport));
- classifier.addTopic(worldcup, Arrays.asList(football));
+ classifier.addConcept(business, null);
+ classifier.addConcept(technology, null);
+ classifier.addConcept(sport, null);
+ classifier.addConcept(music, null);
+ classifier.addConcept(apple, Arrays.asList(business, technology));
+ classifier.addConcept(football, Arrays.asList(sport));
+ classifier.addConcept(worldcup, Arrays.asList(football));
// train the classifier on an empty dataset
classifier.setTrainingSet(trainingSet);
@@ -280,10 +280,10 @@ public class TopicEngineTest extends Emb
// test the trained classifier
suggestions = classifier.suggestTopics("I like the sound of vuvuzula in the morning!");
assertTrue(suggestions.size() >= 4);
- assertEquals(worldcup, suggestions.get(0).uri);
- assertEquals(music, suggestions.get(1).uri);
- assertEquals(football, suggestions.get(2).uri);
- assertEquals(sport, suggestions.get(3).uri);
+ assertEquals(worldcup, suggestions.get(0).conceptUri);
+ assertEquals(music, suggestions.get(1).conceptUri);
+ assertEquals(football, suggestions.get(2).conceptUri);
+ assertEquals(sport, suggestions.get(3).conceptUri);
// check that the scores are decreasing:
assertTrue(suggestions.get(0).score >= suggestions.get(1).score);
assertTrue(suggestions.get(1).score >= suggestions.get(2).score);
@@ -291,14 +291,14 @@ public class TopicEngineTest extends Emb
suggestions = classifier.suggestTopics("Apple is no longer a startup.");
assertTrue(suggestions.size() >= 3);
- assertEquals(apple, suggestions.get(0).uri);
- assertEquals(technology, suggestions.get(1).uri);
- assertEquals(business, suggestions.get(2).uri);
+ assertEquals(apple, suggestions.get(0).conceptUri);
+ assertEquals(technology, suggestions.get(1).conceptUri);
+ assertEquals(business, suggestions.get(2).conceptUri);
suggestions = classifier.suggestTopics("You can watch the worldcup on your iPad.");
assertTrue(suggestions.size() >= 2);
- assertEquals(apple, suggestions.get(0).uri);
- assertEquals(worldcup, suggestions.get(1).uri);
+ assertEquals(apple, suggestions.get(0).conceptUri);
+ assertEquals(worldcup, suggestions.get(1).conceptUri);
// test incremental update of a single root node
Thread.sleep(10);
@@ -308,7 +308,7 @@ public class TopicEngineTest extends Emb
assertEquals(0, classifier.updateModel(true));
suggestions = classifier.suggestTopics("Glory box is best mixed as dubstep.");
assertTrue(suggestions.size() >= 1);
- assertEquals(music, suggestions.get(0).uri);
+ assertEquals(music, suggestions.get(0).conceptUri);
// test incremental update of a leaf node (the parent topic needs re-indexing too)
Thread.sleep(10);
@@ -331,12 +331,12 @@ public class TopicEngineTest extends Emb
+ " in which they intend to represent the state.",
Arrays.asList(law));
assertEquals(0, classifier.updateModel(true));
- classifier.addTopic(law, null);
+ classifier.addConcept(law, null);
assertEquals(1, classifier.updateModel(true));
assertEquals(0, classifier.updateModel(true));
// registering new subtopics invalidate the models of the parent as well
- classifier.addTopic("urn:topics/sportsmafia", Arrays.asList(football, business));
+ classifier.addConcept("urn:topics/sportsmafia", Arrays.asList(football, business));
assertEquals(3, classifier.updateModel(true));
assertEquals(0, classifier.updateModel(true));
@@ -354,8 +354,8 @@ public class TopicEngineTest extends Emb
}
// register some topics
- classifier.addTopic("urn:t/001", null);
- classifier.addTopic("urn:t/002", Arrays.asList("urn:t/001"));
+ classifier.addConcept("urn:t/001", null);
+ classifier.addConcept("urn:t/002", Arrays.asList("urn:t/001"));
performanceEstimates = classifier.getPerformanceEstimates("urn:t/002");
assertFalse(performanceEstimates.uptodate);
@@ -370,7 +370,7 @@ public class TopicEngineTest extends Emb
assertEquals(0.67f, performanceEstimates.f1, 0.01);
assertEquals(34, performanceEstimates.positiveSupport);
assertEquals(32, performanceEstimates.negativeSupport);
- assertTrue(classifier.getBroaderTopics("urn:t/002").contains("urn:t/001"));
+ assertTrue(classifier.getBroaderConcepts("urn:t/002").contains("urn:t/001"));
// accumulate other folds statistics and compute means of statistics
classifier.updatePerformanceMetadata("urn:t/002", 0.79f, 0.63f, 10, 10, Arrays.asList("ex1", "ex5"),
@@ -456,7 +456,7 @@ public class TopicEngineTest extends Emb
for (int i = 0; i < numberOfTopics; i++) {
String topic = String.format("urn:t/%03d", i + 1);
topics[i] = topic;
- classifier.addTopic(topic, null);
+ classifier.addConcept(topic, null);
String[] terms = randomVocabulary(i, vocabSizeMin, vocabSizeMax, rng);
vocabularies.put(topic, terms);
}
@@ -521,7 +521,7 @@ public class TopicEngineTest extends Emb
config.put(TopicClassificationEngine.ENTRY_TYPE_FIELD, "entry_type");
config.put(TopicClassificationEngine.MODEL_ENTRY_ID_FIELD, "model_entry_id");
config.put(TopicClassificationEngine.SOLR_CORE, classifierSolrServer);
- config.put(TopicClassificationEngine.TOPIC_URI_FIELD, "topic");
+ config.put(TopicClassificationEngine.CONCEPT_URI_FIELD, "topic");
config.put(TopicClassificationEngine.SIMILARTITY_FIELD, "classifier_features");
config.put(TopicClassificationEngine.BROADER_FIELD, "broader");
config.put(TopicClassificationEngine.MODEL_UPDATE_DATE_FIELD, "last_update_dt");