You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2012/12/01 15:00:12 UTC
svn commit: r1415998 [2/2] - in /stanbol/trunk/enhancer/engines:
entityhublinking/
entityhublinking/src/main/java/org/apache/stanbol/enhancer/engines/entityhublinking/
entitylinking/
entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entit...
Modified: stanbol/trunk/enhancer/engines/entitylinking/src/test/java/org/apache/stanbol/enhancer/engines/entitylinking/engine/EntityLinkingEngineTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entitylinking/src/test/java/org/apache/stanbol/enhancer/engines/entitylinking/engine/EntityLinkingEngineTest.java?rev=1415998&r1=1415997&r2=1415998&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/entitylinking/src/test/java/org/apache/stanbol/enhancer/engines/entitylinking/engine/EntityLinkingEngineTest.java (original)
+++ stanbol/trunk/enhancer/engines/entitylinking/src/test/java/org/apache/stanbol/enhancer/engines/entitylinking/engine/EntityLinkingEngineTest.java Sat Dec 1 14:00:09 2012
@@ -44,13 +44,16 @@ import opennlp.tools.tokenize.SimpleToke
import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.LiteralFactory;
+import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
+import org.apache.stanbol.commons.indexedgraph.IndexedMGraph;
import org.apache.stanbol.commons.opennlp.OpenNLP;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
+import org.apache.stanbol.enhancer.engines.entitylinking.Entity;
import org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer;
import org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig;
import org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig;
@@ -73,14 +76,10 @@ import org.apache.stanbol.enhancer.servi
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
-import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
-import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
-import org.apache.stanbol.entityhub.servicesapi.model.Representation;
-import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
-import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
@@ -114,56 +113,62 @@ public class EntityLinkingEngineTest {
private static final String TEST_REFERENCED_SITE_NAME = "dummRefSiteName";
static TestSearcherImpl searcher;
- static ValueFactory factory = InMemoryValueFactory.getInstance();
-
- public static final String NAME = NamespaceEnum.rdfs+"label";
- public static final String TYPE = NamespaceEnum.rdf+"type";
- public static final String REDIRECT = NamespaceEnum.rdfs+"seeAlso";
+
+ public static final UriRef NAME = new UriRef(NamespaceEnum.rdfs+"label");
+ public static final UriRef TYPE = new UriRef(NamespaceEnum.rdf+"type");
+ public static final UriRef REDIRECT = new UriRef(NamespaceEnum.rdfs+"seeAlso");
@BeforeClass
public static void setUpServices() throws IOException {
searcher = new TestSearcherImpl(TEST_REFERENCED_SITE_NAME,NAME,SimpleTokenizer.INSTANCE);
//add some terms to the searcher
- Representation rep = factory.createRepresentation("urn:test:PatrickMarshall");
- rep.addNaturalText(NAME, "Patrick Marshall");
- rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PERSON.getUnicodeString());
- searcher.addEntity(rep);
- rep = factory.createRepresentation("urn:test:Geologist");
- rep.addNaturalText(NAME, "Geologist");
- rep.addReference(TYPE, NamespaceEnum.skos+"Concept");
- rep.addReference(REDIRECT, "urn:test:redirect:Geologist");
- searcher.addEntity(rep);
+ MGraph graph = new IndexedMGraph();
+ UriRef uri = new UriRef("urn:test:PatrickMarshall");
+ graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Patrick Marshall")));
+ graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PERSON));
+ searcher.addEntity(new Entity(uri, graph));
+
+ uri = new UriRef("urn:test:Geologist");
+ graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Geologist")));
+ graph.add(new TripleImpl(uri, TYPE, new UriRef(NamespaceEnum.skos+"Concept")));
+ graph.add(new TripleImpl(uri, REDIRECT, new UriRef("urn:test:redirect:Geologist")));
+ searcher.addEntity(new Entity(uri, graph));
//a redirect
- rep = factory.createRepresentation("urn:test:redirect:Geologist");
- rep.addNaturalText(NAME, "Geologe (redirect)");
- rep.addReference(TYPE, NamespaceEnum.skos+"Concept");
- searcher.addEntity(rep);
- rep = factory.createRepresentation("urn:test:NewZealand");
- rep.addNaturalText(NAME, "New Zealand");
- rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PLACE.getUnicodeString());
- searcher.addEntity(rep);
- rep = factory.createRepresentation("urn:test:UniversityOfOtago");
- rep.addNaturalText(NAME, "University of Otago");
- rep.addReference(TYPE, OntologicalClasses.DBPEDIA_ORGANISATION.getUnicodeString());
- searcher.addEntity(rep);
- rep = factory.createRepresentation("urn:test:University");
- rep.addNaturalText(NAME, "University");
- rep.addReference(TYPE, NamespaceEnum.skos+"Concept");
- searcher.addEntity(rep);
- rep = factory.createRepresentation("urn:test:Otago");
- rep.addNaturalText(NAME, "Otago");
- rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PLACE.getUnicodeString());
- searcher.addEntity(rep);
+ uri = new UriRef("urn:test:redirect:Geologist");
+ graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Geologe (redirect)")));
+ graph.add(new TripleImpl(uri, TYPE, new UriRef(NamespaceEnum.skos+"Concept")));
+ searcher.addEntity(new Entity(uri, graph));
+
+ uri = new UriRef("urn:test:NewZealand");
+ graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("New Zealand")));
+ graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
+ searcher.addEntity(new Entity(uri, graph));
+
+ uri = new UriRef("urn:test:UniversityOfOtago");
+ graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University of Otago")));
+ graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_ORGANISATION));
+ searcher.addEntity(new Entity(uri, graph));
+
+ uri = new UriRef("urn:test:University");
+ graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University")));
+ graph.add(new TripleImpl(uri, TYPE, new UriRef(NamespaceEnum.skos+"Concept")));
+ searcher.addEntity(new Entity(uri, graph));
+
+ uri = new UriRef("urn:test:Otago");
+ graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago")));
+ graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
+ searcher.addEntity(new Entity(uri, graph));
//add a 2nd Otago (Place and University
- rep = factory.createRepresentation("urn:test:Otago_Texas");
- rep.addNaturalText(NAME, "Otago (Texas)");
- rep.addNaturalText(NAME, "Otago");
- rep.addReference(TYPE, OntologicalClasses.DBPEDIA_PLACE.getUnicodeString());
- searcher.addEntity(rep);
- rep = factory.createRepresentation("urn:test:UniversityOfOtago_Texas");
- rep.addNaturalText(NAME, "University of Otago (Texas)");
- rep.addReference(TYPE, OntologicalClasses.DBPEDIA_ORGANISATION.getUnicodeString());
- searcher.addEntity(rep);
+ uri = new UriRef("urn:test:Otago_Texas");
+ graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago (Texas)")));
+ graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago")));
+ graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
+ searcher.addEntity(new Entity(uri, graph));
+
+ uri = new UriRef("urn:test:UniversityOfOtago_Texas");
+ graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University of Otago (Texas)")));
+ graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_ORGANISATION));
+ searcher.addEntity(new Entity(uri, graph));
Value<PhraseTag> nounPhrase = Value.value(new PhraseTag("NP",LexicalCategory.Noun),1d);
TEST_ANALYSED_TEXT = AnalysedTextFactory.getDefaultInstance().createAnalysedText(
@@ -205,8 +210,7 @@ public class EntityLinkingEngineTest {
TEST_ANALYSED_TEXT.addToken(start+23,start+24).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(".",Pos.Point),1d));
}
- private OpenNLP openNLP = new OpenNLP(new ClasspathDataFileProvider(
- null));
+ private OpenNLP openNLP = new OpenNLP(new ClasspathDataFileProvider(null));
private LabelTokenizer labelTokenizer = new SimpleLabelTokenizer();
@@ -302,9 +306,9 @@ public class EntityLinkingEngineTest {
Suggestion suggestion = linkedEntity.getSuggestions().get(i);
assertEquals("Expecced Suggestion at Rank "+i+" expected: "+
expectedSuggestions.get(i)+" suggestion: "+
- suggestion.getRepresentation().getId(),
+ suggestion.getEntity().getId(),
expectedSuggestions.get(i),
- suggestion.getRepresentation().getId());
+ suggestion.getEntity().getId());
assertTrue("Score of suggestion "+i+"("+suggestion.getScore()+
" > as of the previous one ("+score+")",
score >= suggestion.getScore());
@@ -378,7 +382,7 @@ public class EntityLinkingEngineTest {
// +"',entityAnnotation "+entityAnnotation+")",
// 0.0 <= confidence.doubleValue());
//Test the entityhub:site property (STANBOL-625)
- UriRef ENTITYHUB_SITE = new UriRef(RdfResourceEnum.site.getUri());
+ UriRef ENTITYHUB_SITE = new UriRef(NamespaceEnum.entityhub+"site");
Iterator<Triple> entitySiteIterator = ci.getMetadata().filter(entityAnnotation,
ENTITYHUB_SITE, null);
assertTrue("Expected entityhub:site value is missing (entityAnnotation "
Modified: stanbol/trunk/enhancer/engines/entitylinking/src/test/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/TestSearcherImpl.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entitylinking/src/test/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/TestSearcherImpl.java?rev=1415998&r1=1415997&r2=1415998&view=diff
==============================================================================
--- stanbol/trunk/enhancer/engines/entitylinking/src/test/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/TestSearcherImpl.java (original)
+++ stanbol/trunk/enhancer/engines/entitylinking/src/test/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/TestSearcherImpl.java Sat Dec 1 14:00:09 2012
@@ -30,45 +30,44 @@ import java.util.TreeMap;
import opennlp.tools.tokenize.Tokenizer;
-import org.apache.clerezza.rdf.core.Literal;
+import org.apache.clerezza.rdf.core.PlainLiteral;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
+import org.apache.stanbol.enhancer.engines.entitylinking.Entity;
import org.apache.stanbol.enhancer.engines.entitylinking.EntitySearcher;
-import org.apache.stanbol.entityhub.servicesapi.model.Representation;
-import org.apache.stanbol.entityhub.servicesapi.model.Text;
-import org.apache.stanbol.entityhub.servicesapi.model.rdf.RdfResourceEnum;
+import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
public class TestSearcherImpl implements EntitySearcher {
- private final String nameField;
+ private final UriRef nameField;
private final Tokenizer tokenizer;
- private SortedMap<String,Collection<Representation>> data = new TreeMap<String,Collection<Representation>>(String.CASE_INSENSITIVE_ORDER);
- private Map<String,Representation> entities = new HashMap<String,Representation>();
+ private SortedMap<String,Collection<Entity>> data = new TreeMap<String,Collection<Entity>>(String.CASE_INSENSITIVE_ORDER);
+ private Map<UriRef,Entity> entities = new HashMap<UriRef,Entity>();
private Map<UriRef,Collection<Resource>> originInfo;
- public TestSearcherImpl(String siteId,String nameField, Tokenizer tokenizer) {
+ public TestSearcherImpl(String siteId,UriRef nameField, Tokenizer tokenizer) {
this.nameField = nameField;
this.tokenizer = tokenizer;
this.originInfo = Collections.singletonMap(
- new UriRef(RdfResourceEnum.site.getUri()),
+ new UriRef(NamespaceEnum.entityhub+"site"),
(Collection<Resource>)Collections.singleton(
(Resource)new PlainLiteralImpl(siteId)));
}
- public void addEntity(Representation rep){
- entities.put(rep.getId(), rep);
- Iterator<Text> labels = rep.getText(nameField);
+ public void addEntity(Entity rep){
+ entities.put(rep.getUri(), rep);
+ Iterator<PlainLiteral> labels = rep.getText(nameField);
while(labels.hasNext()){
- Text label = labels.next();
- for(String token : tokenizer.tokenize(label.getText())){
- Collection<Representation> values = data.get(token);
+ PlainLiteral label = labels.next();
+ for(String token : tokenizer.tokenize(label.getLexicalForm())){
+ Collection<Entity> values = data.get(token);
if(values == null){
- values = new ArrayList<Representation>();
- data.put(label.getText(), values);
+ values = new ArrayList<Entity>();
+ data.put(label.getLexicalForm(), values);
}
values.add(rep);
}
@@ -77,23 +76,23 @@ public class TestSearcherImpl implements
}
@Override
- public Representation get(String id, Set<String> includeFields) throws IllegalStateException {
+ public Entity get(UriRef id, Set<UriRef> includeFields) throws IllegalStateException {
return entities.get(id);
}
@Override
- public Collection<? extends Representation> lookup(String field,
- Set<String> includeFields,
+ public Collection<? extends Entity> lookup(UriRef field,
+ Set<UriRef> includeFields,
List<String> search,
String[] languages,Integer numResults) throws IllegalStateException {
if(field.equals(nameField)){
//we do not need sorting
//Representation needs to implement equals, therefore results filters multiple matches
- Set<Representation> results = new HashSet<Representation>();
+ Set<Entity> results = new HashSet<Entity>();
for(String term : search){
//TODO: adding 'zzz' to the parsed term is no good solution for
// searching ...
- for(Collection<Representation> termResults : data.subMap(term, term+"zzz").values()){
+ for(Collection<Entity> termResults : data.subMap(term, term+"zzz").values()){
results.addAll(termResults);
}
}