You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@stanbol.apache.org by rw...@apache.org on 2013/12/18 09:02:59 UTC
svn commit: r1551879 - in /stanbol/trunk/enhancer/generic:
nlp-json/src/main/java/org/apache/stanbol/enhancer/nlp/json/valuetype/impl/
nlp-json/src/main/resources/META-INF/services/
nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/...
Author: rwesten
Date: Wed Dec 18 08:02:58 2013
New Revision: 1551879
URL: http://svn.apache.org/r1551879
Log:
STANBOL-1132: applied the coref_dependency_tree_vers_3_fixed.patch
Added:
stanbol/trunk/enhancer/generic/nlp-json/src/main/java/org/apache/stanbol/enhancer/nlp/json/valuetype/impl/CorefFeatureSupport.java
stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/
stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/CorefFeatureSupportTest.java
stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/DependencyRelationSupportTest.java
stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/ValueTypeSupportTest.java
stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/coref/CorefFeature.java
Removed:
stanbol/trunk/enhancer/generic/nlp-json/src/main/java/org/apache/stanbol/enhancer/nlp/json/valuetype/impl/CorefTagSupport.java
stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/coref/CorefTag.java
Modified:
stanbol/trunk/enhancer/generic/nlp-json/src/main/java/org/apache/stanbol/enhancer/nlp/json/valuetype/impl/DependencyRelationSupport.java
stanbol/trunk/enhancer/generic/nlp-json/src/main/resources/META-INF/services/org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeParser
stanbol/trunk/enhancer/generic/nlp-json/src/main/resources/META-INF/services/org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeSerializer
stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java
Added: stanbol/trunk/enhancer/generic/nlp-json/src/main/java/org/apache/stanbol/enhancer/nlp/json/valuetype/impl/CorefFeatureSupport.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp-json/src/main/java/org/apache/stanbol/enhancer/nlp/json/valuetype/impl/CorefFeatureSupport.java?rev=1551879&view=auto
==============================================================================
--- stanbol/trunk/enhancer/generic/nlp-json/src/main/java/org/apache/stanbol/enhancer/nlp/json/valuetype/impl/CorefFeatureSupport.java (added)
+++ stanbol/trunk/enhancer/generic/nlp-json/src/main/java/org/apache/stanbol/enhancer/nlp/json/valuetype/impl/CorefFeatureSupport.java Wed Dec 18 08:02:58 2013
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */package org.apache.stanbol.enhancer.nlp.json.valuetype.impl;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.ConfigurationPolicy;
+import org.apache.felix.scr.annotations.Property;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.stanbol.enhancer.nlp.coref.CorefFeature;
+import org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeParser;
+import org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeSerializer;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.model.Span;
+import org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum;
+import org.codehaus.jackson.JsonNode;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.node.ArrayNode;
+import org.codehaus.jackson.node.ObjectNode;
+
+@Component(immediate=true,policy=ConfigurationPolicy.IGNORE)
+@Service(value={ValueTypeParser.class,ValueTypeSerializer.class})
+@Property(name=ValueTypeParser.PROPERTY_TYPE, value=CorefFeatureSupport.TYPE_VALUE)
+public class CorefFeatureSupport implements ValueTypeParser<CorefFeature>, ValueTypeSerializer<CorefFeature> {
+
+ public static final String TYPE_VALUE = "org.apache.stanbol.enhancer.nlp.coref.CorefFeature";
+
+ private static final String IS_REPRESENTATIVE_TAG = "isRepresentative";
+ private static final String MENTIONS_TAG = "mentions";
+ private static final String MENTION_TYPE_TAG = "type";
+ private static final String MENTION_START_TAG = "start";
+ private static final String MENTION_END_TAG = "end";
+
+ @Override
+ public ObjectNode serialize(ObjectMapper mapper, CorefFeature coref) {
+ ObjectNode jCoref = mapper.createObjectNode();
+
+ jCoref.put(IS_REPRESENTATIVE_TAG, coref.isRepresentative());
+
+ Set<Span> mentions = coref.getMentions();
+
+ if(!mentions.isEmpty()) {
+ ArrayNode jMentions = mapper.createArrayNode();
+
+ for(Span mention : mentions) {
+ ObjectNode jMention = mapper.createObjectNode();
+
+ jMention.put(MENTION_TYPE_TAG, mention.getType().toString());
+ jMention.put(MENTION_START_TAG, mention.getStart());
+ jMention.put(MENTION_END_TAG, mention.getEnd());
+
+ jMentions.add(jMention);
+ }
+
+ jCoref.put(MENTIONS_TAG, jMentions);
+ }
+
+ return jCoref;
+ }
+
+ @Override
+ public Class<CorefFeature> getType() {
+ return CorefFeature.class;
+ }
+
+ @Override
+ public CorefFeature parse(ObjectNode jCoref, AnalysedText at) {
+ JsonNode jIsRepresentative = jCoref.path(IS_REPRESENTATIVE_TAG);
+
+ if (!jIsRepresentative.isBoolean()) {
+ throw new IllegalStateException("Field 'isRepresentative' must have a true/false format");
+ }
+
+ JsonNode node = jCoref.path(MENTIONS_TAG);
+ Set<Span> mentions = new HashSet<Span>();
+
+ if(node.isArray()) {
+ ArrayNode jMentions = (ArrayNode)node;
+
+ for(int i = 0;i < jMentions.size();i++) {
+ JsonNode member = jMentions.get(i);
+
+ if(member.isObject()) {
+ ObjectNode jMention = (ObjectNode)member;
+ SpanTypeEnum spanType = SpanTypeEnum.valueOf(jMention.path(MENTION_TYPE_TAG).getTextValue());
+ int spanStart = jMention.path(MENTION_START_TAG).asInt();
+ int spanEnd = jMention.path(MENTION_END_TAG).asInt();
+ Span mentionedSpan = null;
+
+ switch (spanType) {
+ case Chunk:
+ mentionedSpan = at.addChunk(spanStart, spanEnd);
+ break;
+ case Sentence:
+ case Text:
+ case TextSection:
+ break;
+ case Token:
+ mentionedSpan = at.addToken(spanStart, spanEnd);
+ break;
+
+ }
+
+ mentions.add(mentionedSpan);
+ }
+ }
+ }
+
+ return new CorefFeature(jIsRepresentative.asBoolean(), Collections.unmodifiableSet(mentions));
+ }
+}
\ No newline at end of file
Modified: stanbol/trunk/enhancer/generic/nlp-json/src/main/java/org/apache/stanbol/enhancer/nlp/json/valuetype/impl/DependencyRelationSupport.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp-json/src/main/java/org/apache/stanbol/enhancer/nlp/json/valuetype/impl/DependencyRelationSupport.java?rev=1551879&r1=1551878&r2=1551879&view=diff
==============================================================================
--- stanbol/trunk/enhancer/generic/nlp-json/src/main/java/org/apache/stanbol/enhancer/nlp/json/valuetype/impl/DependencyRelationSupport.java (original)
+++ stanbol/trunk/enhancer/generic/nlp-json/src/main/java/org/apache/stanbol/enhancer/nlp/json/valuetype/impl/DependencyRelationSupport.java Wed Dec 18 08:02:58 2013
@@ -54,7 +54,7 @@ public class DependencyRelationSupport i
GrammaticalRelationTag gramRelTag = relation.getGrammaticalRelationTag();
jDependencyRelation.put(RELATION_TYPE_TAG, gramRelTag.getTag());
jDependencyRelation.put(RELATION_STANBOL_TYPE_TAG, gramRelTag.getGrammaticalRelation().ordinal());
- jDependencyRelation.put(RELATION_IS_DEPENDEE_TAG, (relation.isDependent() ? "true" : "false"));
+ jDependencyRelation.put(RELATION_IS_DEPENDEE_TAG, (relation.isDependent()));
Span partner = relation.getPartner();
if (partner != null) {
Modified: stanbol/trunk/enhancer/generic/nlp-json/src/main/resources/META-INF/services/org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeParser
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp-json/src/main/resources/META-INF/services/org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeParser?rev=1551879&r1=1551878&r2=1551879&view=diff
==============================================================================
--- stanbol/trunk/enhancer/generic/nlp-json/src/main/resources/META-INF/services/org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeParser (original)
+++ stanbol/trunk/enhancer/generic/nlp-json/src/main/resources/META-INF/services/org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeParser Wed Dec 18 08:02:58 2013
@@ -3,4 +3,4 @@ org.apache.stanbol.enhancer.nlp.json.val
org.apache.stanbol.enhancer.nlp.json.valuetype.impl.MorphoFeaturesSupport
org.apache.stanbol.enhancer.nlp.json.valuetype.impl.PhraseTagSupport
org.apache.stanbol.enhancer.nlp.json.valuetype.impl.DependencyRelationSupport
-org.apache.stanbol.enhancer.nlp.json.valuetype.impl.CorefTagSupport
\ No newline at end of file
+org.apache.stanbol.enhancer.nlp.json.valuetype.impl.CorefFeatureSupport
\ No newline at end of file
Modified: stanbol/trunk/enhancer/generic/nlp-json/src/main/resources/META-INF/services/org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeSerializer
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp-json/src/main/resources/META-INF/services/org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeSerializer?rev=1551879&r1=1551878&r2=1551879&view=diff
==============================================================================
--- stanbol/trunk/enhancer/generic/nlp-json/src/main/resources/META-INF/services/org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeSerializer (original)
+++ stanbol/trunk/enhancer/generic/nlp-json/src/main/resources/META-INF/services/org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeSerializer Wed Dec 18 08:02:58 2013
@@ -3,4 +3,4 @@ org.apache.stanbol.enhancer.nlp.json.val
org.apache.stanbol.enhancer.nlp.json.valuetype.impl.MorphoFeaturesSupport
org.apache.stanbol.enhancer.nlp.json.valuetype.impl.PhraseTagSupport
org.apache.stanbol.enhancer.nlp.json.valuetype.impl.DependencyRelationSupport
-org.apache.stanbol.enhancer.nlp.json.valuetype.impl.CorefTagSupport
\ No newline at end of file
+org.apache.stanbol.enhancer.nlp.json.valuetype.impl.CorefFeatureSupport
\ No newline at end of file
Added: stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/CorefFeatureSupportTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/CorefFeatureSupportTest.java?rev=1551879&view=auto
==============================================================================
--- stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/CorefFeatureSupportTest.java (added)
+++ stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/CorefFeatureSupportTest.java Wed Dec 18 08:02:58 2013
@@ -0,0 +1,91 @@
+package org.apache.stanbol.enhancer.nlp.json.valuetype;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
+import org.apache.stanbol.enhancer.nlp.coref.CorefFeature;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.model.Sentence;
+import org.apache.stanbol.enhancer.nlp.model.Span;
+import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class CorefFeatureSupportTest extends ValueTypeSupportTest {
+
+ private static final String sentenceText1 = "Obama visited China.";
+ private static final String sentenceText2 = " He met with the Chinese prime-minister.";
+ private static final String text = sentenceText1 + sentenceText2;
+
+ private static final String jsonCorefCheckObama = "{\n"
+ + " \"type\" : \"Token\",\n"
+ + " \"start\" : 0,\n"
+ + " \"end\" : 5,\n"
+ + " \"stanbol.enhancer.nlp.coref\" : {\n"
+ + " \"isRepresentative\" : true,\n"
+ + " \"mentions\" : [ {\n"
+ + " \"type\" : \"Token\",\n"
+ + " \"start\" : 21,\n"
+ + " \"end\" : 23\n"
+ + " } ],\n"
+ + " \"class\" : \"org.apache.stanbol.enhancer.nlp.coref.CorefFeature\"\n"
+ + " }\n"
+ + " }";
+
+ private static final String jsonCorefCheckHe = "{\n"
+ + " \"type\" : \"Token\",\n"
+ + " \"start\" : 21,\n"
+ + " \"end\" : 23,\n"
+ + " \"stanbol.enhancer.nlp.coref\" : {\n"
+ + " \"isRepresentative\" : false,\n"
+ + " \"mentions\" : [ {\n"
+ + " \"type\" : \"Token\",\n"
+ + " \"start\" : 0,\n"
+ + " \"end\" : 5\n"
+ + " } ],\n"
+ + " \"class\" : \"org.apache.stanbol.enhancer.nlp.coref.CorefFeature\"\n"
+ + " }\n"
+ + " }";
+
+ @BeforeClass
+ public static void setup() throws IOException {
+ setupAnalysedText(text);
+
+ initCorefAnnotations();
+ }
+
+ @Test
+ public void testSerializationAndParse() throws IOException {
+ String serialized = getSerializedString();
+
+ Assert.assertTrue(serialized.contains(jsonCorefCheckObama));
+ Assert.assertTrue(serialized.contains(jsonCorefCheckHe));
+
+ AnalysedText parsedAt = getParsedAnalysedText(serialized);
+ assertAnalysedTextEquality(parsedAt);
+ }
+
+ private static void initCorefAnnotations() {
+ Sentence sentence1 = at.addSentence(0, sentenceText1.indexOf(".") + 1);
+ Token obama = sentence1.addToken(0, "Obama".length());
+
+ Sentence sentence2 = at.addSentence(sentenceText1.indexOf(".") + 2, sentenceText2.indexOf(".") + 1);
+ int heStartIdx = sentence2.getSpan().toString().indexOf("He");
+ Token he = sentence2.addToken(heStartIdx, heStartIdx + "He".length());
+
+ Set<Span> obamaMentions = new HashSet<Span>();
+ obamaMentions.add(he);
+ obama.addAnnotation(NlpAnnotations.COREF_ANNOTATION,
+ Value.value(new CorefFeature(true, Collections.unmodifiableSet(obamaMentions))));
+
+ Set<Span> heMentions = new HashSet<Span>();
+ heMentions.add(obama);
+ he.addAnnotation(NlpAnnotations.COREF_ANNOTATION,
+ Value.value(new CorefFeature(false, Collections.unmodifiableSet(heMentions))));
+ }
+}
Added: stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/DependencyRelationSupportTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/DependencyRelationSupportTest.java?rev=1551879&view=auto
==============================================================================
--- stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/DependencyRelationSupportTest.java (added)
+++ stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/DependencyRelationSupportTest.java Wed Dec 18 08:02:58 2013
@@ -0,0 +1,130 @@
+package org.apache.stanbol.enhancer.nlp.json.valuetype;
+
+import java.io.IOException;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
+import org.apache.stanbol.enhancer.nlp.dependency.DependencyRelation;
+import org.apache.stanbol.enhancer.nlp.dependency.GrammaticalRelation;
+import org.apache.stanbol.enhancer.nlp.dependency.GrammaticalRelationTag;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.model.Sentence;
+import org.apache.stanbol.enhancer.nlp.model.Token;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class DependencyRelationSupportTest extends ValueTypeSupportTest {
+
+ private static final String text = "Obama visited China.";
+
+ private static final String jsonCheckObama = "{" + LINE_SEPARATOR
+ + " \"type\" : \"Token\"," + LINE_SEPARATOR
+ + " \"start\" : 0," + LINE_SEPARATOR
+ + " \"end\" : 5," + LINE_SEPARATOR
+ + " \"stanbol.enhancer.nlp.dependency\" : {" + LINE_SEPARATOR
+ + " \"tag\" : \"nsubj\"," + LINE_SEPARATOR
+ + " \"relationType\" : 32," + LINE_SEPARATOR
+ + " \"isDependent\" : true," + LINE_SEPARATOR
+ + " \"partnerType\" : \"Token\"," + LINE_SEPARATOR
+ + " \"partnerStart\" : 6," + LINE_SEPARATOR
+ + " \"partnerEnd\" : 13," + LINE_SEPARATOR
+ + " \"class\" : \"org.apache.stanbol.enhancer.nlp.dependency.DependencyRelation\"" + LINE_SEPARATOR
+ + " }" + LINE_SEPARATOR
+ + " }";
+
+ private static final String jsonCheckVisited = "{" + LINE_SEPARATOR
+ + " \"type\" : \"Token\"," + LINE_SEPARATOR
+ + " \"start\" : 6," + LINE_SEPARATOR
+ + " \"end\" : 13," + LINE_SEPARATOR
+ + " \"stanbol.enhancer.nlp.dependency\" : [ {" + LINE_SEPARATOR
+ + " \"tag\" : \"root\"," + LINE_SEPARATOR
+ + " \"relationType\" : 56," + LINE_SEPARATOR
+ + " \"isDependent\" : true," + LINE_SEPARATOR
+ + " \"partnerType\" : \"ROOT\"," + LINE_SEPARATOR
+ + " \"partnerStart\" : 0," + LINE_SEPARATOR
+ + " \"partnerEnd\" : 0," + LINE_SEPARATOR
+ + " \"class\" : \"org.apache.stanbol.enhancer.nlp.dependency.DependencyRelation\"" + LINE_SEPARATOR
+ + " }, {" + LINE_SEPARATOR
+ + " \"tag\" : \"nsubj\"," + LINE_SEPARATOR
+ + " \"relationType\" : 32," + LINE_SEPARATOR
+ + " \"isDependent\" : false," + LINE_SEPARATOR
+ + " \"partnerType\" : \"Token\"," + LINE_SEPARATOR
+ + " \"partnerStart\" : 0," + LINE_SEPARATOR
+ + " \"partnerEnd\" : 5," + LINE_SEPARATOR
+ + " \"class\" : \"org.apache.stanbol.enhancer.nlp.dependency.DependencyRelation\"" + LINE_SEPARATOR
+ + " }, {" + LINE_SEPARATOR
+ + " \"tag\" : \"dobj\"," + LINE_SEPARATOR
+ + " \"relationType\" : 24," + LINE_SEPARATOR
+ + " \"isDependent\" : false," + LINE_SEPARATOR
+ + " \"partnerType\" : \"Token\"," + LINE_SEPARATOR
+ + " \"partnerStart\" : 14," + LINE_SEPARATOR
+ + " \"partnerEnd\" : 19," + LINE_SEPARATOR
+ + " \"class\" : \"org.apache.stanbol.enhancer.nlp.dependency.DependencyRelation\"" + LINE_SEPARATOR
+ + " } ]" + LINE_SEPARATOR
+ + " }";
+
+ private static final String jsonCheckChina = "{" + LINE_SEPARATOR
+ + " \"type\" : \"Token\"," + LINE_SEPARATOR
+ + " \"start\" : 14," + LINE_SEPARATOR
+ + " \"end\" : 19," + LINE_SEPARATOR
+ + " \"stanbol.enhancer.nlp.dependency\" : {" + LINE_SEPARATOR
+ + " \"tag\" : \"dobj\"," + LINE_SEPARATOR
+ + " \"relationType\" : 24," + LINE_SEPARATOR
+ + " \"isDependent\" : true," + LINE_SEPARATOR
+ + " \"partnerType\" : \"Token\"," + LINE_SEPARATOR
+ + " \"partnerStart\" : 6," + LINE_SEPARATOR
+ + " \"partnerEnd\" : 13," + LINE_SEPARATOR
+ + " \"class\" : \"org.apache.stanbol.enhancer.nlp.dependency.DependencyRelation\"" + LINE_SEPARATOR
+ + " }" + LINE_SEPARATOR
+ + " }";
+
+ @BeforeClass
+ public static void setup() throws IOException {
+ setupAnalysedText(text);
+
+ initDepTreeAnnotations();
+ }
+
+ @Test
+ public void testSerializationAndParse() throws IOException {
+ String serialized = getSerializedString();
+ Assert.assertTrue(serialized.contains(jsonCheckObama));
+ Assert.assertTrue(serialized.contains(jsonCheckVisited));
+ Assert.assertTrue(serialized.contains(jsonCheckChina));
+
+ AnalysedText parsedAt = getParsedAnalysedText(serialized);
+ assertAnalysedTextEquality(parsedAt);
+ }
+
+ private static void initDepTreeAnnotations() {
+ Sentence sentence = at.addSentence(0, text.indexOf(".") + 1);
+ Token obama = sentence.addToken(0, "Obama".length());
+
+ int visitedStartIdx = sentence.getSpan().toString().indexOf("visited");
+ Token visited = sentence.addToken(visitedStartIdx, visitedStartIdx + "visited".length());
+
+ int chinaStartIdx = sentence.getSpan().toString().indexOf("China");
+ Token china = sentence.addToken(chinaStartIdx, chinaStartIdx + "China".length());
+
+ GrammaticalRelationTag nSubjGrammRelTag = new GrammaticalRelationTag(
+ "nsubj", GrammaticalRelation.NominalSubject);
+ obama.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION,
+ Value.value(new DependencyRelation(nSubjGrammRelTag, true, visited)));
+
+ GrammaticalRelationTag rootGrammRelTag = new GrammaticalRelationTag(
+ "root", GrammaticalRelation.Root);
+ GrammaticalRelationTag dobjGrammRelTag = new GrammaticalRelationTag(
+ "dobj", GrammaticalRelation.DirectObject);
+ visited.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION,
+ Value.value(new DependencyRelation(rootGrammRelTag, true, null)));
+ visited.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION,
+ Value.value(new DependencyRelation(nSubjGrammRelTag, false, obama)));
+ visited.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION,
+ Value.value(new DependencyRelation(dobjGrammRelTag, false, china)));
+
+ china.addAnnotation(NlpAnnotations.DEPENDENCY_ANNOTATION,
+ Value.value(new DependencyRelation(dobjGrammRelTag, true, visited)));
+ }
+}
Added: stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/ValueTypeSupportTest.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/ValueTypeSupportTest.java?rev=1551879&view=auto
==============================================================================
--- stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/ValueTypeSupportTest.java (added)
+++ stanbol/trunk/enhancer/generic/nlp-json/src/test/java/org/apache/stanbol/enhancer/nlp/json/valuetype/ValueTypeSupportTest.java Wed Dec 18 08:02:58 2013
@@ -0,0 +1,88 @@
+package org.apache.stanbol.enhancer.nlp.json.valuetype;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
+import org.apache.stanbol.enhancer.nlp.json.AnalyzedTextParser;
+import org.apache.stanbol.enhancer.nlp.json.AnalyzedTextSerializer;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
+import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
+import org.apache.stanbol.enhancer.nlp.model.Span;
+import org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum;
+import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.apache.stanbol.enhancer.servicesapi.Blob;
+import org.apache.stanbol.enhancer.servicesapi.ContentItem;
+import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
+import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
+import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
+import org.junit.Assert;
+
+public abstract class ValueTypeSupportTest {
+ /**
+ * The line separator used by the Environment running this test
+ */
+ protected static final String LINE_SEPARATOR = System.lineSeparator();
+ /**
+ * Empty AnalysedText instance created before each test
+ */
+ protected static AnalysedText at;
+
+ private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
+ private static final AnalysedTextFactory atFactory = AnalysedTextFactory.getDefaultInstance();
+
+ private static ContentItem ci;
+
+ private static Entry<UriRef,Blob> textBlob;
+
+ protected static void setupAnalysedText(String text) throws IOException {
+ ci = ciFactory.createContentItem(new StringSource(text));
+ textBlob = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
+ at = atFactory.createAnalysedText(textBlob.getValue());
+ }
+
+ protected String getSerializedString() throws IOException {
+ ByteArrayOutputStream bout = new ByteArrayOutputStream();
+ AnalyzedTextSerializer serializer = AnalyzedTextSerializer.getDefaultInstance();
+ serializer.serialize(at, bout, null);
+ byte[] data = bout.toByteArray();
+
+ return new String(data,Charset.forName("UTF-8"));
+ }
+
+ protected AnalysedText getParsedAnalysedText(String serializedData) throws IOException {
+ AnalyzedTextParser parser = AnalyzedTextParser.getDefaultInstance();
+ byte[] bytes = serializedData.getBytes();
+
+ return parser.parse(new ByteArrayInputStream(bytes), null,
+ atFactory.createAnalysedText(textBlob.getValue()));
+ }
+
+ protected void assertAnalysedTextEquality(AnalysedText parsedAt) {
+ Assert.assertEquals(at, parsedAt);
+ Iterator<Span> origSpanIt = at.getEnclosed(EnumSet.allOf(SpanTypeEnum.class));
+ Iterator<Span> parsedSpanIt = parsedAt.getEnclosed(EnumSet.allOf(SpanTypeEnum.class));
+ while(origSpanIt.hasNext() && parsedSpanIt.hasNext()){
+ Span orig = origSpanIt.next();
+ Span parsed = parsedSpanIt.next();
+ Assert.assertEquals(orig, parsed);
+ Set<String> origKeys = orig.getKeys();
+ Set<String> parsedKeys = parsed.getKeys();
+ Assert.assertEquals(origKeys, parsedKeys);
+ for(String key : origKeys){
+ List<Value<?>> origValues = orig.getValues(key);
+ List<Value<?>> parsedValues = parsed.getValues(key);
+ Assert.assertEquals(origValues, parsedValues);
+ }
+ }
+ }
+}
Modified: stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java?rev=1551879&r1=1551878&r2=1551879&view=diff
==============================================================================
--- stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java (original)
+++ stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/NlpAnnotations.java Wed Dec 18 08:02:58 2013
@@ -16,7 +16,7 @@
*/
package org.apache.stanbol.enhancer.nlp;
-import org.apache.stanbol.enhancer.nlp.coref.CorefTag;
+import org.apache.stanbol.enhancer.nlp.coref.CorefFeature;
import org.apache.stanbol.enhancer.nlp.dependency.DependencyRelation;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.Chunk;
@@ -85,8 +85,8 @@ public interface NlpAnnotations {
* mention/reference of a given word. Typically used on {@link Token}s.
* <p>
*/
- Annotation<CorefTag> COREF_ANNOTATION = new Annotation<CorefTag>(
- "stanbol.enhancer.nlp.coref", CorefTag.class);
+ Annotation<CorefFeature> COREF_ANNOTATION = new Annotation<CorefFeature>(
+ "stanbol.enhancer.nlp.coref", CorefFeature.class);
/*
* Currently only used as part of MorphoFeatures
Added: stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/coref/CorefFeature.java
URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/coref/CorefFeature.java?rev=1551879&view=auto
==============================================================================
--- stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/coref/CorefFeature.java (added)
+++ stanbol/trunk/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/coref/CorefFeature.java Wed Dec 18 08:02:58 2013
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.stanbol.enhancer.nlp.coref;
+
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.stanbol.enhancer.nlp.model.Span;
+
+/**
+ * Represents a coreference resolution feature attached to a {@link Token}. It
+ * contains information about other {@link Token}s which refer to the
+ * aforementioned {@link Token}.
+ *
+ * @author Cristian Petroaca
+ *
+ */
+public class CorefFeature {
+ /**
+ * Shows whether the {@link Token} to which this object is attached is the
+ * representative mention in the chain.
+ */
+ private boolean isRepresentative;
+
+ /**
+ * A set of {@link Token}s representing metions of the {@link Token} to
+ * which this object is attached.
+ */
+ private Set<Span> mentions;
+
+ public CorefFeature() {
+ this(false, Collections.unmodifiableSet(Collections
+ .<Span> emptySet()));
+ }
+
+ public CorefFeature(boolean isRepresentative) {
+ this(isRepresentative, Collections.unmodifiableSet(Collections
+ .<Span> emptySet()));
+ }
+
+ public CorefFeature(boolean isRepresentative, Set<Span> mentions) {
+ this.isRepresentative = isRepresentative;
+ this.mentions = mentions;
+ }
+
+ /**
+ * Getter whether the {@link Token} to which this object is attached is the
+ * representative mention in the chain.
+ *
+ * @return the representative state
+ */
+ public boolean isRepresentative() {
+ return this.isRepresentative;
+ }
+
+ /**
+ * Getter for the set of {@link Token}s representing mentions of the
+ * {@link Token} to which this object is attached.
+ *
+ * @return
+ */
+ public Set<Span> getMentions() {
+ return this.mentions;
+ }
+
+ public int hashCode() {
+ return (this.mentions != null) ? this.mentions.hashCode() : 0;
+ }
+
+ public boolean equals(Object obj) {
+ return (obj instanceof CorefFeature)
+ && (this.mentions.equals(((CorefFeature) obj).getMentions()));
+ }
+}