You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/03/13 11:18:38 UTC
svn commit: r1300054 -
/opennlp/sandbox/wikinews-importer/samples/wikinews.xml
Author: joern
Date: Tue Mar 13 10:18:38 2012
New Revision: 1300054
URL: http://svn.apache.org/viewvc?rev=1300054&view=rev
Log:
OPENNLP-468 Added lowercase filter to better work with standard analyzer
Modified:
opennlp/sandbox/wikinews-importer/samples/wikinews.xml
Modified: opennlp/sandbox/wikinews-importer/samples/wikinews.xml
URL: http://svn.apache.org/viewvc/opennlp/sandbox/wikinews-importer/samples/wikinews.xml?rev=1300054&r1=1300053&r2=1300054&view=diff
==============================================================================
--- opennlp/sandbox/wikinews-importer/samples/wikinews.xml (original)
+++ opennlp/sandbox/wikinews-importer/samples/wikinews.xml Tue Mar 13 10:18:38 2012
@@ -24,26 +24,84 @@
<field name="headline" index="yes" stored="no">
<annotations>
- <annotation type="org.apache.opennlp.annotations.Headline" tokenizer="standard"/>
+ <annotation type="org.apache.opennlp.annotations.Headline" tokenizer="standard">
+ <filters>
+ <filter name="lowercase"/>
+ </filters>
+ </annotation>
</annotations>
</field>
<field name="text" index="yes" stored="no">
<annotations>
- <annotation type="uima.tcas.DocumentAnnotation" tokenizer="standard"/>
+ <annotation type="uima.tcas.DocumentAnnotation" tokenizer="standard">
+ <filters>
+ <filter name="lowercase"/>
+ </filters>
+ </annotation>
+
</annotations>
</field>
<field name="person" index="yes" stored="no">
<annotations>
- <annotation type="org.apache.opennlp.annotations.Person" tokenizer="standard"/>
+ <annotation type="org.apache.opennlp.annotations.Person" tokenizer="standard">
+ <filters>
+ <filter name="lowercase"/>
+ </filters>
+ </annotation>
+
</annotations>
</field>
<field name="organization" index="yes" stored="no">
<annotations>
- <annotation type="org.apache.opennlp.annotations.Organization" tokenizer="standard"/>
+ <annotation type="org.apache.opennlp.annotations.Organization" tokenizer="standard">
+ <filters>
+ <filter name="lowercase"/>
+ </filters>
+ </annotation>
+
</annotations>
</field>
+ <field name="status-sentence" index="yes" stored="no" termVector="no">
+ <annotations>
+ <annotation type="bumblebee.annotations.AnnotationStatus" tokenizer="standard">
+ <features>
+ <feature name="sentence"/>
+ </features>
+ </annotation>
+ </annotations>
+ </field>
+
+ <field name="status-token" index="yes" stored="no" termVector="no">
+ <annotations>
+ <annotation type="bumblebee.annotations.AnnotationStatus">
+ <features>
+ <feature name="token"/>
+ </features>
+ </annotation>
+ </annotations>
+ </field>
+
+ <field name="status-person" index="yes" stored="yes">
+ <annotations>
+ <annotation type="bumblebee.annotations.AnnotationStatus">
+ <features>
+ <feature name="token"/>
+ </features>
+ </annotation>
+ </annotations>
+ </field>
+
+ <field name="status-organization" index="yes">
+ <annotations>
+ <annotation type="bumblebee.annotations.AnnotationStatus">
+ <features>
+ <feature name="token"/>
+ </features>
+ </annotation>
+ </annotations>
+ </field>
</fields>