You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2018/04/18 17:42:47 UTC

svn commit: r1829466 [1/2] - in /uima/addons/trunk/ConceptMapper: ./ src/docbook/ src/main/java/org/apache/uima/conceptMapper/ src/main/java/org/apache/uima/conceptMapper/dictionaryCompiler/ src/main/java/org/apache/uima/conceptMapper/support/dictionar...

Author: schor
Date: Wed Apr 18 17:42:46 2018
New Revision: 1829466

URL: http://svn.apache.org/viewvc?rev=1829466&view=rev
Log:
[UIMA-5764] concept mapper - add buffering to IO, fix up javadocs, have pom inherit from uima-wide parent so can be independently releasable, put xml resources into the jar, modify some configs to allow reading from classpath source or file system source.

Added:
    uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/
    uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/aggregate/
    uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/aggregate/OffsetTokenizerMatcher.xml
    uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/
    uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/ConceptMapperOffsetTokenizer.xml
    uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/DictTerm.xml
    uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/OffsetTokenizer.xml
    uima/addons/trunk/ConceptMapper/src/main/resources/collection_processing_engines/
    uima/addons/trunk/ConceptMapper/src/main/resources/collection_processing_engines/TestConceptMapperCPE.xml
Modified:
    uima/addons/trunk/ConceptMapper/pom.xml
    uima/addons/trunk/ConceptMapper/src/docbook/ConceptMapperAnnotatorUserGuide.xml
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/ConceptMapper.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm_Type.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/Logger.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/dictionaryCompiler/CompileDictionary.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/CompiledDictionaryResource_impl.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource_impl.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/EntryProperties.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/annotatorAdaptor/AnnotatorAdaptor.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/stemmer/Stemmer.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/OffsetTokenizer.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation_Type.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenFilter.java
    uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenNormalizer.java
    uima/addons/trunk/ConceptMapper/src/main/java/uima/tt/TokenAnnotation.java
    uima/addons/trunk/ConceptMapper/src/main/java/uima/tt/TokenAnnotation_Type.java

Modified: uima/addons/trunk/ConceptMapper/pom.xml
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/pom.xml?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/pom.xml (original)
+++ uima/addons/trunk/ConceptMapper/pom.xml Wed Apr 18 17:42:46 2018
@@ -22,13 +22,13 @@
   
   <parent>
     <groupId>org.apache.uima</groupId>
-    <artifactId>uima-addons-parent</artifactId>
-    <version>2.3.2-SNAPSHOT</version>
-    <relativePath>../uima-addons-parent</relativePath>
+    <artifactId>parent-pom</artifactId>
+    <version>11</version>
+    <relativePath></relativePath>
   </parent>
   
   <artifactId>ConceptMapper</artifactId>
-  <version>2.3.2-SNAPSHOT</version>
+  <version>2.10.2-SNAPSHOT</version>
   <name>Apache UIMA Annotator: ${project.artifactId}</name>
   <description>Extracts concepts from a CAS</description>
   <url>${uimaWebsiteUrl}</url>
@@ -53,11 +53,101 @@
     </url>
   </scm>
   
+    <!-- The repositories and pluginRepositories section is duplicated from
+       the parent pom one, and adds the Apache Snapshot Nexus repository
+       where UIMA snapshots are deployed.  This is needed if for instance,
+       a project depends on some new SNAPSHOT level of a build tool, 
+       where the users hasn't checked out the build tooling.
+       
+       This allows maven to find the snapshots when looking for the parent of
+       this pom -->
+  <repositories>
+    <repository>
+      <id>eclipsePlugins</id>
+      <name>Eclipse components</name>
+      <layout>default</layout>
+      <url>http://repo1.maven.org/eclipse</url>
+      
+      <releases>
+        <updatePolicy>never</updatePolicy>
+        <checksumPolicy>fail</checksumPolicy>
+      </releases>
+      
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+
+    <!-- modify central repository access:
+         Turn on checksum checking-->
+    <repository>
+      <id>central</id>
+      <name>Maven Repository Switchboard</name>
+      <layout>default</layout>
+      <url>http://repo1.maven.org/maven2</url>
+
+      <releases>
+        <enabled>true</enabled>
+        <checksumPolicy>fail</checksumPolicy>
+        <updatePolicy>never</updatePolicy>
+      </releases>
+
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+
+    </repository>
+    
+    <repository>
+      <id>apache.snapshots</id>
+      <name>Apache Snapshot Repository</name>
+      <url>http://repository.apache.org/snapshots</url>
+      <releases>
+        <enabled>false</enabled>
+      </releases>
+    </repository>    
+    
+  </repositories>
+  
+  <pluginRepositories>
+    <pluginRepository>
+      <id>apache.snapshots.plugins</id>
+      <name>Apache Snapshot Repository - Maven plugins</name>
+      <url>http://repository.apache.org/snapshots</url>
+      <layout>default</layout>
+      <releases>
+        <enabled>false</enabled>
+      </releases>
+      <snapshots>
+        <enabled>true</enabled>
+        <checksumPolicy>fail</checksumPolicy>
+        <updatePolicy>never</updatePolicy>        
+      </snapshots>
+    </pluginRepository>
+  </pluginRepositories>
+  
   <properties>
+    <jiraVersion>ConceptMapper-2.10.2</jiraVersion>
     <uimaScmProject>${project.artifactId}</uimaScmProject>
-    <uimaDependencyVersion>2.4.0</uimaDependencyVersion>
+    <uimaDependencyVersion>2.10.2</uimaDependencyVersion>
     <pearMainDescriptor>desc/analysis_engine/primitive/ConceptMapperOffsetTokenizer.xml</pearMainDescriptor>
     <bookNameRoot>ConceptMapperAnnotatorUserGuide</bookNameRoot>
+ 
+    <!-- 
+     Configuring settings is best done through default properties that multiple plugins.
+     Local configurations within plugins should be avoided. Where plugins do not pick up default
+     properties already, they should be injected manually into the plugins. 
+    -->    
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <maven.compiler.target>1.7</maven.compiler.target>
+    <maven.compiler.source>1.7</maven.compiler.source>
+    <maven.surefire.heap>512m</maven.surefire.heap>
+    <maven.surefire.argLine />
+    <maven.surefire.java9 />
+    
+    <jacoco.argLine />
+    <api_check_oldVersion>2.3.1</api_check_oldVersion>
+    
   </properties>
       
   <dependencies>
@@ -70,7 +160,7 @@
   </dependencies>
   
   <build>
-    <finalName>uima-an-conceptMapper</finalName>
+    
     <pluginManagement>
       <plugins>
         <plugin>
@@ -95,9 +185,18 @@
               </configuration>
             </execution>
           </executions>
-        </plugin>         
-      </plugins>
+        </plugin> 
+     
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-surefire-plugin</artifactId>
+          <version>2.20.1</version>
+          <configuration>
+            <argLine>@{jacoco.argLine} -Xmx@{maven.surefire.heap} -Xms@{maven.surefire.heap} @{maven.surefire.argLine} @{maven.surefire.java9}</argLine>
+          </configuration>
+        </plugin>
+       </plugins>
     </pluginManagement> 
-    
+       
   </build>
 </project>

Modified: uima/addons/trunk/ConceptMapper/src/docbook/ConceptMapperAnnotatorUserGuide.xml
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/docbook/ConceptMapperAnnotatorUserGuide.xml?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/docbook/ConceptMapperAnnotatorUserGuide.xml (original)
+++ uima/addons/trunk/ConceptMapper/src/docbook/ConceptMapperAnnotatorUserGuide.xml Wed Apr 18 17:42:46 2018
@@ -98,7 +98,23 @@
 			The result of running ConceptMapper are UIMA annotations, and there are two configuration parameters that are used to map the attributes from the dictionary (see <xref linkend="ConceptMapper.param.attributelist"/>) to features of UIMA annotations (see <xref linkend="ConceptMapper.param.featurelist"/>).
 		</para>
 		<para>
-			The entire dictionary is loaded into memory, which, in conjunction with an efficient data structure, provides very fast lookups. As stated earlier, dictionaries with millions of entries have been used without any performance issues. The obvious drawback to storing the dictionary in memory is that large dictionaries require large amounts of memory; this is partially mitigated by the fact that the dictionary is implemented as a UIMA shared resource (see <xref linkend="ConceptMapper.res.dictionaryfile"/>). This means that multiple annotators, such as multiple instances of ConceptMapper that are set up using different parameters, can all access it without having to load it more than once. The dictionary loader is specified in the external resource section of the descriptor, and is expected to implement the interface <interfacename>org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource</interfacename>. Two implementations are included in the distribution, <classna
 me>org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource_impl</classname>, the standard implementation, which loads an XML version of a dictionary, and <classname>org.apache.uima.conceptMapper.support.dictionaryResource.CompiledDictionaryResource_impl</classname> which loads a pre-compiled version, for faster loading. The compiler is supplied as <classname>org.apache.uima.conceptMapper.dictionaryCompiler.CompileDictionary</classname>, which takes two arguments, a ConceptMapper analysis engine descriptor that loads the dictionary using the standard dictionary loader, and the name of the output file into which to write the compiled dictionary.
+			The entire dictionary is loaded into memory, which, in conjunction with an efficient data structure, provides very fast lookups. 
+			As stated earlier, dictionaries with millions of entries have been used without any performance issues. 
+			The obvious drawback to storing the dictionary in memory is that large dictionaries require large amounts of memory; 
+			this is partially mitigated by the fact that the dictionary is implemented as a UIMA shared resource 
+			(see <xref linkend="ConceptMapper.res.dictionaryfile"/>). 
+			This means that multiple annotators, such as multiple instances of ConceptMapper that are set up using different parameters, 
+			can all access it without having to load it more than once. 
+			The dictionary loader is specified in the external resource section of the descriptor, 
+			and is expected to implement the interface <interfacename>org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource</interfacename>. 
+			Two implementations are included in the distribution, 
+			<classname>org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource_impl</classname>, 
+			the standard implementation, which loads an XML version of a dictionary, 
+			and <classname>org.apache.uima.conceptMapper.support.dictionaryResource.CompiledDictionaryResource_impl</classname> 
+			which loads a pre-compiled version, for faster loading. 
+			The compiler is supplied as <classname>org.apache.uima.conceptMapper.dictionaryCompiler.CompileDictionary</classname>, 
+			which takes two arguments, a ConceptMapper analysis engine descriptor that loads the dictionary using the standard dictionary loader, 
+			and the name of the output file into which to write the compiled dictionary.
 		</para>
 		</section>
 		<section id="tokenizer">

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/ConceptMapper.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/ConceptMapper.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/ConceptMapper.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/ConceptMapper.java Wed Apr 18 17:42:46 2018
@@ -28,13 +28,13 @@ import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.analysis_engine.ResultSpecification;
 import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
-import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
-import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.FSIndex;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.Feature;
@@ -43,15 +43,14 @@ import org.apache.uima.cas.Type;
 import org.apache.uima.cas.TypeSystem;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.cas.text.AnnotationIndex;
-import org.apache.uima.cas.CAS;
 import org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource;
-import org.apache.uima.conceptMapper.support.dictionaryResource.EntryProperties;
 import org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource.DictEntry;
+import org.apache.uima.conceptMapper.support.dictionaryResource.EntryProperties;
 import org.apache.uima.conceptMapper.support.tokens.TokenFilter;
 import org.apache.uima.conceptMapper.support.tokens.TokenNormalizer;
 import org.apache.uima.conceptMapper.support.tokens.UnknownTypeException;
-import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
 
@@ -221,7 +220,7 @@ public class ConceptMapper extends JCasA
 
   private Type spanFeatureStructureType;
 
-  private Logger logger;
+  public  Logger logger;
 
   private JCas jcas;
 
@@ -368,6 +367,8 @@ public class ConceptMapper extends JCasA
    * 
    * @param typeSystem
    *          the current type system.
+   * @throws AnnotatorConfigurationException -
+   * @throws AnnotatorInitializationException - 
    * @see org.apache.uima.analysis_engine.annotator.TextAnnotator#typeSystemInit(TypeSystem)
    */
   public void typeSystemInit(TypeSystem typeSystem) throws AnnotatorConfigurationException,
@@ -482,10 +483,9 @@ public class ConceptMapper extends JCasA
    * Perform the actual analysis. Iterate over the document content looking for any matching words
    * or phrases in the loaded dictionary and post an annotation for each match found.
    * 
-   * @param tcas
+   * @param jCas
    *          the current CAS to process.
-   * @param aResultSpec
-   *          a specification of the result annotation that should be created by this annotator
+   * @throws AnalysisEngineProcessException -
    * 
    * @see org.apache.uima.analysis_engine.annotator.TextAnnotator#process(CAS,ResultSpecification)
    */
@@ -844,10 +844,11 @@ public class ConceptMapper extends JCasA
   }
 
   /**
-   * @param searchStrategy
-   * @param tcas
-   * @param tokens
-   * @param spanAnnotation
+   * @param searchStrategy -
+   * @param findAllMatches true to find all matches
+   * @param tcas the Cas
+   * @param tokens -
+   * @param spanAnnotation -
    */
   protected void processTokenList(int searchStrategy, boolean findAllMatches, CAS tcas,
           ArrayList<AnnotationFS> tokens, Annotation spanAnnotation) {
@@ -961,10 +962,15 @@ public class ConceptMapper extends JCasA
   }
 
   /**
-   * @param start
-   * @param end
-   * @param properties
-   * @param matched
+   * @param tcas -
+   * @param start -
+   * @param end -
+   * @param properties -
+   * @param spanAnnotation -
+   * @param matchedText -
+   * @param matched -
+   * @param log -
+
    */
   protected void makeAnnotation(CAS tcas, int start, int end, EntryProperties properties,
           Annotation spanAnnotation, String matchedText, Collection<AnnotationFS> matched,

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm.java Wed Apr 18 17:42:46 2018
@@ -33,19 +33,28 @@ public class DictTerm extends Annotation
   protected DictTerm() {}
     
   /** Internal - constructor used by generator 
-   * @generated */
+   * @generated 
+   * @param addr -
+   * @param type -
+   */
   public DictTerm(int addr, TOP_Type type) {
     super(addr, type);
     readObject();
   }
   
-  /** @generated */
+  /** @generated 
+   * @param jcas -
+   */
   public DictTerm(JCas jcas) {
     super(jcas);
     readObject();   
   } 
 
-  /** @generated */  
+  /** @generated 
+   * @param jcas -
+   * @param begin -
+   * @param end -
+   */
   public DictTerm(JCas jcas, int begin, int end) {
     super(jcas);
     setBegin(begin);
@@ -65,13 +74,16 @@ public class DictTerm extends Annotation
   //* Feature: DictCanon
 
   /** getter for DictCanon - gets canonical form
-   * @generated */
+   * @generated
+   * @return - 
+   * */
   public String getDictCanon() {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_DictCanon == null)
       jcasType.jcas.throwFeatMissing("DictCanon", "org.apache.uima.conceptMapper.DictTerm");
     return jcasType.ll_cas.ll_getStringValue(addr, ((DictTerm_Type)jcasType).casFeatCode_DictCanon);}
     
   /** setter for DictCanon - sets canonical form 
+   * @param v -
    * @generated */
   public void setDictCanon(String v) {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_DictCanon == null)
@@ -83,13 +95,15 @@ public class DictTerm extends Annotation
   //* Feature: enclosingSpan
 
   /** getter for enclosingSpan - gets span that this NoTerm is contained within (i.e. its sentence)
+   * @return -
    * @generated */
   public Annotation getEnclosingSpan() {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_enclosingSpan == null)
       jcasType.jcas.throwFeatMissing("enclosingSpan", "org.apache.uima.conceptMapper.DictTerm");
     return (Annotation)(jcasType.ll_cas.ll_getFSForRef(jcasType.ll_cas.ll_getRefValue(addr, ((DictTerm_Type)jcasType).casFeatCode_enclosingSpan)));}
     
-  /** setter for enclosingSpan - sets span that this NoTerm is contained within (i.e. its sentence) 
+  /** setter for enclosingSpan - sets span that this NoTerm is contained within (i.e. its sentence)
+   * @param v -  
    * @generated */
   public void setEnclosingSpan(Annotation v) {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_enclosingSpan == null)
@@ -101,13 +115,15 @@ public class DictTerm extends Annotation
   //* Feature: matchedText
 
   /** getter for matchedText - gets 
+   * @return -
    * @generated */
   public String getMatchedText() {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_matchedText == null)
       jcasType.jcas.throwFeatMissing("matchedText", "org.apache.uima.conceptMapper.DictTerm");
     return jcasType.ll_cas.ll_getStringValue(addr, ((DictTerm_Type)jcasType).casFeatCode_matchedText);}
     
-  /** setter for matchedText - sets  
+  /** setter for matchedText - sets
+   * @param v -  
    * @generated */
   public void setMatchedText(String v) {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_matchedText == null)
@@ -118,7 +134,8 @@ public class DictTerm extends Annotation
   //*--------------*
   //* Feature: matchedTokens
 
-  /** getter for matchedTokens - gets 
+  /** getter for matchedTokens - gets
+   * @return - 
    * @generated */
   public FSArray getMatchedTokens() {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_matchedTokens == null)
@@ -126,13 +143,17 @@ public class DictTerm extends Annotation
     return (FSArray)(jcasType.ll_cas.ll_getFSForRef(jcasType.ll_cas.ll_getRefValue(addr, ((DictTerm_Type)jcasType).casFeatCode_matchedTokens)));}
     
   /** setter for matchedTokens - sets  
-   * @generated */
+   * @generated 
+   * @param v -
+   */
   public void setMatchedTokens(FSArray v) {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_matchedTokens == null)
       jcasType.jcas.throwFeatMissing("matchedTokens", "org.apache.uima.conceptMapper.DictTerm");
     jcasType.ll_cas.ll_setRefValue(addr, ((DictTerm_Type)jcasType).casFeatCode_matchedTokens, jcasType.ll_cas.ll_getFSRef(v));}    
     
   /** indexed getter for matchedTokens - gets an indexed value - 
+   * @param i the index 
+   * @return - 
    * @generated */
   public TOP getMatchedTokens(int i) {
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_matchedTokens == null)
@@ -140,7 +161,9 @@ public class DictTerm extends Annotation
     jcasType.jcas.checkArrayBounds(jcasType.ll_cas.ll_getRefValue(addr, ((DictTerm_Type)jcasType).casFeatCode_matchedTokens), i);
     return (TOP)(jcasType.ll_cas.ll_getFSForRef(jcasType.ll_cas.ll_getRefArrayValue(jcasType.ll_cas.ll_getRefValue(addr, ((DictTerm_Type)jcasType).casFeatCode_matchedTokens), i)));}
 
-  /** indexed setter for matchedTokens - sets an indexed value - 
+  /** indexed setter for matchedTokens - sets an indexed value -
+   * @param i the index
+   * @param v the value to set 
    * @generated */
   public void setMatchedTokens(int i, TOP v) { 
     if (DictTerm_Type.featOkTst && ((DictTerm_Type)jcasType).casFeat_matchedTokens == null)

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm_Type.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm_Type.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm_Type.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/DictTerm_Type.java Wed Apr 18 17:42:46 2018
@@ -46,13 +46,19 @@ public class DictTerm_Type extends Annot
   final Feature casFeat_DictCanon;
   /** @generated */
   final int     casFeatCode_DictCanon;
-  /** @generated */ 
+  /** @generated
+   * @param addr -
+   * @return -
+   */
   public String getDictCanon(int addr) {
         if (featOkTst && casFeat_DictCanon == null)
       jcas.throwFeatMissing("DictCanon", "org.apache.uima.conceptMapper.DictTerm");
     return ll_cas.ll_getStringValue(addr, casFeatCode_DictCanon);
   }
-  /** @generated */    
+  /** @generated 
+   * @param addr -
+   * @param v -
+   */
   public void setDictCanon(int addr, String v) {
         if (featOkTst && casFeat_DictCanon == null)
       jcas.throwFeatMissing("DictCanon", "org.apache.uima.conceptMapper.DictTerm");
@@ -64,13 +70,19 @@ public class DictTerm_Type extends Annot
   final Feature casFeat_enclosingSpan;
   /** @generated */
   final int     casFeatCode_enclosingSpan;
-  /** @generated */ 
+  /** @generated
+   * @param addr - 
+   * @return - 
+   */  
   public int getEnclosingSpan(int addr) {
         if (featOkTst && casFeat_enclosingSpan == null)
       jcas.throwFeatMissing("enclosingSpan", "org.apache.uima.conceptMapper.DictTerm");
     return ll_cas.ll_getRefValue(addr, casFeatCode_enclosingSpan);
   }
-  /** @generated */    
+  /** @generated 
+   * @param addr -
+   * @param v -
+   */    
   public void setEnclosingSpan(int addr, int v) {
         if (featOkTst && casFeat_enclosingSpan == null)
       jcas.throwFeatMissing("enclosingSpan", "org.apache.uima.conceptMapper.DictTerm");
@@ -82,13 +94,19 @@ public class DictTerm_Type extends Annot
   final Feature casFeat_matchedText;
   /** @generated */
   final int     casFeatCode_matchedText;
-  /** @generated */ 
+  /** @generated
+   * @param addr -
+   * @return - 
+   */ 
   public String getMatchedText(int addr) {
         if (featOkTst && casFeat_matchedText == null)
       jcas.throwFeatMissing("matchedText", "org.apache.uima.conceptMapper.DictTerm");
     return ll_cas.ll_getStringValue(addr, casFeatCode_matchedText);
   }
-  /** @generated */    
+  /** @generated 
+   * @param addr -
+   * @param v -
+   */    
   public void setMatchedText(int addr, String v) {
         if (featOkTst && casFeat_matchedText == null)
       jcas.throwFeatMissing("matchedText", "org.apache.uima.conceptMapper.DictTerm");
@@ -100,19 +118,29 @@ public class DictTerm_Type extends Annot
   final Feature casFeat_matchedTokens;
   /** @generated */
   final int     casFeatCode_matchedTokens;
-  /** @generated */ 
+  /** @generated 
+   * @param addr -
+   * @return -
+   */
   public int getMatchedTokens(int addr) {
         if (featOkTst && casFeat_matchedTokens == null)
       jcas.throwFeatMissing("matchedTokens", "org.apache.uima.conceptMapper.DictTerm");
     return ll_cas.ll_getRefValue(addr, casFeatCode_matchedTokens);
   }
-  /** @generated */    
+  /** @generated
+   * @param addr -
+   * @param v -
+   */
   public void setMatchedTokens(int addr, int v) {
         if (featOkTst && casFeat_matchedTokens == null)
       jcas.throwFeatMissing("matchedTokens", "org.apache.uima.conceptMapper.DictTerm");
     ll_cas.ll_setRefValue(addr, casFeatCode_matchedTokens, v);}
     
-   /** @generated */
+   /** @generated 
+    * @param addr -
+    * @param i  the index
+    * @return -
+    */
   public int getMatchedTokens(int addr, int i) {
         if (featOkTst && casFeat_matchedTokens == null)
       jcas.throwFeatMissing("matchedTokens", "org.apache.uima.conceptMapper.DictTerm");
@@ -122,7 +150,12 @@ public class DictTerm_Type extends Annot
   return ll_cas.ll_getRefArrayValue(ll_cas.ll_getRefValue(addr, casFeatCode_matchedTokens), i);
   }
    
-  /** @generated */ 
+  /** @generated 
+   *  
+   * @param addr -
+   * @param i -
+   * @param v -
+   */
   public void setMatchedTokens(int addr, int i, int v) {
         if (featOkTst && casFeat_matchedTokens == null)
       jcas.throwFeatMissing("matchedTokens", "org.apache.uima.conceptMapper.DictTerm");
@@ -136,7 +169,10 @@ public class DictTerm_Type extends Annot
 
 
   /** initialize variables to correspond with Cas Type and Features
-	* @generated */
+	* @generated
+   * @param jcas -
+   * @param casType -
+   */
   public DictTerm_Type(JCas jcas, Type casType) {
     super(jcas, casType);
     casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/Logger.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/Logger.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/Logger.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/Logger.java Wed Apr 18 17:42:46 2018
@@ -47,6 +47,10 @@ public class Logger {
     }
   }
 
+  public void logConfig(String message) {
+    log(Level.CONFIG, message);
+  }
+  
   public void logError(String message) {
     log(Level.SEVERE, message);
   }
@@ -66,4 +70,8 @@ public class Logger {
   public void logFinest(String message) {
     log(Level.FINEST, message);
   }
+  
+  public boolean isLoggable(Level level) {
+    return logger.isLoggable(level);
+  }
 }

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/dictionaryCompiler/CompileDictionary.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/dictionaryCompiler/CompileDictionary.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/dictionaryCompiler/CompileDictionary.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/dictionaryCompiler/CompileDictionary.java Wed Apr 18 17:42:46 2018
@@ -18,7 +18,9 @@
  */
 package org.apache.uima.conceptMapper.dictionaryCompiler;
 
+import java.io.BufferedOutputStream;
 import java.io.FileOutputStream;
+import java.io.OutputStream;
 
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -45,7 +47,7 @@ public class CompileDictionary {
     DictionaryResource_impl dict = (DictionaryResource_impl) ae.getResourceManager().getResource(
     		dictionaryResourceName);
 
-    FileOutputStream output = new FileOutputStream(args[1]);
+    OutputStream output = new BufferedOutputStream(new FileOutputStream(args[1]));
     dict.serializeEntries(output);
     output.close();
     ae.destroy();

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/CompiledDictionaryResource_impl.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/CompiledDictionaryResource_impl.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/CompiledDictionaryResource_impl.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/CompiledDictionaryResource_impl.java Wed Apr 18 17:42:46 2018
@@ -18,7 +18,9 @@
  */
 package org.apache.uima.conceptMapper.support.dictionaryResource;
 
+import java.io.BufferedInputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.ObjectInputStream;
 import java.util.Enumeration;
 import java.util.Hashtable;
@@ -33,6 +35,7 @@ import org.apache.uima.resource.SharedRe
  */
 
 public class CompiledDictionaryResource_impl implements DictionaryResource, SharedResourceObject {
+  
   /**
    * Hashtable of first words. Contains a DictEntries object keyed on word string for the first word
    * of every entry in the specified dictionary.
@@ -43,6 +46,8 @@ public class CompiledDictionaryResource_
   public DictionaryResource newDictionaryResource(int initialSize) {
     throw new UnsupportedOperationException();
   }
+  
+  
 
   public DictEntriesByLength getEntries(String key) {
     return dictImpl.get(key);
@@ -59,10 +64,12 @@ public class CompiledDictionaryResource_
   @SuppressWarnings("unchecked")
   public void load(DataResource data) throws ResourceInitializationException {
     try {
-      ObjectInputStream ois = new ObjectInputStream(data.getInputStream());
+      InputStream iStream = data.getInputStream();
+      BufferedInputStream bis = new BufferedInputStream(iStream);
+      ObjectInputStream ois = new ObjectInputStream(bis);
       entryPropertiesRoot = (EntryPropertiesRoot) ois.readObject();
       dictImpl = (Hashtable) ois.readObject();
-      ois.close();
+      ois.close();          
     } catch (IOException e) {
       throw new ResourceInitializationException(e);
     } catch (ClassNotFoundException e) {

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource.java Wed Apr 18 17:42:46 2018
@@ -53,9 +53,9 @@ public interface DictionaryResource {
   public interface DictEntries extends Serializable {
 
     /**
-     * @param elements
-     * @param unsorted
-     * @param props
+     * @param elements -
+     * @param unsorted -
+     * @param props -
      */
     void putEntry(String[] elements, String unsorted, EntryProperties props);
 
@@ -87,7 +87,7 @@ public interface DictionaryResource {
   /**
    * return data structure containing a list of dictionary entries, sorted by number of tokens
    * 
-   * @param key
+   * @param key -
    * @return data structure containing a list of dictionary entries, sorted by number of tokens
    */
   public DictEntriesByLength getEntries(String key);
@@ -113,13 +113,13 @@ public interface DictionaryResource {
   public String toString();
 
   /**
-   * @param context
-   * @param logger
-   * @param tokenAnnotationName
-   * @param tokenTypeFeatureName
-   * @param tokenClassFeatureName
-   * @param tokenizerDescriptor
-   * @throws ResourceInitializationException
+   * @param context -
+   * @param logger -
+   * @param tokenAnnotationName -
+   * @param tokenTypeFeatureName -
+   * @param tokenClassFeatureName -
+   * @param tokenizerDescriptor -
+   * @throws ResourceInitializationException -
    */
   public void loadDictionaryContents(UimaContext context, Logger logger,
           String tokenAnnotationName, String tokenTypeFeatureName, String tokenClassFeatureName,

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource_impl.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource_impl.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource_impl.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/DictionaryResource_impl.java Wed Apr 18 17:42:46 2018
@@ -18,11 +18,16 @@
  */
 package org.apache.uima.conceptMapper.support.dictionaryResource;
 
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.ObjectOutputStream;
+import java.io.OutputStream;
 import java.io.Serializable;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
@@ -60,6 +65,24 @@ import org.xml.sax.helpers.XMLReaderFact
  */
 
 public class DictionaryResource_impl implements DictionaryResource, SharedResourceObject {
+  
+  /** 
+   * support making a compiled dictionary as a side effect of loading
+   *   - trigger: the existence of a file path in the property 
+   *       uima.conceptmapper.compiled_dictionary_directory
+   *   - this is interpreted as a path to a writable directory where
+   *     the compiled version of this is written after loading
+   *     
+   */
+  
+  public final static String SAVE_COMPILED = "uima.conceptmapper.compiled_dictionary_directory";
+  
+  private final static File compDictDir;
+  static {
+    String p = System.getProperty(SAVE_COMPILED);
+    compDictDir = (p == null) ? null : new File(p);
+  }
+  
 /** Dictionary file loader. Uses an XML parser. */
   protected DictLoader dictLoader;
 
@@ -170,7 +193,7 @@ public class DictionaryResource_impl imp
    * @param unsorted
    * 		  an unsorted string representation of the entry, if the contents of 'elements' has been sorted
    * @param length
-   *          the number of words in the phrase (>=1)
+   *          the number of words in the phrase (&gt;=1)
    * @param props
    *          the EntryProperties object for the dictionary entry
    */
@@ -271,11 +294,36 @@ public class DictionaryResource_impl imp
       // System.out.print ("Loading Dictionary: '" + dictLoader.dataResource.getUri().toString() +
       // "'...");
       // System.out.print ("Loading Dictionary...");
-      logger.logInfo("Loading Dictionary...");
+      URL dictUrl = dictLoader.dataResource.getUrl();
+      String loadPath = (null != dictUrl) 
+                          ? (" from " + dictLoader.dataResource.getUrl().toString())
+                          : "";
+      logger.logInfo("Loading Dictionary" + loadPath);
       dictLoader.setDictionary(dictStream, NumOfInitialDictEntries, tokenAnnotationName,
               tokenTypeFeatureName, tokenClassFeatureName, tokenizerDescriptor, tokenFilter,
               tokenNormalizer, langID, entryPropertiesRoot);
-      logger.logInfo("...done");
+      logger.logInfo("...done loading dictionary" + loadPath);
+      
+      do { // to establish break boundary
+        if (compDictDir != null) {
+          if (!compDictDir.exists()) {
+            if (!compDictDir.mkdirs()) {
+              logger.logError("Cannot create compiled dictionary output directory: " + compDictDir.toString());
+              break;
+            }
+          }
+          
+          // get the last part of the file source name - this will be the name of the compiled dictioary
+          String n = dictUrl.getPath();
+          int i = n.lastIndexOf(File.separator);
+          n = (i >= 0) ? n.substring(i + 1) : n;
+          
+          BufferedOutputStream output = new BufferedOutputStream(new FileOutputStream(new File(compDictDir, n)));
+          serializeEntries(output);
+          output.close();
+        }
+      } while (false);  // of do establishing break boundary
+      
       // System.out.println ("done");
       // System.err.println("NEW DICT:\n" + toString());
       setLoaded(true);
@@ -458,9 +506,9 @@ public class DictionaryResource_impl imp
     EntryProperties properties;
 
     /**
-     * @param elements
-     * @param unsorted
-     * @param properties
+     * @param elements -
+     * @param unsorted -
+     * @param properties -
      */
     public DictEntryImpl(String[] elements, String unsorted,
             EntryProperties properties) {
@@ -954,7 +1002,7 @@ public class DictionaryResource_impl imp
     }
 
     public InputStream getInputStream() throws IOException {
-      return dataResource.getInputStream();
+      return new BufferedInputStream(dataResource.getInputStream());
     }
 
   }
@@ -1006,7 +1054,7 @@ public class DictionaryResource_impl imp
     return result.toString();
   }
 
-  public void serializeEntries(FileOutputStream output) throws IOException {
+  public void serializeEntries(OutputStream output) throws IOException {
     ObjectOutputStream oos = new ObjectOutputStream(output);
     oos.writeObject(this.entryPropertiesRoot);
     oos.writeObject(this.dictImpl);

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/EntryProperties.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/EntryProperties.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/EntryProperties.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/EntryProperties.java Wed Apr 18 17:42:46 2018
@@ -31,9 +31,9 @@ public class EntryProperties implements
 
 	/**
 	 * 
-	 * @param root
-	 * @param maxNumberOfProperties
-	 * @throws NullPointerException
+	 * @param root -
+	 * @param maxNumberOfProperties -
+	 * @throws NullPointerException -
 	 * 
 	 * should only be called by factory
 	 */

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/annotatorAdaptor/AnnotatorAdaptor.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/annotatorAdaptor/AnnotatorAdaptor.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/annotatorAdaptor/AnnotatorAdaptor.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/dictionaryResource/annotatorAdaptor/AnnotatorAdaptor.java Wed Apr 18 17:42:46 2018
@@ -18,7 +18,9 @@
  */
 package org.apache.uima.conceptMapper.support.dictionaryResource.annotatorAdaptor;
 
+import java.io.File;
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.util.Vector;
 
@@ -40,6 +42,7 @@ import org.apache.uima.resource.Resource
 import org.apache.uima.resource.ResourceManager;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.InvalidXMLException;
+import org.apache.uima.util.Level;
 import org.apache.uima.util.XMLInputSource;
 
 public class AnnotatorAdaptor {
@@ -71,15 +74,28 @@ public class AnnotatorAdaptor {
           throws DictionaryLoaderException {
     super();
     try {
-      aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier(
-              new XMLInputSource(analysisEngineDescriptorPath));
+      this.logger = logger;
+//      aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier(
+//              new XMLInputSource(analysisEngineDescriptorPath));
+      
+      XMLInputSource descriptorSource = null;
+      if (new File(analysisEngineDescriptorPath).exists()) {
+        logger.logConfig("Loading the analysisEngineDescriptorPath from file system path: "+ analysisEngineDescriptorPath);
+        descriptorSource = new XMLInputSource(analysisEngineDescriptorPath);
+      } else {
+        logger.logConfig("Loading the analysisEngineDescriptorPath from class path: "+ analysisEngineDescriptorPath);
+        InputStream is = this.getClass().getResourceAsStream(analysisEngineDescriptorPath);
+        descriptorSource = new XMLInputSource(is, null);
+      }
+      aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier(descriptorSource);
+      
       this.tokenTypeName = tokenTypeName;
       this.tokenTypeFeature = tokenFilter.getTokenTypeFeature();
       this.tokenClassFeature = tokenFilter.getTokenClassFeature();
       this.tokenFilter = tokenFilter;
       this.langID = langID;
       this.result = result;
-      this.logger = logger;
+
     } catch (InvalidXMLException e) {
       throw new DictionaryLoaderException(e);
     } catch (IOException e) {

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/stemmer/Stemmer.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/stemmer/Stemmer.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/stemmer/Stemmer.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/stemmer/Stemmer.java Wed Apr 18 17:42:46 2018
@@ -44,6 +44,9 @@ public interface Stemmer {
 
   /**
    * Initialize the stemmer with a dictionary
+   * @param dictionary -
+   * @throws FileNotFoundException -
+   * @throws ParseException -
    */
   public void initialize(String dictionary) throws FileNotFoundException, ParseException;
 

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/OffsetTokenizer.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/OffsetTokenizer.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/OffsetTokenizer.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/OffsetTokenizer.java Wed Apr 18 17:42:46 2018
@@ -137,6 +137,7 @@ public class OffsetTokenizer  extends JC
    * {@link #nextToken(JCas) nextToken} will return the first token from the input string
    * as a TokenAnnotation; you can get the text by using
    * {@link TokenAnnotation#getText()}
+   * @param text -
    */
   public void setText(String text) {
     this.text = text;
@@ -374,8 +375,6 @@ public class OffsetTokenizer  extends JC
    * 
    * @param jcas
    *          the current CAS to process.
-   * @param aResultSpec
-   *          a specification of the result annotation that should be created by this annotator
    * 
    * @see org.apache.uima.analysis_engine.annotator.JTextAnnotator#process(JCas, ResultSpecification)
    */
@@ -400,9 +399,9 @@ public class OffsetTokenizer  extends JC
   }
 
   /**
-   * @param jcas
-   * @param documentText
-   * @param delimiters
+   * @param jcas -
+   * @param documentText -
+   * @param delimiters -
    */
   protected void doTokenization(JCas jcas, String documentText, String delimiters) {
 
@@ -421,8 +420,8 @@ public class OffsetTokenizer  extends JC
   }
 
   /**
-   * @param configParameterName
-   * @param configParameterValue
+   * @param configParameterName -
+   * @param configParameterValue -
    */
   public void processConfigurationParameter(String configParameterName, Object configParameterValue) {
     if (configParameterName.equals(PARAM_CASE_MATCH)) {

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation.java Wed Apr 18 17:42:46 2018
@@ -30,19 +30,28 @@ public class TokenAnnotation extends uim
   protected TokenAnnotation() {}
     
   /** Internal - constructor used by generator 
-   * @generated */
+   * @generated 
+   * @param addr -
+   * @param type -
+   */
   public TokenAnnotation(int addr, TOP_Type type) {
     super(addr, type);
     readObject();
   }
   
-  /** @generated */
+  /** @generated 
+   * @param jcas -
+   */
   public TokenAnnotation(JCas jcas) {
     super(jcas);
     readObject();   
   } 
 
-  /** @generated */  
+  /** @generated 
+   * @param jcas -
+   * @param begin -
+   * @param end -
+   */
   public TokenAnnotation(JCas jcas, int begin, int end) {
     super(jcas);
     setBegin(begin);
@@ -62,14 +71,18 @@ public class TokenAnnotation extends uim
   //* Feature: text
 
   /** getter for text - gets text of token
-   * @generated */
+   * @generated 
+   * @return -
+   */
   public String getText() {
     if (TokenAnnotation_Type.featOkTst && ((TokenAnnotation_Type)jcasType).casFeat_text == null)
       jcasType.jcas.throwFeatMissing("text", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
     return jcasType.ll_cas.ll_getStringValue(addr, ((TokenAnnotation_Type)jcasType).casFeatCode_text);}
     
   /** setter for text - sets text of token 
-   * @generated */
+   * @generated 
+   * @param v -
+   */
   public void setText(String v) {
     if (TokenAnnotation_Type.featOkTst && ((TokenAnnotation_Type)jcasType).casFeat_text == null)
       jcasType.jcas.throwFeatMissing("text", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
@@ -80,14 +93,18 @@ public class TokenAnnotation extends uim
   //* Feature: tokenType
 
   /** getter for tokenType - gets 
-   * @generated */
+   * @generated 
+   * @return -
+   */
   public int getTokenType() {
     if (TokenAnnotation_Type.featOkTst && ((TokenAnnotation_Type)jcasType).casFeat_tokenType == null)
       jcasType.jcas.throwFeatMissing("tokenType", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
     return jcasType.ll_cas.ll_getIntValue(addr, ((TokenAnnotation_Type)jcasType).casFeatCode_tokenType);}
     
   /** setter for tokenType - sets  
-   * @generated */
+   * @generated 
+   * @param v -
+   */
   public void setTokenType(int v) {
     if (TokenAnnotation_Type.featOkTst && ((TokenAnnotation_Type)jcasType).casFeat_tokenType == null)
       jcasType.jcas.throwFeatMissing("tokenType", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
@@ -98,14 +115,18 @@ public class TokenAnnotation extends uim
   //* Feature: tokenClass
 
   /** getter for tokenClass - gets semantic class, or other such classification of this token
-   * @generated */
+   * @generated 
+   * @return -
+   */
   public String getTokenClass() {
     if (TokenAnnotation_Type.featOkTst && ((TokenAnnotation_Type)jcasType).casFeat_tokenClass == null)
       jcasType.jcas.throwFeatMissing("tokenClass", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
     return jcasType.ll_cas.ll_getStringValue(addr, ((TokenAnnotation_Type)jcasType).casFeatCode_tokenClass);}
     
   /** setter for tokenClass - sets semantic class, or other such classification of this token 
-   * @generated */
+   * @generated 
+   * @param v -
+   */
   public void setTokenClass(String v) {
     if (TokenAnnotation_Type.featOkTst && ((TokenAnnotation_Type)jcasType).casFeat_tokenClass == null)
       jcasType.jcas.throwFeatMissing("tokenClass", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation_Type.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation_Type.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation_Type.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokenizer/TokenAnnotation_Type.java Wed Apr 18 17:42:46 2018
@@ -44,13 +44,19 @@ public class TokenAnnotation_Type extend
   final Feature casFeat_text;
   /** @generated */
   final int     casFeatCode_text;
-  /** @generated */ 
+  /** @generated 
+   * @param addr -
+   * @return -
+   */
   public String getText(int addr) {
         if (featOkTst && casFeat_text == null)
       jcas.throwFeatMissing("text", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
     return ll_cas.ll_getStringValue(addr, casFeatCode_text);
   }
-  /** @generated */    
+  /** @generated 
+   * @param addr -
+   * @param v -
+   */
   public void setText(int addr, String v) {
         if (featOkTst && casFeat_text == null)
       jcas.throwFeatMissing("text", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
@@ -62,13 +68,19 @@ public class TokenAnnotation_Type extend
   final Feature casFeat_tokenType;
   /** @generated */
   final int     casFeatCode_tokenType;
-  /** @generated */ 
+  /** @generated 
+   * @param addr -
+   * @return -
+   */
   public int getTokenType(int addr) {
         if (featOkTst && casFeat_tokenType == null)
       jcas.throwFeatMissing("tokenType", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
     return ll_cas.ll_getIntValue(addr, casFeatCode_tokenType);
   }
-  /** @generated */    
+  /** @generated 
+   * @param addr -
+   * @param v -
+   */
   public void setTokenType(int addr, int v) {
         if (featOkTst && casFeat_tokenType == null)
       jcas.throwFeatMissing("tokenType", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
@@ -80,13 +92,19 @@ public class TokenAnnotation_Type extend
   final Feature casFeat_tokenClass;
   /** @generated */
   final int     casFeatCode_tokenClass;
-  /** @generated */ 
+  /** @generated 
+   * @param addr -
+   * @return -
+   */
   public String getTokenClass(int addr) {
         if (featOkTst && casFeat_tokenClass == null)
       jcas.throwFeatMissing("tokenClass", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
     return ll_cas.ll_getStringValue(addr, casFeatCode_tokenClass);
   }
-  /** @generated */    
+  /** @generated 
+   * @param addr -
+   * @param v -
+   */
   public void setTokenClass(int addr, String v) {
         if (featOkTst && casFeat_tokenClass == null)
       jcas.throwFeatMissing("tokenClass", "org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation");
@@ -97,7 +115,10 @@ public class TokenAnnotation_Type extend
 
 
   /** initialize variables to correspond with Cas Type and Features
-	* @generated */
+	* @generated 
+   * @param jcas -
+   * @param casType -
+   */
   public TokenAnnotation_Type(JCas jcas, Type casType) {
     super(jcas, casType);
     casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenFilter.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenFilter.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenFilter.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenFilter.java Wed Apr 18 17:42:46 2018
@@ -270,7 +270,7 @@ public class TokenFilter {
   }
 
   /**
-   * @param token
+   * @param token -
    * @return false if tokenTypeFeature is set, and the token's tokenTypeFeature slot is set, but the value is not OK
    */
   public boolean checkTokenType(AnnotationFS token) {
@@ -318,12 +318,12 @@ public class TokenFilter {
   }
 
   /**
-   * @param typeSystem
+   * @param typeSystem =
    * @param requireFeatureExistence -
    *          if true, if the tokenType and/or tokenClass features of the tokenAnnotation are
    *          specified, they must exist. This is to allow for the situation where these features
    *          might not exist during dictionary loading, but are needed at annotator runtime
-   * @throws UnknownTypeException
+   * @throws UnknownTypeException -
    */
   public void initTypes(TypeSystem typeSystem, boolean requireFeatureExistence)
           throws UnknownTypeException {

Modified: uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenNormalizer.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenNormalizer.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenNormalizer.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/org/apache/uima/conceptMapper/support/tokens/TokenNormalizer.java Wed Apr 18 17:42:46 2018
@@ -66,9 +66,9 @@ public class TokenNormalizer {
   private boolean replaceCommaWithAND;
 
   /**
-   * @param annotatorContext
-   * @param logger
-   * @throws AnnotatorContextException
+   * @param uimaContext -
+   * @param logger -
+   * @throws AnnotatorContextException -
    */
   public TokenNormalizer(UimaContext uimaContext, Logger logger)
           throws AnnotatorContextException {

Modified: uima/addons/trunk/ConceptMapper/src/main/java/uima/tt/TokenAnnotation.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/uima/tt/TokenAnnotation.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/uima/tt/TokenAnnotation.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/uima/tt/TokenAnnotation.java Wed Apr 18 17:42:46 2018
@@ -31,19 +31,28 @@ public class TokenAnnotation extends Ann
   protected TokenAnnotation() {}
     
   /** Internal - constructor used by generator 
-   * @generated */
+   * @generated 
+   * @param addr -
+   * @param type -
+   */
   public TokenAnnotation(int addr, TOP_Type type) {
     super(addr, type);
     readObject();
   }
   
-  /** @generated */
+  /** @generated 
+   * @param jcas -
+   */
   public TokenAnnotation(JCas jcas) {
     super(jcas);
     readObject();   
   } 
 
-  /** @generated */  
+  /** @generated 
+   * @param jcas -
+   * @param begin -
+   * @param end -
+   */
   public TokenAnnotation(JCas jcas, int begin, int end) {
     super(jcas);
     setBegin(begin);

Modified: uima/addons/trunk/ConceptMapper/src/main/java/uima/tt/TokenAnnotation_Type.java
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/java/uima/tt/TokenAnnotation_Type.java?rev=1829466&r1=1829465&r2=1829466&view=diff
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/java/uima/tt/TokenAnnotation_Type.java (original)
+++ uima/addons/trunk/ConceptMapper/src/main/java/uima/tt/TokenAnnotation_Type.java Wed Apr 18 17:42:46 2018
@@ -42,7 +42,10 @@ public class TokenAnnotation_Type extend
 
 
   /** initialize variables to correspond with Cas Type and Features
-	* @generated */
+	* @generated
+   * @param jcas -
+   * @param casType -
+   */
   public TokenAnnotation_Type(JCas jcas, Type casType) {
     super(jcas, casType);
     casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());

Added: uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/aggregate/OffsetTokenizerMatcher.xml
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/aggregate/OffsetTokenizerMatcher.xml?rev=1829466&view=auto
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/aggregate/OffsetTokenizerMatcher.xml (added)
+++ uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/aggregate/OffsetTokenizerMatcher.xml Wed Apr 18 17:42:46 2018
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.    
+-->  
+<taeDescription xmlns="http://uima.apache.org/resourceSpecifier">
+	<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+	<primitive>false</primitive>
+	<delegateAnalysisEngineSpecifiers>
+		<delegateAnalysisEngine key="ConceptMapper">
+			<import location="../primitive/ConceptMapperOffsetTokenizer.xml" />
+		</delegateAnalysisEngine>
+		<delegateAnalysisEngine key="Tokenizer">
+			<import location="../primitive/OffsetTokenizer.xml" />
+		</delegateAnalysisEngine>
+	</delegateAnalysisEngineSpecifiers>
+	<analysisEngineMetaData>
+		<name>DictMatcher</name>
+		<configurationParameters />
+		<configurationParameterSettings />
+		<flowConstraints>
+			<fixedFlow>
+				<node>Tokenizer</node>
+				<node>ConceptMapper</node>
+			</fixedFlow>
+		</flowConstraints>
+		<fsIndexCollection />
+		<capabilities>
+			<capability>
+				<inputs />
+				<outputs>
+					<type allAnnotatorFeatures="true">uima.tt.TokenAnnotation</type>
+					<type allAnnotatorFeatures="true">uima.tt.SentenceAnnotation</type>
+					<type allAnnotatorFeatures="true">uima.tt.ParagraphAnnotation</type>
+					<type allAnnotatorFeatures="true">org.apache.uima.conceptMapper.DictTerm</type>
+				</outputs>
+				<languagesSupported />
+			</capability>
+		</capabilities>
+		<operationalProperties>
+			<modifiesCas>true</modifiesCas>
+			<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+		</operationalProperties>
+	</analysisEngineMetaData>
+</taeDescription>

Added: uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/ConceptMapperOffsetTokenizer.xml
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/ConceptMapperOffsetTokenizer.xml?rev=1829466&view=auto
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/ConceptMapperOffsetTokenizer.xml (added)
+++ uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/ConceptMapperOffsetTokenizer.xml Wed Apr 18 17:42:46 2018
@@ -0,0 +1,513 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.    
+-->  
+<taeDescription xmlns="http://uima.apache.org/resourceSpecifier">
+	<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+	<primitive>true</primitive>
+	<annotatorImplementationName>org.apache.uima.conceptMapper.ConceptMapper</annotatorImplementationName>
+	<analysisEngineMetaData>
+		<name>ConceptMapper</name>
+		<description></description>
+		<version>1</version>
+		<vendor></vendor>
+		<configurationParameters>
+			<configurationParameter>
+				<name>caseMatch</name>
+				<description>
+					this parameter specifies the case folding mode:
+					ignoreall - fold everything to lowercase for
+					matching insensitive - fold only tokens with initial
+					caps to lowercase digitfold - fold all (and only)
+					tokens with a digit sensitive - perform no case
+					folding
+				</description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>true</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>Stemmer</name>
+				<description>
+					Name of stemmer class to use before matching. MUST
+					have a zero-parameter constructor! If not specified,
+					no stemming will be performed.
+				</description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>ResultingAnnotationName</name>
+				<description>
+					Name of the annotation type created by this TAE,
+					must match the typeSystemDescription entry
+				</description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>true</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>ResultingEnclosingSpanName</name>
+				<description>
+					Name of the feature in the resultingAnnotation to
+					contain the span that encloses it (i.e. its
+					sentence)
+				</description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>AttributeList</name>
+				<description>
+					List of attribute names for XML dictionary entry
+					record - must correspond to FeatureList
+				</description>
+				<type>String</type>
+				<multiValued>true</multiValued>
+				<mandatory>true</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>FeatureList</name>
+				<description>
+					List of feature names for CAS annotation - must
+					correspond to AttributeList
+				</description>
+				<type>String</type>
+				<multiValued>true</multiValued>
+				<mandatory>true</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>TokenAnnotation</name>
+				<description></description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>true</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>TokenClassFeatureName</name>
+				<description>
+					Name of feature used when doing lookups against
+					IncludedTokenClasses and ExcludedTokenClasses
+				</description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>TokenTextFeatureName</name>
+				<description></description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>SpanFeatureStructure</name>
+				<description>
+					Type of annotation which corresponds to spans of
+					data for processing (e.g. a Sentence)
+				</description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>true</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>OrderIndependentLookup</name>
+				<description>
+					True if should ignore element order during lookup
+					(i.e., "top box" would equal "box top"). Default is
+					False.
+				</description>
+				<type>Boolean</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>TokenTypeFeatureName</name>
+				<description>
+					Name of feature used when doing lookups against
+					IncludedTokenTypes and ExcludedTokenTypes
+				</description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>IncludedTokenTypes</name>
+				<description>
+					Type of tokens to include in lookups (if not
+					supplied, then all types are included except those
+					specifically mentioned in ExcludedTokenTypes)
+				</description>
+				<type>Integer</type>
+				<multiValued>true</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>ExcludedTokenTypes</name>
+				<description></description>
+				<type>Integer</type>
+				<multiValued>true</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>ExcludedTokenClasses</name>
+				<description>
+					Class of tokens to exclude from lookups (if not
+					supplied, then all classes are excluded except those
+					specifically mentioned in IncludedTokenClasses,
+					unless IncludedTokenClasses is not supplied, in
+					which case none are excluded)
+				</description>
+				<type>String</type>
+				<multiValued>true</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>IncludedTokenClasses</name>
+				<description>
+					Class of tokens to include in lookups (if not
+					supplied, then all classes are included except those
+					specifically mentioned in ExcludedTokenClasses)
+				</description>
+				<type>String</type>
+				<multiValued>true</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>TokenClassWriteBackFeatureNames</name>
+				<description>
+					names of features that should be written back to a
+					token, such as a POS tag
+				</description>
+				<type>String</type>
+				<multiValued>true</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>ResultingAnnotationMatchedTextFeature</name>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>PrintDictionary</name>
+				<type>Boolean</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>SearchStrategy</name>
+				<description>
+					Can be either "SkipAnyMatch",
+					"SkipAnyMatchAllowOverlap" or
+					"ContiguousMatch"&#13;&#13;ContiguousMatch: longest
+					match of contiguous tokens within enclosing
+					span(taking into account included/excluded items).
+					DEFAULT strategy &#13;SkipAnyMatch: longest match of
+					not-necessarily contiguous tokens within enclosing
+					span (taking into account included/excluded items).
+					Subsequent lookups begin in span after complete
+					match. IMPLIES order-independent lookup
+					&#13;SkipAnyMatchAllowOverlap: longest match of
+					not-necessarily contiguous tokens within enclosing
+					span (taking into account included/excluded items).
+					Subsequent lookups begin in span after next token.
+					IMPLIES order-independent lookup
+				</description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>StopWords</name>
+				<type>String</type>
+				<multiValued>true</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>FindAllMatches</name>
+				<type>Boolean</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>MatchedTokensFeatureName</name>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>ReplaceCommaWithAND</name>
+				<type>Boolean</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>TokenizerDescriptorPath</name>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>true</mandatory>
+			</configurationParameter>
+			<configurationParameter>
+				<name>LanguageID</name>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+		</configurationParameters>
+		<configurationParameterSettings>
+			<nameValuePair>
+				<name>caseMatch</name>
+				<value>
+					<string>ignoreall</string>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>AttributeList</name>
+				<value>
+					<array>
+						<string>canonical</string>
+					</array>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>FeatureList</name>
+				<value>
+					<array>
+						<string>DictCanon</string>
+					</array>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>TokenAnnotation</name>
+				<value>
+					<string>uima.tt.TokenAnnotation</string>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>ResultingAnnotationName</name>
+				<value>
+					<string>
+						org.apache.uima.conceptMapper.DictTerm
+					</string>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>SpanFeatureStructure</name>
+				<value>
+					<string>uima.tcas.DocumentAnnotation</string>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>OrderIndependentLookup</name>
+				<value>
+					<boolean>false</boolean>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>TokenClassWriteBackFeatureNames</name>
+				<value>
+					<array />
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>IncludedTokenClasses</name>
+				<value>
+					<array />
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>PrintDictionary</name>
+				<value>
+					<boolean>false</boolean>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>FindAllMatches</name>
+				<value>
+					<boolean>false</boolean>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>StopWords</name>
+				<value>
+					<array />
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>ReplaceCommaWithAND</name>
+				<value>
+					<boolean>false</boolean>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>TokenizerDescriptorPath</name>
+				<value>
+					<string>
+						/OtherStuff/IBM/eclipse-UIMAsandbox/ConceptMapper/desc/analysis_engine/primitive/OffsetTokenizer.xml
+					</string>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>ResultingEnclosingSpanName</name>
+				<value>
+					<string>enclosingSpan</string>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>MatchedTokensFeatureName</name>
+				<value>
+					<string>matchedTokens</string>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>ResultingAnnotationMatchedTextFeature</name>
+				<value>
+					<string>matchedText</string>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>SearchStrategy</name>
+				<value>
+					<string>ContiguousMatch</string>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>LanguageID</name>
+				<value>
+					<string>en</string>
+				</value>
+			</nameValuePair>
+		</configurationParameterSettings>
+		<typeSystemDescription>
+			<imports>
+				<import name="org.apache.uima.conceptMapper.DictTerm" />
+				<import
+					name="org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation" />
+			</imports>
+			<types>
+				<typeDescription>
+					<name>uima.tt.TokenAnnotation</name>
+					<description></description>
+					<supertypeName>uima.tcas.Annotation</supertypeName>
+					<features>
+						<featureDescription>
+							<name>SemClass</name>
+							<description>
+								semantic class of token
+							</description>
+							<rangeTypeName>
+								uima.cas.String
+							</rangeTypeName>
+						</featureDescription>
+						<featureDescription>
+							<name>POS</name>
+							<description>
+								Part of SPeech of term to which this
+								token is a part
+							</description>
+							<rangeTypeName>
+								uima.cas.String
+							</rangeTypeName>
+						</featureDescription>
+						<featureDescription>
+							<name>frost_TokenType</name>
+							<description></description>
+							<rangeTypeName>
+								uima.cas.Integer
+							</rangeTypeName>
+						</featureDescription>
+					</features>
+				</typeDescription>
+			</types>
+		</typeSystemDescription>
+		<typePriorities>
+			<priorityList>
+				<!-- <type>uima.tt.SentenceAnnotation</type> -->
+				<type>uima.tt.TokenAnnotation</type>
+			</priorityList>
+		</typePriorities>
+		<fsIndexCollection />
+		<capabilities>
+			<capability>
+				<inputs>
+					<type allAnnotatorFeatures="true">
+						uima.tt.TokenAnnotation
+					</type>
+					<!-- <type allAnnotatorFeatures="true">uima.tt.SentenceAnnotation</type>
+						<type allAnnotatorFeatures="true">uima.tt.ParagraphAnnotation</type> -->
+				</inputs>
+				<outputs>
+					<type allAnnotatorFeatures="true">
+						org.apache.uima.conceptMapper.DictTerm
+					</type>
+					<type allAnnotatorFeatures="true">
+						uima.tt.TokenAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">
+						org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation
+					</type>
+					<type allAnnotatorFeatures="true">
+						uima.tcas.DocumentAnnotation
+					</type>
+				</outputs>
+				<languagesSupported />
+			</capability>
+		</capabilities>
+		<operationalProperties>
+			<modifiesCas>true</modifiesCas>
+			<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+			<outputsNewCASes>false</outputsNewCASes>
+		</operationalProperties>
+	</analysisEngineMetaData>
+	<externalResourceDependencies>
+		<externalResourceDependency>
+			<key>DictionaryFile</key>
+			<description>dictionary file loader.</description>
+			<interfaceName>
+				org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource
+			</interfaceName>
+			<optional>false</optional>
+		</externalResourceDependency>
+	</externalResourceDependencies>
+	<resourceManagerConfiguration>
+		<externalResources>
+			<externalResource>
+				<name>DictionaryFileName</name>
+				<description>
+					A file containing the dictionary. Modify this URL to
+					use a different dictionary.
+				</description>
+				<fileResourceSpecifier>
+					<fileUrl>file:dict/testDict.xml</fileUrl>
+				</fileResourceSpecifier>
+				<implementationName>
+					org.apache.uima.conceptMapper.support.dictionaryResource.DictionaryResource_impl
+				</implementationName>
+			</externalResource>
+		</externalResources>
+		<externalResourceBindings>
+			<externalResourceBinding>
+				<key>DictionaryFile</key>
+				<resourceName>DictionaryFileName</resourceName>
+			</externalResourceBinding>
+		</externalResourceBindings>
+	</resourceManagerConfiguration>
+</taeDescription>

Added: uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/DictTerm.xml
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/DictTerm.xml?rev=1829466&view=auto
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/DictTerm.xml (added)
+++ uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/DictTerm.xml Wed Apr 18 17:42:46 2018
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.    
+-->  
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+	<name>DictTerm</name>
+	<version>1</version>
+	<vendor>IBM</vendor>
+	<types>
+		<typeDescription>
+			<name>org.apache.uima.conceptMapper.DictTerm</name>
+			<description>Annotation for dictionary lookup matches</description>
+			<supertypeName>uima.tcas.Annotation</supertypeName>
+			<features>
+				<featureDescription>
+					<name>DictCanon</name>
+					<description>canonical form</description>
+					<rangeTypeName>uima.cas.String</rangeTypeName>
+				</featureDescription>
+				<featureDescription>
+					<name>enclosingSpan</name>
+					<description>span that this NoTerm is contained within (i.e. its sentence)</description>
+					<rangeTypeName>uima.tcas.Annotation</rangeTypeName>
+				</featureDescription>
+				<featureDescription>
+					<name>matchedText</name>
+					<description></description>
+					<rangeTypeName>uima.cas.String</rangeTypeName>
+				</featureDescription>
+				<featureDescription>
+					<name>matchedTokens</name>
+					<description></description>
+					<rangeTypeName>uima.cas.FSArray</rangeTypeName>
+				</featureDescription>
+			</features>
+		</typeDescription>
+	</types>
+</typeSystemDescription>

Added: uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/OffsetTokenizer.xml
URL: http://svn.apache.org/viewvc/uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/OffsetTokenizer.xml?rev=1829466&view=auto
==============================================================================
--- uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/OffsetTokenizer.xml (added)
+++ uima/addons/trunk/ConceptMapper/src/main/resources/analysis_engine/primitive/OffsetTokenizer.xml Wed Apr 18 17:42:46 2018
@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+  http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.    
+-->  
+<taeDescription xmlns="http://uima.apache.org/resourceSpecifier">
+	<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+	<primitive>true</primitive>
+	<annotatorImplementationName>org.apache.uima.conceptMapper.support.tokenizer.OffsetTokenizer</annotatorImplementationName>
+	<analysisEngineMetaData>
+		<name>OffsetTokenenizer</name>
+		<configurationParameters>
+			
+			<configurationParameter>
+				<name>caseMatch</name>
+				<description>matching case sensitive or case insensitive</description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>true</mandatory>
+			</configurationParameter>
+			
+			<configurationParameter>
+				<name>tokenDelimiters</name>
+				<description>String of characters that separate tokens</description>
+				<type>String</type>
+				<multiValued>false</multiValued>
+				<mandatory>false</mandatory>
+			</configurationParameter>
+			
+		</configurationParameters>
+		
+		<configurationParameterSettings>
+			<nameValuePair>
+				<name>caseMatch</name>
+				<value>
+					<string>ignoreall</string>
+				</value>
+			</nameValuePair>
+			<nameValuePair>
+				<name>tokenDelimiters</name>
+				<value>
+					<string>
+						/-*&amp;@(){}|[]&gt;&lt;\'`":;,$%+.?!
+					</string>
+				</value>
+			</nameValuePair>
+		</configurationParameterSettings>
+		
+		<typeSystemDescription>
+			<types>
+				<typeDescription>
+					<name>org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation</name>
+					<description />
+					<supertypeName>uima.tt.TokenAnnotation</supertypeName>
+					<features>
+						<featureDescription>
+							<name>text</name>
+							<description></description>
+							<rangeTypeName>uima.cas.String</rangeTypeName>
+						</featureDescription>
+					</features>
+				</typeDescription>
+				<typeDescription>
+					<name>uima.tt.TokenAnnotation</name>
+					<description />
+					<supertypeName>uima.tcas.Annotation</supertypeName>
+				</typeDescription>
+			</types>
+		</typeSystemDescription>
+		<capabilities>
+			<capability>
+				<inputs />
+				<outputs>
+					<type allAnnotatorFeatures="true">org.apache.uima.conceptMapper.support.tokenizer.TokenAnnotation</type>
+					<type allAnnotatorFeatures="true">uima.tt.TokenAnnotation</type>
+					<type allAnnotatorFeatures="true">uima.tcas.DocumentAnnotation</type>
+				</outputs>
+				<languagesSupported />
+			</capability>
+		</capabilities>
+		<operationalProperties>
+			<modifiesCas>true</modifiesCas>
+			<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+			<outputsNewCASes>false</outputsNewCASes>
+		</operationalProperties>
+	</analysisEngineMetaData>
+</taeDescription>