You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2013/02/04 14:40:47 UTC
svn commit: r1442111 - in /lucene/dev/branches/branch_4x: ./ dev-tools/
lucene/ lucene/analysis/
lucene/analysis/icu/src/java/org/apache/lucene/collation/
lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/
lucene/analysis/uima/src/test-file...
Author: tommaso
Date: Mon Feb 4 13:40:45 2013
New Revision: 1442111
URL: http://svn.apache.org/viewvc?rev=1442111&view=rev
Log:
LUCENE-4749 - merged back to branch_4x
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/dev-tools/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/BUILD.txt (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (props changed)
lucene/dev/branches/branch_4x/lucene/JRE_VERSION_MIGRATION.txt (props changed)
lucene/dev/branches/branch_4x/lucene/LICENSE.txt (props changed)
lucene/dev/branches/branch_4x/lucene/MIGRATE.txt (props changed)
lucene/dev/branches/branch_4x/lucene/NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/lucene/README.txt (props changed)
lucene/dev/branches/branch_4x/lucene/SYSTEM_REQUIREMENTS.txt (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilterFactory.java (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
lucene/dev/branches/branch_4x/lucene/backwards/ (props changed)
lucene/dev/branches/branch_4x/lucene/benchmark/ (props changed)
lucene/dev/branches/branch_4x/lucene/build.xml (props changed)
lucene/dev/branches/branch_4x/lucene/codecs/ (props changed)
lucene/dev/branches/branch_4x/lucene/common-build.xml (props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip (props changed)
lucene/dev/branches/branch_4x/lucene/demo/ (props changed)
lucene/dev/branches/branch_4x/lucene/facet/ (props changed)
lucene/dev/branches/branch_4x/lucene/grouping/ (props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/ (props changed)
lucene/dev/branches/branch_4x/lucene/ivy-settings.xml (props changed)
lucene/dev/branches/branch_4x/lucene/join/ (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/ (props changed)
lucene/dev/branches/branch_4x/lucene/memory/ (props changed)
lucene/dev/branches/branch_4x/lucene/misc/ (props changed)
lucene/dev/branches/branch_4x/lucene/module-build.xml (props changed)
lucene/dev/branches/branch_4x/lucene/queries/ (props changed)
lucene/dev/branches/branch_4x/lucene/queryparser/ (props changed)
lucene/dev/branches/branch_4x/lucene/sandbox/ (props changed)
lucene/dev/branches/branch_4x/lucene/site/ (props changed)
lucene/dev/branches/branch_4x/lucene/spatial/ (props changed)
lucene/dev/branches/branch_4x/lucene/suggest/ (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/ (props changed)
lucene/dev/branches/branch_4x/lucene/tools/ (props changed)
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/CHANGES.txt (props changed)
lucene/dev/branches/branch_4x/solr/LICENSE.txt (props changed)
lucene/dev/branches/branch_4x/solr/NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/README.txt (props changed)
lucene/dev/branches/branch_4x/solr/SYSTEM_REQUIREMENTS.txt (props changed)
lucene/dev/branches/branch_4x/solr/build.xml (props changed)
lucene/dev/branches/branch_4x/solr/cloud-dev/ (props changed)
lucene/dev/branches/branch_4x/solr/common-build.xml (props changed)
lucene/dev/branches/branch_4x/solr/contrib/ (props changed)
lucene/dev/branches/branch_4x/solr/core/ (props changed)
lucene/dev/branches/branch_4x/solr/example/ (props changed)
lucene/dev/branches/branch_4x/solr/licenses/ (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpclient-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpclient-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpcore-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpcore-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpmime-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpmime-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/scripts/ (props changed)
lucene/dev/branches/branch_4x/solr/site/ (props changed)
lucene/dev/branches/branch_4x/solr/solrj/ (props changed)
lucene/dev/branches/branch_4x/solr/test-framework/ (props changed)
lucene/dev/branches/branch_4x/solr/testlogging.properties (props changed)
lucene/dev/branches/branch_4x/solr/webapp/ (props changed)
Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Mon Feb 4 13:40:45 2013
@@ -28,6 +28,8 @@ import org.apache.uima.resource.Resource
import java.io.IOException;
import java.io.Reader;
+import java.util.HashMap;
+import java.util.Map;
/**
* Abstract base implementation of a {@link Tokenizer} which is able to analyze the given input with a
@@ -39,10 +41,10 @@ public abstract class BaseUIMATokenizer
protected final AnalysisEngine ae;
protected final CAS cas;
- protected BaseUIMATokenizer(Reader reader, String descriptorPath) {
+ protected BaseUIMATokenizer(Reader reader, String descriptorPath, Map<String, Object> configurationParameters) {
super(reader);
try {
- ae = AEProviderFactory.getInstance().getAEProvider(descriptorPath).getAE();
+ ae = AEProviderFactory.getInstance().getAEProvider(null, descriptorPath, configurationParameters).getAE();
cas = ae.newCAS();
} catch (ResourceInitializationException e) {
throw new RuntimeException(e);
Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java Mon Feb 4 13:40:45 2013
@@ -26,6 +26,7 @@ import org.apache.uima.cas.text.Annotati
import java.io.IOException;
import java.io.Reader;
+import java.util.Map;
/**
* a {@link Tokenizer} which creates tokens from UIMA Annotations
@@ -40,8 +41,8 @@ public final class UIMAAnnotationsTokeni
private int finalOffset = 0;
- public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Reader input) {
- super(input, descriptorPath);
+ public UIMAAnnotationsTokenizer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters, Reader input) {
+ super(input, descriptorPath, configurationParameters);
this.tokenTypeString = tokenType;
this.termAttr = addAttribute(CharTermAttribute.class);
this.offsetAttr = addAttribute(OffsetAttribute.class);
Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizerFactory.java Mon Feb 4 13:40:45 2013
@@ -22,6 +22,7 @@ import org.apache.lucene.analysis.util.T
import org.apache.lucene.analysis.uima.UIMAAnnotationsTokenizer;
import java.io.Reader;
+import java.util.HashMap;
import java.util.Map;
/**
@@ -31,19 +32,29 @@ public class UIMAAnnotationsTokenizerFac
private String descriptorPath;
private String tokenType;
+ private Map<String, Object> configurationParameters;
@Override
public void init(Map<String, String> args) {
super.init(args);
- descriptorPath = args.get("descriptorPath");
- tokenType = args.get("tokenType");
- if (descriptorPath == null || tokenType == null) {
- throw new IllegalArgumentException("Both descriptorPath and tokenType are mandatory");
+ configurationParameters = new HashMap<String, Object>();
+ for (String k : args.keySet()) {
+ if (k.equals("tokenType")) {
+ tokenType = args.get("tokenType");
+ } else if (k.equals("descriptorPath")) {
+ descriptorPath = args.get("descriptorPath");
+ } else {
+ configurationParameters.put(k, args.get(k));
+ }
}
+ if (descriptorPath == null || tokenType == null ) {
+ throw new IllegalArgumentException("descriptorPath and tokenType are mandatory");
+ }
+
}
@Override
public Tokenizer create(Reader input) {
- return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, input);
+ return new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, input);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMABaseAnalyzer.java Mon Feb 4 13:40:45 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Analyzer;
import java.io.Reader;
+import java.util.Map;
/**
* An {@link Analyzer} which use the {@link UIMAAnnotationsTokenizer} for creating tokens
@@ -28,15 +29,17 @@ public final class UIMABaseAnalyzer exte
private final String descriptorPath;
private final String tokenType;
+ private final Map<String, Object> configurationParameters;
- public UIMABaseAnalyzer(String descriptorPath, String tokenType) {
+ public UIMABaseAnalyzer(String descriptorPath, String tokenType, Map<String, Object> configurationParameters) {
this.descriptorPath = descriptorPath;
this.tokenType = tokenType;
+ this.configurationParameters = configurationParameters;
}
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, reader));
+ return new TokenStreamComponents(new UIMAAnnotationsTokenizer(descriptorPath, tokenType, configurationParameters, reader));
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzer.java Mon Feb 4 13:40:45 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.uima;
import org.apache.lucene.analysis.Analyzer;
import java.io.Reader;
+import java.util.Map;
/**
* {@link Analyzer} which uses the {@link UIMATypeAwareAnnotationsTokenizer} for the tokenization phase
@@ -28,15 +29,17 @@ public final class UIMATypeAwareAnalyzer
private final String descriptorPath;
private final String tokenType;
private final String featurePath;
+ private final Map<String, Object> configurationParameters;
- public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath) {
+ public UIMATypeAwareAnalyzer(String descriptorPath, String tokenType, String featurePath, Map<String, Object> configurationParameters) {
this.descriptorPath = descriptorPath;
this.tokenType = tokenType;
this.featurePath = featurePath;
+ this.configurationParameters = configurationParameters;
}
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, reader));
+ return new TokenStreamComponents(new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters, reader));
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java Mon Feb 4 13:40:45 2013
@@ -29,6 +29,7 @@ import org.apache.uima.cas.text.Annotati
import java.io.IOException;
import java.io.Reader;
+import java.util.Map;
/**
* A {@link Tokenizer} which creates token from UIMA Annotations filling also their {@link TypeAttribute} according to
@@ -50,8 +51,8 @@ public final class UIMATypeAwareAnnotati
private int finalOffset = 0;
- public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Reader input) {
- super(input, descriptorPath);
+ public UIMATypeAwareAnnotationsTokenizer(String descriptorPath, String tokenType, String typeAttributeFeaturePath, Map<String, Object> configurationParameters, Reader input) {
+ super(input, descriptorPath, configurationParameters);
this.tokenTypeString = tokenType;
this.termAttr = addAttribute(CharTermAttribute.class);
this.typeAttr = addAttribute(TypeAttribute.class);
Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizerFactory.java Mon Feb 4 13:40:45 2013
@@ -18,10 +18,10 @@ package org.apache.lucene.analysis.uima;
*/
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.uima.UIMATypeAwareAnnotationsTokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import java.io.Reader;
+import java.util.HashMap;
import java.util.Map;
/**
@@ -32,13 +32,23 @@ public class UIMATypeAwareAnnotationsTok
private String descriptorPath;
private String tokenType;
private String featurePath;
+ private Map<String, Object> configurationParameters;
@Override
public void init(Map<String, String> args) {
super.init(args);
- descriptorPath = args.get("descriptorPath");
- tokenType = args.get("tokenType");
- featurePath = args.get("featurePath");
+ configurationParameters = new HashMap<String, Object>();
+ for (String k : args.keySet()) {
+ if (k.equals("featurePath")) {
+ featurePath = args.get("featurePath");
+ } else if (k.equals("tokenType")) {
+ tokenType = args.get("tokenType");
+ } else if (k.equals("descriptorPath")) {
+ descriptorPath = args.get("descriptorPath");
+ } else {
+ configurationParameters.put(k, args.get(k));
+ }
+ }
if (descriptorPath == null || tokenType == null || featurePath == null) {
throw new IllegalArgumentException("descriptorPath, tokenType, and featurePath are mandatory");
}
@@ -46,6 +56,6 @@ public class UIMATypeAwareAnnotationsTok
@Override
public Tokenizer create(Reader input) {
- return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, input);
+ return new UIMATypeAwareAnnotationsTokenizer(descriptorPath, tokenType, featurePath, configurationParameters, input);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestEntityAnnotatorAE.xml Mon Feb 4 13:40:45 2013
@@ -20,7 +20,7 @@
<primitive>true</primitive>
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleEntityAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
- <name>DummyPoSTagger</name>
+ <name>EntityAnnotator</name>
<description/>
<version>1.0</version>
<vendor>ASF</vendor>
Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test-files/uima/TestWSTokenizerAE.xml Mon Feb 4 13:40:45 2013
@@ -20,9 +20,28 @@
<primitive>true</primitive>
<annotatorImplementationName>org.apache.lucene.analysis.uima.an.SampleWSTokenizerAnnotator</annotatorImplementationName>
<analysisEngineMetaData>
- <name>DummyPoSTagger</name>
+ <name>WSTokenizer</name>
<version>1.0</version>
<vendor>ASF</vendor>
+ <configurationParameters>
+ <configurationParameter>
+ <name>line-end</name>
+ <description>
+ the string used as line end
+ </description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>line-end</name>
+ <value>
+ <string>\n</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
<typeSystemDescription>
<types>
<typeDescription>
Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMABaseAnalyzerTest.java Mon Feb 4 13:40:45 2013
@@ -36,6 +36,8 @@ import org.junit.Before;
import org.junit.Test;
import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
/**
* Testcase for {@link UIMABaseAnalyzer}
@@ -48,7 +50,7 @@ public class UIMABaseAnalyzerTest extend
@Before
public void setUp() throws Exception {
super.setUp();
- analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation");
+ analyzer = new UIMABaseAnalyzer("/uima/AggregateSentenceAE.xml", "org.apache.uima.TokenAnnotation", null);
}
@Override
@@ -120,7 +122,15 @@ public class UIMABaseAnalyzerTest extend
@Test
public void testRandomStrings() throws Exception {
- checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation"),
+ checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestAggregateSentenceAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation", null),
+ 100 * RANDOM_MULTIPLIER);
+ }
+
+ @Test
+ public void testRandomStringsWithConfigurationParameters() throws Exception {
+ Map<String, Object> cp = new HashMap<String, Object>();
+ cp.put("line-end", "\r");
+ checkRandomData(random(), new UIMABaseAnalyzer("/uima/TestWSTokenizerAE.xml", "org.apache.lucene.uima.ts.TokenAnnotation", cp),
100 * RANDOM_MULTIPLIER);
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/UIMATypeAwareAnalyzerTest.java Mon Feb 4 13:40:45 2013
@@ -37,7 +37,7 @@ public class UIMATypeAwareAnalyzerTest e
public void setUp() throws Exception {
super.setUp();
analyzer = new UIMATypeAwareAnalyzer("/uima/AggregateSentenceAE.xml",
- "org.apache.uima.TokenAnnotation", "posTag");
+ "org.apache.uima.TokenAnnotation", "posTag", null);
}
@Override
@@ -63,7 +63,7 @@ public class UIMATypeAwareAnalyzerTest e
@Test
public void testRandomStrings() throws Exception {
checkRandomData(random(), new UIMATypeAwareAnalyzer("/uima/TestAggregateSentenceAE.xml",
- "org.apache.lucene.uima.ts.TokenAnnotation", "pos"), 100 * RANDOM_MULTIPLIER);
+ "org.apache.lucene.uima.ts.TokenAnnotation", "pos", null), 100 * RANDOM_MULTIPLIER);
}
}
Modified: lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java?rev=1442111&r1=1442110&r2=1442111&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java (original)
+++ lucene/dev/branches/branch_4x/lucene/analysis/uima/src/test/org/apache/lucene/analysis/uima/an/SampleWSTokenizerAnnotator.java Mon Feb 4 13:40:45 2013
@@ -17,11 +17,13 @@ package org.apache.lucene.analysis.uima.
* limitations under the License.
*/
+import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
/**
* Dummy implementation of a UIMA based whitespace tokenizer
@@ -30,15 +32,21 @@ public class SampleWSTokenizerAnnotator
private final static String TOKEN_TYPE = "org.apache.lucene.uima.ts.TokenAnnotation";
private final static String SENTENCE_TYPE = "org.apache.lucene.uima.ts.SentenceAnnotation";
- private static final String CR = "\n";
+ private String lineEnd;
private static final String WHITESPACE = " ";
@Override
+ public void initialize(UimaContext aContext) throws ResourceInitializationException {
+ super.initialize(aContext);
+ lineEnd = String.valueOf(aContext.getConfigParameterValue("line-end"));
+ }
+
+ @Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
Type sentenceType = jCas.getCas().getTypeSystem().getType(SENTENCE_TYPE);
Type tokenType = jCas.getCas().getTypeSystem().getType(TOKEN_TYPE);
int i = 0;
- for (String sentenceString : jCas.getDocumentText().split(CR)) {
+ for (String sentenceString : jCas.getDocumentText().split(lineEnd)) {
// add the sentence
AnnotationFS sentenceAnnotation = jCas.getCas().createAnnotation(sentenceType, i, sentenceString.length());
jCas.addFsToIndexes(sentenceAnnotation);