You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by tw...@apache.org on 2009/09/04 11:18:00 UTC

svn commit: r811314 - in /incubator/uima/sandbox/trunk/Lucas: desc/ desc/IndexWriter.properties desc/LuceneCASIndexer.xml desc/lucas.xml desc/lucas.xsd pom.xml

Author: twgoetz
Date: Fri Sep  4 09:18:00 2009
New Revision: 811314

URL: http://svn.apache.org/viewvc?rev=811314&view=rev
Log:
UIMA-1535: apply Rico Landefeld's https://issues.apache.org/jira/secure/attachment/12418404/UIMA-1535.patch.

https://issues.apache.org/jira/browse/UIMA-1535

Added:
    incubator/uima/sandbox/trunk/Lucas/desc/
    incubator/uima/sandbox/trunk/Lucas/desc/IndexWriter.properties
    incubator/uima/sandbox/trunk/Lucas/desc/LuceneCASIndexer.xml
    incubator/uima/sandbox/trunk/Lucas/desc/lucas.xml
    incubator/uima/sandbox/trunk/Lucas/desc/lucas.xsd
Modified:
    incubator/uima/sandbox/trunk/Lucas/pom.xml

Added: incubator/uima/sandbox/trunk/Lucas/desc/IndexWriter.properties
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/desc/IndexWriter.properties?rev=811314&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/desc/IndexWriter.properties (added)
+++ incubator/uima/sandbox/trunk/Lucas/desc/IndexWriter.properties Fri Sep  4 09:18:00 2009
@@ -0,0 +1,6 @@
+# only for testing purposes !
+indexPath = src/test/resources/test-index
+RAMBufferSize = 513
+useCompoundFileFormat = false
+maxFieldLength = 9999
+uniqueIndex = true
\ No newline at end of file

Added: incubator/uima/sandbox/trunk/Lucas/desc/LuceneCASIndexer.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/desc/LuceneCASIndexer.xml?rev=811314&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/desc/LuceneCASIndexer.xml (added)
+++ incubator/uima/sandbox/trunk/Lucas/desc/LuceneCASIndexer.xml Fri Sep  4 09:18:00 2009
@@ -0,0 +1,81 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.    
+-->
+
+<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>org.apache.uima.lucas.consumer.LuceneCASIndexer</implementationName>
+  <processingResourceMetaData>
+    <name>LuceneCASIndexer</name>
+    <description/>
+    <version>2.3.0-SNAPSHOT</version>
+    <vendor>apache</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>mappingFile</name>
+        <description>path to the mapping file</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>mappingFile</name>
+        <value>
+          <string>lucas.xml</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription/>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>false</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+  <resourceManagerConfiguration>
+    <externalResources>
+      <externalResource>
+        <name>LucasIndexWriterProvider</name>
+        <description>Provides a index writer instance.</description>
+        <fileResourceSpecifier>
+          <fileUrl>file:IndexWriter.properties</fileUrl>
+        </fileResourceSpecifier>
+        <implementationName>org.apache.uima.lucas.indexer.IndexWriterProviderImpl</implementationName>
+      </externalResource>
+    </externalResources>
+    <externalResourceBindings>
+      <externalResourceBinding>
+        <key>indexWriterProvider</key>
+        <resourceName>LucasIndexWriterProvider</resourceName>
+      </externalResourceBinding>
+    </externalResourceBindings>
+  </resourceManagerConfiguration>
+</casConsumerDescription>

Added: incubator/uima/sandbox/trunk/Lucas/desc/lucas.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/desc/lucas.xml?rev=811314&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/desc/lucas.xml (added)
+++ incubator/uima/sandbox/trunk/Lucas/desc/lucas.xml Fri Sep  4 09:18:00 2009
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.    
+-->
+
+<fields xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:noNamespaceSchemaLocation="../../main/resources/lucas.xsd">
+
+	<field name="annotation1" index="yes" termVector="no" stored="yes"
+		merge="true">
+		<filters>
+			<filter name="testFilterField" key1="value1"/>
+		</filters>
+		<annotations>
+			<annotation type="de.julielab.jules.types.test.Annotation1">
+				<filters>
+					<filter name="testFilterAnnotation" key2="value2"/>
+				</filters>
+				<features>
+					<feature name="featureString" />
+				</features>
+			</annotation>
+			<annotation type="de.julielab.jules.types.test.Annotation1">
+				<features>
+					<feature name="featureInteger" numberFormat="##" />
+				</features>
+			</annotation>
+		</annotations>
+	</field>
+</fields>
\ No newline at end of file

Added: incubator/uima/sandbox/trunk/Lucas/desc/lucas.xsd
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/desc/lucas.xsd?rev=811314&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/desc/lucas.xsd (added)
+++ incubator/uima/sandbox/trunk/Lucas/desc/lucas.xsd Fri Sep  4 09:18:00 2009
@@ -0,0 +1,140 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.    
+-->
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.    
+-->
+
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
+		   elementFormDefault="qualified">
+
+	<xs:simpleType name="indexType">
+  		<xs:restriction base="xs:string">
+    		<xs:enumeration value="yes"/>
+    		<xs:enumeration value="no"/>
+    		<xs:enumeration value="no_norms"/>
+    		<xs:enumeration value="no_tf"/>
+    		<xs:enumeration value="no_norms_tf"/>
+  		</xs:restriction>
+	</xs:simpleType>
+
+	<xs:simpleType name="termVectorType">
+  		<xs:restriction base="xs:string">
+    		<xs:enumeration value="no"/>
+    		<xs:enumeration value="positions"/>
+    		<xs:enumeration value="offsets"/>
+    		<xs:enumeration value="positions_offsets"/>
+  		</xs:restriction>
+	</xs:simpleType>
+
+	<xs:simpleType name="storedType">
+  		<xs:restriction base="xs:string">
+    		<xs:enumeration value="yes"/>
+    		<xs:enumeration value="no"/>
+    		<xs:enumeration value="compress"/>
+  		</xs:restriction>
+	</xs:simpleType>
+
+	<xs:simpleType name="tokenizerType">
+  		<xs:restriction base="xs:string">
+    		<xs:enumeration value="cas"/>
+    		<xs:enumeration value="whitespace"/>
+    		<xs:enumeration value="standard"/>
+  		</xs:restriction>
+	</xs:simpleType>
+
+	<xs:simpleType name="positionType">
+  		<xs:restriction base="xs:string">
+    		<xs:enumeration value="first"/>
+    		<xs:enumeration value="last"/>
+  		</xs:restriction>
+	</xs:simpleType>	
+	
+	<xs:element name="feature">
+		<xs:complexType>
+			<xs:attribute name="name" type="xs:string"/>
+			<xs:attribute name="uppercase" type="xs:boolean"/>
+			<xs:attribute name="lowercase" type="xs:boolean"/>
+			<xs:attribute name="numberFormat" type="xs:string"/>
+		</xs:complexType>
+	</xs:element>
+	
+	<xs:element name="annotation">
+		<xs:complexType>
+			<xs:sequence>
+				<xs:element ref="feature" minOccurs="0" maxOccurs="unbounded"/>
+			</xs:sequence>				
+			<xs:attribute name="type" type="xs:string"/>
+			<xs:attribute name="sofa" type="xs:string"/>
+			<xs:attribute name="featurePath" type="xs:string"/>
+			<xs:attribute name="concatString" type="xs:string"/>
+			<xs:attribute name="splittString" type="xs:string"/>
+			<xs:attribute name="prefix" type="xs:string"/>
+			<xs:attribute name="uppercase" type="xs:boolean"/>
+			<xs:attribute name="lowercase" type="xs:boolean"/>
+			<xs:attribute name="stopwordRemove" type="xs:boolean"/>
+			<xs:attribute name="position" type="positionType"/>
+			<xs:attribute name="addHypernyms" type="xs:boolean"/>
+			<xs:attribute name="mappingFile" type="xs:string"/>
+			<xs:attribute name="snowballFilter" type="xs:string"/>
+			<xs:attribute name="unique" type="xs:boolean"/>
+			<xs:attribute name="tokenizer" type="tokenizerType"/>			
+		</xs:complexType>
+	</xs:element>
+
+	<xs:element name="field">
+		<xs:complexType>
+			<xs:sequence>
+				<xs:element ref="annotation" maxOccurs="unbounded"/>
+			</xs:sequence>				
+			<xs:attribute name="name" type="xs:string"/>
+			<xs:attribute name="index" type="indexType"/>
+			<xs:attribute name="termVector" type="termVectorType"/>
+			<xs:attribute name="delimiter" type="xs:string"/>
+			<xs:attribute name="stored" type="storedType"/>
+			<xs:attribute name="merge" type="xs:boolean"/>			
+		</xs:complexType>
+	</xs:element>
+	
+	<xs:element name="fields">
+		<xs:complexType>
+			<xs:sequence>
+				<xs:element ref="field" maxOccurs="unbounded"/>
+			</xs:sequence>
+		</xs:complexType>
+	</xs:element>
+
+</xs:schema>
\ No newline at end of file

Modified: incubator/uima/sandbox/trunk/Lucas/pom.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/pom.xml?rev=811314&r1=811313&r2=811314&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/pom.xml (original)
+++ incubator/uima/sandbox/trunk/Lucas/pom.xml Fri Sep  4 09:18:00 2009
@@ -29,186 +29,49 @@
 	<version>2.3.0-incubating-SNAPSHOT</version>
 	<description>Writes information contained in CAS objects to fields of a Lucene search index; mapping is done in a mapping file.</description>
   
-  <parent>
+   <parent>
     <groupId>org.apache.uima</groupId>
-    <artifactId>uimaj</artifactId>
+    <artifactId>SandboxDistr</artifactId>
     <version>2.3.0-incubating-SNAPSHOT</version> <!-- this comment is a flag for changeVersion -->
-    <relativePath>../uimaj</relativePath>
-  </parent>
-
-	<properties>
-		<componentId>LuceneCASIndexer</componentId>
-	</properties>
-  
+    <relativePath>../SandboxDistr</relativePath>
+   </parent>
 	<build>
-		<plugins>
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-source-plugin</artifactId>
-				<executions>
-					<execution>
-						<id>jar</id>
-						<goals>
-							<goal>jar</goal>
-						</goals>
-						<phase>package</phase>
-					</execution>
-					<execution>
-						<id>test-jar</id>
-						<goals>
-							<goal>test-jar</goal>
-						</goals>
-						<phase>package</phase>
-					</execution>
-				</executions>
-			</plugin>
-
-			<plugin>
-				<groupId>org.apache.maven.plugins</groupId>
-				<artifactId>maven-dependency-plugin</artifactId>
-				<executions>
-					<!-- Copy the dependencies to the lib folder for the PEAR to copy -->
-					<execution>
-						<id>copy-dependencies</id>
-						<phase>package</phase>
-						<goals>
-							<goal>copy-dependencies</goal>
-						</goals>
-						<configuration>
-							<stripVersion>true</stripVersion>
-							<outputDirectory>${basedir}/lib</outputDirectory>
-							<overWriteReleases>false</overWriteReleases>
-							<overWriteSnapshots>true</overWriteSnapshots>
-							<includeScope>runtime</includeScope>
-							<excludeScope>test</excludeScope>
-							<excludeArtifactIds>uimaj-core</excludeArtifactIds>
-						</configuration>
-					</execution>
-				</executions>
-			</plugin>
-
-			<plugin>
-				<artifactId>maven-antrun-plugin</artifactId>
-				<executions>
-					<!-- Clean the libraries after packaging -->
-					<execution>
-						<id>CleanLib</id>
-						<phase>clean</phase>
-						<configuration>
-							<tasks>
-								<delete dir="lib" deleteOnExit="true" quiet="true"
-									failOnError="false">
-									<fileset dir="lib" includes="*.jar" />
-								</delete>
-								<delete dir="doc" deleteOnExit="true" quiet="true"
-									failOnError="false">
-									<fileset dir="doc" />
-								</delete>
-								<delete dir="desc" deleteOnExit="true" quiet="true"
-									failOnError="false">
-									<fileset dir="desc" />
-								</delete>
-								<delete dir="resources" deleteOnExit="true" quiet="true"
-									failOnError="false">
-									<fileset dir="resources" />
-								</delete>
-							</tasks>
-						</configuration>
-						<goals>
-							<goal>run</goal>
-						</goals>
-					</execution>
-					<execution>
-						<id>BuildDownloadSite</id>
-						<phase>site</phase>
-						<configuration>
-							<tasks>
-								<mkdir dir="${project.build.directory}/site/downloads" />
-								<copy todir="${project.build.directory}/site/downloads">
-									<fileset dir="${basedir}/src/main/resources/" />
-									<fileset dir="${project.build.directory}/" includes="*.jar" />
-									<fileset dir="${project.build.directory}/" includes="*.pear" />
-								</copy>
-							</tasks>
-						</configuration>
-						<goals>
-							<goal>run</goal>
-						</goals>
-					</execution>
-
-					<execution>
-						<id>BuildPearDirectories</id>
-						<phase>package</phase>
-						<configuration>
-							<tasks>
-								<mkdir dir="${basedir}/desc" />
-								<copy file="${basedir}/src/main/resources/${componentId}.xml"
-									todir="${basedir}/desc/" />
-								<mkdir dir="${basedir}/doc" />
-								<!--
-									<copy todir="${basedir}/doc"> <fileset
-									dir="${project.build.directory}/site/"/> </copy>
-								-->
-								<copy todir="${basedir}/doc/downloads">
-									<fileset dir="${basedir}/src/main/resources/" />
-								</copy>
-								<mkdir dir="${basedir}/resources" />
-								<copy todir="${basedir}/resources">
-									<fileset dir="${basedir}/src/test/resources/" />
-								</copy>
-								<replace file="${basedir}/desc/${componentId}.xml"
-									token="src/test/" value="" />
-							</tasks>
-						</configuration>
-						<goals>
-							<goal>run</goal>
-						</goals>
-					</execution>
+	 <plugins>
+			<plugin>
+				<groupId>org.apache.uima</groupId>
+				<artifactId>PearPackagingMavenPlugin</artifactId>
+				<extensions>true</extensions>
+				<executions>
 					<execution>
 						<phase>package</phase>
 						<configuration>
-							<tasks>
-								<ant antfile="build_documentation.xml" target="copyDocs" />
-							</tasks>
+							<classpath>
+								<!-- PEAR file component classpath settings -->
+								$main_root/lib/google-collections-0.9.jar;$main_root/lib/log4j-1.2.14.jar;$main_root/lib/lucene-core-2.4.1.jar;$main_root/lib/lucene-snowball-2.4.1.jar
+							</classpath>
+
+							<mainComponentDesc>
+								<!-- PEAR file main component descriptor -->
+								desc/${componentId}.xml
+							</mainComponentDesc>
+
+							<componentId>
+								<!-- PEAR file component ID -->
+								${componentId}
+							</componentId>
 						</configuration>
 						<goals>
-							<goal>run</goal>
+							<goal>package</goal>
 						</goals>
-					</execution>
-				</executions>
-			</plugin>
-			<plugin>
-				<groupId>org.apache.uima</groupId>
-				<artifactId>PearPackagingMavenPlugin</artifactId>
-				<extensions>true</extensions>
-				<executions>
-					<execution>
-						<phase>package</phase>
-						<configuration>
-							<classpath>
-								<!-- PEAR file component classpath settings -->
-								$main_root/lib/cglib-nodep-2.1_3.jar;$main_root/lib/google-collect-snapshot-20080530.jar;$main_root/lib/log4j-1.2.14.jar;$main_root/lib/lucene-core-2.4.0.jar;$main_root/lib/lucene-snowball-2.4.0.jar
-							</classpath>
-
-							<mainComponentDesc>
-								<!-- PEAR file main component descriptor -->
-								desc/${componentId}.xml
-							</mainComponentDesc>
-
-							<componentId>
-								<!-- PEAR file component ID -->
-								${componentId}
-							</componentId>
-						</configuration>
-						<goals>
-							<goal>package</goal>
-						</goals>
-					</execution>
-				</executions>
+					</execution>
+				</executions>
 			</plugin>
-		</plugins>
+	 </plugins>
 	</build>
-  
+	<properties>
+		<componentId>LuceneCASIndexer</componentId>
+	</properties>
+ 
 	<dependencies>
 		<dependency>
 			<groupId>org.apache.lucene</groupId>