You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by tw...@apache.org on 2009/09/04 11:18:00 UTC
svn commit: r811314 - in /incubator/uima/sandbox/trunk/Lucas: desc/
desc/IndexWriter.properties desc/LuceneCASIndexer.xml desc/lucas.xml
desc/lucas.xsd pom.xml
Author: twgoetz
Date: Fri Sep 4 09:18:00 2009
New Revision: 811314
URL: http://svn.apache.org/viewvc?rev=811314&view=rev
Log:
UIMA-1535: apply Rico Landefeld's https://issues.apache.org/jira/secure/attachment/12418404/UIMA-1535.patch.
https://issues.apache.org/jira/browse/UIMA-1535
Added:
incubator/uima/sandbox/trunk/Lucas/desc/
incubator/uima/sandbox/trunk/Lucas/desc/IndexWriter.properties
incubator/uima/sandbox/trunk/Lucas/desc/LuceneCASIndexer.xml
incubator/uima/sandbox/trunk/Lucas/desc/lucas.xml
incubator/uima/sandbox/trunk/Lucas/desc/lucas.xsd
Modified:
incubator/uima/sandbox/trunk/Lucas/pom.xml
Added: incubator/uima/sandbox/trunk/Lucas/desc/IndexWriter.properties
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/desc/IndexWriter.properties?rev=811314&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/desc/IndexWriter.properties (added)
+++ incubator/uima/sandbox/trunk/Lucas/desc/IndexWriter.properties Fri Sep 4 09:18:00 2009
@@ -0,0 +1,6 @@
+# only for testing purposes !
+indexPath = src/test/resources/test-index
+RAMBufferSize = 513
+useCompoundFileFormat = false
+maxFieldLength = 9999
+uniqueIndex = true
\ No newline at end of file
Added: incubator/uima/sandbox/trunk/Lucas/desc/LuceneCASIndexer.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/desc/LuceneCASIndexer.xml?rev=811314&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/desc/LuceneCASIndexer.xml (added)
+++ incubator/uima/sandbox/trunk/Lucas/desc/LuceneCASIndexer.xml Fri Sep 4 09:18:00 2009
@@ -0,0 +1,81 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<casConsumerDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <implementationName>org.apache.uima.lucas.consumer.LuceneCASIndexer</implementationName>
+ <processingResourceMetaData>
+ <name>LuceneCASIndexer</name>
+ <description/>
+ <version>2.3.0-SNAPSHOT</version>
+ <vendor>apache</vendor>
+ <configurationParameters>
+ <configurationParameter>
+ <name>mappingFile</name>
+ <description>path to the mapping file</description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>mappingFile</name>
+ <value>
+ <string>lucas.xml</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+ <typeSystemDescription/>
+ <typePriorities/>
+ <fsIndexCollection/>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs/>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>false</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </processingResourceMetaData>
+ <resourceManagerConfiguration>
+ <externalResources>
+ <externalResource>
+ <name>LucasIndexWriterProvider</name>
+ <description>Provides a index writer instance.</description>
+ <fileResourceSpecifier>
+ <fileUrl>file:IndexWriter.properties</fileUrl>
+ </fileResourceSpecifier>
+ <implementationName>org.apache.uima.lucas.indexer.IndexWriterProviderImpl</implementationName>
+ </externalResource>
+ </externalResources>
+ <externalResourceBindings>
+ <externalResourceBinding>
+ <key>indexWriterProvider</key>
+ <resourceName>LucasIndexWriterProvider</resourceName>
+ </externalResourceBinding>
+ </externalResourceBindings>
+ </resourceManagerConfiguration>
+</casConsumerDescription>
Added: incubator/uima/sandbox/trunk/Lucas/desc/lucas.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/desc/lucas.xml?rev=811314&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/desc/lucas.xml (added)
+++ incubator/uima/sandbox/trunk/Lucas/desc/lucas.xml Fri Sep 4 09:18:00 2009
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<fields xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:noNamespaceSchemaLocation="../../main/resources/lucas.xsd">
+
+ <field name="annotation1" index="yes" termVector="no" stored="yes"
+ merge="true">
+ <filters>
+ <filter name="testFilterField" key1="value1"/>
+ </filters>
+ <annotations>
+ <annotation type="de.julielab.jules.types.test.Annotation1">
+ <filters>
+ <filter name="testFilterAnnotation" key2="value2"/>
+ </filters>
+ <features>
+ <feature name="featureString" />
+ </features>
+ </annotation>
+ <annotation type="de.julielab.jules.types.test.Annotation1">
+ <features>
+ <feature name="featureInteger" numberFormat="##" />
+ </features>
+ </annotation>
+ </annotations>
+ </field>
+</fields>
\ No newline at end of file
Added: incubator/uima/sandbox/trunk/Lucas/desc/lucas.xsd
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/desc/lucas.xsd?rev=811314&view=auto
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/desc/lucas.xsd (added)
+++ incubator/uima/sandbox/trunk/Lucas/desc/lucas.xsd Fri Sep 4 09:18:00 2009
@@ -0,0 +1,140 @@
+<?xml version="1.0"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
+ elementFormDefault="qualified">
+
+ <xs:simpleType name="indexType">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="yes"/>
+ <xs:enumeration value="no"/>
+ <xs:enumeration value="no_norms"/>
+ <xs:enumeration value="no_tf"/>
+ <xs:enumeration value="no_norms_tf"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="termVectorType">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="no"/>
+ <xs:enumeration value="positions"/>
+ <xs:enumeration value="offsets"/>
+ <xs:enumeration value="positions_offsets"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="storedType">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="yes"/>
+ <xs:enumeration value="no"/>
+ <xs:enumeration value="compress"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="tokenizerType">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="cas"/>
+ <xs:enumeration value="whitespace"/>
+ <xs:enumeration value="standard"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:simpleType name="positionType">
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="first"/>
+ <xs:enumeration value="last"/>
+ </xs:restriction>
+ </xs:simpleType>
+
+ <xs:element name="feature">
+ <xs:complexType>
+ <xs:attribute name="name" type="xs:string"/>
+ <xs:attribute name="uppercase" type="xs:boolean"/>
+ <xs:attribute name="lowercase" type="xs:boolean"/>
+ <xs:attribute name="numberFormat" type="xs:string"/>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="annotation">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element ref="feature" minOccurs="0" maxOccurs="unbounded"/>
+ </xs:sequence>
+ <xs:attribute name="type" type="xs:string"/>
+ <xs:attribute name="sofa" type="xs:string"/>
+ <xs:attribute name="featurePath" type="xs:string"/>
+ <xs:attribute name="concatString" type="xs:string"/>
+ <xs:attribute name="splittString" type="xs:string"/>
+ <xs:attribute name="prefix" type="xs:string"/>
+ <xs:attribute name="uppercase" type="xs:boolean"/>
+ <xs:attribute name="lowercase" type="xs:boolean"/>
+ <xs:attribute name="stopwordRemove" type="xs:boolean"/>
+ <xs:attribute name="position" type="positionType"/>
+ <xs:attribute name="addHypernyms" type="xs:boolean"/>
+ <xs:attribute name="mappingFile" type="xs:string"/>
+ <xs:attribute name="snowballFilter" type="xs:string"/>
+ <xs:attribute name="unique" type="xs:boolean"/>
+ <xs:attribute name="tokenizer" type="tokenizerType"/>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="field">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element ref="annotation" maxOccurs="unbounded"/>
+ </xs:sequence>
+ <xs:attribute name="name" type="xs:string"/>
+ <xs:attribute name="index" type="indexType"/>
+ <xs:attribute name="termVector" type="termVectorType"/>
+ <xs:attribute name="delimiter" type="xs:string"/>
+ <xs:attribute name="stored" type="storedType"/>
+ <xs:attribute name="merge" type="xs:boolean"/>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="fields">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element ref="field" maxOccurs="unbounded"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
+</xs:schema>
\ No newline at end of file
Modified: incubator/uima/sandbox/trunk/Lucas/pom.xml
URL: http://svn.apache.org/viewvc/incubator/uima/sandbox/trunk/Lucas/pom.xml?rev=811314&r1=811313&r2=811314&view=diff
==============================================================================
--- incubator/uima/sandbox/trunk/Lucas/pom.xml (original)
+++ incubator/uima/sandbox/trunk/Lucas/pom.xml Fri Sep 4 09:18:00 2009
@@ -29,186 +29,49 @@
<version>2.3.0-incubating-SNAPSHOT</version>
<description>Writes information contained in CAS objects to fields of a Lucene search index; mapping is done in a mapping file.</description>
- <parent>
+ <parent>
<groupId>org.apache.uima</groupId>
- <artifactId>uimaj</artifactId>
+ <artifactId>SandboxDistr</artifactId>
<version>2.3.0-incubating-SNAPSHOT</version> <!-- this comment is a flag for changeVersion -->
- <relativePath>../uimaj</relativePath>
- </parent>
-
- <properties>
- <componentId>LuceneCASIndexer</componentId>
- </properties>
-
+ <relativePath>../SandboxDistr</relativePath>
+ </parent>
<build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-source-plugin</artifactId>
- <executions>
- <execution>
- <id>jar</id>
- <goals>
- <goal>jar</goal>
- </goals>
- <phase>package</phase>
- </execution>
- <execution>
- <id>test-jar</id>
- <goals>
- <goal>test-jar</goal>
- </goals>
- <phase>package</phase>
- </execution>
- </executions>
- </plugin>
-
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-dependency-plugin</artifactId>
- <executions>
- <!-- Copy the dependencies to the lib folder for the PEAR to copy -->
- <execution>
- <id>copy-dependencies</id>
- <phase>package</phase>
- <goals>
- <goal>copy-dependencies</goal>
- </goals>
- <configuration>
- <stripVersion>true</stripVersion>
- <outputDirectory>${basedir}/lib</outputDirectory>
- <overWriteReleases>false</overWriteReleases>
- <overWriteSnapshots>true</overWriteSnapshots>
- <includeScope>runtime</includeScope>
- <excludeScope>test</excludeScope>
- <excludeArtifactIds>uimaj-core</excludeArtifactIds>
- </configuration>
- </execution>
- </executions>
- </plugin>
-
- <plugin>
- <artifactId>maven-antrun-plugin</artifactId>
- <executions>
- <!-- Clean the libraries after packaging -->
- <execution>
- <id>CleanLib</id>
- <phase>clean</phase>
- <configuration>
- <tasks>
- <delete dir="lib" deleteOnExit="true" quiet="true"
- failOnError="false">
- <fileset dir="lib" includes="*.jar" />
- </delete>
- <delete dir="doc" deleteOnExit="true" quiet="true"
- failOnError="false">
- <fileset dir="doc" />
- </delete>
- <delete dir="desc" deleteOnExit="true" quiet="true"
- failOnError="false">
- <fileset dir="desc" />
- </delete>
- <delete dir="resources" deleteOnExit="true" quiet="true"
- failOnError="false">
- <fileset dir="resources" />
- </delete>
- </tasks>
- </configuration>
- <goals>
- <goal>run</goal>
- </goals>
- </execution>
- <execution>
- <id>BuildDownloadSite</id>
- <phase>site</phase>
- <configuration>
- <tasks>
- <mkdir dir="${project.build.directory}/site/downloads" />
- <copy todir="${project.build.directory}/site/downloads">
- <fileset dir="${basedir}/src/main/resources/" />
- <fileset dir="${project.build.directory}/" includes="*.jar" />
- <fileset dir="${project.build.directory}/" includes="*.pear" />
- </copy>
- </tasks>
- </configuration>
- <goals>
- <goal>run</goal>
- </goals>
- </execution>
-
- <execution>
- <id>BuildPearDirectories</id>
- <phase>package</phase>
- <configuration>
- <tasks>
- <mkdir dir="${basedir}/desc" />
- <copy file="${basedir}/src/main/resources/${componentId}.xml"
- todir="${basedir}/desc/" />
- <mkdir dir="${basedir}/doc" />
- <!--
- <copy todir="${basedir}/doc"> <fileset
- dir="${project.build.directory}/site/"/> </copy>
- -->
- <copy todir="${basedir}/doc/downloads">
- <fileset dir="${basedir}/src/main/resources/" />
- </copy>
- <mkdir dir="${basedir}/resources" />
- <copy todir="${basedir}/resources">
- <fileset dir="${basedir}/src/test/resources/" />
- </copy>
- <replace file="${basedir}/desc/${componentId}.xml"
- token="src/test/" value="" />
- </tasks>
- </configuration>
- <goals>
- <goal>run</goal>
- </goals>
- </execution>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.uima</groupId>
+ <artifactId>PearPackagingMavenPlugin</artifactId>
+ <extensions>true</extensions>
+ <executions>
<execution>
<phase>package</phase>
<configuration>
- <tasks>
- <ant antfile="build_documentation.xml" target="copyDocs" />
- </tasks>
+ <classpath>
+ <!-- PEAR file component classpath settings -->
+ $main_root/lib/google-collections-0.9.jar;$main_root/lib/log4j-1.2.14.jar;$main_root/lib/lucene-core-2.4.1.jar;$main_root/lib/lucene-snowball-2.4.1.jar
+ </classpath>
+
+ <mainComponentDesc>
+ <!-- PEAR file main component descriptor -->
+ desc/${componentId}.xml
+ </mainComponentDesc>
+
+ <componentId>
+ <!-- PEAR file component ID -->
+ ${componentId}
+ </componentId>
</configuration>
<goals>
- <goal>run</goal>
+ <goal>package</goal>
</goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.apache.uima</groupId>
- <artifactId>PearPackagingMavenPlugin</artifactId>
- <extensions>true</extensions>
- <executions>
- <execution>
- <phase>package</phase>
- <configuration>
- <classpath>
- <!-- PEAR file component classpath settings -->
- $main_root/lib/cglib-nodep-2.1_3.jar;$main_root/lib/google-collect-snapshot-20080530.jar;$main_root/lib/log4j-1.2.14.jar;$main_root/lib/lucene-core-2.4.0.jar;$main_root/lib/lucene-snowball-2.4.0.jar
- </classpath>
-
- <mainComponentDesc>
- <!-- PEAR file main component descriptor -->
- desc/${componentId}.xml
- </mainComponentDesc>
-
- <componentId>
- <!-- PEAR file component ID -->
- ${componentId}
- </componentId>
- </configuration>
- <goals>
- <goal>package</goal>
- </goals>
- </execution>
- </executions>
+ </execution>
+ </executions>
</plugin>
- </plugins>
+ </plugins>
</build>
-
+ <properties>
+ <componentId>LuceneCASIndexer</componentId>
+ </properties>
+
<dependencies>
<dependency>
<groupId>org.apache.lucene</groupId>