You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/10/21 20:58:44 UTC
svn commit: r1534320 [2/39] - in /lucene/dev/branches/lucene4956: ./
dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/lucene/expressions/
dev-tools/idea/solr/contrib/velocity/ dev-tools/maven/
dev-tools/maven/lucene/ dev-tools/maven/lucene/expressions/ ...
Modified: lucene/dev/branches/lucene4956/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/build.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/build.xml (original)
+++ lucene/dev/branches/lucene4956/build.xml Mon Oct 21 18:58:24 2013
@@ -68,6 +68,7 @@
<fileset dir="lucene" includes="build.xml" />
<fileset dir="solr" includes="build.xml" />
</subant>
+ <subant buildpath="lucene" target="check-lib-versions" inheritall="false" failonerror="true"/>
</target>
<target name="-validate-source-patterns" unless="disable.source-patterns">
@@ -125,6 +126,13 @@
</subant>
</target>
+ <target name="jar-src" description="Build Lucene and Solr Source Jar files">
+ <subant target="jar-src" inheritall="false" failonerror="true">
+ <fileset dir="lucene" includes="build.xml"/>
+ <fileset dir="solr" includes="build.xml"/>
+ </subant>
+ </target>
+
<property name="version" value="5.0-SNAPSHOT"/>
<property name="maven-build-dir" value="maven-build"/>
<property name="maven-version" value="2.2.1"/>
@@ -229,12 +237,37 @@
</target>
<target name="idea" depends="resolve" description="Setup IntelliJ IDEA configuration">
+ <!-- Look for property definition for ${idea.jdk} in various *build.properties files -->
+ <property file="lucene/build.properties"/> <!-- Look in the current project first -->
+ <property file="${user.home}/lucene.build.properties"/>
+ <property file="${user.home}/build.properties"/>
+ <condition property="idea.jdk.is.set">
+ <isset property="idea.jdk"/>
+ </condition>
+ <!-- Define ${idea.jdk} if it's not yet defined - otherwise literal "${idea.jdk}" is substituted -->
+ <property name="idea.jdk" value=""/>
<copy todir=".">
<fileset dir="dev-tools/idea"/>
+ <filterset begintoken="subst.="" endtoken=""">
+ <filter token="idea.jdk" value="${idea.jdk}"/>
+ </filterset>
</copy>
+ <antcall target="-post-idea-instructions"/>
+ </target>
+
+ <target name="-post-idea-instructions" unless="idea.jdk.is.set">
<echo>
- To complete IntelliJ IDEA setup, you must manually configure
- File | Project Structure | Project | Project SDK.
+To complete IntelliJ IDEA setup, you must manually configure
+File | Project Structure | Project | Project SDK.
+
+You won't have to do this in the future if you define property
+$${idea.jdk}, e.g. in ~/lucene.build.properties, ~/build.properties
+or lucene/build.properties, with a value consisting of the
+following two XML attributes/values (adjust values according to
+JDKs you have defined locally - see
+File | Project Structure | Platform Settings | SDKs):
+
+ idea.jdk = project-jdk-name="1.7" project-jdk-type="JavaSDK"
</echo>
</target>
Modified: lucene/dev/branches/lucene4956/dev-tools/idea/.idea/ant.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/idea/.idea/ant.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/idea/.idea/ant.xml (original)
+++ lucene/dev/branches/lucene4956/dev-tools/idea/.idea/ant.xml Mon Oct 21 18:58:24 2013
@@ -19,6 +19,7 @@
<buildFile url="file://$PROJECT_DIR$/lucene/classification/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/codecs/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/demo/build.xml" />
+ <buildFile url="file://$PROJECT_DIR$/lucene/expressions/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/facet/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/grouping/build.xml" />
<buildFile url="file://$PROJECT_DIR$/lucene/highlighter/build.xml" />
Modified: lucene/dev/branches/lucene4956/dev-tools/idea/.idea/misc.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/idea/.idea/misc.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/idea/.idea/misc.xml (original)
+++ lucene/dev/branches/lucene4956/dev-tools/idea/.idea/misc.xml Mon Oct 21 18:58:24 2013
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
- <component name="ProjectRootManager" version="2" languageLevel="JDK_1_7"/>
+ <component name="ProjectRootManager" version="2" languageLevel="JDK_1_7" subst.="idea.jdk" />
</project>
Modified: lucene/dev/branches/lucene4956/dev-tools/idea/.idea/modules.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/idea/.idea/modules.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/idea/.idea/modules.xml (original)
+++ lucene/dev/branches/lucene4956/dev-tools/idea/.idea/modules.xml Mon Oct 21 18:58:24 2013
@@ -24,6 +24,7 @@
<module filepath="$PROJECT_DIR$/lucene/codecs/src/test/codecs-tests.iml" />
<module filepath="$PROJECT_DIR$/lucene/codecs/src/resources/codecs-resources.iml" />
<module filepath="$PROJECT_DIR$/lucene/demo/demo.iml" />
+ <module filepath="$PROJECT_DIR$/lucene/expressions/expressions.iml" />
<module filepath="$PROJECT_DIR$/lucene/facet/facet.iml" />
<module filepath="$PROJECT_DIR$/lucene/grouping/grouping.iml" />
<module filepath="$PROJECT_DIR$/lucene/highlighter/highlighter.iml" />
Modified: lucene/dev/branches/lucene4956/dev-tools/idea/.idea/workspace.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/idea/.idea/workspace.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/idea/.idea/workspace.xml (original)
+++ lucene/dev/branches/lucene4956/dev-tools/idea/.idea/workspace.xml Mon Oct 21 18:58:24 2013
@@ -95,6 +95,13 @@
<option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
<option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
</configuration>
+ <configuration default="false" name="Module expressions" type="JUnit" factoryName="JUnit">
+ <module name="expressions" />
+ <option name="TEST_OBJECT" value="package" />
+ <option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/lucene/expressions" />
+ <option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
+ <option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
+ </configuration>
<configuration default="false" name="Module facet" type="JUnit" factoryName="JUnit">
<module name="facet" />
<option name="TEST_OBJECT" value="package" />
@@ -263,28 +270,29 @@
<item index="10" class="java.lang.String" itemvalue="JUnit.Module benchmark" />
<item index="11" class="java.lang.String" itemvalue="JUnit.Module classification" />
<item index="12" class="java.lang.String" itemvalue="JUnit.Module codecs" />
- <item index="13" class="java.lang.String" itemvalue="JUnit.Module facet" />
- <item index="14" class="java.lang.String" itemvalue="JUnit.Module grouping" />
- <item index="15" class="java.lang.String" itemvalue="JUnit.Module highlighter" />
- <item index="16" class="java.lang.String" itemvalue="JUnit.Module join" />
- <item index="17" class="java.lang.String" itemvalue="JUnit.Module memory" />
- <item index="18" class="java.lang.String" itemvalue="JUnit.Module misc" />
- <item index="19" class="java.lang.String" itemvalue="JUnit.Module queries" />
- <item index="20" class="java.lang.String" itemvalue="JUnit.Module queryparser" />
- <item index="21" class="java.lang.String" itemvalue="JUnit.Module replicator" />
- <item index="22" class="java.lang.String" itemvalue="JUnit.Module sandbox" />
- <item index="23" class="java.lang.String" itemvalue="JUnit.Module spatial" />
- <item index="24" class="java.lang.String" itemvalue="JUnit.Module suggest" />
- <item index="25" class="java.lang.String" itemvalue="JUnit.Solr core" />
- <item index="26" class="java.lang.String" itemvalue="JUnit.Solr analysis-extras contrib" />
- <item index="27" class="java.lang.String" itemvalue="JUnit.Solr clustering contrib" />
- <item index="28" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler contrib" />
- <item index="29" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler-extras contrib" />
- <item index="30" class="java.lang.String" itemvalue="JUnit.Solr extraction contrib" />
- <item index="31" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
- <item index="32" class="java.lang.String" itemvalue="JUnit.Solr uima contrib" />
- <item index="33" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
- <item index="34" class="java.lang.String" itemvalue="JUnit.Solrj" />
+ <item index="13" class="java.lang.String" itemvalue="JUnit.Module expressions" />
+ <item index="14" class="java.lang.String" itemvalue="JUnit.Module facet" />
+ <item index="15" class="java.lang.String" itemvalue="JUnit.Module grouping" />
+ <item index="16" class="java.lang.String" itemvalue="JUnit.Module highlighter" />
+ <item index="17" class="java.lang.String" itemvalue="JUnit.Module join" />
+ <item index="18" class="java.lang.String" itemvalue="JUnit.Module memory" />
+ <item index="19" class="java.lang.String" itemvalue="JUnit.Module misc" />
+ <item index="20" class="java.lang.String" itemvalue="JUnit.Module queries" />
+ <item index="21" class="java.lang.String" itemvalue="JUnit.Module queryparser" />
+ <item index="22" class="java.lang.String" itemvalue="JUnit.Module replicator" />
+ <item index="23" class="java.lang.String" itemvalue="JUnit.Module sandbox" />
+ <item index="24" class="java.lang.String" itemvalue="JUnit.Module spatial" />
+ <item index="25" class="java.lang.String" itemvalue="JUnit.Module suggest" />
+ <item index="26" class="java.lang.String" itemvalue="JUnit.Solr core" />
+ <item index="27" class="java.lang.String" itemvalue="JUnit.Solr analysis-extras contrib" />
+ <item index="28" class="java.lang.String" itemvalue="JUnit.Solr clustering contrib" />
+ <item index="29" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler contrib" />
+ <item index="30" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler-extras contrib" />
+ <item index="31" class="java.lang.String" itemvalue="JUnit.Solr extraction contrib" />
+ <item index="32" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
+ <item index="33" class="java.lang.String" itemvalue="JUnit.Solr uima contrib" />
+ <item index="34" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
+ <item index="35" class="java.lang.String" itemvalue="JUnit.Solrj" />
</list>
</component>
</project>
Modified: lucene/dev/branches/lucene4956/dev-tools/idea/solr/contrib/velocity/velocity.iml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/idea/solr/contrib/velocity/velocity.iml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/idea/solr/contrib/velocity/velocity.iml (original)
+++ lucene/dev/branches/lucene4956/dev-tools/idea/solr/contrib/velocity/velocity.iml Mon Oct 21 18:58:24 2013
@@ -17,6 +17,7 @@
<orderEntry type="library" name="Solr velocity library" level="project" />
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
<orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
+ <orderEntry type="module" module-name="lucene-core" />
<orderEntry type="module" module-name="solr-core" />
<orderEntry type="module" module-name="solrj" />
</component>
Modified: lucene/dev/branches/lucene4956/dev-tools/maven/lucene/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/maven/lucene/pom.xml.template?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/maven/lucene/pom.xml.template (original)
+++ lucene/dev/branches/lucene4956/dev-tools/maven/lucene/pom.xml.template Mon Oct 21 18:58:24 2013
@@ -47,6 +47,7 @@
<module>benchmark</module>
<module>classification</module>
<module>demo</module>
+ <module>expressions</module>
<module>facet</module>
<module>grouping</module>
<module>highlighter</module>
Modified: lucene/dev/branches/lucene4956/dev-tools/maven/lucene/suggest/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/maven/lucene/suggest/pom.xml.template?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/maven/lucene/suggest/pom.xml.template (original)
+++ lucene/dev/branches/lucene4956/dev-tools/maven/lucene/suggest/pom.xml.template Mon Oct 21 18:58:24 2013
@@ -64,6 +64,16 @@
<artifactId>lucene-misc</artifactId>
<version>${project.version}</version>
</dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>lucene-expressions</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>lucene-queries</artifactId>
+ <version>${project.version}</version>
+ </dependency>
</dependencies>
<build>
<sourceDirectory>${module-path}/src/java</sourceDirectory>
Modified: lucene/dev/branches/lucene4956/dev-tools/maven/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/maven/pom.xml.template?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/maven/pom.xml.template (original)
+++ lucene/dev/branches/lucene4956/dev-tools/maven/pom.xml.template Mon Oct 21 18:58:24 2013
@@ -48,7 +48,7 @@
<slf4j.version>1.6.6</slf4j.version>
<log4j.version>1.2.16</log4j.version>
<tika.version>1.4</tika.version>
- <httpcomponents.version>4.2.3</httpcomponents.version>
+ <httpcomponents.version>4.2.6</httpcomponents.version>
<commons-io.version>2.1</commons-io.version>
<restlet.version>2.1.1</restlet.version>
<hadoop.version>2.0.5-alpha</hadoop.version>
@@ -266,6 +266,38 @@
</exclusions>
</dependency>
<dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr-runtime</artifactId>
+ <version>3.5</version>
+ <!-- these are dependencies to compile antlr-runtime itself -->
+ <exclusions>
+ <exclusion>
+ <groupId>antlr</groupId>
+ <artifactId>antlr</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.antlr</groupId>
+ <artifactId>stringtemplate</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.ow2.asm</groupId>
+ <artifactId>asm</artifactId>
+ <version>4.1</version>
+ </dependency>
+ <dependency>
+ <groupId>org.ow2.asm</groupId>
+ <artifactId>asm-commons</artifactId>
+ <version>4.1</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.ow2.asm</groupId>
+ <artifactId>asm-tree</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
<groupId>org.apache.ant</groupId>
<artifactId>ant</artifactId>
<version>1.8.2</version>
@@ -600,23 +632,10 @@
<version>2.4</version>
<configuration>
<archive>
- <!-- This section should be *exactly* the same under -->
- <!-- maven-jar-plugin and maven-war-plugin. -->
- <!-- If you make changes here, make the same changes -->
- <!-- in the other location as well. -->
- <manifestEntries>
- <Extension-Name>${project.groupId}</Extension-Name>
- <Implementation-Title>${project.groupId}</Implementation-Title>
- <Specification-Title>${project.name}</Specification-Title>
- <!-- spec version must match "digit+{.digit+}*" -->
- <Specification-Version>${base.specification.version}.${now.version}</Specification-Version>
- <Specification-Vendor>The Apache Software Foundation</Specification-Vendor>
- <!-- impl version can be any string -->
- <Implementation-Version>${project.version} ${svn.revision} - ${user.name} - ${now.timestamp}</Implementation-Version>
- <Implementation-Vendor>The Apache Software Foundation</Implementation-Vendor>
- <X-Compile-Source-JDK>${java.compat.version}</X-Compile-Source-JDK>
- <X-Compile-Target-JDK>${java.compat.version}</X-Compile-Target-JDK>
- </manifestEntries>
+ <manifest>
+ <addDefaultSpecificationEntries>false</addDefaultSpecificationEntries>
+ <addDefaultImplementationEntries>false</addDefaultImplementationEntries>
+ </manifest>
</archive>
</configuration>
</plugin>
@@ -681,7 +700,7 @@
<configuration>
<archive>
<!-- This section should be *exactly* the same under -->
- <!-- maven-jar-plugin and maven-war-plugin. -->
+ <!-- maven-bundle-plugin and maven-war-plugin. -->
<!-- If you make changes here, make the same changes -->
<!-- in the other location as well. -->
<manifestEntries>
@@ -694,6 +713,7 @@
<!-- impl version can be any string -->
<Implementation-Version>${project.version} ${svn.revision} - ${user.name} - ${now.timestamp}</Implementation-Version>
<Implementation-Vendor>The Apache Software Foundation</Implementation-Vendor>
+ <Implementation-Vendor-Id>${project.groupId}</Implementation-Vendor-Id>
<X-Compile-Source-JDK>${java.compat.version}</X-Compile-Source-JDK>
<X-Compile-Target-JDK>${java.compat.version}</X-Compile-Target-JDK>
</manifestEntries>
@@ -806,6 +826,23 @@
<configuration>
<instructions>
<Export-Package>*;-split-package:=merge-first</Export-Package>
+
+ <!-- This section should be *exactly* the same under -->
+ <!-- maven-bundle-plugin and maven-war-plugin. -->
+ <!-- If you make changes here, make the same changes -->
+ <!-- in the other location as well. -->
+ <Extension-Name>${project.groupId}</Extension-Name>
+ <Implementation-Title>${project.groupId}</Implementation-Title>
+ <Specification-Title>${project.name}</Specification-Title>
+ <!-- spec version must match "digit+{.digit+}*" -->
+ <Specification-Version>${base.specification.version}.${now.version}</Specification-Version>
+ <Specification-Vendor>The Apache Software Foundation</Specification-Vendor>
+ <!-- impl version can be any string -->
+ <Implementation-Version>${project.version} ${svn.revision} - ${user.name} - ${now.timestamp}</Implementation-Version>
+ <Implementation-Vendor>The Apache Software Foundation</Implementation-Vendor>
+ <Implementation-Vendor-Id>${project.groupId}</Implementation-Vendor-Id>
+ <X-Compile-Source-JDK>${java.compat.version}</X-Compile-Source-JDK>
+ <X-Compile-Target-JDK>${java.compat.version}</X-Compile-Target-JDK>
</instructions>
</configuration>
<executions>
Modified: lucene/dev/branches/lucene4956/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template (original)
+++ lucene/dev/branches/lucene4956/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template Mon Oct 21 18:58:24 2013
@@ -68,16 +68,6 @@
<version>${project.version}</version>
</dependency>
<dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-analyzers-smartcn</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-analyzers-stempel</artifactId>
- <version>${project.version}</version>
- </dependency>
- <dependency>
<!-- SOLR-3263: Test scope dep is required to avoid jar signing conflicts -->
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
Modified: lucene/dev/branches/lucene4956/dev-tools/maven/solr/core/src/java/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/maven/solr/core/src/java/pom.xml.template?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/maven/solr/core/src/java/pom.xml.template (original)
+++ lucene/dev/branches/lucene4956/dev-tools/maven/solr/core/src/java/pom.xml.template Mon Oct 21 18:58:24 2013
@@ -166,13 +166,21 @@
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math</artifactId>
</exclusion>
<exclusion>
<groupId>xmlenc</groupId>
<artifactId>xmlenc</artifactId>
- </exclusion>
+ </exclusion>
+ <exclusion>
+ <groupId>commons-configuration</groupId>
+ <artifactId>commons-configuration</artifactId>
+ </exclusion>
<exclusion>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
@@ -194,6 +202,14 @@
<artifactId>jersey-server</artifactId>
</exclusion>
<exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ </exclusion>
+ <exclusion>
<groupId>tomcat</groupId>
<artifactId>jasper-compiler</artifactId>
</exclusion>
@@ -252,6 +268,10 @@
<artifactId>servlet-api</artifactId>
</exclusion>
<exclusion>
+ <groupId>javax.servlet.jsp</groupId>
+ <artifactId>jsp-api</artifactId>
+ </exclusion>
+ <exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
</exclusion>
@@ -272,6 +292,14 @@
<artifactId>jackson-mapper-asl</artifactId>
</exclusion>
<exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ </exclusion>
+ <exclusion>
<groupId>tomcat</groupId>
<artifactId>jasper-runtime</artifactId>
</exclusion>
@@ -323,17 +351,6 @@
<optional>true</optional> <!-- Only used for tests and one command-line utility: JettySolrRunner -->
</dependency>
<dependency>
- <groupId>org.codehaus.woodstox</groupId>
- <artifactId>wstx-asl</artifactId>
- <scope>runtime</scope>
- <exclusions>
- <exclusion>
- <groupId>stax</groupId>
- <artifactId>stax-api</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<!-- SOLR-3263: Provided scope is required to avoid jar signing conflicts -->
Modified: lucene/dev/branches/lucene4956/dev-tools/scripts/checkJavaDocs.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/scripts/checkJavaDocs.py?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/scripts/checkJavaDocs.py (original)
+++ lucene/dev/branches/lucene4956/dev-tools/scripts/checkJavaDocs.py Mon Oct 21 18:58:24 2013
@@ -80,6 +80,8 @@ def checkClassDetails(fullPath):
Checks for invalid HTML in the full javadocs under each field/method.
"""
+ isAttributeSource = fullPath.endswith('AttributeSource.html')
+
# TODO: only works with java7 generated javadocs now!
with open(fullPath, encoding='UTF-8') as f:
desc = None
@@ -87,6 +89,12 @@ def checkClassDetails(fullPath):
item = None
errors = []
for line in f.readlines():
+
+ if isAttributeSource:
+ # Work around Javadocs bug that fails to escape the <T> type parameter in {@link #getAttribute} and {@link #addAttribute}
+ line = line.replace('<code>getAttribute(java.lang.Class<T>)</code>', '<code>getAttribute(java.lang.Class)</code>')
+ line = line.replace('<code>addAttribute(java.lang.Class<T>)</code>', '<code>addAttribute(java.lang.Class)</code>')
+
m = reH3.search(line)
if m is not None:
if desc is not None:
Modified: lucene/dev/branches/lucene4956/dev-tools/scripts/diffSources.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/scripts/diffSources.py?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/scripts/diffSources.py (original)
+++ lucene/dev/branches/lucene4956/dev-tools/scripts/diffSources.py Mon Oct 21 18:58:24 2013
@@ -50,7 +50,7 @@ while True:
elif l.endswith('\n'):
l = l[:-1]
if l.startswith('diff ') or l.startswith('Binary files '):
- keep = not l.endswith('timehints.txt') and l.lower().find('/build/') == -1 and (l.lower().startswith('Only in') or ((l.lower().endswith('.java') or l.lower().endswith('.txt') or l.lower().endswith('.xml') or l.lower().endswith('.iml')) and l.find('/.svn/') == -1))
+ keep = not l.endswith('timehints.txt') and l.lower().find('/build/') == -1 and (l.lower().startswith('Only in') or ((l.lower().endswith('.java') or l.lower().endswith('.txt') or l.lower().endswith('.xml') or l.lower().endswith('.iml') or l.lower().endswith('.html') or l.lower().endswith('.template') or l.lower().endswith('.py') or l.lower().endswith('.g') or l.lower().endswith('.properties')) and l.find('/.svn/') == -1))
if keep:
print
print
Modified: lucene/dev/branches/lucene4956/dev-tools/scripts/smokeTestRelease.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/dev-tools/scripts/smokeTestRelease.py?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/dev-tools/scripts/smokeTestRelease.py (original)
+++ lucene/dev/branches/lucene4956/dev-tools/scripts/smokeTestRelease.py Mon Oct 21 18:58:24 2013
@@ -249,8 +249,12 @@ def checkJARMetaData(desc, jarFile, svnR
def normSlashes(path):
return path.replace(os.sep, '/')
-def checkAllJARs(topDir, project, svnRevision, version):
- print(' verify JAR/WAR metadata...')
+def checkAllJARs(topDir, project, svnRevision, version, tmpDir, baseURL):
+ print(' verify JAR metadata/identity/no javax.* or java.* classes...')
+ if project == 'solr':
+ luceneDistFilenames = dict()
+ for file in getBinaryDistFiles('lucene', tmpDir, version, baseURL):
+ luceneDistFilenames[os.path.basename(file)] = file
for root, dirs, files in os.walk(topDir):
normRoot = normSlashes(root)
@@ -272,29 +276,54 @@ def checkAllJARs(topDir, project, svnRev
noJavaPackageClasses('JAR file "%s"' % fullPath, fullPath)
if file.lower().find('lucene') != -1 or file.lower().find('solr') != -1:
checkJARMetaData('JAR file "%s"' % fullPath, fullPath, svnRevision, version)
-
+ if project == 'solr' and file.lower().find('lucene') != -1:
+ jarFilename = os.path.basename(file)
+ if jarFilename not in luceneDistFilenames:
+ raise RuntimeError('Artifact %s is not present in Lucene binary distribution' % fullPath)
+ identical = filecmp.cmp(fullPath, luceneDistFilenames[jarFilename], shallow=False)
+ if not identical:
+ raise RuntimeError('Artifact %s is not identical to %s in Lucene binary distribution'
+ % (fullPath, luceneDistFilenames[jarFilename]))
+
-def checkSolrWAR(warFileName, svnRevision, version):
+def checkSolrWAR(warFileName, svnRevision, version, tmpDir, baseURL):
"""
Crawls for JARs inside the WAR and ensures there are no classes
under java.* or javax.* namespace.
"""
- print(' make sure WAR file has no javax.* or java.* classes...')
+ print(' verify WAR metadata/contained JAR identity/no javax.* or java.* classes...')
checkJARMetaData(warFileName, warFileName, svnRevision, version)
+ distFilenames = dict()
+ for file in getBinaryDistFiles('lucene', tmpDir, version, baseURL):
+ distFilenames[os.path.basename(file)] = file
+
with zipfile.ZipFile(warFileName, 'r') as z:
for name in z.namelist():
if name.endswith('.jar'):
+ jarInsideWarContents = z.read(name)
noJavaPackageClasses('JAR file %s inside WAR file %s' % (name, warFileName),
- io.BytesIO(z.read(name)))
+ io.BytesIO(jarInsideWarContents))
if name.lower().find('lucene') != -1 or name.lower().find('solr') != -1:
checkJARMetaData('JAR file %s inside WAR file %s' % (name, warFileName),
- io.BytesIO(z.read(name)),
+ io.BytesIO(jarInsideWarContents),
svnRevision,
version)
+ if name.lower().find('lucene') != -1:
+ jarInsideWarFilename = os.path.basename(name)
+ if jarInsideWarFilename not in distFilenames:
+ raise RuntimeError('Artifact %s in %s is not present in Lucene binary distribution'
+ % (name, warFileName))
+ distJarName = distFilenames[jarInsideWarFilename]
+ with open(distJarName, "rb", buffering=0) as distJarFile:
+ distJarContents = distJarFile.readall()
+ if jarInsideWarContents != distJarContents:
+ raise RuntimeError('Artifact %s in %s is not identical to %s in Lucene binary distribution'
+ % (name, warFileName, distJarName))
+
def checkSigs(project, urlString, version, tmpDir, isSigned):
@@ -575,7 +604,7 @@ def getDirEntries(urlString):
if text == 'Parent Directory' or text == '..':
return links[(i+1):]
-def unpackAndVerify(project, tmpDir, artifact, svnRevision, version, testArgs):
+def unpackAndVerify(project, tmpDir, artifact, svnRevision, version, testArgs, baseURL):
destDir = '%s/unpack' % tmpDir
if os.path.exists(destDir):
shutil.rmtree(destDir)
@@ -595,14 +624,14 @@ def unpackAndVerify(project, tmpDir, art
raise RuntimeError('unpack produced entries %s; expected only %s' % (l, expected))
unpackPath = '%s/%s' % (destDir, expected)
- verifyUnpacked(project, artifact, unpackPath, svnRevision, version, testArgs)
+ verifyUnpacked(project, artifact, unpackPath, svnRevision, version, testArgs, tmpDir, baseURL)
LUCENE_NOTICE = None
LUCENE_LICENSE = None
SOLR_NOTICE = None
SOLR_LICENSE = None
-def verifyUnpacked(project, artifact, unpackPath, svnRevision, version, testArgs):
+def verifyUnpacked(project, artifact, unpackPath, svnRevision, version, testArgs, tmpDir, baseURL):
global LUCENE_NOTICE
global LUCENE_LICENSE
global SOLR_NOTICE
@@ -651,9 +680,9 @@ def verifyUnpacked(project, artifact, un
if project == 'lucene':
# TODO: clean this up to not be a list of modules that we must maintain
- extras = ('analysis', 'benchmark', 'classification', 'codecs', 'core', 'demo', 'docs', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'replicator', 'sandbox', 'spatial', 'suggest', 'test-framework', 'licenses')
+ extras = ('analysis', 'benchmark', 'classification', 'codecs', 'core', 'demo', 'docs', 'expressions', 'facet', 'grouping', 'highlighter', 'join', 'memory', 'misc', 'queries', 'queryparser', 'replicator', 'sandbox', 'spatial', 'suggest', 'test-framework', 'licenses')
if isSrc:
- extras += ('build.xml', 'common-build.xml', 'module-build.xml', 'ivy-settings.xml', 'backwards', 'tools', 'site')
+ extras += ('build.xml', 'common-build.xml', 'module-build.xml', 'ivy-settings.xml', 'ivy-versions.properties', 'backwards', 'tools', 'site')
else:
extras = ()
@@ -720,13 +749,13 @@ def verifyUnpacked(project, artifact, un
else:
- checkAllJARs(os.getcwd(), project, svnRevision, version)
+ checkAllJARs(os.getcwd(), project, svnRevision, version, tmpDir, baseURL)
if project == 'lucene':
testDemo(isSrc, version, '1.7')
else:
- checkSolrWAR('%s/example/webapps/solr.war' % unpackPath, svnRevision, version)
+ checkSolrWAR('%s/example/webapps/solr.war' % unpackPath, svnRevision, version, tmpDir, baseURL)
print(' copying unpacked distribution for Java 7 ...')
java7UnpackPath = '%s-java7' %unpackPath
@@ -913,11 +942,10 @@ def checkMaven(baseURL, tmpDir, svnRevis
if text == releaseBranchText:
releaseBranchSvnURL = subURL
- print(' get POM templates', end=' ')
POMtemplates = defaultdict()
getPOMtemplates(POMtemplates, tmpDir, releaseBranchSvnURL)
print()
- print(' download artifacts', end=' ')
+ print(' download artifacts')
artifacts = {'lucene': [], 'solr': []}
for project in ('lucene', 'solr'):
artifactsURL = '%s/%s/maven/org/apache/%s' % (baseURL, project, project)
@@ -926,59 +954,50 @@ def checkMaven(baseURL, tmpDir, svnRevis
os.makedirs(targetDir)
crawl(artifacts[project], artifactsURL, targetDir)
print()
- print(' verify that each binary artifact has a deployed POM...')
verifyPOMperBinaryArtifact(artifacts, version)
- print(' verify that there is an artifact for each POM template...')
verifyArtifactPerPOMtemplate(POMtemplates, artifacts, tmpDir, version)
- print(" verify Maven artifacts' md5/sha1 digests...")
verifyMavenDigests(artifacts)
- print(' verify that all non-Mavenized deps are deployed...')
- nonMavenizedDeps = dict()
- checkNonMavenizedDeps(nonMavenizedDeps, POMtemplates, artifacts, tmpDir,
- version, releaseBranchSvnURL)
- print(' check for javadoc and sources artifacts...')
- checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version)
- print(" verify deployed POMs' coordinates...")
+ checkJavadocAndSourceArtifacts(artifacts, version)
verifyDeployedPOMsCoordinates(artifacts, version)
if isSigned:
- print(' verify maven artifact sigs', end=' ')
verifyMavenSigs(baseURL, tmpDir, artifacts)
- distributionFiles = getDistributionsForMavenChecks(tmpDir, version, baseURL)
+ distFiles = getBinaryDistFilesForMavenChecks(tmpDir, version, baseURL)
+ checkIdenticalMavenArtifacts(distFiles, artifacts, version)
- print(' verify that non-Mavenized deps are same as in the binary distribution...')
- checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps)
- print(' verify that Maven artifacts are same as in the binary distribution...')
- checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts, version)
-
- checkAllJARs('%s/maven/org/apache/lucene' % tmpDir, 'lucene', svnRevision, version)
- checkAllJARs('%s/maven/org/apache/solr' % tmpDir, 'solr', svnRevision, version)
+ checkAllJARs('%s/maven/org/apache/lucene' % tmpDir, 'lucene', svnRevision, version, tmpDir, baseURL)
+ checkAllJARs('%s/maven/org/apache/solr' % tmpDir, 'solr', svnRevision, version, tmpDir, baseURL)
-def getDistributionsForMavenChecks(tmpDir, version, baseURL):
- distributionFiles = defaultdict()
+def getBinaryDistFilesForMavenChecks(tmpDir, version, baseURL):
+ distFiles = defaultdict()
for project in ('lucene', 'solr'):
- distribution = '%s-%s.tgz' % (project, version)
- if not os.path.exists('%s/%s' % (tmpDir, distribution)):
- distURL = '%s/%s/%s' % (baseURL, project, distribution)
- print(' download %s...' % distribution, end=' ')
- download(distribution, distURL, tmpDir)
- destDir = '%s/unpack-%s-maven' % (tmpDir, project)
- if os.path.exists(destDir):
- shutil.rmtree(destDir)
- os.makedirs(destDir)
- os.chdir(destDir)
- print(' unpack %s...' % distribution)
- unpackLogFile = '%s/unpack-%s-maven-checks.log' % (tmpDir, distribution)
- run('tar xzf %s/%s' % (tmpDir, distribution), unpackLogFile)
- distributionFiles[project] = []
- for root, dirs, files in os.walk(destDir):
- distributionFiles[project].extend([os.path.join(root, file) for file in files])
+ distFiles[project] = getBinaryDistFiles(project, tmpDir, version, baseURL)
+ return distFiles
+
+def getBinaryDistFiles(project, tmpDir, version, baseURL):
+ distribution = '%s-%s.tgz' % (project, version)
+ if not os.path.exists('%s/%s' % (tmpDir, distribution)):
+ distURL = '%s/%s/%s' % (baseURL, project, distribution)
+ print(' download %s...' % distribution, end=' ')
+ download(distribution, distURL, tmpDir)
+ destDir = '%s/unpack-%s-getBinaryDistFiles' % (tmpDir, project)
+ if os.path.exists(destDir):
+ shutil.rmtree(destDir)
+ os.makedirs(destDir)
+ os.chdir(destDir)
+ print(' unpack %s...' % distribution)
+ unpackLogFile = '%s/unpack-%s-getBinaryDistFiles.log' % (tmpDir, distribution)
+ run('tar xzf %s/%s' % (tmpDir, distribution), unpackLogFile)
+ distributionFiles = []
+ for root, dirs, files in os.walk(destDir):
+ distributionFiles.extend([os.path.join(root, file) for file in files])
return distributionFiles
-def checkJavadocAndSourceArtifacts(nonMavenizedDeps, artifacts, version):
+def checkJavadocAndSourceArtifacts(artifacts, version):
+ print(' check for javadoc and sources artifacts...')
for project in ('lucene', 'solr'):
for artifact in artifacts[project]:
- if artifact.endswith(version + '.jar') and artifact not in nonMavenizedDeps:
+ if artifact.endswith(version + '.jar'):
javadocJar = artifact[:-4] + '-javadoc.jar'
if javadocJar not in artifacts[project]:
raise RuntimeError('missing: %s' % javadocJar)
@@ -986,21 +1005,6 @@ def checkJavadocAndSourceArtifacts(nonMa
if sourcesJar not in artifacts[project]:
raise RuntimeError('missing: %s' % sourcesJar)
-def checkIdenticalNonMavenizedDeps(distributionFiles, nonMavenizedDeps):
- for project in ('lucene', 'solr'):
- distFilenames = dict()
- for file in distributionFiles[project]:
- distFilenames[os.path.basename(file)] = file
- for dep in nonMavenizedDeps.keys():
- if ('/%s/' % project) in dep:
- depOrigFilename = os.path.basename(nonMavenizedDeps[dep])
- if not depOrigFilename in distFilenames:
- raise RuntimeError('missing: non-mavenized dependency %s' % nonMavenizedDeps[dep])
- identical = filecmp.cmp(dep, distFilenames[depOrigFilename], shallow=False)
- if not identical:
- raise RuntimeError('Deployed non-mavenized dep %s differs from distribution dep %s'
- % (dep, distFilenames[depOrigFilename]))
-
def getZipFileEntries(fileName):
entries = []
with zipfile.ZipFile(fileName) as zf:
@@ -1010,36 +1014,28 @@ def getZipFileEntries(fileName):
entries.sort()
return entries
-def checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts, version):
+def checkIdenticalMavenArtifacts(distFiles, artifacts, version):
+ print(' verify that Maven artifacts are same as in the binary distribution...')
reJarWar = re.compile(r'%s\.[wj]ar$' % version) # exclude *-javadoc.jar and *-sources.jar
for project in ('lucene', 'solr'):
distFilenames = dict()
- for file in distributionFiles[project]:
+ for file in distFiles[project]:
baseName = os.path.basename(file)
distFilenames[baseName] = file
for artifact in artifacts[project]:
if reJarWar.search(artifact):
- entries = getZipFileEntries(artifact)
- if artifact not in nonMavenizedDeps:
- artifactFilename = os.path.basename(artifact)
- if artifactFilename not in distFilenames:
- raise RuntimeError('Maven artifact %s is not present in %s binary distribution'
- % (artifact, project))
- else:
- binaryEntries = getZipFileEntries(distFilenames[artifactFilename])
- if binaryEntries != entries:
- raise RuntimeError('Maven artifact %s has different contents than binary distribution\n maven:\n%s\n binary:\n%s\n' % \
- (artifactFilename,
- '\n'.join(entries),
- '\n'.join(binaryEntries)))
-
- # TODO: Either fix the build to ensure that maven artifacts *are* identical, or recursively compare contents
- # identical = filecmp.cmp(artifact, distFilenames[artifactFilename], shallow=False)
- # if not identical:
- # raise RuntimeError('Maven artifact %s is not identical to %s in %s binary distribution'
- # % (artifact, distFilenames[artifactFilename], project))
+ artifactFilename = os.path.basename(artifact)
+ if artifactFilename not in distFilenames:
+ raise RuntimeError('Maven artifact %s is not present in %s binary distribution'
+ % (artifact, project))
+ else:
+ identical = filecmp.cmp(artifact, distFilenames[artifactFilename], shallow=False)
+ if not identical:
+ raise RuntimeError('Maven artifact %s is not identical to %s in %s binary distribution'
+ % (artifact, distFilenames[artifactFilename], project))
def verifyMavenDigests(artifacts):
+ print(" verify Maven artifacts' md5/sha1 digests...")
reJarWarPom = re.compile(r'\.(?:[wj]ar|pom)$')
for project in ('lucene', 'solr'):
for artifactFile in [a for a in artifacts[project] if reJarWarPom.search(a)]:
@@ -1070,76 +1066,6 @@ def verifyMavenDigests(artifacts):
raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s'
% (artifactFile, sha1Expected, sha1Actual))
-def checkNonMavenizedDeps(nonMavenizedDependencies, POMtemplates, artifacts,
- tmpDir, version, releaseBranchSvnURL):
- """
- - check for non-mavenized dependencies listed in the grandfather POM template
- - nonMavenizedDependencies is populated with a map from non-mavenized dependency
- artifact path to the original jar path
- """
- namespace = '{http://maven.apache.org/POM/4.0.0}'
- xpathProfile = '{0}profiles/{0}profile'.format(namespace)
- xpathPlugin = '{0}build/{0}plugins/{0}plugin'.format(namespace)
- xpathExecution= '{0}executions/{0}execution'.format(namespace)
- xpathResourceDir = '{0}configuration/{0}resources/{0}resource/{0}directory'.format(namespace)
-
- treeRoot = ET.parse(POMtemplates['grandfather'][0]).getroot()
- for profile in treeRoot.findall(xpathProfile):
- pomDirs = []
- profileId = profile.find('%sid' % namespace)
- if profileId is not None and profileId.text == 'bootstrap':
- plugins = profile.findall(xpathPlugin)
- for plugin in plugins:
- artifactId = plugin.find('%sartifactId' % namespace).text.strip()
- if artifactId == 'maven-resources-plugin':
- for config in plugin.findall(xpathExecution):
- pomDirs.append(config.find(xpathResourceDir).text.strip())
- for plugin in plugins:
- artifactId = plugin.find('%sartifactId' % namespace).text.strip()
- if artifactId == 'maven-install-plugin':
- for execution in plugin.findall(xpathExecution):
- groupId, artifactId, file, pomFile = '', '', '', ''
- for child in execution.find('%sconfiguration' % namespace).getchildren():
- text = child.text.strip()
- if child.tag == '%sgroupId' % namespace:
- groupId = text if text != '${project.groupId}' else 'org.apache.lucene'
- elif child.tag == '%sartifactId' % namespace: artifactId = text
- elif child.tag == '%sfile' % namespace: file = text
- elif child.tag == '%spomFile' % namespace: pomFile = text
- if groupId in ('org.apache.lucene', 'org.apache.solr'):
- depJar = '%s/maven/%s/%s/%s/%s-%s.jar' \
- % (tmpDir, groupId.replace('.', '/'),
- artifactId, version, artifactId, version)
- if depJar not in artifacts['lucene'] \
- and depJar not in artifacts['solr']:
- raise RuntimeError('Missing non-mavenized dependency %s' % depJar)
- nonMavenizedDependencies[depJar] = file
- elif pomFile: # Find non-Mavenized deps with associated POMs
- pomFile = pomFile.split('/')[-1] # remove path
- doc2 = None
- workingCopy = os.path.abspath('%s/../..' % sys.path[0])
- for pomDir in pomDirs:
- if releaseBranchSvnURL is None:
- pomPath = '%s/%s/%s' % (workingCopy, pomDir, pomFile)
- if os.path.exists(pomPath):
- doc2 = ET.XML(open(pomPath, encoding='UTF-8').read())
- break
- else:
- entries = getDirEntries('%s/%s' % (releaseBranchSvnURL, pomDir))
- for text, subURL in entries:
- if text == pomFile:
- doc2 = ET.XML(load(subURL))
- break
- if doc2 is not None: break
-
- groupId2, artifactId2, packaging2, POMversion = getPOMcoordinate(doc2)
- depJar = '%s/maven/%s/%s/%s/%s-%s.jar' \
- % (tmpDir, groupId2.replace('.', '/'),
- artifactId2, version, artifactId2, version)
- if depJar not in artifacts['lucene'] and depJar not in artifacts['solr']:
- raise RuntimeError('Missing non-mavenized dependency %s' % depJar)
- nonMavenizedDependencies[depJar] = file
-
def getPOMcoordinate(treeRoot):
namespace = '{http://maven.apache.org/POM/4.0.0}'
groupId = treeRoot.find('%sgroupId' % namespace)
@@ -1156,7 +1082,7 @@ def getPOMcoordinate(treeRoot):
return groupId, artifactId, packaging, version
def verifyMavenSigs(baseURL, tmpDir, artifacts):
- """Verify Maven artifact signatures"""
+ print(' verify maven artifact sigs', end=' ')
for project in ('lucene', 'solr'):
keysFile = '%s/%s.KEYS' % (tmpDir, project)
if not os.path.exists(keysFile):
@@ -1206,7 +1132,7 @@ def verifyMavenSigs(baseURL, tmpDir, art
print()
def verifyPOMperBinaryArtifact(artifacts, version):
- """verify that each binary jar and war has a corresponding POM file"""
+ print(' verify that each binary artifact has a deployed POM...')
reBinaryJarWar = re.compile(r'%s\.[jw]ar$' % re.escape(version))
for project in ('lucene', 'solr'):
for artifact in [a for a in artifacts[project] if reBinaryJarWar.search(a)]:
@@ -1219,6 +1145,7 @@ def verifyDeployedPOMsCoordinates(artifa
verify that each POM's coordinate (drawn from its content) matches
its filepath, and verify that the corresponding artifact exists.
"""
+ print(" verify deployed POMs' coordinates...")
for project in ('lucene', 'solr'):
for POM in [a for a in artifacts[project] if a.endswith('.pom')]:
treeRoot = ET.parse(POM).getroot()
@@ -1234,7 +1161,7 @@ def verifyDeployedPOMsCoordinates(artifa
raise RuntimeError('Missing corresponding .%s artifact for POM %s' % (packaging, POM))
def verifyArtifactPerPOMtemplate(POMtemplates, artifacts, tmpDir, version):
- """verify that each POM template's artifact is present in artifacts"""
+ print(' verify that there is an artifact for each POM template...')
namespace = '{http://maven.apache.org/POM/4.0.0}'
xpathPlugin = '{0}build/{0}plugins/{0}plugin'.format(namespace)
xpathSkipConfiguration = '{0}configuration/{0}skip'.format(namespace)
@@ -1257,6 +1184,7 @@ def verifyArtifactPerPOMtemplate(POMtemp
raise RuntimeError('Missing artifact %s' % artifact)
def getPOMtemplates(POMtemplates, tmpDir, releaseBranchSvnURL):
+ print(' get POM templates')
allPOMtemplates = []
sourceLocation = releaseBranchSvnURL
if sourceLocation is None:
@@ -1384,15 +1312,15 @@ def smokeTest(baseURL, svnRevision, vers
print('Test Lucene...')
checkSigs('lucene', lucenePath, version, tmpDir, isSigned)
for artifact in ('lucene-%s.tgz' % version, 'lucene-%s.zip' % version):
- unpackAndVerify('lucene', tmpDir, artifact, svnRevision, version, testArgs)
- unpackAndVerify('lucene', tmpDir, 'lucene-%s-src.tgz' % version, svnRevision, version, testArgs)
+ unpackAndVerify('lucene', tmpDir, artifact, svnRevision, version, testArgs, baseURL)
+ unpackAndVerify('lucene', tmpDir, 'lucene-%s-src.tgz' % version, svnRevision, version, testArgs, baseURL)
print()
print('Test Solr...')
checkSigs('solr', solrPath, version, tmpDir, isSigned)
for artifact in ('solr-%s.tgz' % version, 'solr-%s.zip' % version):
- unpackAndVerify('solr', tmpDir, artifact, svnRevision, version, testArgs)
- unpackAndVerify('solr', tmpDir, 'solr-%s-src.tgz' % version, svnRevision, version, testArgs)
+ unpackAndVerify('solr', tmpDir, artifact, svnRevision, version, testArgs, baseURL)
+ unpackAndVerify('solr', tmpDir, 'solr-%s-src.tgz' % version, svnRevision, version, testArgs, baseURL)
print()
print('Test Maven artifacts for Lucene and Solr...')
Modified: lucene/dev/branches/lucene4956/extra-targets.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/extra-targets.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/extra-targets.xml (original)
+++ lucene/dev/branches/lucene4956/extra-targets.xml Mon Oct 21 18:58:24 2013
@@ -79,11 +79,7 @@
<local name="svn.checkprops.failed"/>
<local name="svn.unversioned.failed"/>
<local name="svn.changed.failed"/>
- <script language="groovy" taskname="svn">
- <classpath>
- <path refid="groovy.classpath"/>
- <path refid="svnkit.classpath"/>
- </classpath><![CDATA[
+ <groovy taskname="svn" classpathref="svnkit.classpath"><![CDATA[
import org.tmatesoft.svn.core.*;
import org.tmatesoft.svn.core.wc.*;
@@ -99,7 +95,7 @@
Set missingProps = new TreeSet(), unversioned = new TreeSet(), changed = new TreeSet();
- self.log('Getting all versioned and unversioned files...');
+ task.log('Getting all versioned and unversioned files...');
statusClient.doStatus(basedir, SVNRevision.WORKING, SVNDepth.fromRecurse(true), false, true, false, false, {
status ->
SVNStatusType nodeStatus = status.getNodeStatus();
@@ -114,12 +110,12 @@
}
} as ISVNStatusHandler, null);
- self.log('Filtering files with existing svn:eol-style...');
+ task.log('Filtering files with existing svn:eol-style...');
wcClient.doGetProperty(basedir, 'svn:eol-style', SVNRevision.WORKING, SVNRevision.WORKING, true, {
file, prop -> missingProps.remove(convertRelative(file));
} as ISVNPropertyHandler);
- self.log('Filtering files with binary svn:mime-type...');
+ task.log('Filtering files with binary svn:mime-type...');
wcClient.doGetProperty(basedir, 'svn:mime-type', SVNRevision.WORKING, SVNRevision.WORKING, true, {
file, prop ->
prop = SVNPropertyValue.getPropertyAsString(prop.getValue());
@@ -134,7 +130,7 @@
project.setProperty('svn.checkprops.failed', convertSet2String(missingProps));
project.setProperty('svn.unversioned.failed', convertSet2String(unversioned));
project.setProperty('svn.changed.failed', convertSet2String(changed));
- ]]></script>
+ ]]></groovy>
<fail if="svn.checkprops.failed"
message="The following files are missing svn:eol-style (or binary svn:mime-type):${line.separator}${svn.checkprops.failed}"/>
<fail if="svn.unversioned.failed"
Modified: lucene/dev/branches/lucene4956/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/CHANGES.txt?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene4956/lucene/CHANGES.txt Mon Oct 21 18:58:24 2013
@@ -38,6 +38,22 @@ New Features
* SOLR-3359: Added analyzer attribute/property to SynonymFilterFactory.
(Ryo Onodera via Koji Sekiguchi)
+* LUCENE-5123: Add a "push" option to the postings writing API, so
+ that a PostingsFormat now receives a Fields instance and it is
+ responsible for iterating through all fields, terms, documents and
+ positions. (Robert Muir, Mike McCandless)
+
+* LUCENE-3069: Add two memory resident dictionaries (FST terms dictionary and
+ FSTOrd terms dictionary) to improve primary key lookups. The PostingsBaseFormat
+ API is also changed so that term dictionaries get the ability to block
+ encode term metadata, and all dictionary implementations can now plug in any
+ PostingsBaseFormat. (Han Jiang, Mike McCandless)
+
+* LUCENE-5268: Full cutover of all postings formats to the "pull"
+ FieldsConsumer API, removing PushFieldsConsumer. Added new
+ PushPostingsWriterBase for single-pass push of docs/positions to the
+ postings format. (Mike McCandless)
+
Optimizations
* LUCENE-4848: Use Java 7 NIO2-FileChannel instead of RandomAccessFile
@@ -45,6 +61,170 @@ Optimizations
on Windows if NIOFSDirectory is used, mmapped files are still locked.
(Michael Poindexter, Robert Muir, Uwe Schindler)
+======================= Lucene 4.6.0 =======================
+
+New Features
+
+* LUCENE-4906: PostingsHighlighter can now render to custom Object,
+ for advanced use cases where String is too restrictive (Luca
+ Cavanna, Robert Muir, Mike McCandless)
+
+* LUCENE-5133: Changed AnalyzingInfixSuggester.highlight to return
+ Object instead of String, to allow for advanced use cases where
+ String is too restrictive (Robert Muir, Shai Erera, Mike
+ McCandless)
+
+* LUCENE-5207: Added expressions module for customizing ranking
+ with script-like syntax.
+ (Jack Conradson, Ryan Ernst, Uwe Schindler via Robert Muir)
+
+* LUCENE-5180: ShingleFilter now creates shingles with trailing holes,
+ for example if a StopFilter had removed the last token. (Mike
+ McCandless)
+
+* LUCENE-5219: Add support to SynonymFilterFactory for custom
+ parsers. (Ryan Ernst via Robert Muir)
+
+* LUCENE-5235: Tokenizers now throw an IllegalStateException if the
+ consumer does not call reset() before consuming the stream. Previous
+ versions throwed NullPointerException or ArrayIndexOutOfBoundsException
+ on best effort which was not user-friendly.
+ (Uwe Schindler, Robert Muir)
+
+* LUCENE-5240: Tokenizers now throw an IllegalStateException if the
+ consumer neglects to call close() on the previous stream before consuming
+ the next one. (Uwe Schindler, Robert Muir)
+
+* LUCENE-5214: Add new FreeTextSuggester, to predict the next word
+ using a simple ngram language model. This is useful for the "long
+ tail" suggestions, when a primary suggester fails to find a
+ suggestion. (Mike McCandless)
+
+* LUCENE-5251: New DocumentDictionary allows building suggesters via
+ contents of existing field, weight and optionally payload stored
+ fields in an index (Areek Zillur via Mike McCandless)
+
+* LUCENE-5261: Add QueryBuilder, a simple API to build queries from
+ the analysis chain directly, or to make it easier to implement
+ query parsers. (Robert Muir, Uwe Schindler)
+
+* LUCENE-5270: Add Terms.hasFreqs, to determine whether a given field
+ indexed per-doc term frequencies. (Mike McCandless)
+
+* LUCENE-5269: Add CodepointCountFilter. (Robert Muir)
+
+* LUCENE-5294: Suggest module: add DocumentExpressionDictionary to
+ compute each suggestion's weight using a javascript expression.
+ (Areek Zillur via Mike McCandless)
+
+* LUCENE-5274: FastVectorHighlighter now supports highlighting against several
+ indexed fields. (Nik Everett via Adrien Grand)
+
+Bug Fixes
+
+* LUCENE-4998: Fixed a few places to pass IOContext.READONCE instead
+ of IOContext.READ (Shikhar Bhushan via Mike McCandless)
+
+* LUCENE-5242: DirectoryTaxonomyWriter.replaceTaxonomy did not fully reset
+ its state, which could result in exceptions being thrown, as well as
+ incorrect ordinals returned from getParent. (Shai Erera)
+
+* LUCENE-5254: Fixed bounded memory leak, where objects like live
+ docs bitset were not freed from an starting reader after reopening
+ to a new reader and closing the original one. (Shai Erera, Mike
+ McCandless)
+
+* LUCENE-5262: Fixed file handle leaks when multiple attempts to open an
+ NRT reader hit exceptions. (Shai Erera)
+
+* LUCENE-5263: Transient IOExceptions, e.g. due to disk full or file
+ descriptor exhaustion, hit at unlucky times inside IndexWriter could
+ lead to silently losing deletions. (Shai Erera, Mike McCandless)
+
+* LUCENE-5264: CommonTermsQuery ignored minMustMatch if only high-frequent
+ terms were present in the query and the high-frequent operator was set
+ to SHOULD. (Simon Willnauer)
+
+* LUCENE-5269: Fix bug in NGramTokenFilter where it would sometimes count
+ unicode characters incorrectly. (Mike McCandless, Robert Muir)
+
+* LUCENE-5272: OpenBitSet.ensureCapacity did not modify numBits, causing
+ false assertion errors in fastSet. (Shai Erera)
+
+* LUCENE-5289: IndexWriter.hasUncommittedChanges was returning false
+ when there were buffered delete-by-Term. (Shalin Shekhar Mangar,
+ Mike McCandless)
+
+API Changes:
+
+* LUCENE-5222: Add SortField.needsScores(). Previously it was not possible
+ for a custom Sort that makes use of the relevance score to work correctly
+ with IndexSearcher when an ExecutorService is specified.
+ (Ryan Ernst, Mike McCandless, Robert Muir)
+
+* LUCENE-5275: Change AttributeSource.toString() to display the current
+ state of attributes. (Robert Muir)
+
+* LUCENE-5277: Modify FixedBitSet copy constructor to take an additional
+ numBits parameter to allow growing/shrinking the copied bitset. You can
+ use FixedBitSet.clone() if you only need to clone the bitset. (Shai Erera)
+
+* LUCENE-5260: Use TermFreqPayloadIterator for all suggesters; those
+ suggesters that can't support payloads will throw an exception if
+ hasPayloads() is true. (Areek Zillur via Mike McCandless)
+
+* LUCENE-5280: Rename TermFreqPayloadIterator -> InputIterator, along
+ with associated suggest/spell classes. (Areek Zillur via Mike
+ McCandless)
+
+Optimizations
+
+* LUCENE-5225: The ToParentBlockJoinQuery only keeps tracks of the the child
+ doc ids and child scores if the ToParentBlockJoinCollector is used.
+ (Martijn van Groningen)
+
+* LUCENE-5236: EliasFanoDocIdSet now has an index and uses broadword bit
+ selection to speed-up advance(). (Paul Elschot via Adrien Grand)
+
+* LUCENE-5266: Improved number of read calls and branches in DirectPackedReader. (Ryan Ernst)
+
+Documentation
+
+* LUCENE-5211: Better javadocs and error checking of 'format' option in
+ StopFilterFactory, as well as comments in all snowball formated files
+ about specifying format option. (hossman)
+
+Changes in backwards compatibility policy
+
+* LUCENE-5235: Sub classes of Tokenizer have to call super.reset()
+ when implementing reset(). Otherwise the consumer will get an
+ IllegalStateException because the Reader is not correctly assigned.
+ It is important to never change the "input" field on Tokenizer
+ without using setReader(). The "input" field must not be used
+ outside reset(), incrementToken(), or end() - especially not in
+ the constructor. (Uwe Schindler, Robert Muir)
+
+* LUCENE-5204: Directory doesn't have default implementations for
+ LockFactory-related methods, which have been moved to BaseDirectory. If you
+ had a custom Directory implementation that extended Directory, you need to
+ extend BaseDirectory instead. (Adrien Grand)
+
+Build
+
+* LUCENE-5249, LUCENE-5257: All Lucene/Solr modules should use the same
+ dependency versions. (Steve Rowe)
+
+* LUCENE-5273: Binary artifacts in Lucene and Solr convenience binary
+ distributions accompanying a release, including on Maven Central,
+ should be identical across all distributions. (Steve Rowe, Uwe Schindler,
+ Shalin Shekhar Mangar)
+
+Tests
+
+* LUCENE-5278: Fix MockTokenizer to work better with more regular expression
+ patterns. Previously it could only behave like CharTokenizer (where a character
+ is either a "word" character or not), but now it gives a general longest-match
+ behavior. (Nik Everett via Robert Muir)
======================= Lucene 4.5.0 =======================
@@ -83,14 +263,39 @@ New features
FacetsAggregator.createOrdinalValueResolver. This gives better options for
resolving an ordinal's value by FacetAggregators. (Shai Erera)
+* LUCENE-5165: Add SuggestStopFilter, to be used with analyzing
+ suggesters, so that a stop word at the very end of the lookup query,
+ and without any trailing token characters, will be preserved. This
+ enables query "a" to suggest apple; see
+ http://blog.mikemccandless.com/2013/08/suggeststopfilter-carefully-removes.html
+ for details.
+
+* LUCENE-5178: Added support for missing values to DocValues fields.
+ AtomicReader.getDocsWithField returns a Bits of documents with a value,
+ and FieldCache.getDocsWithField forwards to that for DocValues fields. Things like
+ SortField.setMissingValue, FunctionValues.exists, and FieldValueFilter now
+ work with DocValues fields. (Robert Muir)
+
+* LUCENE-5124: Lucene 4.5 has a new Lucene45Codec with Lucene45DocValues,
+ supporting missing values and with most datastructures residing off-heap.
+ Added "Memory" docvalues format that works entirely in heap, and "Disk"
+ loads no datastructures into RAM. Both of these also support missing values.
+ Added DiskNormsFormat (in case you want norms entirely on disk). (Robert Muir)
+
+* LUCENE-2750: Added PForDeltaDocIdSet, an in-memory doc id set implementation
+ based on the PFOR encoding. (Adrien Grand)
+
+* LUCENE-5186: Added CachingWrapperFilter.getFilter in order to be able to get
+ the wrapped filter. (Trejkaz via Adrien Grand)
+
+* LUCENE-5197: Added SegmentReader.ramBytesUsed to return approximate heap RAM
+ used by index datastructures. (Areek Zillur via Robert Muir)
+
Bug Fixes
* LUCENE-5116: IndexWriter.addIndexes(IndexReader...) should drop empty (or all
deleted) segments. (Robert Muir, Shai Erera)
-* LUCENE-4734: Add FastVectorHighlighter support for proximity queries and
- phrase queries with gaps or overlapping terms. (Ryan Lauck, Adrien Grand)
-
* LUCENE-5132: Spatial RecursivePrefixTree Contains predicate will throw an NPE
when there's no indexed data and maybe in other circumstances too. (David Smiley)
@@ -115,6 +320,44 @@ Bug Fixes
boundary, made it into the top-N and went to the formatter.
(Manuel Amoabeng, Michael McCandless, Robert Muir)
+* LUCENE-4583: Indexing core no longer enforces a limit on maximum
+ length binary doc values fields, but individual codecs (including
+ the default one) have their own limits (David Smiley, Robert Muir,
+ Mike McCandless)
+
+* LUCENE-3849: TokenStreams now set the position increment in end(),
+ so we can handle trailing holes. If you have a custom TokenStream
+ implementing end() then be sure it calls super.end(). (Robert Muir,
+ Mike McCandless)
+
+* LUCENE-5192: IndexWriter could allow adding same field name with different
+ DocValueTypes under some circumstances. (Shai Erera)
+
+* LUCENE-5191: SimpleHTMLEncoder in Highlighter module broke Unicode
+ outside BMP because it encoded UTF-16 chars instead of codepoints.
+ The escaping of codepoints > 127 was removed (not needed for valid HTML)
+ and missing escaping for ' and / was added. (Uwe Schindler)
+
+* LUCENE-5201: Fixed compression bug in LZ4.compressHC when the input is highly
+ compressible and the start offset of the array to compress is > 0.
+ (Adrien Grand)
+
+* LUCENE-5221: SimilarityBase did not write norms the same way as DefaultSimilarity
+ if discountOverlaps == false and index-time boosts are present for the field.
+ (Yubin Kim via Robert Muir)
+
+* LUCENE-5223: Fixed IndexUpgrader command line parsing: -verbose is not required
+ and -dir-impl option now works correctly. (hossman)
+
+* LUCENE-5245: Fix MultiTermQuery's constant score rewrites to always
+ return a ConstantScoreQuery to make scoring consistent. Previously it
+ returned an empty unwrapped BooleanQuery, if no terms were available,
+ which has a different query norm. (Nik Everett, Uwe Schindler)
+
+* LUCENE-5218: In some cases, trying to retrieve or merge a 0-length
+ binary doc value would hit an ArrayIndexOutOfBoundsException.
+ (Littlestar via Mike McCandless)
+
API Changes
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
@@ -155,6 +398,35 @@ API Changes
files. FSDirectory#setReadChunkSize() is now deprecated and will be removed
in Lucene 5.0. (Uwe Schindler, Robert Muir, gsingers)
+* LUCENE-5170: Analyzer.ReuseStrategy instances are now stateless and can
+ be reused in other Analyzer instances, which was not possible before.
+ Lucene ships now with stateless singletons for per field and global reuse.
+ Legacy code can still instantiate the deprecated implementation classes,
+ but new code should use the constants. Implementors of custom strategies
+ have to take care of new method signatures. AnalyzerWrapper can now be
+ configured to use a custom strategy, too, ideally the one from the wrapped
+ Analyzer. Analyzer adds a getter to retrieve the strategy for this use-case.
+ (Uwe Schindler, Robert Muir, Shay Banon)
+
+* LUCENE-5173: Lucene never writes segments with 0 documents anymore.
+ (Shai Erera, Uwe Schindler, Robert Muir)
+
+* LUCENE-5178: SortedDocValues always returns -1 ord when a document is missing
+ a value for the field. Previously it only did this if the SortedDocValues
+ was produced by uninversion on the FieldCache. (Robert Muir)
+
+* LUCENE-5183: remove BinaryDocValues.MISSING. In order to determine a document
+ is missing a field, use getDocsWithField instead. (Robert Muir)
+
+Changes in Runtime Behavior
+
+* LUCENE-5178: DocValues codec consumer APIs (iterables) return null values
+ when the document has no value for the field. (Robert Muir)
+
+* LUCENE-5200: The HighFreqTerms command-line tool returns the true top-N
+ by totalTermFreq when using the -t option, it uses the term statistics (faster)
+ and now always shows totalTermFreq in the output. (Robert Muir)
+
Optimizations
* LUCENE-5088: Added TermFilter to filter docs by a specific term.
@@ -179,11 +451,37 @@ Optimizations
* LUCENE-5159: Prefix-code the sorted/sortedset value dictionaries in DiskDV.
(Robert Muir)
+* LUCENE-5170: Fixed several wrapper analyzers to inherit the reuse strategy
+ of the wrapped Analyzer. (Uwe Schindler, Robert Muir, Shay Banon)
+
+* LUCENE-5006: Simplified DocumentsWriter and DocumentsWriterPerThread
+ synchronization and concurrent interaction with IndexWriter. DWPT is now
+ only setup once and has no reset logic. All segment publishing and state
+ transition from DWPT into IndexWriter is now done via an Event-Queue
+ processed from within the IndexWriter in order to prevent suituations
+ where DWPT or DW calling int IW causing deadlocks. (Simon Willnauer)
+
+* LUCENE-5182: Terminate phrase searches early if max phrase window is
+ exceeded in FastVectorHighlighter to prevent very long running phrase
+ extraction if phrase terms are high frequent. (Simon Willnauer)
+
+* LUCENE-5188: CompressingStoredFieldsFormat now slices chunks containing big
+ documents into fixed-size blocks so that requesting a single field does not
+ necessarily force to decompress the whole chunk. (Adrien Grand)
+
+* LUCENE-5101: CachingWrapper makes it easier to plug-in a custom cacheable
+ DocIdSet implementation and uses WAH8DocIdSet by default, which should be
+ more memory efficient than FixedBitSet on average as well as faster on small
+ sets. (Robert Muir)
+
Documentation
* LUCENE-4894: remove facet userguide as it was outdated. Partially absorbed into
package's documentation and classes javadocs. (Shai Erera)
+* LUCENE-5206: Clarify FuzzyQuery's unexpected behavior on short
+ terms. (Tim Allison via Mike McCandless)
+
Changes in backwards compatibility policy
* LUCENE-5141: CheckIndex.fixIndex(Status,Codec) is now
@@ -194,6 +492,25 @@ Changes in backwards compatibility polic
no longer support multiple "dictionaries" as there is only one dictionary available.
(Dawid Weiss)
+* LUCENE-5170: Changed method signatures of Analyzer.ReuseStrategy to take
+ Analyzer. Closeable interface was removed because the class was changed to
+ be stateless. (Uwe Schindler, Robert Muir, Shay Banon)
+
+* LUCENE-5187: SlowCompositeReaderWrapper constructor is now private,
+ SlowCompositeReaderWrapper.wrap should be used instead. (Adrien Grand)
+
+* LUCENE-5101: CachingWrapperFilter doesn't always return FixedBitSet instances
+ anymore. Users of the join module can use
+ oal.search.join.FixedBitSetCachingWrapperFilter instead. (Adrien Grand)
+
+Build
+
+* SOLR-5159: Manifest includes non-parsed maven variables.
+ (Artem Karpenko via Steve Rowe)
+
+* LUCENE-5193: Add jar-src as top-level target to generate all Lucene and Solr
+ *-src.jar. (Steve Rowe, Shai Erera)
+
======================= Lucene 4.4.0 =======================
Changes in backwards compatibility policy
Modified: lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/arirang/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java Mon Oct 21 18:58:24 2013
@@ -83,7 +83,7 @@ public final class KoreanTokenizer exten
*/
public KoreanTokenizer(Reader input) {
super(input);
- this.scanner = new KoreanTokenizerImpl(input);
+ this.scanner = new KoreanTokenizerImpl(this.input);
}
/**
@@ -91,7 +91,7 @@ public final class KoreanTokenizer exten
*/
public KoreanTokenizer(AttributeFactory factory, Reader input) {
super(factory, input);
- this.scanner = new KoreanTokenizerImpl(input);
+ this.scanner = new KoreanTokenizerImpl(this.input);
}
// this tokenizer generates three attributes:
@@ -146,5 +146,11 @@ public final class KoreanTokenizer exten
super.reset();
scanner.yyreset(input);
}
+
+ @Override
+ public void close() throws IOException {
+ super.close();
+ scanner.yyreset(input);
+ }
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/build.xml?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/build.xml (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/build.xml Mon Oct 21 18:58:24 2013
@@ -112,6 +112,9 @@
<target name="jar">
<forall-analyzers target="jar-core"/>
</target>
+ <target name="jar-src">
+ <forall-analyzers target="jar-src"/>
+ </target>
<target name="jar-core" depends="jar"/>
<target name="build-artifacts-and-tests" depends="default,compile-test" />
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java Mon Oct 21 18:58:24 2013
@@ -80,13 +80,15 @@ public final class KeywordTokenizer exte
}
@Override
- public final void end() {
+ public final void end() throws IOException {
+ super.end();
// set final offset
offsetAtt.setOffset(finalOffset, finalOffset);
}
@Override
public void reset() throws IOException {
+ super.reset();
this.done = false;
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java Mon Oct 21 18:58:24 2013
@@ -22,22 +22,57 @@ import org.apache.lucene.analysis.util.C
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.util.WordlistLoader; // jdocs
import java.util.Map;
import java.io.IOException;
/**
* Factory for {@link StopFilter}.
+ *
* <pre class="prettyprint">
* <fieldType name="text_stop" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
* <analyzer>
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
* <filter class="solr.StopFilterFactory" ignoreCase="true"
- * words="stopwords.txt"
+ * words="stopwords.txt" format="wordset"
* </analyzer>
* </fieldType></pre>
+ *
+ * <p>
+ * All attributes are optional:
+ * </p>
+ * <ul>
+ * <li><code>ignoreCase</code> defaults to <code>false</code></li>
+ * <li><code>words</code> should be the name of a stopwords file to parse, if not
+ * specified the factory will use {@link StopAnalyzer#ENGLISH_STOP_WORDS_SET}
+ * </li>
+ * <li><code>format</code> defines how the <code>words</code> file will be parsed,
+ * and defaults to <code>wordset</code>. If <code>words</code> is not specified,
+ * then <code>format</code> must not be specified.
+ * </li>
+ * </ul>
+ * <p>
+ * The valid values for the <code>format</code> option are:
+ * </p>
+ * <ul>
+ * <li><code>wordset</code> - This is the default format, which supports one word per
+ * line (including any intra-word whitespace) and allows whole line comments
+ * begining with the "#" character. Blank lines are ignored. See
+ * {@link WordlistLoader#getLines WordlistLoader.getLines} for details.
+ * </li>
+ * <li><code>snowball</code> - This format allows for multiple words specified on each
+ * line, and trailing comments may be specified using the vertical line ("|").
+ * Blank lines are ignored. See
+ * {@link WordlistLoader#getSnowballWordSet WordlistLoader.getSnowballWordSet}
+ * for details.
+ * </li>
+ * </ul>
*/
public class StopFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+ public static final String FORMAT_WORDSET = "wordset";
+ public static final String FORMAT_SNOWBALL = "snowball";
+
private CharArraySet stopWords;
private final String stopWordFiles;
private final String format;
@@ -48,7 +83,7 @@ public class StopFilterFactory extends T
super(args);
assureMatchVersion();
stopWordFiles = get(args, "words");
- format = get(args, "format");
+ format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET));
ignoreCase = getBoolean(args, "ignoreCase", false);
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
@@ -58,12 +93,17 @@ public class StopFilterFactory extends T
@Override
public void inform(ResourceLoader loader) throws IOException {
if (stopWordFiles != null) {
- if ("snowball".equalsIgnoreCase(format)) {
+ if (FORMAT_WORDSET.equalsIgnoreCase(format)) {
+ stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
+ } else if (FORMAT_SNOWBALL.equalsIgnoreCase(format)) {
stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase);
} else {
- stopWords = getWordSet(loader, stopWordFiles, ignoreCase);
+ throw new IllegalArgumentException("Unknown 'format' specified for 'words' file: " + format);
}
} else {
+ if (null != format) {
+ throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format);
+ }
stopWords = new CharArraySet(luceneMatchVersion, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
}
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountAnalyzer.java Mon Oct 21 18:58:24 2013
@@ -46,6 +46,7 @@ public final class LimitTokenCountAnalyz
* @param consumeAllTokens whether all tokens from the delegate should be consumed even if maxTokenCount is reached.
*/
public LimitTokenCountAnalyzer(Analyzer delegate, int maxTokenCount, boolean consumeAllTokens) {
+ super(delegate.getReuseStrategy());
this.delegate = delegate;
this.maxTokenCount = maxTokenCount;
this.consumeAllTokens = consumeAllTokens;
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.java Mon Oct 21 18:58:24 2013
@@ -73,6 +73,7 @@ public final class PerFieldAnalyzerWrapp
*/
public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer,
Map<String, Analyzer> fieldAnalyzers) {
+ super(PER_FIELD_REUSE_STRATEGY);
this.defaultAnalyzer = defaultAnalyzer;
this.fieldAnalyzers = (fieldAnalyzers != null) ? fieldAnalyzers : Collections.<String, Analyzer>emptyMap();
}
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java Mon Oct 21 18:58:24 2013
@@ -19,6 +19,8 @@ package org.apache.lucene.analysis.misce
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -30,36 +32,54 @@ import org.apache.lucene.util.RamUsageEs
import java.io.IOException;
/**
- * Splits words into subwords and performs optional transformations on subword groups.
- * Words are split into subwords with the following rules:
- * - split on intra-word delimiters (by default, all non alpha-numeric characters).
- * - "Wi-Fi" -> "Wi", "Fi"
- * - split on case transitions
- * - "PowerShot" -> "Power", "Shot"
- * - split on letter-number transitions
- * - "SD500" -> "SD", "500"
- * - leading and trailing intra-word delimiters on each subword are ignored
- * - "//hello---there, 'dude'" -> "hello", "there", "dude"
- * - trailing "'s" are removed for each subword
- * - "O'Neil's" -> "O", "Neil"
- * - Note: this step isn't performed in a separate filter because of possible subword combinations.
- *
+ * Splits words into subwords and performs optional transformations on subword
+ * groups. Words are split into subwords with the following rules:
+ * <ul>
+ * <li>split on intra-word delimiters (by default, all non alpha-numeric
+ * characters): <code>"Wi-Fi"</code> → <code>"Wi", "Fi"</code></li>
+ * <li>split on case transitions: <code>"PowerShot"</code> →
+ * <code>"Power", "Shot"</code></li>
+ * <li>split on letter-number transitions: <code>"SD500"</code> →
+ * <code>"SD", "500"</code></li>
+ * <li>leading and trailing intra-word delimiters on each subword are ignored:
+ * <code>"//hello---there, 'dude'"</code> →
+ * <code>"hello", "there", "dude"</code></li>
+ * <li>trailing "'s" are removed for each subword: <code>"O'Neil's"</code>
+ * → <code>"O", "Neil"</code>
+ * <ul>
+ * <li>Note: this step isn't performed in a separate filter because of possible
+ * subword combinations.</li>
+ * </ul>
+ * </li>
+ * </ul>
+ *
* The <b>combinations</b> parameter affects how subwords are combined:
- * - combinations="0" causes no subword combinations.
- * - "PowerShot" -> 0:"Power", 1:"Shot" (0 and 1 are the token positions)
- * - combinations="1" means that in addition to the subwords, maximum runs of non-numeric subwords are catenated and produced at the same position of the last subword in the run.
- * - "PowerShot" -> 0:"Power", 1:"Shot" 1:"PowerShot"
- * - "A's+B's&C's" -> 0:"A", 1:"B", 2:"C", 2:"ABC"
- * - "Super-Duper-XL500-42-AutoCoder!" -> 0:"Super", 1:"Duper", 2:"XL", 2:"SuperDuperXL", 3:"500" 4:"42", 5:"Auto", 6:"Coder", 6:"AutoCoder"
- *
- * One use for WordDelimiterFilter is to help match words with different subword delimiters.
- * For example, if the source text contained "wi-fi" one may want "wifi" "WiFi" "wi-fi" "wi+fi" queries to all match.
- * One way of doing so is to specify combinations="1" in the analyzer used for indexing, and combinations="0" (the default)
- * in the analyzer used for querying. Given that the current StandardTokenizer immediately removes many intra-word
- * delimiters, it is recommended that this filter be used after a tokenizer that does not do this (such as WhitespaceTokenizer).
- *
+ * <ul>
+ * <li>combinations="0" causes no subword combinations: <code>"PowerShot"</code>
+ * → <code>0:"Power", 1:"Shot"</code> (0 and 1 are the token positions)</li>
+ * <li>combinations="1" means that in addition to the subwords, maximum runs of
+ * non-numeric subwords are catenated and produced at the same position of the
+ * last subword in the run:
+ * <ul>
+ * <li><code>"PowerShot"</code> →
+ * <code>0:"Power", 1:"Shot" 1:"PowerShot"</code></li>
+ * <li><code>"A's+B's&C's"</code> -gt; <code>0:"A", 1:"B", 2:"C", 2:"ABC"</code>
+ * </li>
+ * <li><code>"Super-Duper-XL500-42-AutoCoder!"</code> →
+ * <code>0:"Super", 1:"Duper", 2:"XL", 2:"SuperDuperXL", 3:"500" 4:"42", 5:"Auto", 6:"Coder", 6:"AutoCoder"</code>
+ * </li>
+ * </ul>
+ * </li>
+ * </ul>
+ * One use for {@link WordDelimiterFilter} is to help match words with different
+ * subword delimiters. For example, if the source text contained "wi-fi" one may
+ * want "wifi" "WiFi" "wi-fi" "wi+fi" queries to all match. One way of doing so
+ * is to specify combinations="1" in the analyzer used for indexing, and
+ * combinations="0" (the default) in the analyzer used for querying. Given that
+ * the current {@link StandardTokenizer} immediately removes many intra-word
+ * delimiters, it is recommended that this filter be used after a tokenizer that
+ * does not do this (such as {@link WhitespaceTokenizer}).
*/
-
public final class WordDelimiterFilter extends TokenFilter {
public static final int LOWER = 0x01;
Modified: lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java?rev=1534320&r1=1534319&r2=1534320&view=diff
==============================================================================
--- lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java (original)
+++ lucene/dev/branches/lucene4956/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/Lucene43NGramTokenizer.java Mon Oct 21 18:58:24 2013
@@ -140,7 +140,8 @@ public final class Lucene43NGramTokenize
}
@Override
- public void end() {
+ public void end() throws IOException {
+ super.end();
// set final offset
final int finalOffset = correctOffset(charsRead);
this.offsetAtt.setOffset(finalOffset, finalOffset);