You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/08/13 13:17:06 UTC
svn commit: r1372366 [2/8] - in /lucene/dev/branches/pforcodec_3892: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/
dev-tools/maven/ dev-tools/maven/lucene/
dev-tools/maven/lucene/analysis/common/
dev-tools/maven/lucene/analysis/icu/ ...
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/clustering/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/clustering/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/clustering/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/clustering/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -35,18 +35,11 @@
<module-directory>solr/contrib/clustering</module-directory>
<top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path>
- <surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </developerConnection>
- <url>
- http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
- </url>
+ <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+ <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+ <url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@@ -106,17 +99,12 @@
<testResource>
<directory>${top-level}/solr/core/src/test-files</directory>
</testResource>
+ <testResource>
+ <directory>${top-level}/dev-tools/maven/solr</directory>
+ <includes>
+ <include>maven.testlogging.properties</include>
+ </includes>
+ </testResource>
</testResources>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <systemPropertyVariables>
- <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
- </systemPropertyVariables>
- </configuration>
- </plugin>
- </plugins>
</build>
</project>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -35,18 +35,11 @@
<module-directory>solr/contrib/dataimporthandler-extras</module-directory>
<top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path>
- <surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </developerConnection>
- <url>
- http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
- </url>
+ <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+ <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+ <url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@@ -104,17 +97,12 @@
<testResource>
<directory>${top-level}/solr/core/src/test-files</directory>
</testResource>
+ <testResource>
+ <directory>${top-level}/dev-tools/maven/solr</directory>
+ <includes>
+ <include>maven.testlogging.properties</include>
+ </includes>
+ </testResource>
</testResources>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <systemPropertyVariables>
- <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
- </systemPropertyVariables>
- </configuration>
- </plugin>
- </plugins>
</build>
</project>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -35,18 +35,11 @@
<module-directory>solr/contrib/dataimporthandler</module-directory>
<top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path>
- <surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </developerConnection>
- <url>
- http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
- </url>
+ <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+ <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+ <url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@@ -90,6 +83,12 @@
<testResource>
<directory>${top-level}/solr/core/src/test-files</directory>
</testResource>
+ <testResource>
+ <directory>${top-level}/dev-tools/maven/solr</directory>
+ <includes>
+ <include>maven.testlogging.properties</include>
+ </includes>
+ </testResource>
</testResources>
<plugins>
<plugin>
@@ -103,15 +102,6 @@
</execution>
</executions>
</plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <systemPropertyVariables>
- <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
- </systemPropertyVariables>
- </configuration>
- </plugin>
</plugins>
</build>
</project>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/extraction/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/extraction/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/extraction/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/extraction/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -38,18 +38,11 @@
<module-directory>solr/contrib/extraction</module-directory>
<top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path>
- <surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </developerConnection>
- <url>
- http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
- </url>
+ <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+ <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+ <url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@@ -102,17 +95,12 @@
<testResource>
<directory>${top-level}/solr/core/src/test-files</directory>
</testResource>
+ <testResource>
+ <directory>${top-level}/dev-tools/maven/solr</directory>
+ <includes>
+ <include>maven.testlogging.properties</include>
+ </includes>
+ </testResource>
</testResources>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <systemPropertyVariables>
- <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
- </systemPropertyVariables>
- </configuration>
- </plugin>
- </plugins>
</build>
</project>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/langid/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/langid/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/langid/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/langid/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -39,18 +39,11 @@
<module-directory>solr/contrib/langid</module-directory>
<top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path>
- <surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </developerConnection>
- <url>
- http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
- </url>
+ <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+ <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+ <url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@@ -107,17 +100,12 @@
<testResource>
<directory>${top-level}/solr/core/src/test-files</directory>
</testResource>
+ <testResource>
+ <directory>${top-level}/dev-tools/maven/solr</directory>
+ <includes>
+ <include>maven.testlogging.properties</include>
+ </includes>
+ </testResource>
</testResources>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <systemPropertyVariables>
- <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
- </systemPropertyVariables>
- </configuration>
- </plugin>
- </plugins>
</build>
</project>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/uima/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/uima/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/uima/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/uima/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -35,18 +35,11 @@
<module-directory>solr/contrib/uima</module-directory>
<top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path>
- <surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </developerConnection>
- <url>
- http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
- </url>
+ <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+ <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+ <url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@@ -121,17 +114,12 @@
<testResource>
<directory>${module-path}/src/test-files</directory>
</testResource>
+ <testResource>
+ <directory>${top-level}/dev-tools/maven/solr</directory>
+ <includes>
+ <include>maven.testlogging.properties</include>
+ </includes>
+ </testResource>
</testResources>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <systemPropertyVariables>
- <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
- </systemPropertyVariables>
- </configuration>
- </plugin>
- </plugins>
</build>
</project>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/velocity/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/velocity/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/velocity/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/velocity/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -35,18 +35,11 @@
<module-directory>solr/contrib/velocity</module-directory>
<top-level>../../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path>
- <surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </developerConnection>
- <url>
- http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
- </url>
+ <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+ <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+ <url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@@ -142,17 +135,12 @@
<testResource>
<directory>${top-level}/solr/core/src/test-files</directory>
</testResource>
+ <testResource>
+ <directory>${top-level}/dev-tools/maven/solr</directory>
+ <includes>
+ <include>maven.testlogging.properties</include>
+ </includes>
+ </testResource>
</testResources>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <systemPropertyVariables>
- <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
- </systemPropertyVariables>
- </configuration>
- </plugin>
- </plugins>
</build>
</project>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/core/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/core/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/core/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/core/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -35,18 +35,11 @@
<module-directory>solr/core</module-directory>
<top-level>../../..</top-level>
<module-path>${top-level}/${module-directory}</module-path>
- <surefire-top-level>${top-level}/../..</surefire-top-level>
</properties>
<scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </developerConnection>
- <url>
- http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
- </url>
+ <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+ <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+ <url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
@@ -243,49 +236,15 @@
<testResource>
<directory>${top-level}/solr/solrj/src/test-files</directory>
</testResource>
+ <testResource>
+ <directory>${top-level}/dev-tools/maven/solr</directory>
+ <includes>
+ <include>maven.testlogging.properties</include>
+ </includes>
+ </testResource>
</testResources>
<plugins>
<plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <configuration>
- <systemPropertyVariables>
- <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
- </systemPropertyVariables>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>appassembler-maven-plugin</artifactId>
- <configuration>
- <extraJvmArguments>-Xmx128M</extraJvmArguments>
- <repositoryLayout>flat</repositoryLayout>
- <platforms>
- <platform>windows</platform>
- <platform>unix</platform>
- </platforms>
- <programs>
- <program>
- <mainClass>org.apache.solr.client.solrj.embedded.JettySolrRunner</mainClass>
- <name>JettySolrRunner</name>
- </program>
- <program>
- <mainClass>org.apache.solr.util.BitSetPerf</mainClass>
- <name>BitSetPerf</name>
- <extraJvmArguments>-Xms128m -Xbatch</extraJvmArguments>
- </program>
- <program>
- <mainClass>org.apache.solr.util.SimplePostTool</mainClass>
- <name>SimplePostTool</name>
- </program>
- <program>
- <mainClass>org.apache.solr.util.SuggestMissingFactories</mainClass>
- <name>SuggestMissingFactories</name>
- </program>
- </programs>
- </configuration>
- </plugin>
- <plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -43,26 +43,14 @@
<module-directory>solr</module-directory>
</properties>
<scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </developerConnection>
- <url>
- http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
- </url>
+ <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+ <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+ <url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<issueManagement>
<system>JIRA</system>
- <url>http://issues.apache.org/jira/browse/SOLR</url>
+ <url>https://issues.apache.org/jira/browse/SOLR</url>
</issueManagement>
- <ciManagement>
- <system>Hudson</system>
- <url>
- http://lucene.zones.apache.org:8080/hudson/job/Solr-Nightly/
- </url>
- </ciManagement>
<mailingLists>
<mailingList>
<name>Solr User List</name>
@@ -111,6 +99,15 @@
<doctitle>${project.name} ${project.version} API (${now.version})</doctitle>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <systemPropertyVariables>
+ <java.util.logging.config.file>../test-classes/maven.testlogging.properties</java.util.logging.config.file>
+ </systemPropertyVariables>
+ </configuration>
+ </plugin>
</plugins>
</pluginManagement>
</build>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/solrj/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/solrj/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/solrj/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/solrj/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </developerConnection>
- <url>
- http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
- </url>
+ <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+ <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+ <url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/test-framework/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/test-framework/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/test-framework/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/test-framework/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </developerConnection>
- <url>
- http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
- </url>
+ <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+ <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+ <url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<!-- These dependencies are compile scope because this is a test framework. -->
@@ -61,19 +55,28 @@
<version>${project.version}</version>
</dependency>
<dependency>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ <!-- SOLR-3263: Provided scope is required to avoid jar signing conflicts -->
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</dependency>
- <!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
- you can exclude the three Jetty dependencies below. -->
<dependency>
<groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- <scope>runtime</scope>
+ <artifactId>jetty-servlet</artifactId>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
+ </dependency>
+ <!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
+ you can exclude the two Jetty dependencies below. -->
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-server</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/webapp/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/webapp/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/webapp/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/webapp/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -37,15 +37,9 @@
<module-path>${top-level}/${module-directory}</module-path>
</properties>
<scm>
- <connection>
- scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </connection>
- <developerConnection>
- scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
- </developerConnection>
- <url>
- http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
- </url>
+ <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+ <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+ <url>${vc-browse-base-url}/${module-directory}</url>
</scm>
<dependencies>
<dependency>
Modified: lucene/dev/branches/pforcodec_3892/dev-tools/scripts/smokeTestRelease.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/scripts/smokeTestRelease.py?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/scripts/smokeTestRelease.py (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/scripts/smokeTestRelease.py Mon Aug 13 11:16:57 2012
@@ -58,7 +58,7 @@ def javaExe(version):
def verifyJavaVersion(version):
s = os.popen('%s; java -version 2>&1' % javaExe(version)).read()
- if s.find('java version "%s.' % version) == -1:
+ if s.find(' version "%s.' % version) == -1:
raise RuntimeError('got wrong version for java %s:\n%s' % (version, s))
# http://s.apache.org/lusolr32rc2
@@ -363,6 +363,10 @@ def verifyDigests(artifact, urlString, t
raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual))
def getDirEntries(urlString):
+ if urlString.startswith('file:/') and not urlString.startswith('file://'):
+ # stupid bogus ant URI
+ urlString = "file:///" + urlString[6:]
+
if urlString.startswith('file://'):
path = urlString[7:]
if path.endswith('/'):
@@ -1026,7 +1030,7 @@ def crawl(downloadedFiles, urlString, ta
def main():
- if len(sys.argv) != 4:
+ if len(sys.argv) < 4:
print()
print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
print()
@@ -1035,8 +1039,11 @@ def main():
baseURL = sys.argv[1]
version = sys.argv[2]
tmpDir = os.path.abspath(sys.argv[3])
+ isSigned = True
+ if len(sys.argv) == 5:
+ isSigned = (sys.argv[4] == "True")
- smokeTest(baseURL, version, tmpDir, True)
+ smokeTest(baseURL, version, tmpDir, isSigned)
def smokeTest(baseURL, version, tmpDir, isSigned):
@@ -1090,4 +1097,5 @@ if __name__ == '__main__':
except:
import traceback
traceback.print_exc()
-
+ sys.exit(1)
+ sys.exit(0)
Modified: lucene/dev/branches/pforcodec_3892/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/CHANGES.txt?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/CHANGES.txt Mon Aug 13 11:16:57 2012
@@ -6,6 +6,56 @@ http://s.apache.org/luceneversions
======================= Lucene 5.0.0 =======================
+======================= Lucene 4.0.0 =======================
+
+New Features
+
+* LUCENE-1888: Added the option to store payloads in the term
+ vectors (IndexableFieldType.storeTermVectorPayloads()). Note
+ that you must store term vector positions to store payloads.
+ (Robert Muir)
+
+API Changes
+
+* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().
+ Previously you had no real way to know that a term vector field
+ had positions or offsets, since this can be configured on a
+ per-field-per-document basis. (Robert Muir)
+
+* Removed DocsAndPositionsEnum.hasPayload() and simplified the
+ contract of getPayload(). It returns null if there is no payload,
+ otherwise returns the current payload. You can now call it multiple
+ times per position if you want. (Robert Muir)
+
+* Removed FieldsEnum. Fields API instead implements Iterable<String>
+ and exposes Iterator, so you can iterate over field names with
+ for (String field : fields) instead. (Robert Muir)
+
+Bug Fixes
+
+* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
+ twice for conjunctions: for most users this is no problem, but
+ if you had a customized Similarity that returned something other
+ than 1 when overlap == maxOverlap (always the case for conjunctions),
+ then the score would be incorrect. (Pascal Chollet, Robert Muir)
+
+* LUCENE-4298: MultiFields.getTermDocsEnum(IndexReader, Bits, String, BytesRef)
+ did not work at all, it would infinitely recurse.
+ (Alberto Paro via Robert Muir)
+
+* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
+ had a custom Similarity where coord(1,1) != 1F, then the rewritten
+ query would be scored differently. (Robert Muir)
+
+* Don't allow negatives in the positions file. If you have an index
+ from 2.4.0 or earlier with such negative positions, and you already
+ upgraded to 3.x, then to Lucene 4.0-ALPHA or -BETA, you should run
+ CheckIndex. If it fails, then you need to upgrade again to 4.0 (Robert Muir)
+
+Build
+
+* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for
+ thread leak detection. Added support for suite timeouts. (Dawid Weiss)
======================= Lucene 4.0.0-BETA =======================
@@ -47,6 +97,11 @@ New features
int docID), to attempt deletion by docID as long as the provided
reader is an NRT reader, and the segment has not yet been merged
away (Mike McCandless).
+
+* LUCENE-4286: Added option to CJKBigramFilter to always also output
+ unigrams. This can be used for a unigram+bigram approach, or at
+ index-time only for better support of short queries.
+ (Tom Burton-West, Robert Muir)
API Changes
@@ -115,6 +170,10 @@ Optimizations
making them substantially more lightweight. Behavior is unchanged.
(Robert Muir)
+* LUCENE-4291: Reduced internal buffer size for Jflex-based tokenizers
+ such as StandardTokenizer from 32kb to 8kb.
+ (Raintung Li, Steven Rowe, Robert Muir)
+
Bug Fixes
* LUCENE-4109: BooleanQueries are not parsed correctly with the
@@ -164,6 +223,9 @@ Bug Fixes
* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
(Johannes Christen, Uwe Schindler, Robert Muir)
+* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
+ (Robert Muir)
+
Changes in Runtime Behavior
* LUCENE-4109: Enable position increments in the flexible queryparser by default.
Modified: lucene/dev/branches/pforcodec_3892/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/MIGRATE.txt?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/MIGRATE.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/MIGRATE.txt Mon Aug 13 11:16:57 2012
@@ -9,7 +9,7 @@ enumeration APIs. Here are the major ch
by the BytesRef class (which provides an offset + length "slice"
into an existing byte[]).
- * Fields are separately enumerated (FieldsEnum) from the terms
+ * Fields are separately enumerated (Fields.iterator()) from the terms
within each field (TermEnum). So instead of this:
TermEnum termsEnum = ...;
@@ -20,10 +20,8 @@ enumeration APIs. Here are the major ch
Do this:
- FieldsEnum fieldsEnum = ...;
- String field;
- while((field = fieldsEnum.next()) != null) {
- TermsEnum termsEnum = fieldsEnum.terms();
+ for(String field : fields) {
+ TermsEnum termsEnum = fields.terms(field);
BytesRef text;
while((text = termsEnum.next()) != null) {
System.out.println("field=" + field + "; text=" + text.utf8ToString());
@@ -316,11 +314,12 @@ an AtomicReader. Note: using "atomicity
slowdowns due to the need to merge terms, postings, DocValues, and
FieldCache, use them with care!
-## LUCENE-2413: Analyzer package changes
+## LUCENE-2413,LUCENE-3396: Analyzer package changes
Lucene's core and contrib analyzers, along with Solr's analyzers,
were consolidated into lucene/analysis. During the refactoring some
-package names have changed:
+package names have changed, and ReusableAnalyzerBase was renamed to
+Analyzer:
- o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
- o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
@@ -345,7 +344,7 @@ package names have changed:
- o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
- o.a.l.analysis.CharArraySet -> o.a.l.analysis.util.CharArraySet
- o.a.l.analysis.CharArrayMap -> o.a.l.analysis.util.CharArrayMap
- - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
+ - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.Analyzer
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
- o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java Mon Aug 13 11:16:57 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.charfilter;
@@ -40,8 +40,8 @@ import org.apache.lucene.analysis.util.O
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 7/26/12 6:22 PM from the specification file
- * <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
+ * on 8/6/12 11:57 AM from the specification file
+ * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
*/
public final class HTMLStripCharFilter extends BaseCharFilter {
@@ -31255,88 +31255,56 @@ public final class HTMLStripCharFilter e
{ yybegin(STYLE);
}
case 55: break;
- case 51:
- { // Handle paired UTF-16 surrogates.
- String surrogatePair = yytext();
- char highSurrogate = '\u0000';
- char lowSurrogate = '\u0000';
- try {
- highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
- } catch(Exception e) { // should never happen
- assert false: "Exception parsing high surrogate '"
- + surrogatePair.substring(2, 6) + "'";
- }
- try { // Low surrogates are in decimal range [56320, 57343]
- lowSurrogate = (char)Integer.parseInt(surrogatePair.substring(9, 14));
- } catch(Exception e) { // should never happen
- assert false: "Exception parsing low surrogate '"
- + surrogatePair.substring(9, 14) + "'";
- }
- if (Character.isLowSurrogate(lowSurrogate)) {
- outputSegment = entitySegment;
- outputSegment.clear();
- outputSegment.unsafeWrite(lowSurrogate);
- // add (previously matched input length) + (this match length) - (substitution length)
- cumulativeDiff += inputSegment.length() + yylength() - 2;
- // position the correction at (already output length) + (substitution length)
- addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
- inputSegment.clear();
- yybegin(YYINITIAL);
- return highSurrogate;
- }
- yypushback(surrogatePair.length() - 1); // Consume only '#'
- inputSegment.append('#');
- yybegin(NUMERIC_CHARACTER);
+ case 27:
+ { // add (previously matched input length) + (this match length) - (substitution length)
+ cumulativeDiff += inputSegment.length() + yylength() - 1;
+ // position the correction at (already output length) + (substitution length)
+ addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
+ inputSegment.clear();
+ yybegin(YYINITIAL);
+ return BLOCK_LEVEL_START_TAG_REPLACEMENT;
}
case 56: break;
- case 21:
- { previousRestoreState = restoreState;
- restoreState = SERVER_SIDE_INCLUDE;
- yybegin(SINGLE_QUOTED_STRING);
+ case 30:
+ { int length = yylength();
+ inputSegment.write(zzBuffer, zzStartRead, length);
+ entitySegment.clear();
+ char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
+ entitySegment.append(ch);
+ outputSegment = entitySegment;
+ yybegin(CHARACTER_REFERENCE_TAIL);
}
case 57: break;
- case 31:
- { int matchLength = yylength();
- inputSegment.write(zzBuffer, zzStartRead, matchLength);
- if (matchLength <= 6) { // 10FFFF: max 6 hex chars
- String hexCharRef
- = new String(zzBuffer, zzStartRead + 1, matchLength - 1);
- int codePoint = 0;
- try {
- codePoint = Integer.parseInt(hexCharRef, 16);
- } catch(Exception e) {
- assert false: "Exception parsing hex code point '" + hexCharRef + "'";
- }
- if (codePoint <= 0x10FFFF) {
- outputSegment = entitySegment;
- outputSegment.clear();
- if (codePoint >= Character.MIN_SURROGATE
- && codePoint <= Character.MAX_SURROGATE) {
- outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
- } else {
- outputSegment.setLength
- (Character.toChars(codePoint, outputSegment.getArray(), 0));
- }
- yybegin(CHARACTER_REFERENCE_TAIL);
- } else {
- outputSegment = inputSegment;
- yybegin(YYINITIAL);
- return outputSegment.nextChar();
- }
- } else {
+ case 48:
+ { inputSegment.clear();
+ yybegin(YYINITIAL);
+ // add (previously matched input length) -- current match and substitution handled below
+ cumulativeDiff += yychar - inputStart;
+ // position the offset correction at (already output length) -- substitution handled below
+ int offsetCorrectionPos = outputCharCount;
+ int returnValue;
+ if (escapeSTYLE) {
+ inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
- yybegin(YYINITIAL);
- return outputSegment.nextChar();
+ returnValue = outputSegment.nextChar();
+ } else {
+ // add (this match length) - (substitution length)
+ cumulativeDiff += yylength() - 1;
+ // add (substitution length)
+ ++offsetCorrectionPos;
+ returnValue = STYLE_REPLACEMENT;
}
+ addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
+ return returnValue;
}
case 58: break;
- case 19:
+ case 8:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
- yybegin(END_TAG_TAIL_INCLUDE);
+ yybegin(START_TAG_TAIL_INCLUDE);
} else {
- yybegin(END_TAG_TAIL_EXCLUDE);
+ yybegin(START_TAG_TAIL_SUBSTITUTE);
}
}
case 59: break;
@@ -31347,113 +31315,79 @@ public final class HTMLStripCharFilter e
yybegin(LEFT_ANGLE_BRACKET);
}
case 60: break;
- case 27:
- { // add (previously matched input length) + (this match length) - (substitution length)
- cumulativeDiff += inputSegment.length() + yylength() - 1;
- // position the correction at (already output length) + (substitution length)
- addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
- inputSegment.clear();
- yybegin(YYINITIAL);
- return BLOCK_LEVEL_START_TAG_REPLACEMENT;
- }
- case 61: break;
case 44:
{ restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
+ case 61: break;
+ case 21:
+ { previousRestoreState = restoreState;
+ restoreState = SERVER_SIDE_INCLUDE;
+ yybegin(SINGLE_QUOTED_STRING);
+ }
case 62: break;
+ case 11:
+ { inputSegment.write(zzBuffer, zzStartRead, yylength());
+ yybegin(LEFT_ANGLE_BRACKET_SPACE);
+ }
+ case 63: break;
case 35:
{ yybegin(SCRIPT);
}
- case 63: break;
+ case 64: break;
case 42:
{ restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
- case 64: break;
+ case 65: break;
case 10:
{ inputSegment.append('!'); yybegin(BANG);
}
- case 65: break;
- case 33:
- { yybegin(YYINITIAL);
- if (escapeBR) {
- inputSegment.write(zzBuffer, zzStartRead, yylength());
- outputSegment = inputSegment;
- return outputSegment.nextChar();
- } else {
- // add (previously matched input length) + (this match length) - (substitution length)
- cumulativeDiff += inputSegment.length() + yylength() - 1;
- // position the correction at (already output length) + (substitution length)
- addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
- inputSegment.reset();
- return BR_START_TAG_REPLACEMENT;
- }
- }
case 66: break;
- case 53:
+ case 51:
{ // Handle paired UTF-16 surrogates.
String surrogatePair = yytext();
char highSurrogate = '\u0000';
- try { // High surrogates are in decimal range [55296, 56319]
- highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
+ char lowSurrogate = '\u0000';
+ try {
+ highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
- + surrogatePair.substring(1, 6) + "'";
+ + surrogatePair.substring(2, 6) + "'";
}
- if (Character.isHighSurrogate(highSurrogate)) {
- char lowSurrogate = '\u0000';
- try { // Low surrogates are in decimal range [56320, 57343]
- lowSurrogate = (char)Integer.parseInt(surrogatePair.substring(9, 14));
- } catch(Exception e) { // should never happen
- assert false: "Exception parsing low surrogate '"
- + surrogatePair.substring(9, 14) + "'";
- }
- if (Character.isLowSurrogate(lowSurrogate)) {
- outputSegment = entitySegment;
- outputSegment.clear();
- outputSegment.unsafeWrite(lowSurrogate);
- // add (previously matched input length) + (this match length) - (substitution length)
- cumulativeDiff += inputSegment.length() + yylength() - 2;
- // position the correction at (already output length) + (substitution length)
- addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
- inputSegment.clear();
- yybegin(YYINITIAL);
- return highSurrogate;
- }
+ try { // Low surrogates are in decimal range [56320, 57343]
+ lowSurrogate = (char)Integer.parseInt(surrogatePair.substring(9, 14));
+ } catch(Exception e) { // should never happen
+ assert false: "Exception parsing low surrogate '"
+ + surrogatePair.substring(9, 14) + "'";
+ }
+ if (Character.isLowSurrogate(lowSurrogate)) {
+ outputSegment = entitySegment;
+ outputSegment.clear();
+ outputSegment.unsafeWrite(lowSurrogate);
+ // add (previously matched input length) + (this match length) - (substitution length)
+ cumulativeDiff += inputSegment.length() + yylength() - 2;
+ // position the correction at (already output length) + (substitution length)
+ addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
+ inputSegment.clear();
+ yybegin(YYINITIAL);
+ return highSurrogate;
}
yypushback(surrogatePair.length() - 1); // Consume only '#'
inputSegment.append('#');
yybegin(NUMERIC_CHARACTER);
}
case 67: break;
- case 43:
- { restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+ case 4:
+ { yypushback(1);
+ outputSegment = inputSegment;
+ outputSegment.restart();
+ yybegin(YYINITIAL);
+ return outputSegment.nextChar();
}
case 68: break;
- case 30:
- { int length = yylength();
- inputSegment.write(zzBuffer, zzStartRead, length);
- entitySegment.clear();
- char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
- entitySegment.append(ch);
- outputSegment = entitySegment;
- yybegin(CHARACTER_REFERENCE_TAIL);
+ case 43:
+ { restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 69: break;
- case 28:
- { restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
- }
- case 70: break;
- case 3:
- { inputStart = yychar;
- inputSegment.clear();
- inputSegment.append('&');
- yybegin(AMPERSAND);
- }
- case 71: break;
- case 16:
- { restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
- }
- case 72: break;
case 52:
{ // Handle paired UTF-16 surrogates.
String surrogatePair = yytext();
@@ -31486,174 +31420,11 @@ public final class HTMLStripCharFilter e
inputSegment.append('#');
yybegin(NUMERIC_CHARACTER);
}
- case 73: break;
- case 6:
- { int matchLength = yylength();
- inputSegment.write(zzBuffer, zzStartRead, matchLength);
- if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
- String decimalCharRef = yytext();
- int codePoint = 0;
- try {
- codePoint = Integer.parseInt(decimalCharRef);
- } catch(Exception e) {
- assert false: "Exception parsing code point '" + decimalCharRef + "'";
- }
- if (codePoint <= 0x10FFFF) {
- outputSegment = entitySegment;
- outputSegment.clear();
- if (codePoint >= Character.MIN_SURROGATE
- && codePoint <= Character.MAX_SURROGATE) {
- outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
- } else {
- outputSegment.setLength
- (Character.toChars(codePoint, outputSegment.getArray(), 0));
- }
- yybegin(CHARACTER_REFERENCE_TAIL);
- } else {
- outputSegment = inputSegment;
- yybegin(YYINITIAL);
- return outputSegment.nextChar();
- }
- } else {
- outputSegment = inputSegment;
- yybegin(YYINITIAL);
- return outputSegment.nextChar();
- }
- }
- case 74: break;
- case 37:
- { // add (this match length) [ - (substitution length) = 0 ]
- cumulativeDiff += yylength();
- // position the correction at (already output length) [ + (substitution length) = 0 ]
- addOffCorrectMap(outputCharCount, cumulativeDiff);
- yybegin(YYINITIAL);
- }
- case 75: break;
- case 8:
- { inputSegment.write(zzBuffer, zzStartRead, yylength());
- if (null != escapedTags
- && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
- yybegin(START_TAG_TAIL_INCLUDE);
- } else {
- yybegin(START_TAG_TAIL_SUBSTITUTE);
- }
- }
- case 76: break;
- case 46:
- { yybegin(SCRIPT);
- if (escapeSCRIPT) {
- inputSegment.write(zzBuffer, zzStartRead, yylength());
- outputSegment = inputSegment;
- inputStart += 1 + yylength();
- return outputSegment.nextChar();
- }
- }
- case 77: break;
- case 11:
- { inputSegment.write(zzBuffer, zzStartRead, yylength());
- yybegin(LEFT_ANGLE_BRACKET_SPACE);
- }
- case 78: break;
- case 20:
- { inputSegment.write(zzBuffer, zzStartRead, yylength());
- }
- case 79: break;
- case 34:
- { // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
- cumulativeDiff += yychar - inputStart + yylength();
- // position the correction at (already output length) [ + (substitution length) = 0]
- addOffCorrectMap(outputCharCount, cumulativeDiff);
- inputSegment.clear();
- yybegin(YYINITIAL);
- }
- case 80: break;
- case 23:
- { yybegin(restoreState); restoreState = previousRestoreState;
- }
- case 81: break;
- case 32:
- { yybegin(COMMENT);
- }
- case 82: break;
- case 14:
- { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
- cumulativeDiff += inputSegment.length() + yylength();
- // position the correction at (already output length) [ + (substitution length) = 0 ]
- addOffCorrectMap(outputCharCount, cumulativeDiff);
- inputSegment.clear();
- yybegin(YYINITIAL);
- }
- case 83: break;
- case 18:
- { inputSegment.write(zzBuffer, zzStartRead, yylength());
- if (null != escapedTags
- && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
- yybegin(END_TAG_TAIL_INCLUDE);
- } else {
- yybegin(END_TAG_TAIL_SUBSTITUTE);
- }
- }
- case 84: break;
- case 25:
- { // add (previously matched input length) + (this match length) - (substitution length)
- cumulativeDiff += inputSegment.length() + yylength() - 1;
- // position the correction at (already output length) + (substitution length)
- addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
- inputSegment.clear();
- yybegin(YYINITIAL);
- return BLOCK_LEVEL_END_TAG_REPLACEMENT;
- }
- case 85: break;
- case 7:
- { // add (previously matched input length) + (this match length) - (substitution length)
- cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
- // position the correction at (already output length) + (substitution length)
- addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
- yybegin(YYINITIAL);
- return outputSegment.nextChar();
- }
- case 86: break;
- case 48:
- { inputSegment.clear();
- yybegin(YYINITIAL);
- // add (previously matched input length) -- current match and substitution handled below
- cumulativeDiff += yychar - inputStart;
- // position the offset correction at (already output length) -- substitution handled below
- int offsetCorrectionPos = outputCharCount;
- int returnValue;
- if (escapeSTYLE) {
- inputSegment.write(zzBuffer, zzStartRead, yylength());
- outputSegment = inputSegment;
- returnValue = outputSegment.nextChar();
- } else {
- // add (this match length) - (substitution length)
- cumulativeDiff += yylength() - 1;
- // add (substitution length)
- ++offsetCorrectionPos;
- returnValue = STYLE_REPLACEMENT;
- }
- addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
- return returnValue;
- }
- case 87: break;
- case 5:
- { inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
- }
- case 88: break;
- case 26:
- { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
- cumulativeDiff += inputSegment.length() + yylength();
- // position the correction at (already output length) [ + (substitution length) = 0 ]
- addOffCorrectMap(outputCharCount, cumulativeDiff);
- inputSegment.clear();
- outputSegment = inputSegment;
- yybegin(YYINITIAL);
- }
- case 89: break;
- case 13:
- { inputSegment.append(zzBuffer[zzStartRead]);
+ case 70: break;
+ case 28:
+ { restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
}
- case 90: break;
+ case 71: break;
case 50:
{ // Handle paired UTF-16 surrogates.
outputSegment = entitySegment;
@@ -31681,32 +31452,41 @@ public final class HTMLStripCharFilter e
yybegin(YYINITIAL);
return highSurrogate;
}
- case 91: break;
- case 40:
- { yybegin(SCRIPT_COMMENT);
- }
- case 92: break;
- case 45:
- { yybegin(STYLE);
- if (escapeSTYLE) {
- inputSegment.write(zzBuffer, zzStartRead, yylength());
- outputSegment = inputSegment;
- inputStart += 1 + yylength();
- return outputSegment.nextChar();
- }
+ case 72: break;
+ case 16:
+ { restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
}
- case 93: break;
+ case 73: break;
case 22:
{ previousRestoreState = restoreState;
restoreState = SERVER_SIDE_INCLUDE;
yybegin(DOUBLE_QUOTED_STRING);
}
- case 94: break;
- case 12:
- { inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
+ case 74: break;
+ case 26:
+ { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+ cumulativeDiff += inputSegment.length() + yylength();
+ // position the correction at (already output length) [ + (substitution length) = 0 ]
+ addOffCorrectMap(outputCharCount, cumulativeDiff);
+ inputSegment.clear();
+ outputSegment = inputSegment;
+ yybegin(YYINITIAL);
}
- case 95: break;
- case 36:
+ case 75: break;
+ case 20:
+ { inputSegment.write(zzBuffer, zzStartRead, yylength());
+ }
+ case 76: break;
+ case 47:
+ { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+ cumulativeDiff += inputSegment.length() + yylength();
+ // position the correction at (already output length) [ + (substitution length) = 0 ]
+ addOffCorrectMap(outputCharCount, cumulativeDiff);
+ inputSegment.clear();
+ yybegin(CDATA);
+ }
+ case 77: break;
+ case 33:
{ yybegin(YYINITIAL);
if (escapeBR) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
@@ -31718,34 +31498,128 @@ public final class HTMLStripCharFilter e
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.reset();
- return BR_END_TAG_REPLACEMENT;
+ return BR_START_TAG_REPLACEMENT;
}
}
- case 96: break;
+ case 78: break;
+ case 23:
+ { yybegin(restoreState); restoreState = previousRestoreState;
+ }
+ case 79: break;
+ case 32:
+ { yybegin(COMMENT);
+ }
+ case 80: break;
case 24:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
- case 97: break;
- case 47:
+ case 81: break;
+ case 3:
+ { inputStart = yychar;
+ inputSegment.clear();
+ inputSegment.append('&');
+ yybegin(AMPERSAND);
+ }
+ case 82: break;
+ case 46:
+ { yybegin(SCRIPT);
+ if (escapeSCRIPT) {
+ inputSegment.write(zzBuffer, zzStartRead, yylength());
+ outputSegment = inputSegment;
+ inputStart += 1 + yylength();
+ return outputSegment.nextChar();
+ }
+ }
+ case 83: break;
+ case 14:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
- yybegin(CDATA);
+ yybegin(YYINITIAL);
}
- case 98: break;
- case 29:
- { restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+ case 84: break;
+ case 6:
+ { int matchLength = yylength();
+ inputSegment.write(zzBuffer, zzStartRead, matchLength);
+ if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
+ String decimalCharRef = yytext();
+ int codePoint = 0;
+ try {
+ codePoint = Integer.parseInt(decimalCharRef);
+ } catch(Exception e) {
+ assert false: "Exception parsing code point '" + decimalCharRef + "'";
+ }
+ if (codePoint <= 0x10FFFF) {
+ outputSegment = entitySegment;
+ outputSegment.clear();
+ if (codePoint >= Character.MIN_SURROGATE
+ && codePoint <= Character.MAX_SURROGATE) {
+ outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
+ } else {
+ outputSegment.setLength
+ (Character.toChars(codePoint, outputSegment.getArray(), 0));
+ }
+ yybegin(CHARACTER_REFERENCE_TAIL);
+ } else {
+ outputSegment = inputSegment;
+ yybegin(YYINITIAL);
+ return outputSegment.nextChar();
+ }
+ } else {
+ outputSegment = inputSegment;
+ yybegin(YYINITIAL);
+ return outputSegment.nextChar();
+ }
}
- case 99: break;
- case 17:
- { restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+ case 85: break;
+ case 34:
+ { // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
+ cumulativeDiff += yychar - inputStart + yylength();
+ // position the correction at (already output length) [ + (substitution length) = 0]
+ addOffCorrectMap(outputCharCount, cumulativeDiff);
+ inputSegment.clear();
+ yybegin(YYINITIAL);
}
- case 100: break;
+ case 86: break;
+ case 5:
+ { inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
+ }
+ case 87: break;
+ case 13:
+ { inputSegment.append(zzBuffer[zzStartRead]);
+ }
+ case 88: break;
+ case 18:
+ { inputSegment.write(zzBuffer, zzStartRead, yylength());
+ if (null != escapedTags
+ && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
+ yybegin(END_TAG_TAIL_INCLUDE);
+ } else {
+ yybegin(END_TAG_TAIL_SUBSTITUTE);
+ }
+ }
+ case 89: break;
+ case 40:
+ { yybegin(SCRIPT_COMMENT);
+ }
+ case 90: break;
+ case 37:
+ { // add (this match length) [ - (substitution length) = 0 ]
+ cumulativeDiff += yylength();
+ // position the correction at (already output length) [ + (substitution length) = 0 ]
+ addOffCorrectMap(outputCharCount, cumulativeDiff);
+ yybegin(YYINITIAL);
+ }
+ case 91: break;
+ case 12:
+ { inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
+ }
+ case 92: break;
case 9:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
@@ -31755,7 +31629,7 @@ public final class HTMLStripCharFilter e
yybegin(START_TAG_TAIL_EXCLUDE);
}
}
- case 101: break;
+ case 93: break;
case 49:
{ inputSegment.clear();
yybegin(YYINITIAL);
@@ -31778,26 +31652,152 @@ public final class HTMLStripCharFilter e
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
return returnValue;
}
+ case 94: break;
+ case 29:
+ { restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+ }
+ case 95: break;
+ case 17:
+ { restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+ }
+ case 96: break;
+ case 45:
+ { yybegin(STYLE);
+ if (escapeSTYLE) {
+ inputSegment.write(zzBuffer, zzStartRead, yylength());
+ outputSegment = inputSegment;
+ inputStart += 1 + yylength();
+ return outputSegment.nextChar();
+ }
+ }
+ case 97: break;
+ case 7:
+ { // add (previously matched input length) + (this match length) - (substitution length)
+ cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
+ // position the correction at (already output length) + (substitution length)
+ addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
+ yybegin(YYINITIAL);
+ return outputSegment.nextChar();
+ }
+ case 98: break;
+ case 19:
+ { inputSegment.write(zzBuffer, zzStartRead, yylength());
+ if (null != escapedTags
+ && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
+ yybegin(END_TAG_TAIL_INCLUDE);
+ } else {
+ yybegin(END_TAG_TAIL_EXCLUDE);
+ }
+ }
+ case 99: break;
+ case 25:
+ { // add (previously matched input length) + (this match length) - (substitution length)
+ cumulativeDiff += inputSegment.length() + yylength() - 1;
+ // position the correction at (already output length) + (substitution length)
+ addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
+ inputSegment.clear();
+ yybegin(YYINITIAL);
+ return BLOCK_LEVEL_END_TAG_REPLACEMENT;
+ }
+ case 100: break;
+ case 31:
+ { int matchLength = yylength();
+ inputSegment.write(zzBuffer, zzStartRead, matchLength);
+ if (matchLength <= 6) { // 10FFFF: max 6 hex chars
+ String hexCharRef
+ = new String(zzBuffer, zzStartRead + 1, matchLength - 1);
+ int codePoint = 0;
+ try {
+ codePoint = Integer.parseInt(hexCharRef, 16);
+ } catch(Exception e) {
+ assert false: "Exception parsing hex code point '" + hexCharRef + "'";
+ }
+ if (codePoint <= 0x10FFFF) {
+ outputSegment = entitySegment;
+ outputSegment.clear();
+ if (codePoint >= Character.MIN_SURROGATE
+ && codePoint <= Character.MAX_SURROGATE) {
+ outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
+ } else {
+ outputSegment.setLength
+ (Character.toChars(codePoint, outputSegment.getArray(), 0));
+ }
+ yybegin(CHARACTER_REFERENCE_TAIL);
+ } else {
+ outputSegment = inputSegment;
+ yybegin(YYINITIAL);
+ return outputSegment.nextChar();
+ }
+ } else {
+ outputSegment = inputSegment;
+ yybegin(YYINITIAL);
+ return outputSegment.nextChar();
+ }
+ }
+ case 101: break;
+ case 53:
+ { // Handle paired UTF-16 surrogates.
+ String surrogatePair = yytext();
+ char highSurrogate = '\u0000';
+ try { // High surrogates are in decimal range [55296, 56319]
+ highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
+ } catch(Exception e) { // should never happen
+ assert false: "Exception parsing high surrogate '"
+ + surrogatePair.substring(1, 6) + "'";
+ }
+ if (Character.isHighSurrogate(highSurrogate)) {
+ char lowSurrogate = '\u0000';
+ try { // Low surrogates are in decimal range [56320, 57343]
+ lowSurrogate = (char)Integer.parseInt(surrogatePair.substring(9, 14));
+ } catch(Exception e) { // should never happen
+ assert false: "Exception parsing low surrogate '"
+ + surrogatePair.substring(9, 14) + "'";
+ }
+ if (Character.isLowSurrogate(lowSurrogate)) {
+ outputSegment = entitySegment;
+ outputSegment.clear();
+ outputSegment.unsafeWrite(lowSurrogate);
+ // add (previously matched input length) + (this match length) - (substitution length)
+ cumulativeDiff += inputSegment.length() + yylength() - 2;
+ // position the correction at (already output length) + (substitution length)
+ addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
+ inputSegment.clear();
+ yybegin(YYINITIAL);
+ return highSurrogate;
+ }
+ }
+ yypushback(surrogatePair.length() - 1); // Consume only '#'
+ inputSegment.append('#');
+ yybegin(NUMERIC_CHARACTER);
+ }
case 102: break;
+ case 36:
+ { yybegin(YYINITIAL);
+ if (escapeBR) {
+ inputSegment.write(zzBuffer, zzStartRead, yylength());
+ outputSegment = inputSegment;
+ return outputSegment.nextChar();
+ } else {
+ // add (previously matched input length) + (this match length) - (substitution length)
+ cumulativeDiff += inputSegment.length() + yylength() - 1;
+ // position the correction at (already output length) + (substitution length)
+ addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
+ inputSegment.reset();
+ return BR_END_TAG_REPLACEMENT;
+ }
+ }
+ case 103: break;
case 38:
{ yybegin(restoreState);
}
- case 103: break;
+ case 104: break;
case 41:
{ yybegin(STYLE_COMMENT);
}
- case 104: break;
+ case 105: break;
case 1:
{ return zzBuffer[zzStartRead];
}
- case 105: break;
- case 4:
- { yypushback(1);
- outputSegment = inputSegment;
- outputSegment.restart();
- yybegin(YYINITIAL);
- return outputSegment.nextChar();
- }
case 106: break;
default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex Mon Aug 13 11:16:57 2012
@@ -141,9 +141,9 @@ InlineElment = ( [aAbBiIqQsSuU]
[vV][aA][rR] )
-%include src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex
+%include HTMLCharacterEntities.jflex
-%include src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
+%include HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
%{
private static final int INITIAL_INPUT_SEGMENT_SIZE = 1024;
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java Mon Aug 13 11:16:57 2012
@@ -24,6 +24,8 @@ import org.apache.lucene.analysis.TokenS
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.ArrayUtil;
@@ -35,6 +37,12 @@ import org.apache.lucene.util.ArrayUtil;
* {@link #CJKBigramFilter(TokenStream, int)} to explicitly control which
* of the CJK scripts are turned into bigrams.
* <p>
+ * By default, when a CJK character has no adjacent characters to form
+ * a bigram, it is output in unigram form. If you want to always output
+ * both unigrams and bigrams, set the <code>outputUnigrams</code>
+ * flag in {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)}.
+ * This can be used for a combined unigram+bigram approach.
+ * <p>
* In all cases, all non-CJK input is passed thru unmodified.
*/
public final class CJKBigramFilter extends TokenFilter {
@@ -67,10 +75,16 @@ public final class CJKBigramFilter exten
private final Object doHiragana;
private final Object doKatakana;
private final Object doHangul;
+
+ // true if we should output unigram tokens always
+ private final boolean outputUnigrams;
+ private boolean ngramState; // false = output unigram, true = output bigram
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+ private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
// buffers containing codepoint and offsets in parallel
int buffer[] = new int[8];
@@ -88,23 +102,36 @@ public final class CJKBigramFilter exten
/**
* Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int)
- * CJKBigramFilter(HAN | HIRAGANA | KATAKANA | HANGUL)}
+ * CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)}
*/
public CJKBigramFilter(TokenStream in) {
this(in, HAN | HIRAGANA | KATAKANA | HANGUL);
}
/**
- * Create a new CJKBigramFilter, specifying which writing systems should be bigrammed.
+ * Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)
+ * CJKBigramFilter(in, flags, false)}
+ */
+ public CJKBigramFilter(TokenStream in, int flags) {
+ this(in, flags, false);
+ }
+
+ /**
+ * Create a new CJKBigramFilter, specifying which writing systems should be bigrammed,
+ * and whether or not unigrams should also be output.
* @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA},
* {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
+ * @param outputUnigrams true if unigrams for the selected writing systems should also be output.
+ * when this is false, this is only done when there are no adjacent characters to form
+ * a bigram.
*/
- public CJKBigramFilter(TokenStream in, int flags) {
+ public CJKBigramFilter(TokenStream in, int flags, boolean outputUnigrams) {
super(in);
doHan = (flags & HAN) == 0 ? NO : HAN_TYPE;
doHiragana = (flags & HIRAGANA) == 0 ? NO : HIRAGANA_TYPE;
doKatakana = (flags & KATAKANA) == 0 ? NO : KATAKANA_TYPE;
doHangul = (flags & HANGUL) == 0 ? NO : HANGUL_TYPE;
+ this.outputUnigrams = outputUnigrams;
}
/*
@@ -120,7 +147,24 @@ public final class CJKBigramFilter exten
// case 1: we have multiple remaining codepoints buffered,
// so we can emit a bigram here.
- flushBigram();
+ if (outputUnigrams) {
+
+ // when also outputting unigrams, we output the unigram first,
+ // then rewind back to revisit the bigram.
+ // so an input of ABC is A + (rewind)AB + B + (rewind)BC + C
+ // the logic in hasBufferedUnigram ensures we output the C,
+ // even though it did actually have adjacent CJK characters.
+
+ if (ngramState) {
+ flushBigram();
+ } else {
+ flushUnigram();
+ index--;
+ }
+ ngramState = !ngramState;
+ } else {
+ flushBigram();
+ }
return true;
} else if (doNext()) {
@@ -260,6 +304,11 @@ public final class CJKBigramFilter exten
termAtt.setLength(len2);
offsetAtt.setOffset(startOffset[index], endOffset[index+1]);
typeAtt.setType(DOUBLE_TYPE);
+ // when outputting unigrams, all bigrams are synonyms that span two unigrams
+ if (outputUnigrams) {
+ posIncAtt.setPositionIncrement(0);
+ posLengthAtt.setPositionLength(2);
+ }
index++;
}
@@ -292,7 +341,13 @@ public final class CJKBigramFilter exten
* inputs.
*/
private boolean hasBufferedUnigram() {
- return bufferLen == 1 && index == 0;
+ if (outputUnigrams) {
+ // when outputting unigrams always
+ return bufferLen - index == 1;
+ } else {
+ // otherwise its only when we have a lone CJK character
+ return bufferLen == 1 && index == 0;
+ }
}
@Override
@@ -303,5 +358,6 @@ public final class CJKBigramFilter exten
lastEndOffset = 0;
loneState = null;
exhausted = false;
+ ngramState = false;
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java Mon Aug 13 11:16:57 2012
@@ -33,12 +33,13 @@ import org.apache.lucene.analysis.util.T
* <filter class="solr.LowerCaseFilterFactory"/>
* <filter class="solr.CJKBigramFilterFactory"
* han="true" hiragana="true"
- * katakana="true" hangul="true" />
+ * katakana="true" hangul="true" outputUnigrams="false" />
* </analyzer>
* </fieldType></pre>
*/
public class CJKBigramFilterFactory extends TokenFilterFactory {
int flags;
+ boolean outputUnigrams;
@Override
public void init(Map<String,String> args) {
@@ -56,10 +57,11 @@ public class CJKBigramFilterFactory exte
if (getBoolean("hangul", true)) {
flags |= CJKBigramFilter.HANGUL;
}
+ outputUnigrams = getBoolean("outputUnigrams", false);
}
@Override
public TokenStream create(TokenStream input) {
- return new CJKBigramFilter(input, flags);
+ return new CJKBigramFilter(input, flags, outputUnigrams);
}
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java Mon Aug 13 11:16:57 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.standard;
@@ -33,8 +33,8 @@ import org.apache.lucene.analysis.tokena
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 7/15/12 1:57 AM from the specification file
- * <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
+ * on 8/6/12 11:57 AM from the specification file
+ * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
*/
class ClassicTokenizerImpl implements StandardTokenizerInterface {
@@ -42,7 +42,7 @@ class ClassicTokenizerImpl implements St
public static final int YYEOF = -1;
/** initial size of the lookahead buffer */
- private static final int ZZ_BUFFERSIZE = 16384;
+ private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */
public static final int YYINITIAL = 0;
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex Mon Aug 13 11:16:57 2012
@@ -36,6 +36,7 @@ import org.apache.lucene.analysis.tokena
%function getNextToken
%pack
%char
+%buffer 4096
%{
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro Mon Aug 13 11:16:57 2012
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-// Generated using ICU4J 49.1.0.0 on Thursday, July 26, 2012 10:22:01 PM UTC
+// Generated using ICU4J 49.1.0.0 on Monday, August 6, 2012 3:57:23 PM UTC
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java Mon Aug 13 11:16:57 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.standard;
@@ -43,7 +43,7 @@ public final class StandardTokenizerImpl
public static final int YYEOF = -1;
/** initial size of the lookahead buffer */
- private static final int ZZ_BUFFERSIZE = 16384;
+ private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */
public static final int YYINITIAL = 0;
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex Mon Aug 13 11:16:57 2012
@@ -44,8 +44,9 @@ import org.apache.lucene.analysis.tokena
%implements StandardTokenizerInterface
%function getNextToken
%char
+%buffer 4096
-%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
+%include SUPPLEMENTARY.jflex-macro
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
Format = ([\p{WB:Format}] | {FormatSupp})
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java Mon Aug 13 11:16:57 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.standard;
@@ -46,7 +46,7 @@ public final class UAX29URLEmailTokenize
public static final int YYEOF = -1;
/** initial size of the lookahead buffer */
- private static final int ZZ_BUFFERSIZE = 16384;
+ private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */
public static final int YYINITIAL = 0;
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex Mon Aug 13 11:16:57 2012
@@ -47,8 +47,9 @@ import org.apache.lucene.analysis.tokena
%implements StandardTokenizerInterface
%function getNextToken
%char
+%buffer 4096
-%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
+%include SUPPLEMENTARY.jflex-macro
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
Format = ([\p{WB:Format}] | {FormatSupp})
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
@@ -88,7 +89,7 @@ HiraganaEx = {Hiragana} ({Format} | {Ext
// RFC-5321: Simple Mail Transfer Protocol
// RFC-5322: Internet Message Format
-%include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
+%include ASCIITLD.jflex-macro
DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java Mon Aug 13 11:16:57 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
package org.apache.lucene.analysis.wikipedia;
@@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokena
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 7/15/12 1:57 AM from the specification file
- * <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
+ * on 8/6/12 11:57 AM from the specification file
+ * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
*/
class WikipediaTokenizerImpl {
@@ -34,7 +34,7 @@ class WikipediaTokenizerImpl {
public static final int YYEOF = -1;
/** initial size of the lookahead buffer */
- private static final int ZZ_BUFFERSIZE = 16384;
+ private static final int ZZ_BUFFERSIZE = 4096;
/** lexical states */
public static final int THREE_SINGLE_QUOTES_STATE = 10;