You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/08/13 13:17:06 UTC

svn commit: r1372366 [2/8] - in /lucene/dev/branches/pforcodec_3892: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/analysis/common/ dev-tools/maven/lucene/analysis/icu/ ...

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/clustering/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/clustering/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/clustering/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/clustering/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -35,18 +35,11 @@
     <module-directory>solr/contrib/clustering</module-directory>
     <top-level>../../../..</top-level>
     <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
   </properties>
   <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
   </scm>
   <dependencies>
     <dependency>
@@ -106,17 +99,12 @@
       <testResource>
         <directory>${top-level}/solr/core/src/test-files</directory>
       </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
     </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
   </build>
 </project>

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -35,18 +35,11 @@
     <module-directory>solr/contrib/dataimporthandler-extras</module-directory>
     <top-level>../../../..</top-level>
     <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
   </properties>
   <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
   </scm>
   <dependencies>
     <dependency>
@@ -104,17 +97,12 @@
       <testResource>
         <directory>${top-level}/solr/core/src/test-files</directory>
       </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
     </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
   </build>
 </project>

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -35,18 +35,11 @@
     <module-directory>solr/contrib/dataimporthandler</module-directory>
     <top-level>../../../..</top-level>
     <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
   </properties>
   <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
   </scm>
   <dependencies>
     <dependency>
@@ -90,6 +83,12 @@
       <testResource>
         <directory>${top-level}/solr/core/src/test-files</directory>
       </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
     </testResources>
     <plugins>
       <plugin>
@@ -103,15 +102,6 @@
           </execution>
         </executions>
       </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
     </plugins>
   </build>
 </project>

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/extraction/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/extraction/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/extraction/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/extraction/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -38,18 +38,11 @@
     <module-directory>solr/contrib/extraction</module-directory>
     <top-level>../../../..</top-level>
     <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
   </properties>
   <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
   </scm>
   <dependencies>
     <dependency>
@@ -102,17 +95,12 @@
       <testResource>
         <directory>${top-level}/solr/core/src/test-files</directory>
       </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
     </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
   </build>
 </project>

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/langid/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/langid/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/langid/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/langid/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -39,18 +39,11 @@
     <module-directory>solr/contrib/langid</module-directory>
     <top-level>../../../..</top-level>
     <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
   </properties>
   <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
   </scm>
   <dependencies>
     <dependency>
@@ -107,17 +100,12 @@
       <testResource>
         <directory>${top-level}/solr/core/src/test-files</directory>
       </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
     </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
   </build>
 </project>

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/uima/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/uima/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/uima/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/uima/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -35,18 +35,11 @@
     <module-directory>solr/contrib/uima</module-directory>
     <top-level>../../../..</top-level>
     <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
   </properties>
   <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
   </scm>
   <dependencies>
     <dependency>
@@ -121,17 +114,12 @@
       <testResource>
         <directory>${module-path}/src/test-files</directory>
       </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
     </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
   </build>
 </project>

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/velocity/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/velocity/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/velocity/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/contrib/velocity/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -35,18 +35,11 @@
     <module-directory>solr/contrib/velocity</module-directory>
     <top-level>../../../..</top-level>
     <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
   </properties>
   <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
   </scm>
   <dependencies>
     <dependency>
@@ -142,17 +135,12 @@
       <testResource>
         <directory>${top-level}/solr/core/src/test-files</directory>
       </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
     </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
   </build>
 </project>

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/core/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/core/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/core/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/core/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -35,18 +35,11 @@
     <module-directory>solr/core</module-directory>
     <top-level>../../..</top-level>
     <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
   </properties>
   <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
   </scm>
   <dependencies>
     <dependency>
@@ -243,49 +236,15 @@
       <testResource>
         <directory>${top-level}/solr/solrj/src/test-files</directory>
       </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
     </testResources>
     <plugins>
       <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>appassembler-maven-plugin</artifactId>
-        <configuration>
-          <extraJvmArguments>-Xmx128M</extraJvmArguments>
-          <repositoryLayout>flat</repositoryLayout>
-          <platforms>
-            <platform>windows</platform>
-            <platform>unix</platform>
-          </platforms>
-          <programs>
-            <program>
-              <mainClass>org.apache.solr.client.solrj.embedded.JettySolrRunner</mainClass>
-              <name>JettySolrRunner</name>
-            </program>
-            <program>
-              <mainClass>org.apache.solr.util.BitSetPerf</mainClass>
-              <name>BitSetPerf</name>
-              <extraJvmArguments>-Xms128m -Xbatch</extraJvmArguments>
-            </program>
-            <program>
-              <mainClass>org.apache.solr.util.SimplePostTool</mainClass>
-              <name>SimplePostTool</name>
-            </program>
-            <program>
-              <mainClass>org.apache.solr.util.SuggestMissingFactories</mainClass>
-              <name>SuggestMissingFactories</name>
-            </program>
-          </programs>
-        </configuration>
-      </plugin>
-      <plugin>
         <groupId>org.codehaus.mojo</groupId>
         <artifactId>build-helper-maven-plugin</artifactId>
         <executions>

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -43,26 +43,14 @@
     <module-directory>solr</module-directory>
   </properties>
   <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
   </scm>
   <issueManagement>
     <system>JIRA</system>
-    <url>http://issues.apache.org/jira/browse/SOLR</url>
+    <url>https://issues.apache.org/jira/browse/SOLR</url>
   </issueManagement>
-  <ciManagement>
-    <system>Hudson</system>
-    <url>
-      http://lucene.zones.apache.org:8080/hudson/job/Solr-Nightly/
-    </url>
-  </ciManagement>
   <mailingLists>
     <mailingList>
       <name>Solr User List</name>
@@ -111,6 +99,15 @@
             <doctitle>${project.name} ${project.version} API (${now.version})</doctitle>
           </configuration>
         </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-surefire-plugin</artifactId>
+          <configuration>
+            <systemPropertyVariables>
+              <java.util.logging.config.file>../test-classes/maven.testlogging.properties</java.util.logging.config.file>
+            </systemPropertyVariables>
+          </configuration>
+        </plugin>
       </plugins>
     </pluginManagement>
   </build>

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/solrj/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/solrj/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/solrj/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/solrj/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -37,15 +37,9 @@
     <module-path>${top-level}/${module-directory}</module-path>
   </properties>
   <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
   </scm>
   <dependencies>
     <dependency>

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/test-framework/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/test-framework/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/test-framework/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/test-framework/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -37,15 +37,9 @@
     <module-path>${top-level}/${module-directory}</module-path>
   </properties>
   <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
   </scm>
   <dependencies>
     <!-- These dependencies are compile scope because this is a test framework. -->
@@ -61,19 +55,28 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>servlet-api</artifactId>
+      <!-- SOLR-3263: Provided scope is required to avoid jar signing conflicts -->
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
     </dependency>
-    <!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
-         you can exclude the three Jetty dependencies below. -->
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
-      <scope>runtime</scope>
+      <artifactId>jetty-servlet</artifactId>
     </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-util</artifactId>
+    </dependency>
+    <!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
+         you can exclude the two Jetty dependencies below. -->
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
       <scope>runtime</scope>
     </dependency>
     <dependency>

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/webapp/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/webapp/pom.xml.template?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/webapp/pom.xml.template (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/maven/solr/webapp/pom.xml.template Mon Aug 13 11:16:57 2012
@@ -37,15 +37,9 @@
     <module-path>${top-level}/${module-directory}</module-path>
   </properties>
   <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
   </scm>
   <dependencies>
     <dependency>

Modified: lucene/dev/branches/pforcodec_3892/dev-tools/scripts/smokeTestRelease.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/dev-tools/scripts/smokeTestRelease.py?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/dev-tools/scripts/smokeTestRelease.py (original)
+++ lucene/dev/branches/pforcodec_3892/dev-tools/scripts/smokeTestRelease.py Mon Aug 13 11:16:57 2012
@@ -58,7 +58,7 @@ def javaExe(version):
 
 def verifyJavaVersion(version):
   s = os.popen('%s; java -version 2>&1' % javaExe(version)).read()
-  if s.find('java version "%s.' % version) == -1:
+  if s.find(' version "%s.' % version) == -1:
     raise RuntimeError('got wrong version for java %s:\n%s' % (version, s))
 
 # http://s.apache.org/lusolr32rc2
@@ -363,6 +363,10 @@ def verifyDigests(artifact, urlString, t
     raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual))
 
 def getDirEntries(urlString):
+  if urlString.startswith('file:/') and not urlString.startswith('file://'):
+    # stupid bogus ant URI
+    urlString = "file:///" + urlString[6:]
+
   if urlString.startswith('file://'):
     path = urlString[7:]
     if path.endswith('/'):
@@ -1026,7 +1030,7 @@ def crawl(downloadedFiles, urlString, ta
 
 def main():
 
-  if len(sys.argv) != 4:
+  if len(sys.argv) < 4:
     print()
     print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
     print()
@@ -1035,8 +1039,11 @@ def main():
   baseURL = sys.argv[1]
   version = sys.argv[2]
   tmpDir = os.path.abspath(sys.argv[3])
+  isSigned = True 
+  if len(sys.argv) == 5:
+    isSigned = (sys.argv[4] == "True")
 
-  smokeTest(baseURL, version, tmpDir, True)
+  smokeTest(baseURL, version, tmpDir, isSigned)
 
 def smokeTest(baseURL, version, tmpDir, isSigned):
 
@@ -1090,4 +1097,5 @@ if __name__ == '__main__':
   except:
     import traceback
     traceback.print_exc()
-  
+    sys.exit(1)
+  sys.exit(0)

Modified: lucene/dev/branches/pforcodec_3892/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/CHANGES.txt?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/CHANGES.txt Mon Aug 13 11:16:57 2012
@@ -6,6 +6,56 @@ http://s.apache.org/luceneversions
 
 ======================= Lucene 5.0.0 =======================
 
+======================= Lucene 4.0.0 =======================
+
+New Features
+
+* LUCENE-1888: Added the option to store payloads in the term
+  vectors (IndexableFieldType.storeTermVectorPayloads()). Note 
+  that you must store term vector positions to store payloads.
+  (Robert Muir)
+
+API Changes
+
+* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().
+  Previously you had no real way to know that a term vector field
+  had positions or offsets, since this can be configured on a 
+  per-field-per-document basis. (Robert Muir)
+
+* Removed DocsAndPositionsEnum.hasPayload() and simplified the
+  contract of getPayload(). It returns null if there is no payload,
+  otherwise returns the current payload. You can now call it multiple
+  times per position if you want. (Robert Muir)
+
+* Removed FieldsEnum. Fields API instead implements Iterable<String>
+  and exposes Iterator, so you can iterate over field names with
+  for (String field : fields) instead.  (Robert Muir)
+
+Bug Fixes
+
+* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
+  twice for conjunctions: for most users this is no problem, but
+  if you had a customized Similarity that returned something other
+  than 1 when overlap == maxOverlap (always the case for conjunctions),
+  then the score would be incorrect.  (Pascal Chollet, Robert Muir)
+
+* LUCENE-4298: MultiFields.getTermDocsEnum(IndexReader, Bits, String, BytesRef)
+  did not work at all, it would infinitely recurse.
+  (Alberto Paro via Robert Muir)
+
+* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
+  had a custom Similarity where coord(1,1) != 1F, then the rewritten
+  query would be scored differently.  (Robert Muir)
+
+* Don't allow negatives in the positions file. If you have an index
+  from 2.4.0 or earlier with such negative positions, and you already 
+  upgraded to 3.x, then to Lucene 4.0-ALPHA or -BETA, you should run 
+  CheckIndex. If it fails, then you need to upgrade again to 4.0  (Robert Muir)
+
+Build
+
+* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for 
+  thread leak detection. Added support for suite timeouts. (Dawid Weiss)
 
 ======================= Lucene 4.0.0-BETA =======================
 
@@ -47,6 +97,11 @@ New features
   int docID), to attempt deletion by docID as long as the provided
   reader is an NRT reader, and the segment has not yet been merged
   away (Mike McCandless).
+  
+* LUCENE-4286: Added option to CJKBigramFilter to always also output
+  unigrams. This can be used for a unigram+bigram approach, or at 
+  index-time only for better support of short queries.
+  (Tom Burton-West, Robert Muir)
 
 API Changes
 
@@ -115,6 +170,10 @@ Optimizations
   making them substantially more lightweight. Behavior is unchanged. 
   (Robert Muir)
 
+* LUCENE-4291: Reduced internal buffer size for Jflex-based tokenizers 
+  such as StandardTokenizer from 32kb to 8kb.  
+  (Raintung Li, Steven Rowe, Robert Muir)
+
 Bug Fixes
 
 * LUCENE-4109: BooleanQueries are not parsed correctly with the 
@@ -164,6 +223,9 @@ Bug Fixes
 * LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
   (Johannes Christen, Uwe Schindler, Robert Muir)
 
+* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
+  (Robert Muir)
+
 Changes in Runtime Behavior
 
 * LUCENE-4109: Enable position increments in the flexible queryparser by default.

Modified: lucene/dev/branches/pforcodec_3892/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/MIGRATE.txt?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/MIGRATE.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/MIGRATE.txt Mon Aug 13 11:16:57 2012
@@ -9,7 +9,7 @@ enumeration APIs.  Here are the major ch
     by the BytesRef class (which provides an offset + length "slice"
     into an existing byte[]).
 
-  * Fields are separately enumerated (FieldsEnum) from the terms
+  * Fields are separately enumerated (Fields.iterator()) from the terms
     within each field (TermEnum).  So instead of this:
 
         TermEnum termsEnum = ...;
@@ -20,10 +20,8 @@ enumeration APIs.  Here are the major ch
 
     Do this:
 
-        FieldsEnum fieldsEnum = ...;
-        String field;
-        while((field = fieldsEnum.next()) != null) {
-            TermsEnum termsEnum = fieldsEnum.terms();
+        for(String field : fields) {
+            TermsEnum termsEnum = fields.terms(field);
             BytesRef text;
             while((text = termsEnum.next()) != null) {
               System.out.println("field=" + field + "; text=" + text.utf8ToString());
@@ -316,11 +314,12 @@ an AtomicReader. Note: using "atomicity 
 slowdowns due to the need to merge terms, postings, DocValues, and 
 FieldCache, use them with care! 
 
-## LUCENE-2413: Analyzer package changes
+## LUCENE-2413,LUCENE-3396: Analyzer package changes
 
 Lucene's core and contrib analyzers, along with Solr's analyzers,
 were consolidated into lucene/analysis. During the refactoring some
-package names have changed:
+package names have changed, and ReusableAnalyzerBase was renamed to
+Analyzer:
 
   - o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
   - o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
@@ -345,7 +344,7 @@ package names have changed:
   - o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
   - o.a.l.analysis.CharArraySet -> o.a.l.analysis.util.CharArraySet
   - o.a.l.analysis.CharArrayMap -> o.a.l.analysis.util.CharArrayMap
-  - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
+  - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.Analyzer
   - o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
   - o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
   - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java Mon Aug 13 11:16:57 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
 
 package org.apache.lucene.analysis.charfilter;
 
@@ -40,8 +40,8 @@ import org.apache.lucene.analysis.util.O
 /**
  * This class is a scanner generated by 
  * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 7/26/12 6:22 PM from the specification file
- * <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
+ * on 8/6/12 11:57 AM from the specification file
+ * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
  */
 public final class HTMLStripCharFilter extends BaseCharFilter {
 
@@ -31255,88 +31255,56 @@ public final class HTMLStripCharFilter e
           { yybegin(STYLE);
           }
         case 55: break;
-        case 51: 
-          { // Handle paired UTF-16 surrogates.
-    String surrogatePair = yytext();
-    char highSurrogate = '\u0000';
-    char lowSurrogate = '\u0000';
-    try {
-      highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
-    } catch(Exception e) { // should never happen
-      assert false: "Exception parsing high surrogate '"
-                  + surrogatePair.substring(2, 6) + "'";
-    }
-    try { // Low surrogates are in decimal range [56320, 57343]
-      lowSurrogate = (char)Integer.parseInt(surrogatePair.substring(9, 14));
-    } catch(Exception e) { // should never happen
-      assert false: "Exception parsing low surrogate '"
-                  + surrogatePair.substring(9, 14) + "'";
-    }
-    if (Character.isLowSurrogate(lowSurrogate)) {
-      outputSegment = entitySegment;
-      outputSegment.clear();
-      outputSegment.unsafeWrite(lowSurrogate);
-      // add (previously matched input length) + (this match length) - (substitution length)
-      cumulativeDiff += inputSegment.length() + yylength() - 2;
-      // position the correction at (already output length) + (substitution length)
-      addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
-      inputSegment.clear();
-      yybegin(YYINITIAL);
-      return highSurrogate;
-    }
-    yypushback(surrogatePair.length() - 1); // Consume only '#'
-    inputSegment.append('#');
-    yybegin(NUMERIC_CHARACTER);
+        case 27: 
+          { // add (previously matched input length) + (this match length) - (substitution length)
+    cumulativeDiff += inputSegment.length() + yylength() - 1;
+    // position the correction at (already output length) + (substitution length)
+    addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
+    inputSegment.clear();
+    yybegin(YYINITIAL);
+    return BLOCK_LEVEL_START_TAG_REPLACEMENT;
           }
         case 56: break;
-        case 21: 
-          { previousRestoreState = restoreState;
-    restoreState = SERVER_SIDE_INCLUDE;
-    yybegin(SINGLE_QUOTED_STRING);
+        case 30: 
+          { int length = yylength();
+    inputSegment.write(zzBuffer, zzStartRead, length);
+    entitySegment.clear();
+    char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
+    entitySegment.append(ch);
+    outputSegment = entitySegment;
+    yybegin(CHARACTER_REFERENCE_TAIL);
           }
         case 57: break;
-        case 31: 
-          { int matchLength = yylength();
-    inputSegment.write(zzBuffer, zzStartRead, matchLength);
-    if (matchLength <= 6) { // 10FFFF: max 6 hex chars
-      String hexCharRef
-          = new String(zzBuffer, zzStartRead + 1, matchLength - 1);
-      int codePoint = 0;
-      try {
-        codePoint = Integer.parseInt(hexCharRef, 16);
-      } catch(Exception e) {
-        assert false: "Exception parsing hex code point '" + hexCharRef + "'";
-      }
-      if (codePoint <= 0x10FFFF) {
-        outputSegment = entitySegment;
-        outputSegment.clear();
-        if (codePoint >= Character.MIN_SURROGATE
-            && codePoint <= Character.MAX_SURROGATE) {
-          outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
-        } else {
-          outputSegment.setLength
-              (Character.toChars(codePoint, outputSegment.getArray(), 0));
-        }
-        yybegin(CHARACTER_REFERENCE_TAIL);
-      } else {
-        outputSegment = inputSegment;
-        yybegin(YYINITIAL);
-        return outputSegment.nextChar();
-      }
-    } else {
+        case 48: 
+          { inputSegment.clear();
+    yybegin(YYINITIAL);
+    // add (previously matched input length) -- current match and substitution handled below
+    cumulativeDiff += yychar - inputStart;
+    // position the offset correction at (already output length) -- substitution handled below
+    int offsetCorrectionPos = outputCharCount;
+    int returnValue;
+    if (escapeSTYLE) {
+      inputSegment.write(zzBuffer, zzStartRead, yylength());
       outputSegment = inputSegment;
-      yybegin(YYINITIAL);
-      return outputSegment.nextChar();
+      returnValue = outputSegment.nextChar();
+    } else {
+      // add (this match length) - (substitution length)
+      cumulativeDiff += yylength() - 1;
+      // add (substitution length)
+      ++offsetCorrectionPos;
+      returnValue = STYLE_REPLACEMENT;
     }
+    addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
+    return returnValue;
           }
         case 58: break;
-        case 19: 
+        case 8: 
           { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
         && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
-      yybegin(END_TAG_TAIL_INCLUDE);
+      yybegin(START_TAG_TAIL_INCLUDE);
     } else {
-      yybegin(END_TAG_TAIL_EXCLUDE);
+      yybegin(START_TAG_TAIL_SUBSTITUTE);
     }
           }
         case 59: break;
@@ -31347,113 +31315,79 @@ public final class HTMLStripCharFilter e
   yybegin(LEFT_ANGLE_BRACKET);
           }
         case 60: break;
-        case 27: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
-    cumulativeDiff += inputSegment.length() + yylength() - 1;
-    // position the correction at (already output length) + (substitution length)
-    addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
-    inputSegment.clear();
-    yybegin(YYINITIAL);
-    return BLOCK_LEVEL_START_TAG_REPLACEMENT;
-          }
-        case 61: break;
         case 44: 
           { restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
           }
+        case 61: break;
+        case 21: 
+          { previousRestoreState = restoreState;
+    restoreState = SERVER_SIDE_INCLUDE;
+    yybegin(SINGLE_QUOTED_STRING);
+          }
         case 62: break;
+        case 11: 
+          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+    yybegin(LEFT_ANGLE_BRACKET_SPACE);
+          }
+        case 63: break;
         case 35: 
           { yybegin(SCRIPT);
           }
-        case 63: break;
+        case 64: break;
         case 42: 
           { restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
           }
-        case 64: break;
+        case 65: break;
         case 10: 
           { inputSegment.append('!'); yybegin(BANG);
           }
-        case 65: break;
-        case 33: 
-          { yybegin(YYINITIAL);
-    if (escapeBR) {
-      inputSegment.write(zzBuffer, zzStartRead, yylength());
-      outputSegment = inputSegment;
-      return outputSegment.nextChar();
-    } else {
-      // add (previously matched input length) + (this match length) - (substitution length)
-      cumulativeDiff += inputSegment.length() + yylength() - 1;
-      // position the correction at (already output length) + (substitution length)
-      addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
-      inputSegment.reset();
-      return BR_START_TAG_REPLACEMENT;
-    }
-          }
         case 66: break;
-        case 53: 
+        case 51: 
           { // Handle paired UTF-16 surrogates.
     String surrogatePair = yytext();
     char highSurrogate = '\u0000';
-    try { // High surrogates are in decimal range [55296, 56319]
-      highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
+    char lowSurrogate = '\u0000';
+    try {
+      highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
     } catch(Exception e) { // should never happen
       assert false: "Exception parsing high surrogate '"
-                  + surrogatePair.substring(1, 6) + "'";
+                  + surrogatePair.substring(2, 6) + "'";
     }
-    if (Character.isHighSurrogate(highSurrogate)) {
-      char lowSurrogate = '\u0000';
-      try { // Low surrogates are in decimal range [56320, 57343]
-        lowSurrogate = (char)Integer.parseInt(surrogatePair.substring(9, 14));
-      } catch(Exception e) { // should never happen
-        assert false: "Exception parsing low surrogate '"
-                    + surrogatePair.substring(9, 14) + "'";
-      }
-      if (Character.isLowSurrogate(lowSurrogate)) {
-        outputSegment = entitySegment;
-        outputSegment.clear();
-        outputSegment.unsafeWrite(lowSurrogate);
-        // add (previously matched input length) + (this match length) - (substitution length)
-        cumulativeDiff += inputSegment.length() + yylength() - 2;
-        // position the correction at (already output length) + (substitution length)
-        addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
-        inputSegment.clear();
-        yybegin(YYINITIAL);
-        return highSurrogate;
-      }
+    try { // Low surrogates are in decimal range [56320, 57343]
+      lowSurrogate = (char)Integer.parseInt(surrogatePair.substring(9, 14));
+    } catch(Exception e) { // should never happen
+      assert false: "Exception parsing low surrogate '"
+                  + surrogatePair.substring(9, 14) + "'";
+    }
+    if (Character.isLowSurrogate(lowSurrogate)) {
+      outputSegment = entitySegment;
+      outputSegment.clear();
+      outputSegment.unsafeWrite(lowSurrogate);
+      // add (previously matched input length) + (this match length) - (substitution length)
+      cumulativeDiff += inputSegment.length() + yylength() - 2;
+      // position the correction at (already output length) + (substitution length)
+      addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
+      inputSegment.clear();
+      yybegin(YYINITIAL);
+      return highSurrogate;
     }
     yypushback(surrogatePair.length() - 1); // Consume only '#'
     inputSegment.append('#');
     yybegin(NUMERIC_CHARACTER);
           }
         case 67: break;
-        case 43: 
-          { restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+        case 4: 
+          { yypushback(1);
+    outputSegment = inputSegment;
+    outputSegment.restart();
+    yybegin(YYINITIAL);
+    return outputSegment.nextChar();
           }
         case 68: break;
-        case 30: 
-          { int length = yylength();
-    inputSegment.write(zzBuffer, zzStartRead, length);
-    entitySegment.clear();
-    char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
-    entitySegment.append(ch);
-    outputSegment = entitySegment;
-    yybegin(CHARACTER_REFERENCE_TAIL);
+        case 43: 
+          { restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
           }
         case 69: break;
-        case 28: 
-          { restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 70: break;
-        case 3: 
-          { inputStart = yychar;
-  inputSegment.clear();
-  inputSegment.append('&');
-  yybegin(AMPERSAND);
-          }
-        case 71: break;
-        case 16: 
-          { restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 72: break;
         case 52: 
           { // Handle paired UTF-16 surrogates.
     String surrogatePair = yytext();
@@ -31486,174 +31420,11 @@ public final class HTMLStripCharFilter e
     inputSegment.append('#');
     yybegin(NUMERIC_CHARACTER);
           }
-        case 73: break;
-        case 6: 
-          { int matchLength = yylength();
-    inputSegment.write(zzBuffer, zzStartRead, matchLength);
-    if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
-      String decimalCharRef = yytext();
-      int codePoint = 0;
-      try {
-        codePoint = Integer.parseInt(decimalCharRef);
-      } catch(Exception e) {
-        assert false: "Exception parsing code point '" + decimalCharRef + "'";
-      }
-      if (codePoint <= 0x10FFFF) {
-        outputSegment = entitySegment;
-        outputSegment.clear();
-        if (codePoint >= Character.MIN_SURROGATE
-            && codePoint <= Character.MAX_SURROGATE) {
-          outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
-        } else {
-          outputSegment.setLength
-              (Character.toChars(codePoint, outputSegment.getArray(), 0));
-        }
-        yybegin(CHARACTER_REFERENCE_TAIL);
-      } else {
-        outputSegment = inputSegment;
-        yybegin(YYINITIAL);
-        return outputSegment.nextChar();
-      }
-    } else {
-      outputSegment = inputSegment;
-      yybegin(YYINITIAL);
-      return outputSegment.nextChar();
-    }
-          }
-        case 74: break;
-        case 37: 
-          { // add (this match length) [ - (substitution length) = 0 ]
-    cumulativeDiff += yylength();
-    // position the correction at (already output length) [ + (substitution length) = 0 ]
-    addOffCorrectMap(outputCharCount, cumulativeDiff);
-    yybegin(YYINITIAL);
-          }
-        case 75: break;
-        case 8: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-    if (null != escapedTags
-        && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
-      yybegin(START_TAG_TAIL_INCLUDE);
-    } else {
-      yybegin(START_TAG_TAIL_SUBSTITUTE);
-    }
-          }
-        case 76: break;
-        case 46: 
-          { yybegin(SCRIPT);
-    if (escapeSCRIPT) {
-      inputSegment.write(zzBuffer, zzStartRead, yylength());
-      outputSegment = inputSegment;
-      inputStart += 1 + yylength();
-      return outputSegment.nextChar();
-    }
-          }
-        case 77: break;
-        case 11: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-    yybegin(LEFT_ANGLE_BRACKET_SPACE);
-          }
-        case 78: break;
-        case 20: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-          }
-        case 79: break;
-        case 34: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
-    cumulativeDiff += yychar - inputStart + yylength();
-    // position the correction at (already output length) [ + (substitution length) = 0]
-    addOffCorrectMap(outputCharCount, cumulativeDiff);
-    inputSegment.clear();
-    yybegin(YYINITIAL);
-          }
-        case 80: break;
-        case 23: 
-          { yybegin(restoreState); restoreState = previousRestoreState;
-          }
-        case 81: break;
-        case 32: 
-          { yybegin(COMMENT);
-          }
-        case 82: break;
-        case 14: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
-    cumulativeDiff += inputSegment.length() + yylength();
-    // position the correction at (already output length) [ + (substitution length) = 0 ]
-    addOffCorrectMap(outputCharCount, cumulativeDiff);
-    inputSegment.clear();
-    yybegin(YYINITIAL);
-          }
-        case 83: break;
-        case 18: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-    if (null != escapedTags
-        && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
-      yybegin(END_TAG_TAIL_INCLUDE);
-    } else {
-      yybegin(END_TAG_TAIL_SUBSTITUTE);
-    }
-          }
-        case 84: break;
-        case 25: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
-    cumulativeDiff += inputSegment.length() + yylength() - 1;
-    // position the correction at (already output length) + (substitution length)
-    addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
-    inputSegment.clear();
-    yybegin(YYINITIAL);
-    return BLOCK_LEVEL_END_TAG_REPLACEMENT;
-          }
-        case 85: break;
-        case 7: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
-    cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
-    // position the correction at (already output length) + (substitution length)
-    addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
-    yybegin(YYINITIAL);
-    return outputSegment.nextChar();
-          }
-        case 86: break;
-        case 48: 
-          { inputSegment.clear();
-    yybegin(YYINITIAL);
-    // add (previously matched input length) -- current match and substitution handled below
-    cumulativeDiff += yychar - inputStart;
-    // position the offset correction at (already output length) -- substitution handled below
-    int offsetCorrectionPos = outputCharCount;
-    int returnValue;
-    if (escapeSTYLE) {
-      inputSegment.write(zzBuffer, zzStartRead, yylength());
-      outputSegment = inputSegment;
-      returnValue = outputSegment.nextChar();
-    } else {
-      // add (this match length) - (substitution length)
-      cumulativeDiff += yylength() - 1;
-      // add (substitution length)
-      ++offsetCorrectionPos;
-      returnValue = STYLE_REPLACEMENT;
-    }
-    addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
-    return returnValue;
-          }
-        case 87: break;
-        case 5: 
-          { inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
-          }
-        case 88: break;
-        case 26: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
-    cumulativeDiff += inputSegment.length() + yylength();
-    // position the correction at (already output length) [ + (substitution length) = 0 ]
-    addOffCorrectMap(outputCharCount, cumulativeDiff);
-    inputSegment.clear();
-    outputSegment = inputSegment;
-    yybegin(YYINITIAL);
-          }
-        case 89: break;
-        case 13: 
-          { inputSegment.append(zzBuffer[zzStartRead]);
+        case 70: break;
+        case 28: 
+          { restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
           }
-        case 90: break;
+        case 71: break;
         case 50: 
           { // Handle paired UTF-16 surrogates.
     outputSegment = entitySegment;
@@ -31681,32 +31452,41 @@ public final class HTMLStripCharFilter e
     yybegin(YYINITIAL);
     return highSurrogate;
           }
-        case 91: break;
-        case 40: 
-          { yybegin(SCRIPT_COMMENT);
-          }
-        case 92: break;
-        case 45: 
-          { yybegin(STYLE);
-    if (escapeSTYLE) {
-      inputSegment.write(zzBuffer, zzStartRead, yylength());
-      outputSegment = inputSegment;
-      inputStart += 1 + yylength();
-      return outputSegment.nextChar();
-    }
+        case 72: break;
+        case 16: 
+          { restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
           }
-        case 93: break;
+        case 73: break;
         case 22: 
           { previousRestoreState = restoreState;
     restoreState = SERVER_SIDE_INCLUDE;
     yybegin(DOUBLE_QUOTED_STRING);
           }
-        case 94: break;
-        case 12: 
-          { inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
+        case 74: break;
+        case 26: 
+          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+    cumulativeDiff += inputSegment.length() + yylength();
+    // position the correction at (already output length) [ + (substitution length) = 0 ]
+    addOffCorrectMap(outputCharCount, cumulativeDiff);
+    inputSegment.clear();
+    outputSegment = inputSegment;
+    yybegin(YYINITIAL);
           }
-        case 95: break;
-        case 36: 
+        case 75: break;
+        case 20: 
+          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+          }
+        case 76: break;
+        case 47: 
+          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+    cumulativeDiff += inputSegment.length() + yylength();
+    // position the correction at (already output length) [ + (substitution length) = 0 ]
+    addOffCorrectMap(outputCharCount, cumulativeDiff);
+    inputSegment.clear();
+    yybegin(CDATA);
+          }
+        case 77: break;
+        case 33: 
           { yybegin(YYINITIAL);
     if (escapeBR) {
       inputSegment.write(zzBuffer, zzStartRead, yylength());
@@ -31718,34 +31498,128 @@ public final class HTMLStripCharFilter e
       // position the correction at (already output length) + (substitution length)
       addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
       inputSegment.reset();
-      return BR_END_TAG_REPLACEMENT;
+      return BR_START_TAG_REPLACEMENT;
     }
           }
-        case 96: break;
+        case 78: break;
+        case 23: 
+          { yybegin(restoreState); restoreState = previousRestoreState;
+          }
+        case 79: break;
+        case 32: 
+          { yybegin(COMMENT);
+          }
+        case 80: break;
         case 24: 
           { inputSegment.write(zzBuffer, zzStartRead, yylength());
      outputSegment = inputSegment;
      yybegin(YYINITIAL);
      return outputSegment.nextChar();
           }
-        case 97: break;
-        case 47: 
+        case 81: break;
+        case 3: 
+          { inputStart = yychar;
+  inputSegment.clear();
+  inputSegment.append('&');
+  yybegin(AMPERSAND);
+          }
+        case 82: break;
+        case 46: 
+          { yybegin(SCRIPT);
+    if (escapeSCRIPT) {
+      inputSegment.write(zzBuffer, zzStartRead, yylength());
+      outputSegment = inputSegment;
+      inputStart += 1 + yylength();
+      return outputSegment.nextChar();
+    }
+          }
+        case 83: break;
+        case 14: 
           { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
     cumulativeDiff += inputSegment.length() + yylength();
     // position the correction at (already output length) [ + (substitution length) = 0 ]
     addOffCorrectMap(outputCharCount, cumulativeDiff);
     inputSegment.clear();
-    yybegin(CDATA);
+    yybegin(YYINITIAL);
           }
-        case 98: break;
-        case 29: 
-          { restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+        case 84: break;
+        case 6: 
+          { int matchLength = yylength();
+    inputSegment.write(zzBuffer, zzStartRead, matchLength);
+    if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
+      String decimalCharRef = yytext();
+      int codePoint = 0;
+      try {
+        codePoint = Integer.parseInt(decimalCharRef);
+      } catch(Exception e) {
+        assert false: "Exception parsing code point '" + decimalCharRef + "'";
+      }
+      if (codePoint <= 0x10FFFF) {
+        outputSegment = entitySegment;
+        outputSegment.clear();
+        if (codePoint >= Character.MIN_SURROGATE
+            && codePoint <= Character.MAX_SURROGATE) {
+          outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
+        } else {
+          outputSegment.setLength
+              (Character.toChars(codePoint, outputSegment.getArray(), 0));
+        }
+        yybegin(CHARACTER_REFERENCE_TAIL);
+      } else {
+        outputSegment = inputSegment;
+        yybegin(YYINITIAL);
+        return outputSegment.nextChar();
+      }
+    } else {
+      outputSegment = inputSegment;
+      yybegin(YYINITIAL);
+      return outputSegment.nextChar();
+    }
           }
-        case 99: break;
-        case 17: 
-          { restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+        case 85: break;
+        case 34: 
+          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
+    cumulativeDiff += yychar - inputStart + yylength();
+    // position the correction at (already output length) [ + (substitution length) = 0]
+    addOffCorrectMap(outputCharCount, cumulativeDiff);
+    inputSegment.clear();
+    yybegin(YYINITIAL);
           }
-        case 100: break;
+        case 86: break;
+        case 5: 
+          { inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
+          }
+        case 87: break;
+        case 13: 
+          { inputSegment.append(zzBuffer[zzStartRead]);
+          }
+        case 88: break;
+        case 18: 
+          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+    if (null != escapedTags
+        && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
+      yybegin(END_TAG_TAIL_INCLUDE);
+    } else {
+      yybegin(END_TAG_TAIL_SUBSTITUTE);
+    }
+          }
+        case 89: break;
+        case 40: 
+          { yybegin(SCRIPT_COMMENT);
+          }
+        case 90: break;
+        case 37: 
+          { // add (this match length) [ - (substitution length) = 0 ]
+    cumulativeDiff += yylength();
+    // position the correction at (already output length) [ + (substitution length) = 0 ]
+    addOffCorrectMap(outputCharCount, cumulativeDiff);
+    yybegin(YYINITIAL);
+          }
+        case 91: break;
+        case 12: 
+          { inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
+          }
+        case 92: break;
         case 9: 
           { inputSegment.write(zzBuffer, zzStartRead, yylength());
     if (null != escapedTags
@@ -31755,7 +31629,7 @@ public final class HTMLStripCharFilter e
       yybegin(START_TAG_TAIL_EXCLUDE);
     }
           }
-        case 101: break;
+        case 93: break;
         case 49: 
           { inputSegment.clear();
     yybegin(YYINITIAL);
@@ -31778,26 +31652,152 @@ public final class HTMLStripCharFilter e
     addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
     return returnValue;
           }
+        case 94: break;
+        case 29: 
+          { restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+          }
+        case 95: break;
+        case 17: 
+          { restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+          }
+        case 96: break;
+        case 45: 
+          { yybegin(STYLE);
+    if (escapeSTYLE) {
+      inputSegment.write(zzBuffer, zzStartRead, yylength());
+      outputSegment = inputSegment;
+      inputStart += 1 + yylength();
+      return outputSegment.nextChar();
+    }
+          }
+        case 97: break;
+        case 7: 
+          { // add (previously matched input length) + (this match length) - (substitution length)
+    cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
+    // position the correction at (already output length) + (substitution length)
+    addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
+    yybegin(YYINITIAL);
+    return outputSegment.nextChar();
+          }
+        case 98: break;
+        case 19: 
+          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+    if (null != escapedTags
+        && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
+      yybegin(END_TAG_TAIL_INCLUDE);
+    } else {
+      yybegin(END_TAG_TAIL_EXCLUDE);
+    }
+          }
+        case 99: break;
+        case 25: 
+          { // add (previously matched input length) + (this match length) - (substitution length)
+    cumulativeDiff += inputSegment.length() + yylength() - 1;
+    // position the correction at (already output length) + (substitution length)
+    addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
+    inputSegment.clear();
+    yybegin(YYINITIAL);
+    return BLOCK_LEVEL_END_TAG_REPLACEMENT;
+          }
+        case 100: break;
+        case 31: 
+          { int matchLength = yylength();
+    inputSegment.write(zzBuffer, zzStartRead, matchLength);
+    if (matchLength <= 6) { // 10FFFF: max 6 hex chars
+      String hexCharRef
+          = new String(zzBuffer, zzStartRead + 1, matchLength - 1);
+      int codePoint = 0;
+      try {
+        codePoint = Integer.parseInt(hexCharRef, 16);
+      } catch(Exception e) {
+        assert false: "Exception parsing hex code point '" + hexCharRef + "'";
+      }
+      if (codePoint <= 0x10FFFF) {
+        outputSegment = entitySegment;
+        outputSegment.clear();
+        if (codePoint >= Character.MIN_SURROGATE
+            && codePoint <= Character.MAX_SURROGATE) {
+          outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
+        } else {
+          outputSegment.setLength
+              (Character.toChars(codePoint, outputSegment.getArray(), 0));
+        }
+        yybegin(CHARACTER_REFERENCE_TAIL);
+      } else {
+        outputSegment = inputSegment;
+        yybegin(YYINITIAL);
+        return outputSegment.nextChar();
+      }
+    } else {
+      outputSegment = inputSegment;
+      yybegin(YYINITIAL);
+      return outputSegment.nextChar();
+    }
+          }
+        case 101: break;
+        case 53: 
+          { // Handle paired UTF-16 surrogates.
+    String surrogatePair = yytext();
+    char highSurrogate = '\u0000';
+    try { // High surrogates are in decimal range [55296, 56319]
+      highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
+    } catch(Exception e) { // should never happen
+      assert false: "Exception parsing high surrogate '"
+                  + surrogatePair.substring(1, 6) + "'";
+    }
+    if (Character.isHighSurrogate(highSurrogate)) {
+      char lowSurrogate = '\u0000';
+      try { // Low surrogates are in decimal range [56320, 57343]
+        lowSurrogate = (char)Integer.parseInt(surrogatePair.substring(9, 14));
+      } catch(Exception e) { // should never happen
+        assert false: "Exception parsing low surrogate '"
+                    + surrogatePair.substring(9, 14) + "'";
+      }
+      if (Character.isLowSurrogate(lowSurrogate)) {
+        outputSegment = entitySegment;
+        outputSegment.clear();
+        outputSegment.unsafeWrite(lowSurrogate);
+        // add (previously matched input length) + (this match length) - (substitution length)
+        cumulativeDiff += inputSegment.length() + yylength() - 2;
+        // position the correction at (already output length) + (substitution length)
+        addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
+        inputSegment.clear();
+        yybegin(YYINITIAL);
+        return highSurrogate;
+      }
+    }
+    yypushback(surrogatePair.length() - 1); // Consume only '#'
+    inputSegment.append('#');
+    yybegin(NUMERIC_CHARACTER);
+          }
         case 102: break;
+        case 36: 
+          { yybegin(YYINITIAL);
+    if (escapeBR) {
+      inputSegment.write(zzBuffer, zzStartRead, yylength());
+      outputSegment = inputSegment;
+      return outputSegment.nextChar();
+    } else {
+      // add (previously matched input length) + (this match length) - (substitution length)
+      cumulativeDiff += inputSegment.length() + yylength() - 1;
+      // position the correction at (already output length) + (substitution length)
+      addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
+      inputSegment.reset();
+      return BR_END_TAG_REPLACEMENT;
+    }
+          }
+        case 103: break;
         case 38: 
           { yybegin(restoreState);
           }
-        case 103: break;
+        case 104: break;
         case 41: 
           { yybegin(STYLE_COMMENT);
           }
-        case 104: break;
+        case 105: break;
         case 1: 
           { return zzBuffer[zzStartRead];
           }
-        case 105: break;
-        case 4: 
-          { yypushback(1);
-    outputSegment = inputSegment;
-    outputSegment.restart();
-    yybegin(YYINITIAL);
-    return outputSegment.nextChar();
-          }
         case 106: break;
         default: 
           if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex Mon Aug 13 11:16:57 2012
@@ -141,9 +141,9 @@ InlineElment = ( [aAbBiIqQsSuU]         
                  [vV][aA][rR]                     )
 
 
-%include src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex
+%include HTMLCharacterEntities.jflex
 
-%include src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
+%include HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
 
 %{
   private static final int INITIAL_INPUT_SEGMENT_SIZE = 1024;

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java Mon Aug 13 11:16:57 2012
@@ -24,6 +24,8 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.ArrayUtil;
 
@@ -35,6 +37,12 @@ import org.apache.lucene.util.ArrayUtil;
  * {@link #CJKBigramFilter(TokenStream, int)} to explicitly control which
  * of the CJK scripts are turned into bigrams.
  * <p>
+ * By default, when a CJK character has no adjacent characters to form
+ * a bigram, it is output in unigram form. If you want to always output
+ * both unigrams and bigrams, set the <code>outputUnigrams</code>
+ * flag in {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)}.
+ * This can be used for a combined unigram+bigram approach.
+ * <p>
  * In all cases, all non-CJK input is passed thru unmodified.
  */
 public final class CJKBigramFilter extends TokenFilter {
@@ -67,10 +75,16 @@ public final class CJKBigramFilter exten
   private final Object doHiragana;
   private final Object doKatakana;
   private final Object doHangul;
+  
+  // true if we should output unigram tokens always
+  private final boolean outputUnigrams;
+  private boolean ngramState; // false = output unigram, true = output bigram
     
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
   private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+  private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
   
   // buffers containing codepoint and offsets in parallel
   int buffer[] = new int[8];
@@ -88,23 +102,36 @@ public final class CJKBigramFilter exten
   
   /** 
    * Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int)
-   *       CJKBigramFilter(HAN | HIRAGANA | KATAKANA | HANGUL)}
+   *       CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)}
    */
   public CJKBigramFilter(TokenStream in) {
     this(in, HAN | HIRAGANA | KATAKANA | HANGUL);
   }
   
   /** 
-   * Create a new CJKBigramFilter, specifying which writing systems should be bigrammed.
+   * Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)
+   *       CJKBigramFilter(in, flags, false)}
+   */
+  public CJKBigramFilter(TokenStream in, int flags) {
+    this(in, flags, false);
+  }
+  
+  /**
+   * Create a new CJKBigramFilter, specifying which writing systems should be bigrammed,
+   * and whether or not unigrams should also be output.
    * @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA}, 
    *        {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
+   * @param outputUnigrams true if unigrams for the selected writing systems should also be output.
+   *        when this is false, this is only done when there are no adjacent characters to form
+   *        a bigram.
    */
-  public CJKBigramFilter(TokenStream in, int flags) {
+  public CJKBigramFilter(TokenStream in, int flags, boolean outputUnigrams) {
     super(in);
     doHan =      (flags & HAN) == 0      ? NO : HAN_TYPE;
     doHiragana = (flags & HIRAGANA) == 0 ? NO : HIRAGANA_TYPE;
     doKatakana = (flags & KATAKANA) == 0 ? NO : KATAKANA_TYPE;
     doHangul =   (flags & HANGUL) == 0   ? NO : HANGUL_TYPE;
+    this.outputUnigrams = outputUnigrams;
   }
   
   /*
@@ -120,7 +147,24 @@ public final class CJKBigramFilter exten
         // case 1: we have multiple remaining codepoints buffered,
         // so we can emit a bigram here.
         
-        flushBigram();
+        if (outputUnigrams) {
+
+          // when also outputting unigrams, we output the unigram first,
+          // then rewind back to revisit the bigram.
+          // so an input of ABC is A + (rewind)AB + B + (rewind)BC + C
+          // the logic in hasBufferedUnigram ensures we output the C, 
+          // even though it did actually have adjacent CJK characters.
+
+          if (ngramState) {
+            flushBigram();
+          } else {
+            flushUnigram();
+            index--;
+          }
+          ngramState = !ngramState;
+        } else {
+          flushBigram();
+        }
         return true;
       } else if (doNext()) {
         
@@ -260,6 +304,11 @@ public final class CJKBigramFilter exten
     termAtt.setLength(len2);
     offsetAtt.setOffset(startOffset[index], endOffset[index+1]);
     typeAtt.setType(DOUBLE_TYPE);
+    // when outputting unigrams, all bigrams are synonyms that span two unigrams
+    if (outputUnigrams) {
+      posIncAtt.setPositionIncrement(0);
+      posLengthAtt.setPositionLength(2);
+    }
     index++;
   }
   
@@ -292,7 +341,13 @@ public final class CJKBigramFilter exten
    * inputs.
    */
   private boolean hasBufferedUnigram() {
-    return bufferLen == 1 && index == 0;
+    if (outputUnigrams) {
+      // when outputting unigrams always
+      return bufferLen - index == 1;
+    } else {
+      // otherwise its only when we have a lone CJK character
+      return bufferLen == 1 && index == 0;
+    }
   }
 
   @Override
@@ -303,5 +358,6 @@ public final class CJKBigramFilter exten
     lastEndOffset = 0;
     loneState = null;
     exhausted = false;
+    ngramState = false;
   }
 }

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java Mon Aug 13 11:16:57 2012
@@ -33,12 +33,13 @@ import org.apache.lucene.analysis.util.T
  *     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
  *     &lt;filter class="solr.CJKBigramFilterFactory" 
  *       han="true" hiragana="true" 
- *       katakana="true" hangul="true" /&gt;
+ *       katakana="true" hangul="true" outputUnigrams="false" /&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
  */
 public class CJKBigramFilterFactory extends TokenFilterFactory {
   int flags;
+  boolean outputUnigrams;
 
   @Override
   public void init(Map<String,String> args) {
@@ -56,10 +57,11 @@ public class CJKBigramFilterFactory exte
     if (getBoolean("hangul", true)) {
       flags |= CJKBigramFilter.HANGUL;
     }
+    outputUnigrams = getBoolean("outputUnigrams", false);
   }
   
   @Override
   public TokenStream create(TokenStream input) {
-    return new CJKBigramFilter(input, flags);
+    return new CJKBigramFilter(input, flags, outputUnigrams);
   }
 }

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java Mon Aug 13 11:16:57 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
 
 package org.apache.lucene.analysis.standard;
 
@@ -33,8 +33,8 @@ import org.apache.lucene.analysis.tokena
 /**
  * This class is a scanner generated by 
  * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 7/15/12 1:57 AM from the specification file
- * <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
+ * on 8/6/12 11:57 AM from the specification file
+ * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
  */
 class ClassicTokenizerImpl implements StandardTokenizerInterface {
 
@@ -42,7 +42,7 @@ class ClassicTokenizerImpl implements St
   public static final int YYEOF = -1;
 
   /** initial size of the lookahead buffer */
-  private static final int ZZ_BUFFERSIZE = 16384;
+  private static final int ZZ_BUFFERSIZE = 4096;
 
   /** lexical states */
   public static final int YYINITIAL = 0;

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex Mon Aug 13 11:16:57 2012
@@ -36,6 +36,7 @@ import org.apache.lucene.analysis.tokena
 %function getNextToken
 %pack
 %char
+%buffer 4096
 
 %{
 

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro Mon Aug 13 11:16:57 2012
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Generated using ICU4J 49.1.0.0 on Thursday, July 26, 2012 10:22:01 PM UTC
+// Generated using ICU4J 49.1.0.0 on Monday, August 6, 2012 3:57:23 PM UTC
 // by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
 
 

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java Mon Aug 13 11:16:57 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
 
 package org.apache.lucene.analysis.standard;
 
@@ -43,7 +43,7 @@ public final class StandardTokenizerImpl
   public static final int YYEOF = -1;
 
   /** initial size of the lookahead buffer */
-  private static final int ZZ_BUFFERSIZE = 16384;
+  private static final int ZZ_BUFFERSIZE = 4096;
 
   /** lexical states */
   public static final int YYINITIAL = 0;

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex Mon Aug 13 11:16:57 2012
@@ -44,8 +44,9 @@ import org.apache.lucene.analysis.tokena
 %implements StandardTokenizerInterface
 %function getNextToken
 %char
+%buffer 4096
 
-%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
+%include SUPPLEMENTARY.jflex-macro
 ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
 Format =  ([\p{WB:Format}] | {FormatSupp})
 Numeric = ([\p{WB:Numeric}] | {NumericSupp})

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java Mon Aug 13 11:16:57 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
 
 package org.apache.lucene.analysis.standard;
 
@@ -46,7 +46,7 @@ public final class UAX29URLEmailTokenize
   public static final int YYEOF = -1;
 
   /** initial size of the lookahead buffer */
-  private static final int ZZ_BUFFERSIZE = 16384;
+  private static final int ZZ_BUFFERSIZE = 4096;
 
   /** lexical states */
   public static final int YYINITIAL = 0;

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex Mon Aug 13 11:16:57 2012
@@ -47,8 +47,9 @@ import org.apache.lucene.analysis.tokena
 %implements StandardTokenizerInterface
 %function getNextToken
 %char
+%buffer 4096
 
-%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
+%include SUPPLEMENTARY.jflex-macro
 ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
 Format =  ([\p{WB:Format}] | {FormatSupp})
 Numeric = ([\p{WB:Numeric}] | {NumericSupp})
@@ -88,7 +89,7 @@ HiraganaEx = {Hiragana} ({Format} | {Ext
 //     RFC-5321: Simple Mail Transfer Protocol
 //     RFC-5322: Internet Message Format
 
-%include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
+%include ASCIITLD.jflex-macro
 
 DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
 DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java?rev=1372366&r1=1372365&r2=1372366&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java Mon Aug 13 11:16:57 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
 
 package org.apache.lucene.analysis.wikipedia;
 
@@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokena
 /**
  * This class is a scanner generated by 
  * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 7/15/12 1:57 AM from the specification file
- * <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
+ * on 8/6/12 11:57 AM from the specification file
+ * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
  */
 class WikipediaTokenizerImpl {
 
@@ -34,7 +34,7 @@ class WikipediaTokenizerImpl {
   public static final int YYEOF = -1;
 
   /** initial size of the lookahead buffer */
-  private static final int ZZ_BUFFERSIZE = 16384;
+  private static final int ZZ_BUFFERSIZE = 4096;
 
   /** lexical states */
   public static final int THREE_SINGLE_QUOTES_STATE = 10;