You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/07/04 01:26:45 UTC

svn commit: r1499601 [2/20] - in /lucene/dev/branches/security: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/ dev-tools/idea/solr/core/src/test/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/analysis/stempel/ dev-to...

Modified: lucene/dev/branches/security/.gitignore
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/.gitignore?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/.gitignore (original)
+++ lucene/dev/branches/security/.gitignore Wed Jul  3 23:26:32 2013
@@ -1,4 +1,5 @@
-
+# hdfs
+/solr/example/hdfs
 *.jar
 
 # .

Modified: lucene/dev/branches/security/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/build.xml?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/build.xml (original)
+++ lucene/dev/branches/security/build.xml Wed Jul  3 23:26:32 2013
@@ -90,6 +90,7 @@
 	
   <target name="rat-sources" description="Runs rat across all sources and tests">
     <subant target="rat-sources" inheritall="false" failonerror="true">
+      <fileset dir="." includes="extra-targets.xml" /><!-- run rat-sources also for root directory -->
       <fileset dir="lucene" includes="build.xml" />
       <fileset dir="solr" includes="build.xml" />
     </subant>
@@ -130,7 +131,7 @@
   
   <target name="get-maven-poms"
           description="Copy Maven POMs from dev-tools/maven/ to maven-build/">
-    <copy todir="${maven-build-dir}" overwrite="true">
+    <copy todir="${maven-build-dir}" overwrite="true" encoding="UTF-8">
       <fileset dir="${basedir}/dev-tools/maven"/>
       <filterset begintoken="@" endtoken="@">
         <filter token="version" value="${version}"/>
@@ -185,7 +186,12 @@
   </target>
 
   <target name="eclipse" depends="resolve" description="Setup Eclipse configuration">
-    <copy file="dev-tools/eclipse/dot.project" tofile=".project" overwrite="false"/>
+    <basename file="${basedir}" property="eclipseprojectname"/>
+      <copy file="dev-tools/eclipse/dot.project" tofile=".project" overwrite="false" encoding="UTF-8">
+      <filterset>
+        <filter token="ECLIPSEPROJECTNAME" value="${eclipseprojectname}"/>
+      </filterset>
+    </copy>
     <mkdir dir=".settings"/>
     <copy todir=".settings/" overwrite="true">
       <fileset dir="dev-tools/eclipse/dot.settings" includes="*.prefs" />
@@ -199,7 +205,7 @@
     <!-- TODO: find a better way to exclude duplicate JAR files & fix the servlet-api mess! -->
     <pathconvert property="eclipse.fileset.libs" pathsep="|" dirsep="/">
       <fileset dir="${basedir}/lucene" includes="**/lib/*.jar" excludes="**/*servlet-api*.jar, analysis/uima/**, tools/**, build/**"/>
-      <fileset dir="${basedir}/solr" includes="**/lib/*.jar" excludes="core/lib/*servlet-api*.jar, contrib/analysis-extras/**, test-framework/**, build/**, dist/**, package/**" />
+      <fileset dir="${basedir}/solr" includes="**/lib/*.jar" excludes="core/lib/*servlet-api*.jar, contrib/analysis-extras/**, test-framework/lib/junit*, test-framework/lib/ant*, test-framework/lib/randomizedtesting*, build/**, dist/**, package/**" />
       <map from="${basedir}/" to=""/>
     </pathconvert>
     <xslt in="${ant.file}" out=".classpath" style="dev-tools/eclipse/dot.classpath.xsl" force="true">

Modified: lucene/dev/branches/security/dev-tools/eclipse/dot.project
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/eclipse/dot.project?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/eclipse/dot.project (original)
+++ lucene/dev/branches/security/dev-tools/eclipse/dot.project Wed Jul  3 23:26:32 2013
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <projectDescription>
-	<name>lucene_solr_trunk</name>
+	<name>@ECLIPSEPROJECTNAME@</name>
 	<comment></comment>
 	<projects>
 	</projects>

Modified: lucene/dev/branches/security/dev-tools/idea/solr/core/src/test/solr-core-tests.iml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/idea/solr/core/src/test/solr-core-tests.iml?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/idea/solr/core/src/test/solr-core-tests.iml (original)
+++ lucene/dev/branches/security/dev-tools/idea/solr/core/src/test/solr-core-tests.iml Wed Jul  3 23:26:32 2013
@@ -13,6 +13,7 @@
     <orderEntry type="library" scope="TEST" name="Solr core library" level="project" />
     <orderEntry type="library" scope="TEST" name="Solrj library" level="project" />
     <orderEntry type="library" scope="TEST" name="Solr example library" level="project" />
+    <orderEntry type="library" scope="TEST" name="Solr test framework library" level="project" />
     <orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
     <orderEntry type="module" scope="TEST" module-name="solr-test-framework" />
     <orderEntry type="module" scope="TEST" module-name="solr-core-test-files" />

Modified: lucene/dev/branches/security/dev-tools/maven/lucene/analysis/stempel/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/lucene/analysis/stempel/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/lucene/analysis/stempel/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/lucene/analysis/stempel/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -96,6 +96,7 @@
               </bundledSignatures>
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/lucene/benchmark/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/lucene/benchmark/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/lucene/benchmark/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/lucene/benchmark/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -142,6 +142,7 @@
               </bundledSignatures>
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/lucene/core/src/java/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/lucene/core/src/java/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/lucene/core/src/java/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/lucene/core/src/java/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -87,6 +87,7 @@
               </bundledSignatures>
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/lucene/demo/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/lucene/demo/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/lucene/demo/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/lucene/demo/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -113,6 +113,7 @@
               </bundledSignatures>
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/lucene/facet/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/lucene/facet/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/lucene/facet/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/lucene/facet/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -98,6 +98,7 @@
               </bundledSignatures>
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/lucene/misc/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/lucene/misc/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/lucene/misc/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/lucene/misc/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -86,6 +86,7 @@
               </bundledSignatures>
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/lucene/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/lucene/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/lucene/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/lucene/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -78,6 +78,7 @@
               </bundledSignatures>
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>
@@ -96,6 +97,7 @@
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/tests.txt</signaturesFile>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/lucene/queryparser/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/lucene/queryparser/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/lucene/queryparser/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/lucene/queryparser/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -101,6 +101,7 @@
               </bundledSignatures>
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/lucene/test-framework/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/lucene/test-framework/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/lucene/test-framework/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/lucene/test-framework/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -106,6 +106,7 @@
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/tests.txt</signaturesFile>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -47,10 +47,11 @@
     <jetty.version>8.1.10.v20130312</jetty.version>
     <slf4j.version>1.6.6</slf4j.version>
     <log4j.version>1.2.16</log4j.version>
-    <tika.version>1.3</tika.version>
+    <tika.version>1.4</tika.version>
     <httpcomponents.version>4.2.3</httpcomponents.version>
     <commons-io.version>2.1</commons-io.version>
     <restlet.version>2.1.1</restlet.version>
+    <hadoop.version>2.0.5-alpha</hadoop.version>
 
     <!-- RandomizedTesting library system properties -->
     <tests.iters>1</tests.iters>
@@ -183,6 +184,11 @@
         <version>${commons-io.version}</version>
       </dependency>
       <dependency>
+        <groupId>joda-time</groupId>
+        <artifactId>joda-time</artifactId>
+        <version>2.2</version>
+      </dependency>
+      <dependency>
         <groupId>org.apache.httpcomponents</groupId>
         <artifactId>httpclient</artifactId>
         <version>${httpcomponents.version}</version>
@@ -204,6 +210,16 @@
         <version>0.5</version>
       </dependency>
       <dependency>
+        <groupId>com.googlecode.concurrentlinkedhashmap</groupId>
+        <artifactId>concurrentlinkedhashmap-lru</artifactId>
+        <version>1.2</version>
+      </dependency>
+      <dependency>
+        <groupId>com.sun.jersey</groupId>
+        <artifactId>jersey-core</artifactId>
+        <version>1.16</version>
+      </dependency>
+      <dependency>
         <groupId>commons-lang</groupId>
         <artifactId>commons-lang</artifactId>
         <version>2.6</version>
@@ -281,6 +297,38 @@
         <version>10.9.1.0</version>
       </dependency>
       <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-annotations</artifactId>
+        <version>${hadoop.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-auth</artifactId>
+        <version>${hadoop.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-common</artifactId>
+        <version>${hadoop.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-common</artifactId>
+        <version>${hadoop.version}</version>
+        <classifier>tests</classifier>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-hdfs</artifactId>
+        <version>${hadoop.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-hdfs</artifactId>
+        <version>${hadoop.version}</version>
+        <classifier>tests</classifier>
+      </dependency>
+      <dependency>
         <groupId>org.apache.tika</groupId>
         <artifactId>tika-core</artifactId>
         <version>${tika.version}</version>
@@ -307,8 +355,8 @@
             <artifactId>vorbis-java-core</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>asm</groupId>
-            <artifactId>asm</artifactId>
+            <groupId>org.ow2.asm</groupId>
+            <artifactId>asm-debug-all</artifactId>
           </exclusion>
           <exclusion>
             <groupId>org.aspectj</groupId>
@@ -364,7 +412,7 @@
       <dependency>
         <groupId>org.carrot2</groupId>
         <artifactId>morfologik-polish</artifactId>
-        <version>1.5.5</version>
+        <version>1.6.0</version>
       </dependency>
       <dependency>
         <groupId>org.codehaus.woodstox</groupId>
@@ -398,6 +446,16 @@
         <version>${jetty.version}</version>
       </dependency>
       <dependency>
+        <groupId>org.mortbay.jetty</groupId>
+        <artifactId>jetty</artifactId>
+        <version>6.1.26</version>
+      </dependency>
+      <dependency>
+        <groupId>org.mortbay.jetty</groupId>
+        <artifactId>jetty-util</artifactId>
+        <version>6.1.26</version>
+      </dependency>
+      <dependency>
         <groupId>org.restlet.jee</groupId>
         <artifactId>org.restlet</artifactId>
         <version>${restlet.version}</version>
@@ -510,7 +568,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-compiler-plugin</artifactId>
-          <version>3.0</version>
+          <version>3.1</version>
           <configuration>
             <source>${java.compat.version}</source>
             <target>${java.compat.version}</target>
@@ -519,7 +577,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-dependency-plugin</artifactId>
-          <version>2.6</version>
+          <version>2.8</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
@@ -529,7 +587,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-enforcer-plugin</artifactId>
-          <version>1.2</version>
+          <version>1.3</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
@@ -565,7 +623,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-javadoc-plugin</artifactId>
-          <version>2.9</version>
+          <version>2.9.1</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
@@ -574,8 +632,13 @@
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-site-plugin</artifactId>
+          <version>3.3</version>
+        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-surefire-plugin</artifactId>
-          <version>2.13</version>
+          <version>2.15</version>
           <configuration>
             <runOrder>random</runOrder>
             <reportFormat>plain</reportFormat>
@@ -640,7 +703,7 @@
         <plugin>
           <groupId>org.codehaus.mojo</groupId>
           <artifactId>build-helper-maven-plugin</artifactId>
-          <version>1.7</version>
+          <version>1.8</version>
         </plugin>
         <plugin>
           <groupId>org.codehaus.mojo</groupId>
@@ -739,7 +802,7 @@
       <plugin>
         <groupId>org.apache.felix</groupId>
         <artifactId>maven-bundle-plugin</artifactId>
-        <version>2.3.7</version>
+        <version>2.4.0</version>
         <configuration>
           <instructions>
             <Export-Package>*;-split-package:=merge-first</Export-Package>

Modified: lucene/dev/branches/security/dev-tools/maven/solr/core/src/java/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/solr/core/src/java/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/solr/core/src/java/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/solr/core/src/java/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -125,6 +125,10 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
+      <groupId>com.googlecode.concurrentlinkedhashmap</groupId>
+      <artifactId>concurrentlinkedhashmap-lru</artifactId>
+    </dependency>
+    <dependency>
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>
     </dependency>
@@ -137,6 +141,146 @@
       <artifactId>commons-fileupload</artifactId>
     </dependency>
     <dependency>
+      <groupId>joda-time</groupId>
+      <artifactId>joda-time</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-auth</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.commons</groupId>
+          <artifactId>commons-math</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>xmlenc</groupId>
+          <artifactId>xmlenc</artifactId>                                  
+        </exclusion>                                                          
+        <exclusion>
+          <groupId>commons-httpclient</groupId>
+          <artifactId>commons-httpclient</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-net</groupId>
+          <artifactId>commons-net</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-json</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-server</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-el</groupId>
+          <artifactId>commons-el</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-logging</groupId>
+          <artifactId>commons-logging</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>net.java.dev.jets3t</groupId>
+          <artifactId>jets3t</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-digester</groupId>
+          <artifactId>commons-digester</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-beanutils</groupId>
+          <artifactId>commons-beanutils-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-core-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-mapper-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.avro</groupId>
+          <artifactId>avro</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>net.sf.kosmosfs</groupId>
+          <artifactId>kfs</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.jcraft</groupId>
+          <artifactId>jsch</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-server</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-logging</groupId>
+          <artifactId>commons-logging</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-core-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-mapper-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>xmlenc</groupId>
+          <artifactId>xmlenc</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-daemon</groupId>
+          <artifactId>commons-daemon</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
       <groupId>org.restlet.jee</groupId>
       <artifactId>org.restlet</artifactId>
       <version>${restlet.version}</version>
@@ -249,6 +393,7 @@
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/servlet-api.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/solr/core/src/test/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/solr/core/src/test/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/solr/core/src/test/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/solr/core/src/test/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -138,6 +138,7 @@
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/servlet-api.txt</signaturesFile>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/solr/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/solr/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/solr/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/solr/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -34,7 +34,6 @@
   <modules>
     <module>core</module>
     <module>solrj</module>
-    <module>webapp</module>
     <module>test-framework</module>
     <module>contrib</module>
   </modules>
@@ -149,6 +148,7 @@
               </bundledSignatures>
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>
@@ -169,6 +169,7 @@
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/tests.txt</signaturesFile>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/solr/solrj/src/java/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/solr/solrj/src/java/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/solr/solrj/src/java/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/solr/solrj/src/java/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -155,6 +155,7 @@
               </bundledSignatures>
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
                 <!-- Solr-J does NOT depend on servlet-api -->
                 <!-- <signaturesFile>${top-level}/lucene/tools/forbiddenApis/servlet-api.txt</signaturesFile> -->
               </signaturesFiles>

Modified: lucene/dev/branches/security/dev-tools/maven/solr/solrj/src/test/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/solr/solrj/src/test/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/solr/solrj/src/test/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/solr/solrj/src/test/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -142,6 +142,7 @@
               <signaturesFiles>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/tests.txt</signaturesFile>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/maven/solr/test-framework/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/maven/solr/test-framework/pom.xml.template?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/maven/solr/test-framework/pom.xml.template (original)
+++ lucene/dev/branches/security/dev-tools/maven/solr/test-framework/pom.xml.template Wed Jul  3 23:26:32 2013
@@ -65,6 +65,128 @@
       <artifactId>junit</artifactId>
     </dependency>
     <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <classifier>tests</classifier>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.commons</groupId>
+          <artifactId>commons-math</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>xmlenc</groupId>
+          <artifactId>xmlenc</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-httpclient</groupId>
+          <artifactId>commons-httpclient</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-net</groupId>
+          <artifactId>commons-net</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-json</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-server</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-compiler</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-el</groupId>
+          <artifactId>commons-el</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-logging</groupId>
+          <artifactId>commons-logging</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>net.java.dev.jets3t</groupId>
+          <artifactId>jets3t</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-digester</groupId>
+          <artifactId>commons-digester</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-core-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-mapper-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.apache.avro</groupId>
+          <artifactId>avro</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>net.sf.kosmosfs</groupId>
+          <artifactId>kfs</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.jcraft</groupId>
+          <artifactId>jsch</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-beanutils</groupId>
+          <artifactId>commons-beanutils-core</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <classifier>tests</classifier>
+      <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-server</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-logging</groupId>
+          <artifactId>commons-logging</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-core-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-mapper-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>tomcat</groupId>
+          <artifactId>jasper-runtime</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>xmlenc</groupId>
+          <artifactId>xmlenc</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-daemon</groupId>
+          <artifactId>commons-daemon</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-servlet</artifactId>
     </dependency>
@@ -72,6 +194,23 @@
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-util</artifactId>
     </dependency>
+
+    <!-- Jetty 6 required for Hadoop DfsMiniCluster -->
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+    </dependency>
+
     <!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
          you can exclude the two Jetty dependencies below. -->
     <dependency>
@@ -130,6 +269,7 @@
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/servlet-api.txt</signaturesFile>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/tests.txt</signaturesFile>
                 <signaturesFile>${top-level}/lucene/tools/forbiddenApis/executors.txt</signaturesFile>
+                <signaturesFile>${top-level}/lucene/tools/forbiddenApis/chars.txt</signaturesFile>
               </signaturesFiles>
             </configuration>
             <goals>

Modified: lucene/dev/branches/security/dev-tools/scripts/buildAndPushRelease.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/scripts/buildAndPushRelease.py?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/scripts/buildAndPushRelease.py (original)
+++ lucene/dev/branches/security/dev-tools/scripts/buildAndPushRelease.py Wed Jul  3 23:26:32 2013
@@ -46,9 +46,10 @@ def run(command):
     raise RuntimeError(msg)
 
 def runAndSendGPGPassword(command, password):
-  p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE)
+  p = subprocess.Popen(command, shell=True, bufsize=0, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, stdin=subprocess.PIPE)
   f = open(LOG, 'ab')
   while True:
+    p.stdout.flush()
     line = p.stdout.readline()
     if len(line) == 0:
       break

Modified: lucene/dev/branches/security/dev-tools/scripts/smokeTestRelease.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/dev-tools/scripts/smokeTestRelease.py?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/dev-tools/scripts/smokeTestRelease.py (original)
+++ lucene/dev/branches/security/dev-tools/scripts/smokeTestRelease.py Wed Jul  3 23:26:32 2013
@@ -970,10 +970,6 @@ def getDistributionsForMavenChecks(tmpDi
     print('    unpack %s...' % distribution)
     unpackLogFile = '%s/unpack-%s-maven-checks.log' % (tmpDir, distribution)
     run('tar xzf %s/%s' % (tmpDir, distribution), unpackLogFile)
-    if project == 'solr': # unpack the Solr war
-      unpackLogFile = '%s/unpack-solr-war-maven-checks.log' % tmpDir
-      print('        unpack Solr war...')
-      run('jar xvf */dist/*.war', unpackLogFile)
     distributionFiles[project] = []
     for root, dirs, files in os.walk(destDir):
       distributionFiles[project].extend([os.path.join(root, file) for file in files])
@@ -1309,7 +1305,7 @@ def main():
 
   if len(sys.argv) < 5:
     print()
-    print('Usage python -u %s BaseURL SvnRevision version tmpDir [ isSigned ] [ -testArgs "-Dwhat=ever [ ... ]" ]'
+    print('Usage python -u %s BaseURL SvnRevision version tmpDir [ isSigned(True|False) ] [ -testArgs "-Dwhat=ever [ ... ]" ]'
           % sys.argv[0])
     print()
     print('  example: python3.2 -u dev-tools/scripts/smokeTestRelease.py http://people.apache.org/~whoever/staging_area/lucene-solr-4.3.0-RC1-rev1469340 1469340 4.3.0 /path/to/a/tmp/dir')

Modified: lucene/dev/branches/security/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/CHANGES.txt?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/security/lucene/CHANGES.txt Wed Jul  3 23:26:32 2013
@@ -23,11 +23,19 @@ Changes in backwards compatibility polic
   not positioned. This change affects all classes that inherit from
   DocIdSetIterator, including DocsEnum and DocsAndPositionsEnum. (Adrien Grand)
 
+* LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter 
+  no longer support multiple "dictionaries" as there is only one dictionary available.
+  (Dawid Weiss)
+
 New Features
 
 * LUCENE-4747: Move to Java 7 as minimum Java version.
   (Robert Muir, Uwe Schindler)
 
+* LUCENE-5089: Update to Morfologik 1.6.0. MorfologikAnalyzer and MorfologikFilter 
+  no longer support multiple "dictionaries" as there is only one dictionary available.
+  (Dawid Weiss)
+
 Optimizations
 
 * LUCENE-4848: Use Java 7 NIO2-FileChannel instead of RandomAccessFile
@@ -39,6 +47,9 @@ Optimizations
 
 Changes in backwards compatibility policy
 
+* LUCENE-5085: MorfologikFilter will no longer stem words marked as keywords
+  (Dawid Weiss, Grzegorz Sobczyk)
+
 * LUCENE-4955: NGramTokenFilter now emits all n-grams for the same token at the
   same position and preserves the position length and the offsets of the
   original token. (Simon Willnauer, Adrien Grand)
@@ -47,6 +58,10 @@ Changes in backwards compatibility polic
   (a, ab, b, bc, c) instead of (a, b, c, ab, bc) and doesn't trim trailing
   whitespaces. (Adrien Grand)
 
+* LUCENE-5042: The n-gram and edge n-gram tokenizers and filters now correctly
+  handle supplementary characters, and the tokenizers have the ability to
+  pre-tokenize the input stream similarly to CharTokenizer. (Adrien Grand)
+
 * LUCENE-4967: NRTManager is replaced by
   ControlledRealTimeReopenThread, for controlling which requests must
   see which indexing changes, so that it can work with any
@@ -90,8 +105,31 @@ Changes in backwards compatibility polic
   categories. You should set TakmiSampleFixer on SamplingParams if required (but 
   notice that this means slower search). (Rob Audenaerde, Gilad Barkai, Shai Erera)
 
+* LUCENE-4933: Replace ExactSimScorer/SloppySimScorer with just SimScorer. Previously
+  there were 2 implementations as a performance hack to support tableization of
+  sqrt(), but this caching is removed, as sqrt is implemented in hardware with modern 
+  jvms and its faster not to cache.  (Robert Muir)
+
+* LUCENE-5038: MergePolicy now has a default implementation for useCompoundFile based
+  on segment size and noCFSRatio. The default implemantion was pulled up from
+  TieredMergePolicy. (Simon Willnauer)
+
+* LUCENE-5063: FieldCache.get(Bytes|Shorts), SortField.Type.(BYTE|SHORT) and
+  FieldCache.DEFAULT_(BYTE|SHORT|INT|LONG|FLOAT|DOUBLE)_PARSER are now
+  deprecated. These methods/types assume that data is stored as strings although
+  Lucene has much better support for numeric data through (Int|Long)Field,
+  NumericRangeQuery and FieldCache.get(Int|Long)s. (Adrien Grand)
+
+* LUCENE-5078: TfIDFSimilarity lets you encode the norm value as any arbitrary long.
+  As a result, encode/decodeNormValue were made abstract with their signatures changed.
+  The default implementation was moved to DefaultSimilarity, which encodes the norm as
+  a single-byte value. (Shai Erera)
+
 Bug Fixes
 
+* LUCENE-4890: QueryTreeBuilder.getBuilder() only finds interfaces on the 
+  most derived class. (Adriano Crestani)
+
 * LUCENE-4997: Internal test framework's tests are sensitive to previous 
   test failures and tests.failfast. (Dawid Weiss, Shai Erera)
 
@@ -123,6 +161,48 @@ Bug Fixes
   some readers did not have the requested numeric DV field.
   (Rob Audenaerde, Shai Erera)
 
+* LUCENE-5028: Remove pointless and confusing doShare option in FST's
+  PositiveIntOutputs (Han Jiang via Mike McCandless)
+
+* LUCENE-5032: Fix IndexOutOfBoundsExc in PostingsHighlighter when
+  multi-valued fields exceed maxLength (Tomás Fernández Löbbe
+  via Mike McCandless)
+
+* LUCENE-4933: SweetSpotSimilarity didn't apply its tf function to some
+  queries (SloppyPhraseQuery, SpanQueries).  (Robert Muir)
+
+* LUCENE-5033: SlowFuzzyQuery was accepting too many terms (documents) when
+  provided minSimilarity is an int > 1 (Tim Allison via Mike McCandless)
+
+* LUCENE-5045: DrillSideways.search did not work on an empty index. (Shai Erera)
+
+* LUCENE-4995: CompressingStoredFieldsReader now only reuses an internal buffer
+  when there is no more than 32kb to decompress. This prevents from running
+  into out-of-memory errors when working with large stored fields.
+  (Adrien Grand)
+
+* LUCENE-5048: CategoryPath with a long path could result in hitting 
+  NegativeArraySizeException, categories being added multiple times to the 
+  taxonomy or drill-down terms silently discarded by the indexer. CategoryPath 
+  is now limited to MAX_CATEGORY_PATH_LENGTH characters.
+  (Colton Jamieson, Mike McCandless, Shai Erera)
+
+* LUCENE-5062: If the spatial data for a document was comprised of multiple
+  overlapping or adjacent parts then a CONTAINS predicate query might not match
+  when the sum of those shapes contain the query shape but none do individually.
+  A flag was added to use the original faster algorithm. (David Smiley)
+
+* LUCENE-4971: Fixed NPE in AnalyzingSuggester when there are too many
+  graph expansions.  (Alexey Kudinov via Mike McCandless)
+
+* LUCENE-5080: Combined setMaxMergeCount and setMaxThreadCount into one
+  setter in ConcurrentMergePolicy: setMaxMergesAndThreads.  Previously these
+  setters would not work unless you invoked them very carefully.
+  (Robert Muir, Shai Erera)
+  
+* LUCENE-5068: QueryParserUtil.escape() does not escape forward slash.
+  (Matias Holte via Steve Rowe)
+
 Optimizations
 
 * LUCENE-4936: Improve numeric doc values compression in case all values share
@@ -137,8 +217,23 @@ Optimizations
   single snapshots_N file, and no longer requires closing (Mike
   McCandless, Shai Erera)
 
+* LUCENE-5035: Compress addresses in FieldCacheImpl.SortedDocValuesImpl more
+  efficiently. (Adrien Grand, Robert Muir)
+
+* LUCENE-4941: Sort "from" terms only once when using JoinUtil.
+  (Martijn van Groningen)
+
+* LUCENE-5050: Close the stored fields and term vectors index files as soon as
+  the index has been loaded into memory to save file descriptors. (Adrien Grand)
+
 New Features
 
+* LUCENE-5085: MorfologikFilter will no longer stem words marked as keywords
+  (Dawid Weiss, Grzegorz Sobczyk)
+
+* LUCENE-5064: Added PagedMutable (internal), a paged extension of
+  PackedInts.Mutable which allows for storing more than 2B values. (Adrien Grand)
+
 * LUCENE-4766: Added a PatternCaptureGroupTokenFilter that uses Java regexes to 
   emit multiple tokens one for each capture group in one or more patterns.
   (Simon Willnauer, Clinton Gormley)
@@ -169,6 +264,37 @@ New Features
 * LUCENE-5022: Added FacetResult.mergeHierarchies to merge multiple
   FacetResult of the same dimension into a single one with the reconstructed
   hierarchy. (Shai Erera)
+
+* LUCENE-5026: Added PagedGrowableWriter, a new internal packed-ints structure
+  that grows the number of bits per value on demand, can store more than 2B
+  values and supports random write and read access. (Adrien Grand)
+
+* LUCENE-5025: FST's Builder can now handle more than 2.1 billion
+  "tail nodes" while building a minimal FST.  (Aaron Binns, Adrien
+  Grand, Mike McCandless)
+
+* LUCENE-5063: FieldCache.DEFAULT.get(Ints|Longs) now uses bit-packing to save
+  memory. (Adrien Grand)
+
+* LUCENE-5079: IndexWriter.hasUncommittedChanges() returns true if there are
+  changes that have not been committed. (yonik, Mike McCandless, Uwe Schindler)
+
+* SOLR-4565: Extend NorwegianLightStemFilter and NorwegianMinimalStemFilter 
+  to handle "nynorsk" (Erlend Garåsen, janhoy via Robert Muir)
+
+* LUCENE-5087: Add getMultiValuedSeparator to PostingsHighlighter, for cases
+  where you want a different logical separator between field values. This can
+  be set to e.g. U+2029 PARAGRAPH SEPARATOR if you never want passes to span
+  values. (Mike McCandless, Robert Muir)
+
+* LUCENE-5013: Added ScandinavianFoldingFilterFactory and
+  ScandinavianNormalizationFilterFactory (Karl Wettin via janhoy)
+
+API Changes
+
+* LUCENE-5077: Make it easier to use compressed norms. Lucene42NormsFormat takes
+  an overhead parameter, so you can easily pass a different value other than
+  PackedInts.FASTEST from your own codec.  (Robert Muir)
   
 Build
 
@@ -176,12 +302,31 @@ Build
   Test framework may fail internally due to overly aggresive J9 optimizations. 
   (Dawid Weiss, Shai Erera)
 
+* LUCENE-5043: The eclipse target now uses the containing directory for the
+  project name.  This also enforces UTF-8 encoding when files are copied with
+  filtering.
+
+* LUCENE-5055: "rat-sources" target now checks also build.xml, ivy.xml,
+  forbidden-api signatures, and parts of resources folders.  (Ryan Ernst,
+  Uwe Schindler)
+
+* LUCENE-5072: Automatically patch javadocs generated by JDK versions
+  before 7u25 to work around the frame injection vulnerability (CVE-2013-1571,
+  VU#225657).  (Uwe Schindler)
+
 Tests
 
 * LUCENE-4901: TestIndexWriterOnJRECrash should work on any 
   JRE vendor via Runtime.halt().
   (Mike McCandless, Robert Muir, Uwe Schindler, Rodrigo Trujillo, Dawid Weiss)
 
+Changes in runtime behavior
+
+* LUCENE-5038: New segments written by IndexWriter are now wrapped into CFS
+  by default. DocumentsWriterPerThread doesn't consult MergePolicy anymore 
+  to decide if a CFS must be written, instead IndexWriterConfig now has a
+  property to enable / disable CFS for newly created segments. (Simon Willnauer)
+
 ======================= Lucene 4.3.1 =======================
 
 Bug Fixes

Modified: lucene/dev/branches/security/lucene/analysis/common/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/build.xml?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/build.xml (original)
+++ lucene/dev/branches/security/lucene/analysis/common/build.xml Wed Jul  3 23:26:32 2013
@@ -25,6 +25,7 @@
 
   <!-- some files for testing that do not have license headers -->
   <property name="rat.excludes" value="**/*.aff,**/*.dic,**/*.txt,**/charfilter/*.htm*,**/*LuceneResourcesWikiPage.html"/>
+  <property name="rat.additional-includes" value="src/tools/**"/>
 
   <import file="../analysis-module-build.xml"/>
 	

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java Wed Jul  3 23:26:32 2013
@@ -57,7 +57,7 @@ public final class GreekLowerCaseFilter 
       int chLen = termAtt.length();
       for (int i = 0; i < chLen;) {
         i += Character.toChars(
-            lowerCase(charUtils.codePointAt(chArray, i)), chArray, i);
+            lowerCase(charUtils.codePointAt(chArray, i, chLen)), chArray, i);
        }
       return true;
     } else {

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java Wed Jul  3 23:26:32 2013
@@ -378,17 +378,14 @@ public class HunspellDictionary {
         wordForm = new HunspellWord(flagParsingStrategy.parseFlags(flagPart));
         Arrays.sort(wordForm.getFlags());
         entry = line.substring(0, flagSep);
-        if(ignoreCase) {
-          entry = entry.toLowerCase(Locale.ROOT);
-        }
       }
-      
-      List<HunspellWord> entries = words.get(entry);
-      if (entries == null) {
-        entries = new ArrayList<HunspellWord>();
-        words.put(entry, entries);
+      if(ignoreCase) {
+        entry = entry.toLowerCase(Locale.ROOT);
       }
+
+      List<HunspellWord> entries = new ArrayList<HunspellWord>();
       entries.add(wordForm);
+      words.put(entry, entries);
     }
   }
 

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java Wed Jul  3 23:26:32 2013
@@ -25,21 +25,26 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.util.Version;
 
 /**
  * Tokenizes the given token into n-grams of given size(s).
  * <p>
  * This {@link TokenFilter} create n-grams from the beginning edge of a input token.
+ * <p><a name="match_version" />As of Lucene 4.4, this filter handles correctly
+ * supplementary characters.
  */
 public final class EdgeNGramTokenFilter extends TokenFilter {
   public static final int DEFAULT_MAX_GRAM_SIZE = 1;
   public static final int DEFAULT_MIN_GRAM_SIZE = 1;
 
+  private final CharacterUtils charUtils;
   private final int minGram;
   private final int maxGram;
   private char[] curTermBuffer;
   private int curTermLength;
+  private int curCodePointCount;
   private int curGramSize;
   private int tokStart;
   private int tokEnd; // only used if the length changed before this filter
@@ -74,6 +79,9 @@ public final class EdgeNGramTokenFilter 
       throw new IllegalArgumentException("minGram must not be greater than maxGram");
     }
 
+    this.charUtils = version.onOrAfter(Version.LUCENE_44)
+        ? CharacterUtils.getInstance(version)
+        : CharacterUtils.getJava4Instance();
     this.minGram = minGram;
     this.maxGram = maxGram;
   }
@@ -87,6 +95,7 @@ public final class EdgeNGramTokenFilter 
         } else {
           curTermBuffer = termAtt.buffer().clone();
           curTermLength = termAtt.length();
+          curCodePointCount = charUtils.codePointCount(termAtt);
           curGramSize = minGram;
           tokStart = offsetAtt.startOffset();
           tokEnd = offsetAtt.endOffset();
@@ -95,7 +104,7 @@ public final class EdgeNGramTokenFilter 
         }
       }
       if (curGramSize <= maxGram) {         // if we have hit the end of our n-gram size range, quit
-        if (curGramSize <= curTermLength) { // if the remaining input is too short, we can't generate any n-grams
+        if (curGramSize <= curCodePointCount) { // if the remaining input is too short, we can't generate any n-grams
           // grab gramSize chars from front or back
           clearAttributes();
           offsetAtt.setOffset(tokStart, tokEnd);
@@ -107,7 +116,8 @@ public final class EdgeNGramTokenFilter 
             posIncrAtt.setPositionIncrement(0);
           }
           posLenAtt.setPositionLength(savePosLen);
-          termAtt.copyBuffer(curTermBuffer, 0, curGramSize);
+          final int charLength = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curGramSize);
+          termAtt.copyBuffer(curTermBuffer, 0, charLength);
           curGramSize++;
           return true;
         }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java Wed Jul  3 23:26:32 2013
@@ -17,37 +17,23 @@ package org.apache.lucene.analysis.ngram
  * limitations under the License.
  */
 
-import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Version;
 
 /**
  * Tokenizes the input from an edge into n-grams of given size(s).
  * <p>
  * This {@link Tokenizer} create n-grams from the beginning edge of a input token.
+ * <p><a name="match_version" />As of Lucene 4.4, this class supports
+ * {@link #isTokenChar(int) pre-tokenization} and correctly handles
+ * supplementary characters.
  */
-public final class EdgeNGramTokenizer extends Tokenizer {
+public class EdgeNGramTokenizer extends NGramTokenizer {
   public static final int DEFAULT_MAX_GRAM_SIZE = 1;
   public static final int DEFAULT_MIN_GRAM_SIZE = 1;
 
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-
-  private int minGram;
-  private int maxGram;
-  private int gramSize;
-  private boolean started;
-  private int inLen; // length of the input AFTER trim()
-  private int charsRead; // length of the input
-  private String inStr;
-
   /**
    * Creates EdgeNGramTokenizer that can generate n-grams in the sizes of the given range
    *
@@ -57,8 +43,7 @@ public final class EdgeNGramTokenizer ex
    * @param maxGram the largest n-gram to generate
    */
   public EdgeNGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
-    super(input);
-    init(version, minGram, maxGram);
+    super(version, input, minGram, maxGram, true);
   }
 
   /**
@@ -71,102 +56,7 @@ public final class EdgeNGramTokenizer ex
    * @param maxGram the largest n-gram to generate
    */
   public EdgeNGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
-    super(factory, input);
-    init(version, minGram, maxGram);
-  }
-
-  private void init(Version version, int minGram, int maxGram) {
-    if (version == null) {
-      throw new IllegalArgumentException("version must not be null");
-    }
-
-    if (minGram < 1) {
-      throw new IllegalArgumentException("minGram must be greater than zero");
-    }
-
-    if (minGram > maxGram) {
-      throw new IllegalArgumentException("minGram must not be greater than maxGram");
-    }
-
-    this.minGram = minGram;
-    this.maxGram = maxGram;
+    super(version, factory, input, minGram, maxGram, true);
   }
 
-  /** Returns the next token in the stream, or null at EOS. */
-  @Override
-  public boolean incrementToken() throws IOException {
-    clearAttributes();
-    // if we are just starting, read the whole input
-    if (!started) {
-      started = true;
-      gramSize = minGram;
-      char[] chars = new char[Math.min(1024, maxGram)];
-      charsRead = 0;
-      // TODO: refactor to a shared readFully somewhere:
-      boolean exhausted = false;
-      while (charsRead < maxGram) {
-        final int inc = input.read(chars, charsRead, chars.length-charsRead);
-        if (inc == -1) {
-          exhausted = true;
-          break;
-        }
-        charsRead += inc;
-        if (charsRead == chars.length && charsRead < maxGram) {
-          chars = ArrayUtil.grow(chars);
-        }
-      }
-
-      inStr = new String(chars, 0, charsRead);
-
-      if (!exhausted) {
-        // Read extra throwaway chars so that on end() we
-        // report the correct offset:
-        char[] throwaway = new char[1024];
-        while(true) {
-          final int inc = input.read(throwaway, 0, throwaway.length);
-          if (inc == -1) {
-            break;
-          }
-          charsRead += inc;
-        }
-      }
-
-      inLen = inStr.length();
-      if (inLen == 0) {
-        return false;
-      }
-      posIncrAtt.setPositionIncrement(1);
-    } else {
-      posIncrAtt.setPositionIncrement(1);
-    }
-
-    // if the remaining input is too short, we can't generate any n-grams
-    if (gramSize > inLen) {
-      return false;
-    }
-
-    // if we have hit the end of our n-gram size range, quit
-    if (gramSize > maxGram || gramSize > inLen) {
-      return false;
-    }
-
-    // grab gramSize chars from front or back
-    termAtt.setEmpty().append(inStr, 0, gramSize);
-    offsetAtt.setOffset(correctOffset(0), correctOffset(gramSize));
-    gramSize++;
-    return true;
-  }
-  
-  @Override
-  public void end() {
-    // set final offset
-    final int finalOffset = correctOffset(charsRead);
-    this.offsetAtt.setOffset(finalOffset, finalOffset);
-  }    
-
-  @Override
-  public void reset() throws IOException {
-    super.reset();
-    started = false;
-  }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java Wed Jul  3 23:26:32 2013
@@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.util.Version;
 
 /**
@@ -33,6 +34,7 @@ import org.apache.lucene.util.Version;
  * <a name="version"/>
  * <p>You must specify the required {@link Version} compatibility when
  * creating a {@link NGramTokenFilter}. As of Lucene 4.4, this token filters:<ul>
+ * <li>handles supplementary characters correctly,</li>
  * <li>emits all n-grams for the same token at the same position,</li>
  * <li>does not modify offsets,</li>
  * <li>sorts n-grams by their offset in the original token first, then
@@ -42,6 +44,10 @@ import org.apache.lucene.util.Version;
  * {@link Version#LUCENE_44} in the constructor but this is not recommended as
  * it will lead to broken {@link TokenStream}s that will cause highlighting
  * bugs.
+ * <p>If you were using this {@link TokenFilter} to perform partial highlighting,
+ * this won't work anymore since this filter doesn't update offsets. You should
+ * modify your analysis chain to use {@link NGramTokenizer}, and potentially
+ * override {@link NGramTokenizer#isTokenChar(int)} to perform pre-tokenization.
  */
 public final class NGramTokenFilter extends TokenFilter {
   public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
@@ -51,6 +57,7 @@ public final class NGramTokenFilter exte
 
   private char[] curTermBuffer;
   private int curTermLength;
+  private int curCodePointCount;
   private int curGramSize;
   private int curPos;
   private int curPosInc, curPosLen;
@@ -59,6 +66,7 @@ public final class NGramTokenFilter exte
   private boolean hasIllegalOffsets; // only if the length changed before this filter
 
   private final Version version;
+  private final CharacterUtils charUtils;
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncAtt;
   private final PositionLengthAttribute posLenAtt;
@@ -75,6 +83,9 @@ public final class NGramTokenFilter exte
   public NGramTokenFilter(Version version, TokenStream input, int minGram, int maxGram) {
     super(new LengthFilter(version, input, minGram, Integer.MAX_VALUE));
     this.version = version;
+    this.charUtils = version.onOrAfter(Version.LUCENE_44)
+        ? CharacterUtils.getInstance(version)
+        : CharacterUtils.getJava4Instance();
     if (minGram < 1) {
       throw new IllegalArgumentException("minGram must be greater than zero");
     }
@@ -126,6 +137,7 @@ public final class NGramTokenFilter exte
         } else {
           curTermBuffer = termAtt.buffer().clone();
           curTermLength = termAtt.length();
+          curCodePointCount = charUtils.codePointCount(termAtt);
           curGramSize = minGram;
           curPos = 0;
           curPosInc = posIncAtt.getPositionIncrement();
@@ -138,13 +150,15 @@ public final class NGramTokenFilter exte
         }
       }
       if (version.onOrAfter(Version.LUCENE_44)) {
-        if (curGramSize > maxGram || curPos + curGramSize > curTermLength) {
+        if (curGramSize > maxGram || (curPos + curGramSize) > curCodePointCount) {
           ++curPos;
           curGramSize = minGram;
         }
-        if (curPos + curGramSize <= curTermLength) {
+        if ((curPos + curGramSize) <= curCodePointCount) {
           clearAttributes();
-          termAtt.copyBuffer(curTermBuffer, curPos, curGramSize);
+          final int start = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos);
+          final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
+          termAtt.copyBuffer(curTermBuffer, start, end - start);
           posIncAtt.setPositionIncrement(curPosInc);
           curPosInc = 0;
           posLenAtt.setPositionLength(curPosLen);

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java Wed Jul  3 23:26:32 2013
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.util.Version;
 
 /**
@@ -40,29 +41,47 @@ import org.apache.lucene.util.Version;
  * <tr><th>Offsets</th><td>[0,2[</td><td>[0,3[</td><td>[1,3[</td><td>[1,4[</td><td>[2,4[</td><td>[2,5[</td><td>[3,5[</td></tr>
  * </table>
  * <a name="version"/>
- * <p>Before Lucene 4.4, this class had a different behavior:<ul>
- * <li>It didn't support more than 1024 chars of input, the rest was trashed.</li>
- * <li>The last whitespaces of the 1024 chars block were trimmed.</li>
- * <li>Tokens were emitted in a different order (by increasing lengths).</li></ul>
- * <p>Although highly discouraged, it is still possible to use the old behavior
- * through {@link Lucene43NGramTokenizer}.
+ * <p>This tokenizer changed a lot in Lucene 4.4 in order to:<ul>
+ * <li>tokenize in a streaming fashion to support streams which are larger
+ * than 1024 chars (limit of the previous version),
+ * <li>count grams based on unicode code points instead of java chars (and
+ * never split in the middle of surrogate pairs),
+ * <li>give the ability to {@link #isTokenChar(int) pre-tokenize} the stream
+ * before computing n-grams.</ul>
+ * <p>Additionally, this class doesn't trim trailing whitespaces and emits
+ * tokens in a different order, tokens are now emitted by increasing start
+ * offsets while they used to be emitted by increasing lengths (which prevented
+ * from supporting large input streams).
+ * <p>Although <b style="color:red">highly</b> discouraged, it is still possible
+ * to use the old behavior through {@link Lucene43NGramTokenizer}.
  */
-public final class NGramTokenizer extends Tokenizer {
+// non-final to allow for overriding isTokenChar, but all other methods should be final
+public class NGramTokenizer extends Tokenizer {
   public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
   public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
 
-  private char[] buffer;
-  private int bufferStart, bufferEnd; // remaining slice of the buffer
+  private CharacterUtils charUtils;
+  private CharacterUtils.CharacterBuffer charBuffer;
+  private int[] buffer; // like charBuffer, but converted to code points
+  private int bufferStart, bufferEnd; // remaining slice in buffer
   private int offset;
   private int gramSize;
   private int minGram, maxGram;
   private boolean exhausted;
+  private int lastCheckedChar; // last offset in the buffer that we checked
+  private int lastNonTokenChar; // last offset that we found to not be a token char
+  private boolean edgesOnly; // leading edges n-grams only
 
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
   private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
   private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
 
+  NGramTokenizer(Version version, Reader input, int minGram, int maxGram, boolean edgesOnly) {
+    super(input);
+    init(version, minGram, maxGram, edgesOnly);
+  }
+
   /**
    * Creates NGramTokenizer with given min and max n-grams.
    * @param version the lucene compatibility <a href="#version">version</a>
@@ -71,8 +90,12 @@ public final class NGramTokenizer extend
    * @param maxGram the largest n-gram to generate
    */
   public NGramTokenizer(Version version, Reader input, int minGram, int maxGram) {
-    super(input);
-    init(version, minGram, maxGram);
+    this(version, input, minGram, maxGram, false);
+  }
+
+  NGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram, boolean edgesOnly) {
+    super(factory, input);
+    init(version, minGram, maxGram, edgesOnly);
   }
 
   /**
@@ -84,8 +107,7 @@ public final class NGramTokenizer extend
    * @param maxGram the largest n-gram to generate
    */
   public NGramTokenizer(Version version, AttributeFactory factory, Reader input, int minGram, int maxGram) {
-    super(factory, input);
-    init(version, minGram, maxGram);
+    this(version, factory, input, minGram, maxGram, false);
   }
 
   /**
@@ -97,10 +119,13 @@ public final class NGramTokenizer extend
     this(version, input, DEFAULT_MIN_NGRAM_SIZE, DEFAULT_MAX_NGRAM_SIZE);
   }
 
-  private void init(Version version, int minGram, int maxGram) {
-    if (!version.onOrAfter(Version.LUCENE_44)) {
+  private void init(Version version, int minGram, int maxGram, boolean edgesOnly) {
+    if (!edgesOnly && !version.onOrAfter(Version.LUCENE_44)) {
       throw new IllegalArgumentException("This class only works with Lucene 4.4+. To emulate the old (broken) behavior of NGramTokenizer, use Lucene43NGramTokenizer");
     }
+    charUtils = version.onOrAfter(Version.LUCENE_44)
+        ? CharacterUtils.getInstance(version)
+        : CharacterUtils.getJava4Instance();
     if (minGram < 1) {
       throw new IllegalArgumentException("minGram must be greater than zero");
     }
@@ -109,66 +134,107 @@ public final class NGramTokenizer extend
     }
     this.minGram = minGram;
     this.maxGram = maxGram;
-    buffer = new char[maxGram + 1024];
+    this.edgesOnly = edgesOnly;
+    charBuffer = CharacterUtils.newCharacterBuffer(2 * maxGram + 1024); // 2 * maxGram in case all code points require 2 chars and + 1024 for buffering to not keep polling the Reader
+    buffer = new int[charBuffer.getBuffer().length];
+    // Make the term att large enough
+    termAtt.resizeBuffer(2 * maxGram);
   }
 
-  /** Returns the next token in the stream, or null at EOS. */
   @Override
-  public boolean incrementToken() throws IOException {
+  public final boolean incrementToken() throws IOException {
     clearAttributes();
 
-    // compact
-    if (bufferStart >= buffer.length - maxGram) {
-      System.arraycopy(buffer, bufferStart, buffer, 0, bufferEnd - bufferStart);
-      bufferEnd -= bufferStart;
-      bufferStart = 0;
-
-      // fill in remaining space
-      if (!exhausted) {
-        // TODO: refactor to a shared readFully
-        while (bufferEnd < buffer.length) {
-          final int read = input.read(buffer, bufferEnd, buffer.length - bufferEnd);
-          if (read == -1) {
-            exhausted = true;
-            break;
-          }
-          bufferEnd += read;
+    // termination of this loop is guaranteed by the fact that every iteration
+    // either advances the buffer (calls consumes()) or increases gramSize
+    while (true) {
+      // compact
+      if (bufferStart >= bufferEnd - maxGram - 1 && !exhausted) {
+        System.arraycopy(buffer, bufferStart, buffer, 0, bufferEnd - bufferStart);
+        bufferEnd -= bufferStart;
+        lastCheckedChar -= bufferStart;
+        lastNonTokenChar -= bufferStart;
+        bufferStart = 0;
+
+        // fill in remaining space
+        exhausted = !charUtils.fill(charBuffer, input, buffer.length - bufferEnd);
+        // convert to code points
+        bufferEnd += charUtils.toCodePoints(charBuffer.getBuffer(), 0, charBuffer.getLength(), buffer, bufferEnd);
+      }
+
+      // should we go to the next offset?
+      if (gramSize > maxGram || (bufferStart + gramSize) > bufferEnd) {
+        if (bufferStart + 1 + minGram > bufferEnd) {
+          assert exhausted;
+          return false;
         }
+        consume();
+        gramSize = minGram;
       }
+
+      updateLastNonTokenChar();
+
+      // retry if the token to be emitted was going to not only contain token chars
+      final boolean termContainsNonTokenChar = lastNonTokenChar >= bufferStart && lastNonTokenChar < (bufferStart + gramSize);
+      final boolean isEdgeAndPreviousCharIsTokenChar = edgesOnly && lastNonTokenChar != bufferStart - 1;
+      if (termContainsNonTokenChar || isEdgeAndPreviousCharIsTokenChar) {
+        consume();
+        gramSize = minGram;
+        continue;
+      }
+
+      final int length = charUtils.toChars(buffer, bufferStart, gramSize, termAtt.buffer(), 0);
+      termAtt.setLength(length);
+      posIncAtt.setPositionIncrement(1);
+      posLenAtt.setPositionLength(1);
+      offsetAtt.setOffset(correctOffset(offset), correctOffset(offset + length));
+      ++gramSize;
+      return true;
     }
+  }
 
-    // should we go to the next offset?
-    if (gramSize > maxGram || bufferStart + gramSize > bufferEnd) {
-      bufferStart++;
-      offset++;
-      gramSize = minGram;
-    }
-
-    // are there enough chars remaining?
-    if (bufferStart + gramSize > bufferEnd) {
-      return false;
-    }
-
-    termAtt.copyBuffer(buffer, bufferStart, gramSize);
-    posIncAtt.setPositionIncrement(1);
-    posLenAtt.setPositionLength(1);
-    offsetAtt.setOffset(correctOffset(offset), correctOffset(offset + gramSize));
-    ++gramSize;
+  private void updateLastNonTokenChar() {
+    final int termEnd = bufferStart + gramSize - 1;
+    if (termEnd > lastCheckedChar) {
+      for (int i = termEnd; i > lastCheckedChar; --i) {
+        if (!isTokenChar(buffer[i])) {
+          lastNonTokenChar = i;
+          break;
+        }
+      }
+      lastCheckedChar = termEnd;
+    }
+  }
+
+  /** Consume one code point. */
+  private void consume() {
+    offset += Character.charCount(buffer[bufferStart++]);
+  }
+
+  /** Only collect characters which satisfy this condition. */
+  protected boolean isTokenChar(int chr) {
     return true;
   }
 
   @Override
-  public void end() {
-    final int endOffset = correctOffset(offset + bufferEnd - bufferStart);
+  public final void end() {
+    assert bufferStart <= bufferEnd;
+    int endOffset = offset;
+    for (int i = bufferStart; i < bufferEnd; ++i) {
+      endOffset += Character.charCount(buffer[i]);
+    }
+    endOffset = correctOffset(endOffset);
     offsetAtt.setOffset(endOffset, endOffset);
   }
 
   @Override
-  public void reset() throws IOException {
+  public final void reset() throws IOException {
     super.reset();
     bufferStart = bufferEnd = buffer.length;
+    lastNonTokenChar = lastCheckedChar = bufferStart - 1;
     offset = 0;
     gramSize = minGram;
     exhausted = false;
+    charBuffer.reset();
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilter.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilter.java Wed Jul  3 23:26:32 2013
@@ -35,12 +35,26 @@ import org.apache.lucene.analysis.tokena
  * </p>
  */
 public final class NorwegianLightStemFilter extends TokenFilter {
-  private final NorwegianLightStemmer stemmer = new NorwegianLightStemmer();
+  private final NorwegianLightStemmer stemmer;
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
   private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
-
+  
+  /** 
+   * Calls {@link #NorwegianLightStemFilter(TokenStream, int) 
+   * NorwegianLightStemFilter(input, BOKMAAL)}
+   */
   public NorwegianLightStemFilter(TokenStream input) {
+    this(input, NorwegianLightStemmer.BOKMAAL);
+  }
+  
+  /** 
+   * Creates a new NorwegianLightStemFilter
+   * @param flags set to {@link NorwegianLightStemmer#BOKMAAL}, 
+   *                     {@link NorwegianLightStemmer#NYNORSK}, or both.
+   */
+  public NorwegianLightStemFilter(TokenStream input, int flags) {
     super(input);
+    stemmer = new NorwegianLightStemmer(flags);
   }
   
   @Override

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilterFactory.java Wed Jul  3 23:26:32 2013
@@ -23,6 +23,9 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.lucene.analysis.no.NorwegianLightStemFilter;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
+import static org.apache.lucene.analysis.no.NorwegianLightStemmer.BOKMAAL;
+import static org.apache.lucene.analysis.no.NorwegianLightStemmer.NYNORSK;
+
 /** 
  * Factory for {@link NorwegianLightStemFilter}.
  * <pre class="prettyprint">
@@ -30,15 +33,27 @@ import org.apache.lucene.analysis.util.T
  *   &lt;analyzer&gt;
  *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
  *     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
- *     &lt;filter class="solr.NorwegianLightStemFilterFactory"/&gt;
+ *     &lt;filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/&gt;
  *   &lt;/analyzer&gt;
  * &lt;/fieldType&gt;</pre>
  */
 public class NorwegianLightStemFilterFactory extends TokenFilterFactory {
   
+  private final int flags;
+  
   /** Creates a new NorwegianLightStemFilterFactory */
   public NorwegianLightStemFilterFactory(Map<String,String> args) {
     super(args);
+    String variant = get(args, "variant");
+    if (variant == null || "nb".equals(variant)) {
+      flags = BOKMAAL;
+    } else if ("nn".equals(variant)) {
+      flags = NYNORSK;
+    } else if ("no".equals(variant)) {
+      flags = BOKMAAL | NYNORSK;
+    } else {
+      throw new IllegalArgumentException("invalid variant: " + variant);
+    }
     if (!args.isEmpty()) {
       throw new IllegalArgumentException("Unknown parameters: " + args);
     }
@@ -46,6 +61,6 @@ public class NorwegianLightStemFilterFac
   
   @Override
   public TokenStream create(TokenStream input) {
-    return new NorwegianLightStemFilter(input);
+    return new NorwegianLightStemFilter(input, flags);
   }
 }

Modified: lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemmer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemmer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemmer.java Wed Jul  3 23:26:32 2013
@@ -62,50 +62,106 @@ import static org.apache.lucene.analysis
  * corpus to validate against whereas the Norwegian one is hand crafted.
  */
 public class NorwegianLightStemmer {
+  /** Constant to remove Bokmål-specific endings */
+  public static final int BOKMAAL = 1;
+  /** Constant to remove Nynorsk-specific endings */
+  public static final int NYNORSK = 2;
   
+  final boolean useBokmaal;
+  final boolean useNynorsk;
+  
+  /** 
+   * Creates a new NorwegianLightStemmer
+   * @param flags set to {@link #BOKMAAL}, {@link #NYNORSK}, or both.
+   */
+  public NorwegianLightStemmer(int flags) {
+    if (flags <= 0 || flags > BOKMAAL + NYNORSK) {
+      throw new IllegalArgumentException("invalid flags");
+    }
+    useBokmaal = (flags & BOKMAAL) != 0;
+    useNynorsk = (flags & NYNORSK) != 0;
+  }
+      
   public int stem(char s[], int len) {   
     // Remove posessive -s (bilens -> bilen) and continue checking 
     if (len > 4 && s[len-1] == 's')
       len--;
 
     // Remove common endings, single-pass
-    if (len > 7 && 
-        (endsWith(s, len, "heter") ||  // general ending (hemmelig-heter -> hemmelig)
-         endsWith(s, len, "heten")))   // general ending (hemmelig-heten -> hemmelig)
+    if (len > 7 &&
+        ((endsWith(s, len, "heter") &&
+          useBokmaal) ||  // general ending (hemmelig-heter -> hemmelig)
+         (endsWith(s, len, "heten") &&
+          useBokmaal) ||  // general ending (hemmelig-heten -> hemmelig)
+         (endsWith(s, len, "heita") &&
+          useNynorsk)))   // general ending (hemmeleg-heita -> hemmeleg)
       return len - 5;
+    
+    // Remove Nynorsk common endings, single-pass
+    if (len > 8 && useNynorsk &&
+        (endsWith(s, len, "heiter") ||  // general ending (hemmeleg-heiter -> hemmeleg)
+         endsWith(s, len, "leiken") ||  // general ending (trygg-leiken -> trygg)
+         endsWith(s, len, "leikar")))   // general ending (trygg-leikar -> trygg)
+      return len - 6;
 
     if (len > 5 &&
-        (endsWith(s, len, "dom") || // general ending (kristen-dom -> kristen)
-         endsWith(s, len, "het")))  // general ending (hemmelig-het -> hemmelig)
+        (endsWith(s, len, "dom") ||  // general ending (kristen-dom -> kristen)
+         (endsWith(s, len, "het") &&
+          useBokmaal)))              // general ending (hemmelig-het -> hemmelig)
       return len - 3;
     
+    if (len > 6 && useNynorsk &&
+        (endsWith(s, len, "heit") ||  // general ending (hemmeleg-heit -> hemmeleg)
+         endsWith(s, len, "semd") ||  // general ending (verk-semd -> verk)
+         endsWith(s, len, "leik")))   // general ending (trygg-leik -> trygg)
+      return len - 4;
+    
     if (len > 7 && 
         (endsWith(s, len, "elser") ||   // general ending (føl-elser -> føl)
          endsWith(s, len, "elsen")))    // general ending (føl-elsen -> føl)
       return len - 5;
     
     if (len > 6 &&
-        (endsWith(s, len, "ende") ||  // (sov-ende -> sov)
+        ((endsWith(s, len, "ende") &&
+          useBokmaal) ||      // (sov-ende -> sov)
+         (endsWith(s, len, "ande") &&
+          useNynorsk) ||      // (sov-ande -> sov)
          endsWith(s, len, "else") ||  // general ending (føl-else -> føl)
-         endsWith(s, len, "este") ||  // adj (fin-este -> fin)
-         endsWith(s, len, "eren")))   // masc
+         (endsWith(s, len, "este") &&
+          useBokmaal) ||      // adj (fin-este -> fin)
+         (endsWith(s, len, "aste") &&
+          useNynorsk) ||      // adj (fin-aste -> fin)
+         (endsWith(s, len, "eren") &&
+          useBokmaal) ||      // masc
+         (endsWith(s, len, "aren") &&
+          useNynorsk)))       // masc 
       return len - 4;
     
     if (len > 5 &&
-        (endsWith(s, len, "ere") || // adj (fin-ere -> fin)
-         endsWith(s, len, "est") || // adj (fin-est -> fin)
-         endsWith(s, len, "ene")    // masc/fem/neutr pl definite (hus-ene)
-         )) 
+        ((endsWith(s, len, "ere") &&
+         useBokmaal) ||     // adj (fin-ere -> fin)
+         (endsWith(s, len, "are") &&
+          useNynorsk) ||    // adj (fin-are -> fin)
+         (endsWith(s, len, "est") &&
+          useBokmaal) ||    // adj (fin-est -> fin)
+         (endsWith(s, len, "ast") &&
+          useNynorsk) ||    // adj (fin-ast -> fin)
+         endsWith(s, len, "ene") || // masc/fem/neutr pl definite (hus-ene)
+         (endsWith(s, len, "ane") &&
+          useNynorsk)))     // masc pl definite (gut-ane)
       return len - 3;
     
     if (len > 4 &&
         (endsWith(s, len, "er") ||  // masc/fem indefinite
          endsWith(s, len, "en") ||  // masc/fem definite
          endsWith(s, len, "et") ||  // neutr definite
-         endsWith(s, len, "st") ||  // adj (billig-st -> billig)
+         (endsWith(s, len, "ar") &&
+          useNynorsk) ||    // masc pl indefinite
+         (endsWith(s, len, "st") &&
+          useBokmaal) ||    // adj (billig-st -> billig)
          endsWith(s, len, "te")))
       return len - 2;
-    
+
     if (len > 3)
       switch(s[len-1]) {
         case 'a':     // fem definite