You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/07/04 01:26:45 UTC
svn commit: r1499601 [4/20] - in /lucene/dev/branches/security: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/ dev-tools/idea/solr/core/src/test/ dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/analysis/stempel/ dev-to...

Modified: lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java Wed Jul  3 23:26:32 2013
@@ -18,11 +18,20 @@ package org.apache.lucene.analysis.morfo
  */
 
 import java.io.IOException;
+import java.io.Reader;
 import java.io.StringReader;
 import java.util.TreeSet;
 
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;
 
 /**
  * TODO: The tests below rely on the order of returned lemmas, which is probably not good. 
@@ -56,10 +65,22 @@ public class TestMorfologikAnalyzer exte
     assertAnalyzesToReuse(
         a,
         "T. Gl\u00FCcksberg",
-        new String[] { "to", "tom", "tona", "Gl\u00FCcksberg" },
-        new int[] { 0, 0, 0, 3  },
-        new int[] { 1, 1, 1, 13 },
-        new int[] { 1, 0, 0, 1  });
+        new String[] { "tom", "tona", "Gl\u00FCcksberg" },
+        new int[] { 0, 0, 3  },
+        new int[] { 1, 1, 13 },
+        new int[] { 1, 0, 1  });
+  }
+
+  @SuppressWarnings("unused")
+  private void dumpTokens(String input) throws IOException {
+    TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader(input));
+    ts.reset();
+
+    MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
+    CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class);
+    while (ts.incrementToken()) {
+      System.out.println(charTerm.toString() + " => " + attribute.getTags());
+    }
   }
 
   /** Test reuse of MorfologikFilter with leftover stems. */
@@ -144,6 +165,34 @@ public class TestMorfologikAnalyzer exte
     ts.close();
   }
 
+  /** */
+  public final void testKeywordAttrTokens() throws IOException {
+    final Version version = TEST_VERSION_CURRENT;
+
+    Analyzer a = new MorfologikAnalyzer(version) {
+      @Override
+      protected TokenStreamComponents createComponents(String field, Reader reader) {
+        final CharArraySet keywords = new CharArraySet(version, 1, false);
+        keywords.add("liÅcie");
+
+        final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+        TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src);
+        result = new SetKeywordMarkerFilter(result, keywords);
+        result = new MorfologikFilter(result, TEST_VERSION_CURRENT); 
+
+        return new TokenStreamComponents(src, result);
+      }
+    };
+
+    assertAnalyzesToReuse(
+      a,
+      "liÅcie danych",
+      new String[] { "liÅcie", "dany", "dana", "dane", "daÄ" },
+      new int[] { 0, 7, 7, 7, 7 },
+      new int[] { 6, 13, 13, 13, 13 },
+      new int[] { 1, 1, 0, 0, 0 });
+  }
+
   /** blast some random strings through the analyzer */
   public void testRandom() throws Exception {
     checkRandomData(random(), getTestAnalyzer(), 1000 * RANDOM_MULTIPLIER); 

Modified: lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java Wed Jul  3 23:26:32 2013
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.morfo
  */
 
 import java.io.StringReader;
+import java.util.Collections;
 import java.util.HashMap;
-import java.util.Map;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
@@ -31,10 +31,7 @@ import org.apache.lucene.analysis.TokenS
 public class TestMorfologikFilterFactory extends BaseTokenStreamTestCase {
   public void testCreateDictionary() throws Exception {
     StringReader reader = new StringReader("rowery bilety");
-    Map<String,String> initParams = new HashMap<String,String>();
-    initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE,
-        "morfologik");
-    MorfologikFilterFactory factory = new MorfologikFilterFactory(initParams);
+    MorfologikFilterFactory factory = new MorfologikFilterFactory(Collections.<String,String>emptyMap());
     TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
     stream = factory.create(stream);
     assertTokenStreamContents(stream, new String[] {"rower", "bilet"});

Modified: lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (original)
+++ lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java Wed Jul  3 23:26:32 2013
@@ -119,14 +119,9 @@ public class CreateIndexTask extends Per
       
       if (mergeScheduler.equals("org.apache.lucene.index.ConcurrentMergeScheduler")) {
         ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwConf.getMergeScheduler();
-        int v = config.get("concurrent.merge.scheduler.max.thread.count", -1);
-        if (v != -1) {
-          cms.setMaxThreadCount(v);
-        }
-        v = config.get("concurrent.merge.scheduler.max.merge.count", -1);
-        if (v != -1) {
-          cms.setMaxMergeCount(v);
-        }
+        int maxThreadCount = config.get("concurrent.merge.scheduler.max.thread.count", ConcurrentMergeScheduler.DEFAULT_MAX_THREAD_COUNT);
+        int maxMergeCount = config.get("concurrent.merge.scheduler.max.merge.count", ConcurrentMergeScheduler.DEFAULT_MAX_MERGE_COUNT);
+        cms.setMaxMergesAndThreads(maxMergeCount, maxThreadCount);
       }
     }
 
@@ -151,13 +146,10 @@ public class CreateIndexTask extends Per
       } catch (Exception e) {
         throw new RuntimeException("unable to instantiate class '" + mergePolicy + "' as merge policy", e);
       }
+      iwConf.getMergePolicy().setNoCFSRatio(isCompound ? 1.0 : 0.0);
       if(iwConf.getMergePolicy() instanceof LogMergePolicy) {
         LogMergePolicy logMergePolicy = (LogMergePolicy) iwConf.getMergePolicy();
-        logMergePolicy.setUseCompoundFile(isCompound);
         logMergePolicy.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR));
-      } else if(iwConf.getMergePolicy() instanceof TieredMergePolicy) {
-        TieredMergePolicy tieredMergePolicy = (TieredMergePolicy) iwConf.getMergePolicy();
-        tieredMergePolicy.setUseCompoundFile(isCompound);
       }
     }
     final double ramBuffer = config.get("ram.flush.mb",OpenIndexTask.DEFAULT_RAM_FLUSH_MB);

Modified: lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Wed Jul  3 23:26:32 2013
@@ -49,6 +49,7 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.LogDocMergePolicy;
 import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.index.SerialMergeScheduler;
@@ -754,7 +755,7 @@ public class TestPerfTasksLogic extends 
     assertEquals(2, writer.getConfig().getMaxBufferedDocs());
     assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, (int) writer.getConfig().getRAMBufferSizeMB());
     assertEquals(3, ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor());
-    assertFalse(((LogMergePolicy) writer.getConfig().getMergePolicy()).getUseCompoundFile());
+    assertEquals(0.0d, writer.getConfig().getMergePolicy().getNoCFSRatio(), 0.0);
     writer.close();
     Directory dir = benchmark.getRunData().getDirectory();
     IndexReader reader = DirectoryReader.open(dir);

Modified: lucene/dev/branches/security/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/build.xml?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/build.xml (original)
+++ lucene/dev/branches/security/lucene/build.xml Wed Jul  3 23:26:32 2013
@@ -183,7 +183,10 @@
     <forbidden-apis internalRuntimeForbidden="true" classpathref="forbidden-apis.classpath">
       <bundledSignatures name="jdk-unsafe-${javac.target}"/>
       <bundledSignatures name="jdk-deprecated-${javac.target}"/>
-      <signaturesFileSet file="${common.dir}/tools/forbiddenApis/executors.txt"/>
+      <signaturesFileSet dir="${common.dir}/tools/forbiddenApis">
+        <include name="executors.txt" />
+        <include name="chars.txt" />
+      </signaturesFileSet>
       <fileset dir="${basedir}/build" includes="**/*.class" />
     </forbidden-apis>
   </target>
@@ -345,7 +348,7 @@
   </target>
 
   <!-- rat-sources-typedef is *not* a useless dependency. do not remove -->
-  <target name="rat-sources" depends="rat-sources-typedef">
+  <target name="rat-sources" depends="rat-sources-typedef,common.rat-sources">
     <subant target="rat-sources" failonerror="true" inheritall="false">
       <propertyset refid="uptodate.and.compiled.properties"/>
       <fileset dir="core" includes="build.xml"/>

Modified: lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java (original)
+++ lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java Wed Jul  3 23:26:32 2013
@@ -44,7 +44,7 @@ import org.apache.lucene.util.fst.Util; 
  * @lucene.experimental */
 public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
 
-  private final PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton(true);
+  private final PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
   private int indexDivisor;
 
   // Closed if indexLoaded is true:
@@ -199,7 +199,7 @@ public class VariableGapTermsIndexReader
         if (indexDivisor > 1) {
           // subsample
           final IntsRef scratchIntsRef = new IntsRef();
-          final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+          final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
           final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
           final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
           BytesRefFSTEnum.InputOutput<Long> result;

Modified: lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java Wed Jul  3 23:26:32 2013
@@ -235,7 +235,7 @@ public class VariableGapTermsIndexWriter
 
     public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
       this.fieldInfo = fieldInfo;
-      fstOutputs = PositiveIntOutputs.getSingleton(true);
+      fstOutputs = PositiveIntOutputs.getSingleton();
       fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, fstOutputs);
       indexStart = out.getFilePointer();
       ////System.out.println("VGW: field=" + fieldInfo.name);

Modified: lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Wed Jul  3 23:26:32 2013
@@ -513,7 +513,7 @@ class SimpleTextFieldsReader extends Fie
     }
 
     private void loadTerms() throws IOException {
-      PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
+      PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
       final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
       final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
       final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,

Modified: lucene/dev/branches/security/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/common-build.xml?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/common-build.xml (original)
+++ lucene/dev/branches/security/lucene/common-build.xml Wed Jul  3 23:26:32 2013
@@ -186,6 +186,7 @@
   <property name="build.encoding" value="utf-8"/>
 
   <property name="src.dir" location="src/java"/>
+  <property name="resources.dir" location="${src.dir}/../resources"/>
   <property name="tests.src.dir" location="src/test"/>
   <available property="module.has.tests" type="dir" file="${tests.src.dir}"/>
   <property name="build.dir" location="build"/>
@@ -255,6 +256,10 @@
 
   <!-- a reasonable default exclusion set, can be overridden for special cases -->
   <property name="rat.excludes" value="**/TODO,**/*.txt,**/*.iml"/>
+  
+  <!-- These patterns can be defined to add additional files for checks, relative to module's home dir -->
+  <property name="rat.additional-includes" value=""/>
+  <property name="rat.additional-excludes" value=""/>
 
   <propertyset id="uptodate.and.compiled.properties" dynamic="true">
     <propertyref regex=".*\.uptodate$$"/>
@@ -351,7 +356,7 @@
   <target name="resolve" depends="ivy-availability-check,ivy-configure">
     <!-- todo, make this a property or something. 
          only special cases need bundles -->
-    <ivy:retrieve type="jar,bundle" log="download-only" 
+    <ivy:retrieve type="jar,bundle,tests" log="download-only" 
                   conf="${ivy.default.configuration}" sync="${ivy.sync}"/>
   </target>
 
@@ -489,7 +494,7 @@
 
     <!-- Copy the resources folder (if existent) -->
     <copy todir="${build.dir}/classes/java">
-      <fileset dir="${src.dir}/../resources" erroronmissingdir="no"/>
+      <fileset dir="${resources.dir}" erroronmissingdir="no"/>
     </copy>
   </target>
 
@@ -864,6 +869,7 @@
         <mkdir dir="${tests.cachedir}/${name}" />
 
         <junit4:junit4
+            taskName="junit4"
             dir="@{workDir}"
             tempdir="@{workDir}/temp"
             maxmemory="${tests.heapsize}" 
@@ -1339,7 +1345,7 @@ ${tests-output}/junit4-*.suites     - pe
     <sequential>
       <mkdir dir="${build.dir}" />
       <jarify basedir="${src.dir}" destfile="${build.dir}/${final.name}-src.jar">
-        <fileset dir="${src.dir}/../resources" erroronmissingdir="no"/>
+        <fileset dir="${resources.dir}" erroronmissingdir="no"/>
       </jarify>
     </sequential>
   </target>
@@ -1454,7 +1460,7 @@ ${tests-output}/junit4-*.suites     - pe
 
   <target name="filter-pom-templates" unless="filtered.pom.templates.uptodate">
     <mkdir dir="${filtered.pom.templates.dir}"/>
-    <copy todir="${common.dir}/build/poms" overwrite="true">
+    <copy todir="${common.dir}/build/poms" overwrite="true" encoding="UTF-8">
       <fileset dir="${common.dir}/../dev-tools/maven"/>
       <filterset begintoken="@" endtoken="@">
         <filter token="version" value="${version}"/>
@@ -1508,28 +1514,34 @@ ${tests-output}/junit4-*.suites     - pe
   </target>
 
   <target name="rat-sources-typedef" unless="rat.loaded">
-    <ivy:cachepath organisation="org.apache.rat" module="apache-rat" revision="0.8" transitive="false" inline="true" conf="master" type="jar" pathid="rat.classpath"/>
+    <ivy:cachepath organisation="org.apache.rat" module="apache-rat" revision="0.9" transitive="false" inline="true" conf="master" type="jar" pathid="rat.classpath"/>
     <typedef resource="org/apache/rat/anttasks/antlib.xml" uri="antlib:org.apache.rat.anttasks" classpathref="rat.classpath"/>
     <property name="rat.loaded" value="true"/>
   </target>
 
   <target name="rat-sources" depends="rat-sources-typedef"
 	  description="runs the tasks over source and test files">
-    <sequential>
     <!-- create a temp file for the log to go to -->
     <tempfile property="rat.sources.logfile"
               prefix="rat"
               destdir="${java.io.tmpdir}"/>
     <!-- run rat, going to the file -->
     <rat:report xmlns:rat="antlib:org.apache.rat.anttasks" 
-                reportFile="${rat.sources.logfile}">
-      <fileset dir="${src.dir}" excludes="${rat.excludes}"/>
+                reportFile="${rat.sources.logfile}" addDefaultLicenseMatchers="true">
+      <fileset dir="." includes="*.xml ${rat.additional-includes}" excludes="${rat.additional-excludes}"/>
+      <fileset dir="${src.dir}" excludes="${rat.excludes}" erroronmissingdir="false"/>
       <fileset dir="${tests.src.dir}" excludes="${rat.excludes}" erroronmissingdir="false"/>
-      <!-- some modules have a src/tools/[java,test] -->
-      <fileset dir="src/tools/java" excludes="${rat.excludes}" erroronmissingdir="false"/>
-      <fileset dir="src/tools/test" excludes="${rat.excludes}" erroronmissingdir="false"/>
+
+      <!-- TODO: Check all resource files. Currently not all stopword and similar files have no header! -->
+      <fileset dir="${resources.dir}" includes="META-INF/**" erroronmissingdir="false"/>
       
-      <!-- bsd-like stuff -->
+      <!-- BSD 4-clause stuff (is disallowed below) -->
+      <rat:substringMatcher licenseFamilyCategory="BSD4 "
+             licenseFamilyName="Original BSD License (with advertising clause)">
+        <pattern substring="All advertising materials"/>
+      </rat:substringMatcher>
+
+      <!-- BSD-like stuff -->
       <rat:substringMatcher licenseFamilyCategory="BSD  "
              licenseFamilyName="Modified BSD License">
       <!-- brics automaton -->
@@ -1542,16 +1554,20 @@ ${tests-output}/junit4-*.suites     - pe
         <pattern substring="Egothor Software License version 1.00"/>
       <!-- JaSpell -->
         <pattern substring="Copyright (c) 2005 Bruno Martins"/>
+      <!-- d3.js -->
+        <pattern substring="THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS"/>
+      <!-- highlight.js -->
+        <pattern substring="THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS"/>
       </rat:substringMatcher>
 
-      <!-- mit-like -->
+      <!-- MIT-like -->
       <rat:substringMatcher licenseFamilyCategory="MIT  "
              licenseFamilyName="The MIT License">
       <!-- ICU license -->
         <pattern substring="Permission is hereby granted, free of charge, to any person obtaining a copy"/>
       </rat:substringMatcher>
 
-      <!-- apache -->
+      <!-- Apache -->
       <rat:substringMatcher licenseFamilyCategory="AL   "
              licenseFamilyName="Apache">
         <pattern substring="Licensed to the Apache Software Foundation (ASF) under"/>
@@ -1577,13 +1593,13 @@ ${tests-output}/junit4-*.suites     - pe
     </rat:report>
     <!-- now print the output, for review -->
     <loadfile property="rat.output" srcFile="${rat.sources.logfile}"/>
-    <echo>${rat.output}</echo>
+    <echo taskname="rat">${rat.output}</echo>
     <delete>
       <fileset file="${rat.sources.logfile}">
         <and>
           <containsregexp expression="^0 Unknown Licenses"/>
           <not>
-            <containsregexp expression="^\s+!AL"/>
+            <containsregexp expression="^\s+!"/>
           </not>
         </and>
       </fileset>
@@ -1594,7 +1610,6 @@ ${tests-output}/junit4-*.suites     - pe
         <available file="${rat.sources.logfile}"/>
       </condition>
     </fail>
-    </sequential>
   </target>
 
   <!--+
@@ -1814,10 +1829,71 @@ ${tests-output}/junit4-*.suites     - pe
         </condition>
       </fail>
 
-
+      <patch-javadoc dir="@{destdir}" docencoding="${javadoc.charset}"/>
    </sequential>
   </macrodef>
 
+  <!--
+    Patch frame injection bugs in javadoc generated files - see CVE-2013-1571, http://www.kb.cert.org/vuls/id/225657
+    
+    Feel free to use this macro in your own Ant build file. This macro works together with the javadoc task on Ant
+    and should be invoked directly after its execution to patch broken javadocs, e.g.:
+      <patch-javadoc dir="..." docencoding="UTF-8"/>
+    Please make sure that the docencoding parameter uses the same charset like javadoc's docencoding. Default
+    is the platform default encoding (like the javadoc task).
+    The specified dir is the destination directory of the javadoc task.
+  -->
+  <macrodef name="patch-javadoc">
+    <attribute name="dir"/>
+    <attribute name="docencoding" default="${file.encoding}"/>
+    <sequential>
+      <replace encoding="@{docencoding}" summary="true" taskname="patch-javadoc">
+        <fileset dir="@{dir}" casesensitive="false" includes="**/index.html,**/index.htm,**/toc.html,**/toc.htm">
+          <!-- TODO: add encoding="@{docencoding}" to contains check, when we are on ANT 1.9.0: -->
+          <not><contains text="function validURL(url) {" casesensitive="true" /></not>
+        </fileset>
+        <replacetoken><![CDATA[function loadFrames() {]]></replacetoken>
+        <replacevalue expandProperties="false"><![CDATA[if (targetPage != "" && !validURL(targetPage))
+        targetPage = "undefined";
+    function validURL(url) {
+        var pos = url.indexOf(".html");
+        if (pos == -1 || pos != url.length - 5)
+            return false;
+        var allowNumber = false;
+        var allowSep = false;
+        var seenDot = false;
+        for (var i = 0; i < url.length - 5; i++) {
+            var ch = url.charAt(i);
+            if ('a' <= ch && ch <= 'z' ||
+                    'A' <= ch && ch <= 'Z' ||
+                    ch == '$' ||
+                    ch == '_') {
+                allowNumber = true;
+                allowSep = true;
+            } else if ('0' <= ch && ch <= '9'
+                    || ch == '-') {
+                if (!allowNumber)
+                     return false;
+            } else if (ch == '/' || ch == '.') {
+                if (!allowSep)
+                    return false;
+                allowNumber = false;
+                allowSep = false;
+                if (ch == '.')
+                     seenDot = true;
+                if (ch == '/' && seenDot)
+                     return false;
+            } else {
+                return false;
+            }
+        }
+        return true;
+    }
+    function loadFrames() {]]></replacevalue>
+      </replace>
+    </sequential>
+  </macrodef>
+
   <macrodef name="modules-crawl">
     <attribute name="target" default=""/>
     <attribute name="failonerror" default="true"/>
@@ -2030,7 +2106,7 @@ ${tests-output}/junit4-*.suites     - pe
     <element name="nested" optional="false" implicit="true"/>
     <sequential>
       <copy todir="@{todir}" flatten="@{flatten}" overwrite="@{overwrite}" verbose="true"
-        preservelastmodified="false" encoding="UTF-8" outputencoding="UTF-8" taskname="pegdown"
+        preservelastmodified="false" encoding="UTF-8" taskname="pegdown"
       >
         <filterchain>
           <tokenfilter>

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java Wed Jul  3 23:26:32 2013
@@ -50,10 +50,6 @@ public class BlockTermState extends OrdT
     totalTermFreq = other.totalTermFreq;
     termBlockOrd = other.termBlockOrd;
     blockFilePointer = other.blockFilePointer;
-
-    // NOTE: don't copy blockTermCount;
-    // it's "transient": used only by the "primary"
-    // termState, and regenerated on seek by TermState
   }
 
   @Override

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java Wed Jul  3 23:26:32 2013
@@ -17,7 +17,6 @@ package org.apache.lucene.codecs.compres
  * limitations under the License.
  */
 
-import java.io.Closeable;
 import java.io.IOException;
 import java.util.Arrays;
 
@@ -25,16 +24,13 @@ import org.apache.lucene.index.CorruptIn
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.packed.PackedInts;
 
 /**
  * Random-access reader for {@link CompressingStoredFieldsIndexWriter}.
  * @lucene.internal
  */
-public final class CompressingStoredFieldsIndexReader implements Closeable, Cloneable {
-
-  final IndexInput fieldsIndexIn;
+public final class CompressingStoredFieldsIndexReader implements Cloneable {
 
   static long moveLowOrderBitToSign(long n) {
     return ((n >>> 1) ^ -(n & 1));
@@ -48,8 +44,9 @@ public final class CompressingStoredFiel
   final PackedInts.Reader[] docBasesDeltas; // delta from the avg
   final PackedInts.Reader[] startPointersDeltas; // delta from the avg
 
+  // It is the responsibility of the caller to close fieldsIndexIn after this constructor
+  // has been called
   CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) throws IOException {
-    this.fieldsIndexIn = fieldsIndexIn;
     maxDoc = si.getDocCount();
     int[] docBases = new int[16];
     long[] startPointers = new long[16];
@@ -106,17 +103,6 @@ public final class CompressingStoredFiel
     this.startPointersDeltas = Arrays.copyOf(startPointersDeltas, blockCount);
   }
 
-  private CompressingStoredFieldsIndexReader(CompressingStoredFieldsIndexReader other) {
-    this.fieldsIndexIn = null;
-    this.maxDoc = other.maxDoc;
-    this.docBases = other.docBases;
-    this.startPointers = other.startPointers;
-    this.avgChunkDocs = other.avgChunkDocs;
-    this.avgChunkSizes = other.avgChunkSizes;
-    this.docBasesDeltas = other.docBasesDeltas;
-    this.startPointersDeltas = other.startPointersDeltas;
-  }
-
   private int block(int docID) {
     int lo = 0, hi = docBases.length - 1;
     while (lo <= hi) {
@@ -172,16 +158,7 @@ public final class CompressingStoredFiel
 
   @Override
   public CompressingStoredFieldsIndexReader clone() {
-    if (fieldsIndexIn == null) {
-      return this;
-    } else {
-      return new CompressingStoredFieldsIndexReader(this);
-    }
-  }
-
-  @Override
-  public void close() throws IOException {
-    IOUtils.close(fieldsIndexIn);
+    return this;
   }
 
 }

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Wed Jul  3 23:26:32 2013
@@ -60,6 +60,9 @@ import org.apache.lucene.util.packed.Pac
  */
 public final class CompressingStoredFieldsReader extends StoredFieldsReader {
 
+  // Do not reuse the decompression buffer when there is more than 32kb to decompress
+  private static final int BUFFER_REUSE_THRESHOLD = 1 << 15;
+
   private final FieldInfos fieldInfos;
   private final CompressingStoredFieldsIndexReader indexReader;
   private final IndexInput fieldsStream;
@@ -93,20 +96,23 @@ public final class CompressingStoredFiel
     numDocs = si.getDocCount();
     IndexInput indexStream = null;
     try {
-      fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context);
+      // Load the index into memory
       final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION);
       indexStream = d.openInput(indexStreamFN, context);
-
       final String codecNameIdx = formatName + CODEC_SFX_IDX;
-      final String codecNameDat = formatName + CODEC_SFX_DAT;
       CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
-      CodecUtil.checkHeader(fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
-      assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();
       assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
-
       indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
+      indexStream.close();
       indexStream = null;
 
+      // Open the data file and read metadata
+      final String fieldsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION);
+      fieldsStream = d.openInput(fieldsStreamFN, context);
+      final String codecNameDat = formatName + CODEC_SFX_DAT;
+      CodecUtil.checkHeader(fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
+      assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();
+
       packedIntsVersion = fieldsStream.readVInt();
       decompressor = compressionMode.newDecompressor();
       this.bytes = new BytesRef();
@@ -134,7 +140,7 @@ public final class CompressingStoredFiel
   @Override
   public void close() throws IOException {
     if (!closed) {
-      IOUtils.close(fieldsStream, indexReader);
+      IOUtils.close(fieldsStream);
       closed = true;
     }
   }
@@ -255,6 +261,7 @@ public final class CompressingStoredFiel
       return;
     }
 
+    final BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.bytes : new BytesRef();
     decompressor.decompress(fieldsStream, totalLength, offset, length, bytes);
     assert bytes.length == length;
 

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java Wed Jul  3 23:26:32 2013
@@ -102,20 +102,23 @@ public final class CompressingTermVector
     numDocs = si.getDocCount();
     IndexInput indexStream = null;
     try {
-      vectorsStream = d.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context);
+      // Load the index into memory
       final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION);
       indexStream = d.openInput(indexStreamFN, context);
-
       final String codecNameIdx = formatName + CODEC_SFX_IDX;
-      final String codecNameDat = formatName + CODEC_SFX_DAT;
       CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
-      CodecUtil.checkHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
-      assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
       assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
-
       indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
+      indexStream.close();
       indexStream = null;
 
+      // Open the data file and read metadata
+      final String vectorsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION);
+      vectorsStream = d.openInput(vectorsStreamFN, context);
+      final String codecNameDat = formatName + CODEC_SFX_DAT;
+      CodecUtil.checkHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
+      assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
+
       packedIntsVersion = vectorsStream.readVInt();
       chunkSize = vectorsStream.readVInt();
       decompressor = compressionMode.newDecompressor();
@@ -161,7 +164,7 @@ public final class CompressingTermVector
   @Override
   public void close() throws IOException {
     if (!closed) {
-      IOUtils.close(vectorsStream, indexReader);
+      IOUtils.close(vectorsStream);
       closed = true;
     }
   }

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java Wed Jul  3 23:26:32 2013
@@ -242,8 +242,8 @@ public final class Lucene40TermVectorsWr
       if (payloads) {
         tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
       }
-      for (int i = 0; i < bufferedIndex; i++) {
-        if (offsets) {
+      if (offsets) {
+        for (int i = 0; i < bufferedIndex; i++) {
           tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
           tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
           lastOffset = offsetEndBuffer[i];

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html Wed Jul  3 23:26:32 2013
@@ -372,13 +372,7 @@ term vectors.</li>
 <a name="Limitations" id="Limitations"></a>
 <h2>Limitations</h2>
 <div>
-<p>When referring to term numbers, Lucene's current implementation uses a Java
-<code>int</code> to hold the term index, which means the
-maximum number of unique terms in any single index segment is ~2.1 billion
-times the term index interval (default 128) = ~274 billion. This is technically
-not a limitation of the index file format, just of Lucene's current
-implementation.</p>
-<p>Similarly, Lucene uses a Java <code>int</code> to refer to
+<p>Lucene uses a Java <code>int</code> to refer to
 document numbers, and the index file format uses an <code>Int32</code>
 on-disk to store document numbers. This is a limitation
 of both the index file format and the current implementation. Eventually these

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java Wed Jul  3 23:26:32 2013
@@ -161,7 +161,7 @@ import org.apache.lucene.util.packed.Pac
  *    <li>SkipFPDelta determines the position of this term's SkipData within the .doc
  *        file. In particular, it is the length of the TermFreq data.
  *        SkipDelta is only stored if DocFreq is not smaller than SkipMinimum
- *        (i.e. 8 in Lucene41PostingsFormat).</li>
+ *        (i.e. 128 in Lucene41PostingsFormat).</li>
  *    <li>SingletonDocID is an optimization when a term only appears in one document. In this case, instead
  *        of writing a file pointer to the .doc file (DocFPDelta), and then a VIntBlock at that location, the 
  *        single document ID is written to the term dictionary.</li>

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html Wed Jul  3 23:26:32 2013
@@ -381,13 +381,7 @@ the term dictionary. Stored fields are c
 <a name="Limitations" id="Limitations"></a>
 <h2>Limitations</h2>
 <div>
-<p>When referring to term numbers, Lucene's current implementation uses a Java
-<code>int</code> to hold the term index, which means the
-maximum number of unique terms in any single index segment is ~2.1 billion
-times the term index interval (default 128) = ~274 billion. This is technically
-not a limitation of the index file format, just of Lucene's current
-implementation.</p>
-<p>Similarly, Lucene uses a Java <code>int</code> to refer to
+<p>Lucene uses a Java <code>int</code> to refer to
 document numbers, and the index file format uses an <code>Int32</code>
 on-disk to store document numbers. This is a limitation
 of both the index file format and the current implementation. Eventually these

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java Wed Jul  3 23:26:32 2013
@@ -245,7 +245,7 @@ class Lucene42DocValuesConsumer extends 
     meta.writeVInt(field.number);
     meta.writeByte(FST);
     meta.writeLong(data.getFilePointer());
-    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
     Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
     IntsRef scratch = new IntsRef();
     long ord = 0;

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java Wed Jul  3 23:26:32 2013
@@ -120,16 +120,33 @@ import org.apache.lucene.util.packed.Blo
  * </ol>
  */
 public final class Lucene42DocValuesFormat extends DocValuesFormat {
-
-  /** Sole constructor */
+  final float acceptableOverheadRatio;
+  
+  /** 
+   * Calls {@link #Lucene42DocValuesFormat(float) 
+   * Lucene42DocValuesFormat(PackedInts.DEFAULT)} 
+   */
   public Lucene42DocValuesFormat() {
+    this(PackedInts.DEFAULT);
+  }
+  
+  /**
+   * Creates a new Lucene42DocValuesFormat with the specified
+   * <code>acceptableOverheadRatio</code> for NumericDocValues.
+   * @param acceptableOverheadRatio compression parameter for numerics. 
+   *        Currently this is only used when the number of unique values is small.
+   *        
+   * @lucene.experimental
+   */
+  public Lucene42DocValuesFormat(float acceptableOverheadRatio) {
     super("Lucene42");
+    this.acceptableOverheadRatio = acceptableOverheadRatio;
   }
 
   @Override
   public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
     // note: we choose DEFAULT here (its reasonably fast, and for small bpv has tiny waste)
-    return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, PackedInts.DEFAULT);
+    return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
   }
   
   @Override

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Wed Jul  3 23:26:32 2013
@@ -278,7 +278,7 @@ class Lucene42DocValuesProducer extends 
       instance = fstInstances.get(field.number);
       if (instance == null) {
         data.seek(entry.offset);
-        instance = new FST<Long>(data, PositiveIntOutputs.getSingleton(true));
+        instance = new FST<Long>(data, PositiveIntOutputs.getSingleton());
         fstInstances.put(field.number, instance);
       }
     }
@@ -352,7 +352,7 @@ class Lucene42DocValuesProducer extends 
       instance = fstInstances.get(field.number);
       if (instance == null) {
         data.seek(entry.offset);
-        instance = new FST<Long>(data, PositiveIntOutputs.getSingleton(true));
+        instance = new FST<Long>(data, PositiveIntOutputs.getSingleton());
         fstInstances.put(field.number, instance);
       }
     }

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java Wed Jul  3 23:26:32 2013
@@ -42,14 +42,32 @@ import org.apache.lucene.util.packed.Pac
  * @see Lucene42DocValuesFormat
  */
 public final class Lucene42NormsFormat extends NormsFormat {
+  final float acceptableOverheadRatio;
 
-  /** Sole constructor */
-  public Lucene42NormsFormat() {}
+  /** 
+   * Calls {@link #Lucene42NormsFormat(float) 
+   * Lucene42DocValuesFormat(PackedInts.FASTEST)} 
+   */
+  public Lucene42NormsFormat() {
+    // note: we choose FASTEST here (otherwise our norms are half as big but 15% slower than previous lucene)
+    this(PackedInts.FASTEST);
+  }
+  
+  /**
+   * Creates a new Lucene42DocValuesFormat with the specified
+   * <code>acceptableOverheadRatio</code> for NumericDocValues.
+   * @param acceptableOverheadRatio compression parameter for numerics. 
+   *        Currently this is only used when the number of unique values is small.
+   *        
+   * @lucene.experimental
+   */
+  public Lucene42NormsFormat(float acceptableOverheadRatio) {
+    this.acceptableOverheadRatio = acceptableOverheadRatio;
+  }
   
   @Override
   public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
-    // note: we choose FASTEST here (otherwise our norms are half as big but 15% slower than previous lucene)
-    return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, PackedInts.FASTEST);
+    return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
   }
   
   @Override

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html Wed Jul  3 23:26:32 2013
@@ -384,13 +384,7 @@ on multi-valued fields.</li>
 <a name="Limitations" id="Limitations"></a>
 <h2>Limitations</h2>
 <div>
-<p>When referring to term numbers, Lucene's current implementation uses a Java
-<code>int</code> to hold the term index, which means the
-maximum number of unique terms in any single index segment is ~2.1 billion
-times the term index interval (default 128) = ~274 billion. This is technically
-not a limitation of the index file format, just of Lucene's current
-implementation.</p>
-<p>Similarly, Lucene uses a Java <code>int</code> to refer to
+<p>Lucene uses a Java <code>int</code> to refer to
 document numbers, and the index file format uses an <code>Int32</code>
 on-disk to store document numbers. This is a limitation
 of both the index file format and the current implementation. Eventually these

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Wed Jul  3 23:26:32 2013
@@ -463,11 +463,11 @@ public class CheckIndex {
 
     if (onlySegments != null) {
       result.partial = true;
-      if (infoStream != null)
+      if (infoStream != null) {
         infoStream.print("\nChecking only these segments:");
-      for (String s : onlySegments) {
-        if (infoStream != null)
+        for (String s : onlySegments) {
           infoStream.print(" " + s);
+        }
       }
       result.segmentsChecked.addAll(onlySegments);
       msg(infoStream, ":");

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java Wed Jul  3 23:26:32 2013
@@ -30,12 +30,11 @@ import java.util.Comparator;
  *  separate thread.
  *
  *  <p>Specify the max number of threads that may run at
- *  once with {@link #setMaxThreadCount}.</p>
+ *  once, and the maximum number of simultaneous merges
+ *  with {@link #setMaxMergesAndThreads}.</p>
  *
- *  <p>Separately specify the maximum number of simultaneous
- *  merges with {@link #setMaxMergeCount}.  If the number of
- *  merges exceeds the max number of threads then the
- *  largest merges are paused until one of the smaller
+ *  <p>If the number of merges exceeds the max number of threads 
+ *  then the largest merges are paused until one of the smaller
  *  merges completes.</p>
  *
  *  <p>If more than {@link #getMaxMergeCount} merges are
@@ -49,21 +48,29 @@ public class ConcurrentMergeScheduler ex
 
   /** List of currently active {@link MergeThread}s. */
   protected List<MergeThread> mergeThreads = new ArrayList<MergeThread>();
+  
+  /** 
+   * Default {@code maxThreadCount}.
+   * We default to 1: tests on spinning-magnet drives showed slower
+   * indexing performance if more than one merge thread runs at
+   * once (though on an SSD it was faster)
+   */
+  public static final int DEFAULT_MAX_THREAD_COUNT = 1;
+  
+  /** Default {@code maxMergeCount}. */
+  public static final int DEFAULT_MAX_MERGE_COUNT = 2;
 
   // Max number of merge threads allowed to be running at
   // once.  When there are more merges then this, we
   // forcefully pause the larger ones, letting the smaller
   // ones run, up until maxMergeCount merges at which point
   // we forcefully pause incoming threads (that presumably
-  // are the ones causing so much merging).  We default to 1
-  // here: tests on spinning-magnet drives showed slower
-  // indexing perf if more than one merge thread runs at
-  // once (though on an SSD it was faster):
-  private int maxThreadCount = 1;
+  // are the ones causing so much merging).
+  private int maxThreadCount = DEFAULT_MAX_THREAD_COUNT;
 
   // Max number of merges we accept before forcefully
   // throttling the incoming threads
-  private int maxMergeCount = 2;
+  private int maxMergeCount = DEFAULT_MAX_MERGE_COUNT;
 
   /** {@link Directory} that holds the index. */
   protected Directory dir;
@@ -80,43 +87,40 @@ public class ConcurrentMergeScheduler ex
   public ConcurrentMergeScheduler() {
   }
 
-  /** Sets the max # simultaneous merge threads that should
-   *  be running at once.  This must be <= {@link
-   *  #setMaxMergeCount}. */
-  public void setMaxThreadCount(int count) {
-    if (count < 1) {
-      throw new IllegalArgumentException("count should be at least 1");
+  /**
+   * Sets the maximum number of merge threads and simultaneous merges allowed.
+   * 
+   * @param maxMergeCount the max # simultaneous merges that are allowed.
+   *       If a merge is necessary yet we already have this many
+   *       threads running, the incoming thread (that is calling
+   *       add/updateDocument) will block until a merge thread
+   *       has completed.  Note that we will only run the
+   *       smallest <code>maxThreadCount</code> merges at a time.
+   * @param maxThreadCount the max # simultaneous merge threads that should
+   *       be running at once.  This must be &lt;= <code>maxMergeCount</code>
+   */
+  public void setMaxMergesAndThreads(int maxMergeCount, int maxThreadCount) {
+    if (maxThreadCount < 1) {
+      throw new IllegalArgumentException("maxThreadCount should be at least 1");
     }
-    if (count > maxMergeCount) {
-      throw new IllegalArgumentException("count should be <= maxMergeCount (= " + maxMergeCount + ")");
+    if (maxMergeCount < 1) {
+      throw new IllegalArgumentException("maxMergeCount should be at least 1");
     }
-    maxThreadCount = count;
+    if (maxThreadCount > maxMergeCount) {
+      throw new IllegalArgumentException("maxThreadCount should be <= maxMergeCount (= " + maxMergeCount + ")");
+    }
+    this.maxThreadCount = maxThreadCount;
+    this.maxMergeCount = maxMergeCount;
   }
 
   /** Returns {@code maxThreadCount}.
    *
-   * @see #setMaxThreadCount(int) */
+   * @see #setMaxMergesAndThreads(int, int) */
   public int getMaxThreadCount() {
     return maxThreadCount;
   }
 
-  /** Sets the max # simultaneous merges that are allowed.
-   *  If a merge is necessary yet we already have this many
-   *  threads running, the incoming thread (that is calling
-   *  add/updateDocument) will block until a merge thread
-   *  has completed.  Note that we will only run the
-   *  smallest {@link #setMaxThreadCount} merges at a time. */
-  public void setMaxMergeCount(int count) {
-    if (count < 1) {
-      throw new IllegalArgumentException("count should be at least 1");
-    }
-    if (count < maxThreadCount) {
-      throw new IllegalArgumentException("count should be >= maxThreadCount (= " + maxThreadCount + ")");
-    }
-    maxMergeCount = count;
-  }
-
-  /** See {@link #setMaxMergeCount}. */
+  /** See {@link #setMaxMergesAndThreads}. */
   public int getMaxMergeCount() {
     return maxMergeCount;
   }

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java Wed Jul  3 23:26:32 2013
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
 
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -291,7 +292,7 @@ public abstract class DirectoryReader ex
           // IOException allowed to throw there, in case
           // segments_N is corrupt
           sis.read(dir, fileName);
-        } catch (FileNotFoundException fnfe) {
+        } catch (FileNotFoundException | NoSuchFileException fnfe) {
           // LUCENE-948: on NFS (and maybe others), if
           // you have writers switching back and forth
           // between machines, it's very likely that the

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocConsumer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocConsumer.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocConsumer.java Wed Jul  3 23:26:32 2013
@@ -24,5 +24,4 @@ abstract class DocConsumer {
   abstract void finishDocument() throws IOException;
   abstract void flush(final SegmentWriteState state) throws IOException;
   abstract void abort();
-  abstract void doAfterFlush();
 }

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java Wed Jul  3 23:26:32 2013
@@ -144,15 +144,6 @@ final class DocFieldProcessor extends Do
     return fields;
   }
 
-  /** In flush we reset the fieldHash to not maintain per-field state
-   *  across segments */
-  @Override
-  void doAfterFlush() {
-    fieldHash = new DocFieldProcessorPerField[2];
-    hashMask = 1;
-    totalFieldCount = 0;
-  }
-
   private void rehash() {
     final int newHashSize = (fieldHash.length*2);
     assert newHashSize > fieldHash.length;

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java Wed Jul  3 23:26:32 2013
@@ -17,12 +17,7 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import java.util.HashMap;
-import java.util.Map;
-
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.Counter;
 import org.apache.lucene.util.RamUsageEstimator;
 
 /**
@@ -33,19 +28,16 @@ final class DocFieldProcessorPerField {
 
   final DocFieldConsumerPerField consumer;
   final FieldInfo fieldInfo;
-  private final Counter bytesUsed;
 
   DocFieldProcessorPerField next;
   int lastGen = -1;
 
   int fieldCount;
   IndexableField[] fields = new IndexableField[1];
-  private final Map<FieldInfo,String> dvFields = new HashMap<FieldInfo,String>();
 
   public DocFieldProcessorPerField(final DocFieldProcessor docFieldProcessor, final FieldInfo fieldInfo) {
     this.consumer = docFieldProcessor.consumer.addField(fieldInfo);
     this.fieldInfo = fieldInfo;
-    this.bytesUsed = docFieldProcessor.bytesUsed;
   }
 
   public void addField(IndexableField field) {

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java Wed Jul  3 23:26:32 2013
@@ -29,15 +29,11 @@ import org.apache.lucene.index.Documents
 import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
 import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
 import org.apache.lucene.index.FieldInfos.FieldNumbers;
-import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FlushInfo;
-import org.apache.lucene.store.IOContext;
 import org.apache.lucene.util.InfoStream;
-import org.apache.lucene.util.MutableBits;
 
 /**
  * This class accepts multiple added documents and directly
@@ -114,6 +110,7 @@ final class DocumentsWriter {
   List<String> newFiles;
 
   final IndexWriter indexWriter;
+  final LiveIndexWriterConfig indexWriterConfig;
 
   private AtomicInteger numDocsInRAM = new AtomicInteger(0);
 
@@ -144,6 +141,7 @@ final class DocumentsWriter {
     this.indexWriter = writer;
     this.infoStream = config.getInfoStream();
     this.similarity = config.getSimilarity();
+    this.indexWriterConfig = writer.getConfig();
     this.perThreadPool = config.getIndexerThreadPool();
     this.chain = config.getIndexingChain();
     this.perThreadPool.initialize(this, globalFieldNumbers, config);
@@ -517,7 +515,7 @@ final class DocumentsWriter {
     // buffer, force them all to apply now. This is to
     // prevent too-frequent flushing of a long tail of
     // tiny segments:
-    final double ramBufferSizeMB = indexWriter.getConfig().getRAMBufferSizeMB();
+    final double ramBufferSizeMB = indexWriterConfig.getRAMBufferSizeMB();
     if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
         flushControl.getDeleteBytesUsed() > (1024*1024*ramBufferSizeMB/2)) {
       if (infoStream.isEnabled("DW")) {

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java Wed Jul  3 23:26:32 2013
@@ -112,7 +112,7 @@ class DocumentsWriterPerThread {
 
     // Only called by asserts
     public boolean testPoint(String name) {
-      return docWriter.writer.testPoint(name);
+      return docWriter.testPoint(name);
     }
 
     public void clear() {
@@ -194,6 +194,7 @@ class DocumentsWriterPerThread {
   private final NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
   final Allocator byteBlockAllocator;
   final IntBlockPool.Allocator intBlockAllocator;
+  private final LiveIndexWriterConfig indexWriterConfig;
 
   
   public DocumentsWriterPerThread(Directory directory, DocumentsWriter parent,
@@ -203,6 +204,7 @@ class DocumentsWriterPerThread {
     this.parent = parent;
     this.fieldInfos = fieldInfos;
     this.writer = parent.indexWriter;
+    this.indexWriterConfig = parent.indexWriterConfig;
     this.infoStream = parent.infoStream;
     this.codec = parent.codec;
     this.docState = new DocState(this, infoStream);
@@ -232,6 +234,13 @@ class DocumentsWriterPerThread {
     aborting = true;
   }
   
+  final boolean testPoint(String message) {
+    if (infoStream.isEnabled("TP")) {
+      infoStream.message("TP", message);
+    }
+    return true;
+  }
+  
   boolean checkAndResetHasAborted() {
     final boolean retval = hasAborted;
     hasAborted = false;
@@ -239,7 +248,7 @@ class DocumentsWriterPerThread {
   }
 
   public void updateDocument(IndexDocument doc, Analyzer analyzer, Term delTerm) throws IOException {
-    assert writer.testPoint("DocumentsWriterPerThread addDocument start");
+    assert testPoint("DocumentsWriterPerThread addDocument start");
     assert deleteQueue != null;
     docState.doc = doc;
     docState.analyzer = analyzer;
@@ -292,7 +301,7 @@ class DocumentsWriterPerThread {
   }
   
   public int updateDocuments(Iterable<? extends IndexDocument> docs, Analyzer analyzer, Term delTerm) throws IOException {
-    assert writer.testPoint("DocumentsWriterPerThread addDocuments start");
+    assert testPoint("DocumentsWriterPerThread addDocuments start");
     assert deleteQueue != null;
     docState.analyzer = analyzer;
     if (segmentInfo == null) {
@@ -428,7 +437,6 @@ class DocumentsWriterPerThread {
   /** Reset after a flush */
   private void doAfterFlush() {
     segmentInfo = null;
-    consumer.doAfterFlush();
     directory.getCreatedFiles().clear();
     fieldInfos = new FieldInfos.Builder(fieldInfos.globalFieldNumbers);
     parent.subtractFlushedNumDocs(numDocsInRAM);
@@ -561,7 +569,7 @@ class DocumentsWriterPerThread {
 
     boolean success = false;
     try {
-      if (writer.useCompoundFile(newSegment)) {
+      if (indexWriterConfig.getUseCompoundFile()) {
 
         // Now build compound file
         Collection<String> oldFiles = IndexWriter.createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, newSegment.info, context);

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexDeletionPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexDeletionPolicy.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexDeletionPolicy.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexDeletionPolicy.java Wed Jul  3 23:26:32 2013
@@ -20,6 +20,8 @@ package org.apache.lucene.index;
 import java.util.List;
 import java.io.IOException;
 
+import org.apache.lucene.store.Directory;
+
 /**
  * <p>Expert: policy for deletion of stale {@link IndexCommit index commits}. 
  * 
@@ -46,6 +48,10 @@ import java.io.IOException;
  * target="top"
  * href="http://issues.apache.org/jira/browse/LUCENE-710">LUCENE-710</a>
  * for details.</p>
+ *
+ * <p>Implementers of sub-classes should make sure that {@link #clone()}
+ * returns an independent instance able to work with any other {@link IndexWriter}
+ * or {@link Directory} instance.</p>
  */
 
 public abstract class IndexDeletionPolicy implements Cloneable {

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java Wed Jul  3 23:26:32 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
 import java.io.Closeable;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.nio.file.NoSuchFileException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -170,7 +171,7 @@ final class IndexFileDeleter implements 
             SegmentInfos sis = new SegmentInfos();
             try {
               sis.read(directory, fileName);
-            } catch (FileNotFoundException e) {
+            } catch (FileNotFoundException | NoSuchFileException e) {
               // LUCENE-948: on NFS (and maybe others), if
               // you have writers switching back and forth
               // between machines, it's very likely that the

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Wed Jul  3 23:26:32 2013
@@ -214,7 +214,7 @@ public class IndexWriter implements Clos
   private final Analyzer analyzer;    // how to analyze text
 
   private volatile long changeCount; // increments every time a change is completed
-  private long lastCommitChangeCount; // last changeCount that was committed
+  private volatile long lastCommitChangeCount; // last changeCount that was committed
 
   private List<SegmentInfoPerCommit> rollbackSegments;      // list of segmentInfo we will fallback to if the commit fails
 
@@ -631,7 +631,12 @@ public class IndexWriter implements Clos
   /**
    * Constructs a new IndexWriter per the settings given in <code>conf</code>.
    * Note that the passed in {@link IndexWriterConfig} is
-   * privately cloned; if you need to make subsequent "live"
+   * privately cloned, which, in-turn, clones the
+   * {@link IndexWriterConfig#getFlushPolicy() flush policy},
+   * {@link IndexWriterConfig#getIndexDeletionPolicy() deletion policy},
+   * {@link IndexWriterConfig#getMergePolicy() merge policy},
+   * and {@link IndexWriterConfig#getMergeScheduler() merge scheduler}.
+   * If you need to make subsequent "live"
    * changes to the configuration use {@link #getConfig}.
    * <p>
    * 
@@ -2269,10 +2274,6 @@ public class IndexWriter implements Clos
     }
   }
 
-  synchronized boolean useCompoundFile(SegmentInfoPerCommit segmentInfo) throws IOException {
-    return mergePolicy.useCompoundFile(segmentInfos, segmentInfo);
-  }
-
   private synchronized void resetMergeExceptions() {
     mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
     mergeGen++;
@@ -2826,6 +2827,11 @@ public class IndexWriter implements Clos
     commitInternal();
   }
 
+  /** Returns true if there are changes that have not been committed */
+  public final boolean hasUncommittedChanges() {
+    return changeCount != lastCommitChangeCount;
+  }
+
   private final void commitInternal() throws IOException {
 
     if (infoStream.isEnabled("IW")) {
@@ -2865,8 +2871,8 @@ public class IndexWriter implements Clos
         if (infoStream.isEnabled("IW")) {
           infoStream.message("IW", "commit: wrote segments file \"" + pendingCommit.getSegmentsFileName() + "\"");
         }
-        lastCommitChangeCount = pendingCommitChangeCount;
         segmentInfos.updateGeneration(pendingCommit);
+        lastCommitChangeCount = pendingCommitChangeCount;
         rollbackSegments = pendingCommit.createBackupSegmentInfos();
         deleter.checkpoint(pendingCommit, true);
       } finally {
@@ -4162,7 +4168,10 @@ public class IndexWriter implements Clos
   //   startCommitMergeDeletes
   //   startMergeInit
   //   DocumentsWriter.ThreadState.init start
-  boolean testPoint(String name) {
+  private final boolean testPoint(String message) {
+    if (infoStream.isEnabled("TP")) {
+      infoStream.message("TP", message);
+    }
     return true;
   }
 

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java Wed Jul  3 23:26:32 2013
@@ -110,6 +110,10 @@ public final class IndexWriterConfig ext
    *  others to finish. Default value is 8. */
   public final static int DEFAULT_MAX_THREAD_STATES = 8;
   
+  /** Default value for compound file system for newly written segments
+   *  (set to <code>true</code>). For batch indexing with very large 
+   *  ram buffers use <code>false</code> */
+  public final static boolean DEFAULT_USE_COMPOUND_FILE_SYSTEM = true;
   /**
    * Sets the default (for any instance) maximum time to wait for a write lock
    * (in milliseconds).
@@ -540,5 +544,9 @@ public final class IndexWriterConfig ext
   public IndexWriterConfig setTermIndexInterval(int interval) {
     return (IndexWriterConfig) super.setTermIndexInterval(interval);
   }
+  
+  public IndexWriterConfig setUseCompoundFile(boolean useCompoundFile) {
+    return (IndexWriterConfig) super.setUseCompoundFile(useCompoundFile);
+  }
 
 }

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java Wed Jul  3 23:26:32 2013
@@ -98,6 +98,9 @@ public class LiveIndexWriterConfig {
   /** {@link Version} that {@link IndexWriter} should emulate. */
   protected final Version matchVersion;
 
+  /** True if segment flushes should use compound file format */
+  protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
+
   // used by IndexWriterConfig
   LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) {
     this.analyzer = analyzer;
@@ -110,6 +113,7 @@ public class LiveIndexWriterConfig {
     termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
     delPolicy = new KeepOnlyLastCommitDeletionPolicy();
     commit = null;
+    useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
     openMode = OpenMode.CREATE_OR_APPEND;
     similarity = IndexSearcher.getDefaultSimilarity();
     mergeScheduler = new ConcurrentMergeScheduler();
@@ -154,6 +158,7 @@ public class LiveIndexWriterConfig {
     readerPooling = config.getReaderPooling();
     flushPolicy = config.getFlushPolicy();
     perThreadHardLimitMB = config.getRAMPerThreadHardLimitMB();
+    useCompoundFile = config.getUseCompoundFile();
   }
 
   /** Returns the default analyzer to use for indexing documents. */
@@ -542,6 +547,33 @@ public class LiveIndexWriterConfig {
     return infoStream;
   }
   
+  /**
+   * Sets if the {@link IndexWriter} should pack newly written segments in a
+   * compound file. Default is <code>true</code>.
+   * <p>
+   * Use <code>false</code> for batch indexing with very large ram buffer
+   * settings.
+   * </p>
+   * <p>
+   * <b>Note: To control compound file usage during segment merges see
+   * {@link MergePolicy#setNoCFSRatio(double)} and
+   * {@link MergePolicy#setMaxCFSSegmentSizeMB(double)}. This setting only
+   * applies to newly created segments.</b>
+   * </p>
+   */
+  public LiveIndexWriterConfig setUseCompoundFile(boolean useCompoundFile) {
+    this.useCompoundFile = useCompoundFile;
+    return this;
+  }
+  
+  /**
+   * Returns <code>true</code> iff the {@link IndexWriter} packs
+   * newly written segments in a compound file. Default is <code>true</code>.
+   */
+  public boolean getUseCompoundFile() {
+    return useCompoundFile ;
+  }
+  
   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder();
@@ -567,7 +599,10 @@ public class LiveIndexWriterConfig {
     sb.append("indexerThreadPool=").append(getIndexerThreadPool()).append("\n");
     sb.append("readerPooling=").append(getReaderPooling()).append("\n");
     sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n");
+    sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n");
     return sb.toString();
   }
 
+
+
 }

Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java Wed Jul  3 23:26:32 2013
@@ -64,16 +64,9 @@ public abstract class LogMergePolicy ext
 
   /** Default noCFSRatio.  If a merge's size is >= 10% of
    *  the index, then we disable compound file for it.
-   *  @see #setNoCFSRatio */
+   *  @see MergePolicy#setNoCFSRatio */
   public static final double DEFAULT_NO_CFS_RATIO = 0.1;
 
-  /** Default maxCFSSegmentSize value allows compound file
-   * for a segment of any size. The actual file format is
-   * still subject to noCFSRatio.
-   * @see #setMaxCFSSegmentSizeMB(double)
-   */
-  public static final long DEFAULT_MAX_CFS_SEGMENT_SIZE = Long.MAX_VALUE;
-
   /** How many segments to merge at a time. */
   protected int mergeFactor = DEFAULT_MERGE_FACTOR;
 
@@ -96,30 +89,14 @@ public abstract class LogMergePolicy ext
    *  will never be merged. */
   protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
 
-  /** If the size of the merge segment exceeds this ratio of
-   *  the total index size then it will remain in
-   *  non-compound format even if {@link
-   *  #setUseCompoundFile} is {@code true}. */
-  protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
-
-  /** If the size of the merged segment exceeds
-   *  this value then it will not use compound file format. */
-  protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
-
   /** If true, we pro-rate a segment's size by the
    *  percentage of non-deleted documents. */
   protected boolean calibrateSizeByDeletes = true;
 
-  /** True if new segments (flushed or merged) should use
-   *  the compound file format.  Note that large segments
-   *  may sometimes still use non-compound format (see
-   *  {@link #setNoCFSRatio}. */
-  protected boolean useCompoundFile = true;
-
   /** Sole constructor. (For invocation by subclass 
    *  constructors, typically implicit.) */
   public LogMergePolicy() {
-    super();
+    super(DEFAULT_NO_CFS_RATIO, MergePolicy.DEFAULT_MAX_CFS_SEGMENT_SIZE);
   }
 
   /** Returns true if {@code LMP} is enabled in {@link
@@ -129,25 +106,6 @@ public abstract class LogMergePolicy ext
     return w != null && w.infoStream.isEnabled("LMP");
   }
 
-  /** Returns current {@code noCFSRatio}.
-   *
-   *  @see #setNoCFSRatio */
-  public double getNoCFSRatio() {
-    return noCFSRatio;
-  }
-
-  /** If a merged segment will be more than this percentage
-   *  of the total size of the index, leave the segment as
-   *  non-compound file even if compound file is enabled.
-   *  Set to 1.0 to always use CFS regardless of merge
-   *  size. */
-  public void setNoCFSRatio(double noCFSRatio) {
-    if (noCFSRatio < 0.0 || noCFSRatio > 1.0) {
-      throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio);
-    }
-    this.noCFSRatio = noCFSRatio;
-  }
-
   /** Print a debug message to {@link IndexWriter}'s {@code
    *  infoStream}. */
   protected void message(String message) {
@@ -178,39 +136,6 @@ public abstract class LogMergePolicy ext
     this.mergeFactor = mergeFactor;
   }
 
-  // Javadoc inherited
-  @Override
-  public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
-    if (!getUseCompoundFile()) {
-      return false;
-    }
-    long mergedInfoSize = size(mergedInfo);
-    if (mergedInfoSize > maxCFSSegmentSize) {
-      return false;
-    }
-    if (getNoCFSRatio() >= 1.0) {
-      return true;
-    }
-    long totalSize = 0;
-    for (SegmentInfoPerCommit info : infos) {
-      totalSize += size(info);
-    }
-    return mergedInfoSize <= getNoCFSRatio() * totalSize;
-  }
-
-  /** Sets whether compound file format should be used for
-   *  newly flushed and newly merged segments. */
-  public void setUseCompoundFile(boolean useCompoundFile) {
-    this.useCompoundFile = useCompoundFile;
-  }
-
-  /** Returns true if newly flushed and newly merge segments
-   *  are written in compound file format. @see
-   *  #setUseCompoundFile */
-  public boolean getUseCompoundFile() {
-    return useCompoundFile;
-  }
-
   /** Sets whether the segment size should be calibrated by
    *  the number of deletes when choosing segments for merge. */
   public void setCalibrateSizeByDeletes(boolean calibrateSizeByDeletes) {
@@ -226,9 +151,6 @@ public abstract class LogMergePolicy ext
   @Override
   public void close() {}
 
-  /** Return the size of the provided {@link
-   *  SegmentInfoPerCommit}. */
-  abstract protected long size(SegmentInfoPerCommit info) throws IOException;
 
   /** Return the number of documents in the provided {@link
    *  SegmentInfoPerCommit}, pro-rated by percentage of
@@ -249,15 +171,10 @@ public abstract class LogMergePolicy ext
    *  non-deleted documents if {@link
    *  #setCalibrateSizeByDeletes} is set. */
   protected long sizeBytes(SegmentInfoPerCommit info) throws IOException {
-    long byteSize = info.sizeInBytes();
     if (calibrateSizeByDeletes) {
-      int delCount = writer.get().numDeletedDocs(info);
-      double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((float)delCount / (float)info.info.getDocCount()));
-      assert delRatio <= 1.0;
-      return (info.info.getDocCount() <= 0 ?  byteSize : (long)(byteSize * (1.0 - delRatio)));
-    } else {
-      return byteSize;
+      return super.size(info);
     }
+    return info.sizeInBytes();
   }
   
   /** Returns true if the number of segments eligible for
@@ -282,19 +199,6 @@ public abstract class LogMergePolicy ext
       (numToMerge != 1 || !segmentIsOriginal || isMerged(mergeInfo));
   }
 
-  /** Returns true if this single info is already fully merged (has no
-   *  pending norms or deletes, is in the same dir as the
-   *  writer, and matches the current compound file setting */
-  protected boolean isMerged(SegmentInfoPerCommit info)
-    throws IOException {
-    IndexWriter w = writer.get();
-    assert w != null;
-    boolean hasDeletions = w.numDeletedDocs(info) > 0;
-    return !hasDeletions &&
-      info.info.dir == w.getDirectory() &&
-      (info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
-  }
-
   /**
    * Returns the merges necessary to merge the index, taking the max merge
    * size or max merge docs into consideration. This method attempts to respect
@@ -726,29 +630,10 @@ public abstract class LogMergePolicy ext
     sb.append("maxMergeSizeForForcedMerge=").append(maxMergeSizeForForcedMerge).append(", ");
     sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
     sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
-    sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
     sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
     sb.append("noCFSRatio=").append(noCFSRatio);
     sb.append("]");
     return sb.toString();
   }
 
-  /** Returns the largest size allowed for a compound file segment */
-  public final double getMaxCFSSegmentSizeMB() {
-    return maxCFSSegmentSize/1024/1024.;
-  }
-
-  /** If a merged segment will be more than this value,
-   *  leave the segment as
-   *  non-compound file even if compound file is enabled.
-   *  Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
-   *  to always use CFS regardless of merge size. */
-  public final void setMaxCFSSegmentSizeMB(double v) {
-    if (v < 0.0) {
-      throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
-    }
-    v *= 1024 * 1024;
-    this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
-  }
-
 }