You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2013/07/04 01:26:45 UTC
svn commit: r1499601 [4/20] - in /lucene/dev/branches/security: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/
dev-tools/idea/solr/core/src/test/ dev-tools/maven/ dev-tools/maven/lucene/
dev-tools/maven/lucene/analysis/stempel/ dev-to...
Modified: lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java (original)
+++ lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java Wed Jul 3 23:26:32 2013
@@ -18,11 +18,20 @@ package org.apache.lucene.analysis.morfo
*/
import java.io.IOException;
+import java.io.Reader;
import java.io.StringReader;
import java.util.TreeSet;
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.Version;
/**
* TODO: The tests below rely on the order of returned lemmas, which is probably not good.
@@ -56,10 +65,22 @@ public class TestMorfologikAnalyzer exte
assertAnalyzesToReuse(
a,
"T. Gl\u00FCcksberg",
- new String[] { "to", "tom", "tona", "Gl\u00FCcksberg" },
- new int[] { 0, 0, 0, 3 },
- new int[] { 1, 1, 1, 13 },
- new int[] { 1, 0, 0, 1 });
+ new String[] { "tom", "tona", "Gl\u00FCcksberg" },
+ new int[] { 0, 0, 3 },
+ new int[] { 1, 1, 13 },
+ new int[] { 1, 0, 1 });
+ }
+
+ @SuppressWarnings("unused")
+ private void dumpTokens(String input) throws IOException {
+ TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader(input));
+ ts.reset();
+
+ MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
+ CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class);
+ while (ts.incrementToken()) {
+ System.out.println(charTerm.toString() + " => " + attribute.getTags());
+ }
}
/** Test reuse of MorfologikFilter with leftover stems. */
@@ -144,6 +165,34 @@ public class TestMorfologikAnalyzer exte
ts.close();
}
+ /** */
+ public final void testKeywordAttrTokens() throws IOException {
+ final Version version = TEST_VERSION_CURRENT;
+
+ Analyzer a = new MorfologikAnalyzer(version) {
+ @Override
+ protected TokenStreamComponents createComponents(String field, Reader reader) {
+ final CharArraySet keywords = new CharArraySet(version, 1, false);
+ keywords.add("liÅcie");
+
+ final Tokenizer src = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+ TokenStream result = new StandardFilter(TEST_VERSION_CURRENT, src);
+ result = new SetKeywordMarkerFilter(result, keywords);
+ result = new MorfologikFilter(result, TEST_VERSION_CURRENT);
+
+ return new TokenStreamComponents(src, result);
+ }
+ };
+
+ assertAnalyzesToReuse(
+ a,
+ "liÅcie danych",
+ new String[] { "liÅcie", "dany", "dana", "dane", "daÄ" },
+ new int[] { 0, 7, 7, 7, 7 },
+ new int[] { 6, 13, 13, 13, 13 },
+ new int[] { 1, 1, 0, 0, 0 });
+ }
+
/** blast some random strings through the analyzer */
public void testRandom() throws Exception {
checkRandomData(random(), getTestAnalyzer(), 1000 * RANDOM_MULTIPLIER);
Modified: lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java (original)
+++ lucene/dev/branches/security/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikFilterFactory.java Wed Jul 3 23:26:32 2013
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.morfo
*/
import java.io.StringReader;
+import java.util.Collections;
import java.util.HashMap;
-import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
@@ -31,10 +31,7 @@ import org.apache.lucene.analysis.TokenS
public class TestMorfologikFilterFactory extends BaseTokenStreamTestCase {
public void testCreateDictionary() throws Exception {
StringReader reader = new StringReader("rowery bilety");
- Map<String,String> initParams = new HashMap<String,String>();
- initParams.put(MorfologikFilterFactory.DICTIONARY_SCHEMA_ATTRIBUTE,
- "morfologik");
- MorfologikFilterFactory factory = new MorfologikFilterFactory(initParams);
+ MorfologikFilterFactory factory = new MorfologikFilterFactory(Collections.<String,String>emptyMap());
TokenStream stream = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
stream = factory.create(stream);
assertTokenStreamContents(stream, new String[] {"rower", "bilet"});
Modified: lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (original)
+++ lucene/dev/branches/security/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java Wed Jul 3 23:26:32 2013
@@ -119,14 +119,9 @@ public class CreateIndexTask extends Per
if (mergeScheduler.equals("org.apache.lucene.index.ConcurrentMergeScheduler")) {
ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwConf.getMergeScheduler();
- int v = config.get("concurrent.merge.scheduler.max.thread.count", -1);
- if (v != -1) {
- cms.setMaxThreadCount(v);
- }
- v = config.get("concurrent.merge.scheduler.max.merge.count", -1);
- if (v != -1) {
- cms.setMaxMergeCount(v);
- }
+ int maxThreadCount = config.get("concurrent.merge.scheduler.max.thread.count", ConcurrentMergeScheduler.DEFAULT_MAX_THREAD_COUNT);
+ int maxMergeCount = config.get("concurrent.merge.scheduler.max.merge.count", ConcurrentMergeScheduler.DEFAULT_MAX_MERGE_COUNT);
+ cms.setMaxMergesAndThreads(maxMergeCount, maxThreadCount);
}
}
@@ -151,13 +146,10 @@ public class CreateIndexTask extends Per
} catch (Exception e) {
throw new RuntimeException("unable to instantiate class '" + mergePolicy + "' as merge policy", e);
}
+ iwConf.getMergePolicy().setNoCFSRatio(isCompound ? 1.0 : 0.0);
if(iwConf.getMergePolicy() instanceof LogMergePolicy) {
LogMergePolicy logMergePolicy = (LogMergePolicy) iwConf.getMergePolicy();
- logMergePolicy.setUseCompoundFile(isCompound);
logMergePolicy.setMergeFactor(config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR));
- } else if(iwConf.getMergePolicy() instanceof TieredMergePolicy) {
- TieredMergePolicy tieredMergePolicy = (TieredMergePolicy) iwConf.getMergePolicy();
- tieredMergePolicy.setUseCompoundFile(isCompound);
}
}
final double ramBuffer = config.get("ram.flush.mb",OpenIndexTask.DEFAULT_RAM_FLUSH_MB);
Modified: lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/dev/branches/security/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Wed Jul 3 23:26:32 2013
@@ -49,6 +49,7 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SerialMergeScheduler;
@@ -754,7 +755,7 @@ public class TestPerfTasksLogic extends
assertEquals(2, writer.getConfig().getMaxBufferedDocs());
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, (int) writer.getConfig().getRAMBufferSizeMB());
assertEquals(3, ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor());
- assertFalse(((LogMergePolicy) writer.getConfig().getMergePolicy()).getUseCompoundFile());
+ assertEquals(0.0d, writer.getConfig().getMergePolicy().getNoCFSRatio(), 0.0);
writer.close();
Directory dir = benchmark.getRunData().getDirectory();
IndexReader reader = DirectoryReader.open(dir);
Modified: lucene/dev/branches/security/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/build.xml?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/build.xml (original)
+++ lucene/dev/branches/security/lucene/build.xml Wed Jul 3 23:26:32 2013
@@ -183,7 +183,10 @@
<forbidden-apis internalRuntimeForbidden="true" classpathref="forbidden-apis.classpath">
<bundledSignatures name="jdk-unsafe-${javac.target}"/>
<bundledSignatures name="jdk-deprecated-${javac.target}"/>
- <signaturesFileSet file="${common.dir}/tools/forbiddenApis/executors.txt"/>
+ <signaturesFileSet dir="${common.dir}/tools/forbiddenApis">
+ <include name="executors.txt" />
+ <include name="chars.txt" />
+ </signaturesFileSet>
<fileset dir="${basedir}/build" includes="**/*.class" />
</forbidden-apis>
</target>
@@ -345,7 +348,7 @@
</target>
<!-- rat-sources-typedef is *not* a useless dependency. do not remove -->
- <target name="rat-sources" depends="rat-sources-typedef">
+ <target name="rat-sources" depends="rat-sources-typedef,common.rat-sources">
<subant target="rat-sources" failonerror="true" inheritall="false">
<propertyset refid="uptodate.and.compiled.properties"/>
<fileset dir="core" includes="build.xml"/>
Modified: lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java (original)
+++ lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java Wed Jul 3 23:26:32 2013
@@ -44,7 +44,7 @@ import org.apache.lucene.util.fst.Util;
* @lucene.experimental */
public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
- private final PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton(true);
+ private final PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
private int indexDivisor;
// Closed if indexLoaded is true:
@@ -199,7 +199,7 @@ public class VariableGapTermsIndexReader
if (indexDivisor > 1) {
// subsample
final IntsRef scratchIntsRef = new IntsRef();
- final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+ final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
BytesRefFSTEnum.InputOutput<Long> result;
Modified: lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java Wed Jul 3 23:26:32 2013
@@ -235,7 +235,7 @@ public class VariableGapTermsIndexWriter
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
this.fieldInfo = fieldInfo;
- fstOutputs = PositiveIntOutputs.getSingleton(true);
+ fstOutputs = PositiveIntOutputs.getSingleton();
fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, fstOutputs);
indexStart = out.getFilePointer();
////System.out.println("VGW: field=" + fieldInfo.name);
Modified: lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/security/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Wed Jul 3 23:26:32 2013
@@ -513,7 +513,7 @@ class SimpleTextFieldsReader extends Fie
}
private void loadTerms() throws IOException {
- PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
+ PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
Modified: lucene/dev/branches/security/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/common-build.xml?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/common-build.xml (original)
+++ lucene/dev/branches/security/lucene/common-build.xml Wed Jul 3 23:26:32 2013
@@ -186,6 +186,7 @@
<property name="build.encoding" value="utf-8"/>
<property name="src.dir" location="src/java"/>
+ <property name="resources.dir" location="${src.dir}/../resources"/>
<property name="tests.src.dir" location="src/test"/>
<available property="module.has.tests" type="dir" file="${tests.src.dir}"/>
<property name="build.dir" location="build"/>
@@ -255,6 +256,10 @@
<!-- a reasonable default exclusion set, can be overridden for special cases -->
<property name="rat.excludes" value="**/TODO,**/*.txt,**/*.iml"/>
+
+ <!-- These patterns can be defined to add additional files for checks, relative to module's home dir -->
+ <property name="rat.additional-includes" value=""/>
+ <property name="rat.additional-excludes" value=""/>
<propertyset id="uptodate.and.compiled.properties" dynamic="true">
<propertyref regex=".*\.uptodate$$"/>
@@ -351,7 +356,7 @@
<target name="resolve" depends="ivy-availability-check,ivy-configure">
<!-- todo, make this a property or something.
only special cases need bundles -->
- <ivy:retrieve type="jar,bundle" log="download-only"
+ <ivy:retrieve type="jar,bundle,tests" log="download-only"
conf="${ivy.default.configuration}" sync="${ivy.sync}"/>
</target>
@@ -489,7 +494,7 @@
<!-- Copy the resources folder (if existent) -->
<copy todir="${build.dir}/classes/java">
- <fileset dir="${src.dir}/../resources" erroronmissingdir="no"/>
+ <fileset dir="${resources.dir}" erroronmissingdir="no"/>
</copy>
</target>
@@ -864,6 +869,7 @@
<mkdir dir="${tests.cachedir}/${name}" />
<junit4:junit4
+ taskName="junit4"
dir="@{workDir}"
tempdir="@{workDir}/temp"
maxmemory="${tests.heapsize}"
@@ -1339,7 +1345,7 @@ ${tests-output}/junit4-*.suites - pe
<sequential>
<mkdir dir="${build.dir}" />
<jarify basedir="${src.dir}" destfile="${build.dir}/${final.name}-src.jar">
- <fileset dir="${src.dir}/../resources" erroronmissingdir="no"/>
+ <fileset dir="${resources.dir}" erroronmissingdir="no"/>
</jarify>
</sequential>
</target>
@@ -1454,7 +1460,7 @@ ${tests-output}/junit4-*.suites - pe
<target name="filter-pom-templates" unless="filtered.pom.templates.uptodate">
<mkdir dir="${filtered.pom.templates.dir}"/>
- <copy todir="${common.dir}/build/poms" overwrite="true">
+ <copy todir="${common.dir}/build/poms" overwrite="true" encoding="UTF-8">
<fileset dir="${common.dir}/../dev-tools/maven"/>
<filterset begintoken="@" endtoken="@">
<filter token="version" value="${version}"/>
@@ -1508,28 +1514,34 @@ ${tests-output}/junit4-*.suites - pe
</target>
<target name="rat-sources-typedef" unless="rat.loaded">
- <ivy:cachepath organisation="org.apache.rat" module="apache-rat" revision="0.8" transitive="false" inline="true" conf="master" type="jar" pathid="rat.classpath"/>
+ <ivy:cachepath organisation="org.apache.rat" module="apache-rat" revision="0.9" transitive="false" inline="true" conf="master" type="jar" pathid="rat.classpath"/>
<typedef resource="org/apache/rat/anttasks/antlib.xml" uri="antlib:org.apache.rat.anttasks" classpathref="rat.classpath"/>
<property name="rat.loaded" value="true"/>
</target>
<target name="rat-sources" depends="rat-sources-typedef"
description="runs the tasks over source and test files">
- <sequential>
<!-- create a temp file for the log to go to -->
<tempfile property="rat.sources.logfile"
prefix="rat"
destdir="${java.io.tmpdir}"/>
<!-- run rat, going to the file -->
<rat:report xmlns:rat="antlib:org.apache.rat.anttasks"
- reportFile="${rat.sources.logfile}">
- <fileset dir="${src.dir}" excludes="${rat.excludes}"/>
+ reportFile="${rat.sources.logfile}" addDefaultLicenseMatchers="true">
+ <fileset dir="." includes="*.xml ${rat.additional-includes}" excludes="${rat.additional-excludes}"/>
+ <fileset dir="${src.dir}" excludes="${rat.excludes}" erroronmissingdir="false"/>
<fileset dir="${tests.src.dir}" excludes="${rat.excludes}" erroronmissingdir="false"/>
- <!-- some modules have a src/tools/[java,test] -->
- <fileset dir="src/tools/java" excludes="${rat.excludes}" erroronmissingdir="false"/>
- <fileset dir="src/tools/test" excludes="${rat.excludes}" erroronmissingdir="false"/>
+
+ <!-- TODO: Check all resource files. Currently not all stopword and similar files have no header! -->
+ <fileset dir="${resources.dir}" includes="META-INF/**" erroronmissingdir="false"/>
- <!-- bsd-like stuff -->
+ <!-- BSD 4-clause stuff (is disallowed below) -->
+ <rat:substringMatcher licenseFamilyCategory="BSD4 "
+ licenseFamilyName="Original BSD License (with advertising clause)">
+ <pattern substring="All advertising materials"/>
+ </rat:substringMatcher>
+
+ <!-- BSD-like stuff -->
<rat:substringMatcher licenseFamilyCategory="BSD "
licenseFamilyName="Modified BSD License">
<!-- brics automaton -->
@@ -1542,16 +1554,20 @@ ${tests-output}/junit4-*.suites - pe
<pattern substring="Egothor Software License version 1.00"/>
<!-- JaSpell -->
<pattern substring="Copyright (c) 2005 Bruno Martins"/>
+ <!-- d3.js -->
+ <pattern substring="THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS"/>
+ <!-- highlight.js -->
+ <pattern substring="THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS"/>
</rat:substringMatcher>
- <!-- mit-like -->
+ <!-- MIT-like -->
<rat:substringMatcher licenseFamilyCategory="MIT "
licenseFamilyName="The MIT License">
<!-- ICU license -->
<pattern substring="Permission is hereby granted, free of charge, to any person obtaining a copy"/>
</rat:substringMatcher>
- <!-- apache -->
+ <!-- Apache -->
<rat:substringMatcher licenseFamilyCategory="AL "
licenseFamilyName="Apache">
<pattern substring="Licensed to the Apache Software Foundation (ASF) under"/>
@@ -1577,13 +1593,13 @@ ${tests-output}/junit4-*.suites - pe
</rat:report>
<!-- now print the output, for review -->
<loadfile property="rat.output" srcFile="${rat.sources.logfile}"/>
- <echo>${rat.output}</echo>
+ <echo taskname="rat">${rat.output}</echo>
<delete>
<fileset file="${rat.sources.logfile}">
<and>
<containsregexp expression="^0 Unknown Licenses"/>
<not>
- <containsregexp expression="^\s+!AL"/>
+ <containsregexp expression="^\s+!"/>
</not>
</and>
</fileset>
@@ -1594,7 +1610,6 @@ ${tests-output}/junit4-*.suites - pe
<available file="${rat.sources.logfile}"/>
</condition>
</fail>
- </sequential>
</target>
<!--+
@@ -1814,10 +1829,71 @@ ${tests-output}/junit4-*.suites - pe
</condition>
</fail>
-
+ <patch-javadoc dir="@{destdir}" docencoding="${javadoc.charset}"/>
</sequential>
</macrodef>
+ <!--
+ Patch frame injection bugs in javadoc generated files - see CVE-2013-1571, http://www.kb.cert.org/vuls/id/225657
+
+ Feel free to use this macro in your own Ant build file. This macro works together with the javadoc task on Ant
+ and should be invoked directly after its execution to patch broken javadocs, e.g.:
+ <patch-javadoc dir="..." docencoding="UTF-8"/>
+ Please make sure that the docencoding parameter uses the same charset like javadoc's docencoding. Default
+ is the platform default encoding (like the javadoc task).
+ The specified dir is the destination directory of the javadoc task.
+ -->
+ <macrodef name="patch-javadoc">
+ <attribute name="dir"/>
+ <attribute name="docencoding" default="${file.encoding}"/>
+ <sequential>
+ <replace encoding="@{docencoding}" summary="true" taskname="patch-javadoc">
+ <fileset dir="@{dir}" casesensitive="false" includes="**/index.html,**/index.htm,**/toc.html,**/toc.htm">
+ <!-- TODO: add encoding="@{docencoding}" to contains check, when we are on ANT 1.9.0: -->
+ <not><contains text="function validURL(url) {" casesensitive="true" /></not>
+ </fileset>
+ <replacetoken><![CDATA[function loadFrames() {]]></replacetoken>
+ <replacevalue expandProperties="false"><![CDATA[if (targetPage != "" && !validURL(targetPage))
+ targetPage = "undefined";
+ function validURL(url) {
+ var pos = url.indexOf(".html");
+ if (pos == -1 || pos != url.length - 5)
+ return false;
+ var allowNumber = false;
+ var allowSep = false;
+ var seenDot = false;
+ for (var i = 0; i < url.length - 5; i++) {
+ var ch = url.charAt(i);
+ if ('a' <= ch && ch <= 'z' ||
+ 'A' <= ch && ch <= 'Z' ||
+ ch == '$' ||
+ ch == '_') {
+ allowNumber = true;
+ allowSep = true;
+ } else if ('0' <= ch && ch <= '9'
+ || ch == '-') {
+ if (!allowNumber)
+ return false;
+ } else if (ch == '/' || ch == '.') {
+ if (!allowSep)
+ return false;
+ allowNumber = false;
+ allowSep = false;
+ if (ch == '.')
+ seenDot = true;
+ if (ch == '/' && seenDot)
+ return false;
+ } else {
+ return false;
+ }
+ }
+ return true;
+ }
+ function loadFrames() {]]></replacevalue>
+ </replace>
+ </sequential>
+ </macrodef>
+
<macrodef name="modules-crawl">
<attribute name="target" default=""/>
<attribute name="failonerror" default="true"/>
@@ -2030,7 +2106,7 @@ ${tests-output}/junit4-*.suites - pe
<element name="nested" optional="false" implicit="true"/>
<sequential>
<copy todir="@{todir}" flatten="@{flatten}" overwrite="@{overwrite}" verbose="true"
- preservelastmodified="false" encoding="UTF-8" outputencoding="UTF-8" taskname="pegdown"
+ preservelastmodified="false" encoding="UTF-8" taskname="pegdown"
>
<filterchain>
<tokenfilter>
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java Wed Jul 3 23:26:32 2013
@@ -50,10 +50,6 @@ public class BlockTermState extends OrdT
totalTermFreq = other.totalTermFreq;
termBlockOrd = other.termBlockOrd;
blockFilePointer = other.blockFilePointer;
-
- // NOTE: don't copy blockTermCount;
- // it's "transient": used only by the "primary"
- // termState, and regenerated on seek by TermState
}
@Override
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java Wed Jul 3 23:26:32 2013
@@ -17,7 +17,6 @@ package org.apache.lucene.codecs.compres
* limitations under the License.
*/
-import java.io.Closeable;
import java.io.IOException;
import java.util.Arrays;
@@ -25,16 +24,13 @@ import org.apache.lucene.index.CorruptIn
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.packed.PackedInts;
/**
* Random-access reader for {@link CompressingStoredFieldsIndexWriter}.
* @lucene.internal
*/
-public final class CompressingStoredFieldsIndexReader implements Closeable, Cloneable {
-
- final IndexInput fieldsIndexIn;
+public final class CompressingStoredFieldsIndexReader implements Cloneable {
static long moveLowOrderBitToSign(long n) {
return ((n >>> 1) ^ -(n & 1));
@@ -48,8 +44,9 @@ public final class CompressingStoredFiel
final PackedInts.Reader[] docBasesDeltas; // delta from the avg
final PackedInts.Reader[] startPointersDeltas; // delta from the avg
+ // It is the responsibility of the caller to close fieldsIndexIn after this constructor
+ // has been called
CompressingStoredFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) throws IOException {
- this.fieldsIndexIn = fieldsIndexIn;
maxDoc = si.getDocCount();
int[] docBases = new int[16];
long[] startPointers = new long[16];
@@ -106,17 +103,6 @@ public final class CompressingStoredFiel
this.startPointersDeltas = Arrays.copyOf(startPointersDeltas, blockCount);
}
- private CompressingStoredFieldsIndexReader(CompressingStoredFieldsIndexReader other) {
- this.fieldsIndexIn = null;
- this.maxDoc = other.maxDoc;
- this.docBases = other.docBases;
- this.startPointers = other.startPointers;
- this.avgChunkDocs = other.avgChunkDocs;
- this.avgChunkSizes = other.avgChunkSizes;
- this.docBasesDeltas = other.docBasesDeltas;
- this.startPointersDeltas = other.startPointersDeltas;
- }
-
private int block(int docID) {
int lo = 0, hi = docBases.length - 1;
while (lo <= hi) {
@@ -172,16 +158,7 @@ public final class CompressingStoredFiel
@Override
public CompressingStoredFieldsIndexReader clone() {
- if (fieldsIndexIn == null) {
- return this;
- } else {
- return new CompressingStoredFieldsIndexReader(this);
- }
- }
-
- @Override
- public void close() throws IOException {
- IOUtils.close(fieldsIndexIn);
+ return this;
}
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Wed Jul 3 23:26:32 2013
@@ -60,6 +60,9 @@ import org.apache.lucene.util.packed.Pac
*/
public final class CompressingStoredFieldsReader extends StoredFieldsReader {
+ // Do not reuse the decompression buffer when there is more than 32kb to decompress
+ private static final int BUFFER_REUSE_THRESHOLD = 1 << 15;
+
private final FieldInfos fieldInfos;
private final CompressingStoredFieldsIndexReader indexReader;
private final IndexInput fieldsStream;
@@ -93,20 +96,23 @@ public final class CompressingStoredFiel
numDocs = si.getDocCount();
IndexInput indexStream = null;
try {
- fieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION), context);
+ // Load the index into memory
final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_INDEX_EXTENSION);
indexStream = d.openInput(indexStreamFN, context);
-
final String codecNameIdx = formatName + CODEC_SFX_IDX;
- final String codecNameDat = formatName + CODEC_SFX_DAT;
CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
- CodecUtil.checkHeader(fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
- assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
-
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
+ indexStream.close();
indexStream = null;
+ // Open the data file and read metadata
+ final String fieldsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, FIELDS_EXTENSION);
+ fieldsStream = d.openInput(fieldsStreamFN, context);
+ final String codecNameDat = formatName + CODEC_SFX_DAT;
+ CodecUtil.checkHeader(fieldsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
+ assert CodecUtil.headerLength(codecNameDat) == fieldsStream.getFilePointer();
+
packedIntsVersion = fieldsStream.readVInt();
decompressor = compressionMode.newDecompressor();
this.bytes = new BytesRef();
@@ -134,7 +140,7 @@ public final class CompressingStoredFiel
@Override
public void close() throws IOException {
if (!closed) {
- IOUtils.close(fieldsStream, indexReader);
+ IOUtils.close(fieldsStream);
closed = true;
}
}
@@ -255,6 +261,7 @@ public final class CompressingStoredFiel
return;
}
+ final BytesRef bytes = totalLength <= BUFFER_REUSE_THRESHOLD ? this.bytes : new BytesRef();
decompressor.decompress(fieldsStream, totalLength, offset, length, bytes);
assert bytes.length == length;
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java Wed Jul 3 23:26:32 2013
@@ -102,20 +102,23 @@ public final class CompressingTermVector
numDocs = si.getDocCount();
IndexInput indexStream = null;
try {
- vectorsStream = d.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), context);
+ // Load the index into memory
final String indexStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION);
indexStream = d.openInput(indexStreamFN, context);
-
final String codecNameIdx = formatName + CODEC_SFX_IDX;
- final String codecNameDat = formatName + CODEC_SFX_DAT;
CodecUtil.checkHeader(indexStream, codecNameIdx, VERSION_START, VERSION_CURRENT);
- CodecUtil.checkHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
- assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
assert CodecUtil.headerLength(codecNameIdx) == indexStream.getFilePointer();
-
indexReader = new CompressingStoredFieldsIndexReader(indexStream, si);
+ indexStream.close();
indexStream = null;
+ // Open the data file and read metadata
+ final String vectorsStreamFN = IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION);
+ vectorsStream = d.openInput(vectorsStreamFN, context);
+ final String codecNameDat = formatName + CODEC_SFX_DAT;
+ CodecUtil.checkHeader(vectorsStream, codecNameDat, VERSION_START, VERSION_CURRENT);
+ assert CodecUtil.headerLength(codecNameDat) == vectorsStream.getFilePointer();
+
packedIntsVersion = vectorsStream.readVInt();
chunkSize = vectorsStream.readVInt();
decompressor = compressionMode.newDecompressor();
@@ -161,7 +164,7 @@ public final class CompressingTermVector
@Override
public void close() throws IOException {
if (!closed) {
- IOUtils.close(vectorsStream, indexReader);
+ IOUtils.close(vectorsStream);
closed = true;
}
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java Wed Jul 3 23:26:32 2013
@@ -242,8 +242,8 @@ public final class Lucene40TermVectorsWr
if (payloads) {
tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
}
- for (int i = 0; i < bufferedIndex; i++) {
- if (offsets) {
+ if (offsets) {
+ for (int i = 0; i < bufferedIndex; i++) {
tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
lastOffset = offsetEndBuffer[i];
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html Wed Jul 3 23:26:32 2013
@@ -372,13 +372,7 @@ term vectors.</li>
<a name="Limitations" id="Limitations"></a>
<h2>Limitations</h2>
<div>
-<p>When referring to term numbers, Lucene's current implementation uses a Java
-<code>int</code> to hold the term index, which means the
-maximum number of unique terms in any single index segment is ~2.1 billion
-times the term index interval (default 128) = ~274 billion. This is technically
-not a limitation of the index file format, just of Lucene's current
-implementation.</p>
-<p>Similarly, Lucene uses a Java <code>int</code> to refer to
+<p>Lucene uses a Java <code>int</code> to refer to
document numbers, and the index file format uses an <code>Int32</code>
on-disk to store document numbers. This is a limitation
of both the index file format and the current implementation. Eventually these
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/Lucene41PostingsFormat.java Wed Jul 3 23:26:32 2013
@@ -161,7 +161,7 @@ import org.apache.lucene.util.packed.Pac
* <li>SkipFPDelta determines the position of this term's SkipData within the .doc
* file. In particular, it is the length of the TermFreq data.
* SkipDelta is only stored if DocFreq is not smaller than SkipMinimum
- * (i.e. 8 in Lucene41PostingsFormat).</li>
+ * (i.e. 128 in Lucene41PostingsFormat).</li>
* <li>SingletonDocID is an optimization when a term only appears in one document. In this case, instead
* of writing a file pointer to the .doc file (DocFPDelta), and then a VIntBlock at that location, the
* single document ID is written to the term dictionary.</li>
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene41/package.html Wed Jul 3 23:26:32 2013
@@ -381,13 +381,7 @@ the term dictionary. Stored fields are c
<a name="Limitations" id="Limitations"></a>
<h2>Limitations</h2>
<div>
-<p>When referring to term numbers, Lucene's current implementation uses a Java
-<code>int</code> to hold the term index, which means the
-maximum number of unique terms in any single index segment is ~2.1 billion
-times the term index interval (default 128) = ~274 billion. This is technically
-not a limitation of the index file format, just of Lucene's current
-implementation.</p>
-<p>Similarly, Lucene uses a Java <code>int</code> to refer to
+<p>Lucene uses a Java <code>int</code> to refer to
document numbers, and the index file format uses an <code>Int32</code>
on-disk to store document numbers. This is a limitation
of both the index file format and the current implementation. Eventually these
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java Wed Jul 3 23:26:32 2013
@@ -245,7 +245,7 @@ class Lucene42DocValuesConsumer extends
meta.writeVInt(field.number);
meta.writeByte(FST);
meta.writeLong(data.getFilePointer());
- PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
+ PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
Builder<Long> builder = new Builder<Long>(INPUT_TYPE.BYTE1, outputs);
IntsRef scratch = new IntsRef();
long ord = 0;
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java Wed Jul 3 23:26:32 2013
@@ -120,16 +120,33 @@ import org.apache.lucene.util.packed.Blo
* </ol>
*/
public final class Lucene42DocValuesFormat extends DocValuesFormat {
-
- /** Sole constructor */
+ final float acceptableOverheadRatio;
+
+ /**
+ * Calls {@link #Lucene42DocValuesFormat(float)
+ * Lucene42DocValuesFormat(PackedInts.DEFAULT)}
+ */
public Lucene42DocValuesFormat() {
+ this(PackedInts.DEFAULT);
+ }
+
+ /**
+ * Creates a new Lucene42DocValuesFormat with the specified
+ * <code>acceptableOverheadRatio</code> for NumericDocValues.
+ * @param acceptableOverheadRatio compression parameter for numerics.
+ * Currently this is only used when the number of unique values is small.
+ *
+ * @lucene.experimental
+ */
+ public Lucene42DocValuesFormat(float acceptableOverheadRatio) {
super("Lucene42");
+ this.acceptableOverheadRatio = acceptableOverheadRatio;
}
@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
// note: we choose DEFAULT here (its reasonably fast, and for small bpv has tiny waste)
- return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, PackedInts.DEFAULT);
+ return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
}
@Override
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Wed Jul 3 23:26:32 2013
@@ -278,7 +278,7 @@ class Lucene42DocValuesProducer extends
instance = fstInstances.get(field.number);
if (instance == null) {
data.seek(entry.offset);
- instance = new FST<Long>(data, PositiveIntOutputs.getSingleton(true));
+ instance = new FST<Long>(data, PositiveIntOutputs.getSingleton());
fstInstances.put(field.number, instance);
}
}
@@ -352,7 +352,7 @@ class Lucene42DocValuesProducer extends
instance = fstInstances.get(field.number);
if (instance == null) {
data.seek(entry.offset);
- instance = new FST<Long>(data, PositiveIntOutputs.getSingleton(true));
+ instance = new FST<Long>(data, PositiveIntOutputs.getSingleton());
fstInstances.put(field.number, instance);
}
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42NormsFormat.java Wed Jul 3 23:26:32 2013
@@ -42,14 +42,32 @@ import org.apache.lucene.util.packed.Pac
* @see Lucene42DocValuesFormat
*/
public final class Lucene42NormsFormat extends NormsFormat {
+ final float acceptableOverheadRatio;
- /** Sole constructor */
- public Lucene42NormsFormat() {}
+ /**
+ * Calls {@link #Lucene42NormsFormat(float)
+ * Lucene42DocValuesFormat(PackedInts.FASTEST)}
+ */
+ public Lucene42NormsFormat() {
+ // note: we choose FASTEST here (otherwise our norms are half as big but 15% slower than previous lucene)
+ this(PackedInts.FASTEST);
+ }
+
+ /**
+ * Creates a new Lucene42DocValuesFormat with the specified
+ * <code>acceptableOverheadRatio</code> for NumericDocValues.
+ * @param acceptableOverheadRatio compression parameter for numerics.
+ * Currently this is only used when the number of unique values is small.
+ *
+ * @lucene.experimental
+ */
+ public Lucene42NormsFormat(float acceptableOverheadRatio) {
+ this.acceptableOverheadRatio = acceptableOverheadRatio;
+ }
@Override
public DocValuesConsumer normsConsumer(SegmentWriteState state) throws IOException {
- // note: we choose FASTEST here (otherwise our norms are half as big but 15% slower than previous lucene)
- return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, PackedInts.FASTEST);
+ return new Lucene42DocValuesConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION, acceptableOverheadRatio);
}
@Override
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/codecs/lucene42/package.html Wed Jul 3 23:26:32 2013
@@ -384,13 +384,7 @@ on multi-valued fields.</li>
<a name="Limitations" id="Limitations"></a>
<h2>Limitations</h2>
<div>
-<p>When referring to term numbers, Lucene's current implementation uses a Java
-<code>int</code> to hold the term index, which means the
-maximum number of unique terms in any single index segment is ~2.1 billion
-times the term index interval (default 128) = ~274 billion. This is technically
-not a limitation of the index file format, just of Lucene's current
-implementation.</p>
-<p>Similarly, Lucene uses a Java <code>int</code> to refer to
+<p>Lucene uses a Java <code>int</code> to refer to
document numbers, and the index file format uses an <code>Int32</code>
on-disk to store document numbers. This is a limitation
of both the index file format and the current implementation. Eventually these
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Wed Jul 3 23:26:32 2013
@@ -463,11 +463,11 @@ public class CheckIndex {
if (onlySegments != null) {
result.partial = true;
- if (infoStream != null)
+ if (infoStream != null) {
infoStream.print("\nChecking only these segments:");
- for (String s : onlySegments) {
- if (infoStream != null)
+ for (String s : onlySegments) {
infoStream.print(" " + s);
+ }
}
result.segmentsChecked.addAll(onlySegments);
msg(infoStream, ":");
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java Wed Jul 3 23:26:32 2013
@@ -30,12 +30,11 @@ import java.util.Comparator;
* separate thread.
*
* <p>Specify the max number of threads that may run at
- * once with {@link #setMaxThreadCount}.</p>
+ * once, and the maximum number of simultaneous merges
+ * with {@link #setMaxMergesAndThreads}.</p>
*
- * <p>Separately specify the maximum number of simultaneous
- * merges with {@link #setMaxMergeCount}. If the number of
- * merges exceeds the max number of threads then the
- * largest merges are paused until one of the smaller
+ * <p>If the number of merges exceeds the max number of threads
+ * then the largest merges are paused until one of the smaller
* merges completes.</p>
*
* <p>If more than {@link #getMaxMergeCount} merges are
@@ -49,21 +48,29 @@ public class ConcurrentMergeScheduler ex
/** List of currently active {@link MergeThread}s. */
protected List<MergeThread> mergeThreads = new ArrayList<MergeThread>();
+
+ /**
+ * Default {@code maxThreadCount}.
+ * We default to 1: tests on spinning-magnet drives showed slower
+ * indexing performance if more than one merge thread runs at
+ * once (though on an SSD it was faster)
+ */
+ public static final int DEFAULT_MAX_THREAD_COUNT = 1;
+
+ /** Default {@code maxMergeCount}. */
+ public static final int DEFAULT_MAX_MERGE_COUNT = 2;
// Max number of merge threads allowed to be running at
// once. When there are more merges then this, we
// forcefully pause the larger ones, letting the smaller
// ones run, up until maxMergeCount merges at which point
// we forcefully pause incoming threads (that presumably
- // are the ones causing so much merging). We default to 1
- // here: tests on spinning-magnet drives showed slower
- // indexing perf if more than one merge thread runs at
- // once (though on an SSD it was faster):
- private int maxThreadCount = 1;
+ // are the ones causing so much merging).
+ private int maxThreadCount = DEFAULT_MAX_THREAD_COUNT;
// Max number of merges we accept before forcefully
// throttling the incoming threads
- private int maxMergeCount = 2;
+ private int maxMergeCount = DEFAULT_MAX_MERGE_COUNT;
/** {@link Directory} that holds the index. */
protected Directory dir;
@@ -80,43 +87,40 @@ public class ConcurrentMergeScheduler ex
public ConcurrentMergeScheduler() {
}
- /** Sets the max # simultaneous merge threads that should
- * be running at once. This must be <= {@link
- * #setMaxMergeCount}. */
- public void setMaxThreadCount(int count) {
- if (count < 1) {
- throw new IllegalArgumentException("count should be at least 1");
+ /**
+ * Sets the maximum number of merge threads and simultaneous merges allowed.
+ *
+ * @param maxMergeCount the max # simultaneous merges that are allowed.
+ * If a merge is necessary yet we already have this many
+ * threads running, the incoming thread (that is calling
+ * add/updateDocument) will block until a merge thread
+ * has completed. Note that we will only run the
+ * smallest <code>maxThreadCount</code> merges at a time.
+ * @param maxThreadCount the max # simultaneous merge threads that should
+ * be running at once. This must be <= <code>maxMergeCount</code>
+ */
+ public void setMaxMergesAndThreads(int maxMergeCount, int maxThreadCount) {
+ if (maxThreadCount < 1) {
+ throw new IllegalArgumentException("maxThreadCount should be at least 1");
}
- if (count > maxMergeCount) {
- throw new IllegalArgumentException("count should be <= maxMergeCount (= " + maxMergeCount + ")");
+ if (maxMergeCount < 1) {
+ throw new IllegalArgumentException("maxMergeCount should be at least 1");
}
- maxThreadCount = count;
+ if (maxThreadCount > maxMergeCount) {
+ throw new IllegalArgumentException("maxThreadCount should be <= maxMergeCount (= " + maxMergeCount + ")");
+ }
+ this.maxThreadCount = maxThreadCount;
+ this.maxMergeCount = maxMergeCount;
}
/** Returns {@code maxThreadCount}.
*
- * @see #setMaxThreadCount(int) */
+ * @see #setMaxMergesAndThreads(int, int) */
public int getMaxThreadCount() {
return maxThreadCount;
}
- /** Sets the max # simultaneous merges that are allowed.
- * If a merge is necessary yet we already have this many
- * threads running, the incoming thread (that is calling
- * add/updateDocument) will block until a merge thread
- * has completed. Note that we will only run the
- * smallest {@link #setMaxThreadCount} merges at a time. */
- public void setMaxMergeCount(int count) {
- if (count < 1) {
- throw new IllegalArgumentException("count should be at least 1");
- }
- if (count < maxThreadCount) {
- throw new IllegalArgumentException("count should be >= maxThreadCount (= " + maxThreadCount + ")");
- }
- maxMergeCount = count;
- }
-
- /** See {@link #setMaxMergeCount}. */
+ /** See {@link #setMaxMergesAndThreads}. */
public int getMaxMergeCount() {
return maxMergeCount;
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java Wed Jul 3 23:26:32 2013
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.nio.file.NoSuchFileException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@@ -291,7 +292,7 @@ public abstract class DirectoryReader ex
// IOException allowed to throw there, in case
// segments_N is corrupt
sis.read(dir, fileName);
- } catch (FileNotFoundException fnfe) {
+ } catch (FileNotFoundException | NoSuchFileException fnfe) {
// LUCENE-948: on NFS (and maybe others), if
// you have writers switching back and forth
// between machines, it's very likely that the
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocConsumer.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocConsumer.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocConsumer.java Wed Jul 3 23:26:32 2013
@@ -24,5 +24,4 @@ abstract class DocConsumer {
abstract void finishDocument() throws IOException;
abstract void flush(final SegmentWriteState state) throws IOException;
abstract void abort();
- abstract void doAfterFlush();
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java Wed Jul 3 23:26:32 2013
@@ -144,15 +144,6 @@ final class DocFieldProcessor extends Do
return fields;
}
- /** In flush we reset the fieldHash to not maintain per-field state
- * across segments */
- @Override
- void doAfterFlush() {
- fieldHash = new DocFieldProcessorPerField[2];
- hashMask = 1;
- totalFieldCount = 0;
- }
-
private void rehash() {
final int newHashSize = (fieldHash.length*2);
assert newHashSize > fieldHash.length;
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java Wed Jul 3 23:26:32 2013
@@ -17,12 +17,7 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import java.util.HashMap;
-import java.util.Map;
-
import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.Counter;
import org.apache.lucene.util.RamUsageEstimator;
/**
@@ -33,19 +28,16 @@ final class DocFieldProcessorPerField {
final DocFieldConsumerPerField consumer;
final FieldInfo fieldInfo;
- private final Counter bytesUsed;
DocFieldProcessorPerField next;
int lastGen = -1;
int fieldCount;
IndexableField[] fields = new IndexableField[1];
- private final Map<FieldInfo,String> dvFields = new HashMap<FieldInfo,String>();
public DocFieldProcessorPerField(final DocFieldProcessor docFieldProcessor, final FieldInfo fieldInfo) {
this.consumer = docFieldProcessor.consumer.addField(fieldInfo);
this.fieldInfo = fieldInfo;
- this.bytesUsed = docFieldProcessor.bytesUsed;
}
public void addField(IndexableField field) {
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java Wed Jul 3 23:26:32 2013
@@ -29,15 +29,11 @@ import org.apache.lucene.index.Documents
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
import org.apache.lucene.index.FieldInfos.FieldNumbers;
-import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FlushInfo;
-import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.InfoStream;
-import org.apache.lucene.util.MutableBits;
/**
* This class accepts multiple added documents and directly
@@ -114,6 +110,7 @@ final class DocumentsWriter {
List<String> newFiles;
final IndexWriter indexWriter;
+ final LiveIndexWriterConfig indexWriterConfig;
private AtomicInteger numDocsInRAM = new AtomicInteger(0);
@@ -144,6 +141,7 @@ final class DocumentsWriter {
this.indexWriter = writer;
this.infoStream = config.getInfoStream();
this.similarity = config.getSimilarity();
+ this.indexWriterConfig = writer.getConfig();
this.perThreadPool = config.getIndexerThreadPool();
this.chain = config.getIndexingChain();
this.perThreadPool.initialize(this, globalFieldNumbers, config);
@@ -517,7 +515,7 @@ final class DocumentsWriter {
// buffer, force them all to apply now. This is to
// prevent too-frequent flushing of a long tail of
// tiny segments:
- final double ramBufferSizeMB = indexWriter.getConfig().getRAMBufferSizeMB();
+ final double ramBufferSizeMB = indexWriterConfig.getRAMBufferSizeMB();
if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
flushControl.getDeleteBytesUsed() > (1024*1024*ramBufferSizeMB/2)) {
if (infoStream.isEnabled("DW")) {
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java Wed Jul 3 23:26:32 2013
@@ -112,7 +112,7 @@ class DocumentsWriterPerThread {
// Only called by asserts
public boolean testPoint(String name) {
- return docWriter.writer.testPoint(name);
+ return docWriter.testPoint(name);
}
public void clear() {
@@ -194,6 +194,7 @@ class DocumentsWriterPerThread {
private final NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
final Allocator byteBlockAllocator;
final IntBlockPool.Allocator intBlockAllocator;
+ private final LiveIndexWriterConfig indexWriterConfig;
public DocumentsWriterPerThread(Directory directory, DocumentsWriter parent,
@@ -203,6 +204,7 @@ class DocumentsWriterPerThread {
this.parent = parent;
this.fieldInfos = fieldInfos;
this.writer = parent.indexWriter;
+ this.indexWriterConfig = parent.indexWriterConfig;
this.infoStream = parent.infoStream;
this.codec = parent.codec;
this.docState = new DocState(this, infoStream);
@@ -232,6 +234,13 @@ class DocumentsWriterPerThread {
aborting = true;
}
+ final boolean testPoint(String message) {
+ if (infoStream.isEnabled("TP")) {
+ infoStream.message("TP", message);
+ }
+ return true;
+ }
+
boolean checkAndResetHasAborted() {
final boolean retval = hasAborted;
hasAborted = false;
@@ -239,7 +248,7 @@ class DocumentsWriterPerThread {
}
public void updateDocument(IndexDocument doc, Analyzer analyzer, Term delTerm) throws IOException {
- assert writer.testPoint("DocumentsWriterPerThread addDocument start");
+ assert testPoint("DocumentsWriterPerThread addDocument start");
assert deleteQueue != null;
docState.doc = doc;
docState.analyzer = analyzer;
@@ -292,7 +301,7 @@ class DocumentsWriterPerThread {
}
public int updateDocuments(Iterable<? extends IndexDocument> docs, Analyzer analyzer, Term delTerm) throws IOException {
- assert writer.testPoint("DocumentsWriterPerThread addDocuments start");
+ assert testPoint("DocumentsWriterPerThread addDocuments start");
assert deleteQueue != null;
docState.analyzer = analyzer;
if (segmentInfo == null) {
@@ -428,7 +437,6 @@ class DocumentsWriterPerThread {
/** Reset after a flush */
private void doAfterFlush() {
segmentInfo = null;
- consumer.doAfterFlush();
directory.getCreatedFiles().clear();
fieldInfos = new FieldInfos.Builder(fieldInfos.globalFieldNumbers);
parent.subtractFlushedNumDocs(numDocsInRAM);
@@ -561,7 +569,7 @@ class DocumentsWriterPerThread {
boolean success = false;
try {
- if (writer.useCompoundFile(newSegment)) {
+ if (indexWriterConfig.getUseCompoundFile()) {
// Now build compound file
Collection<String> oldFiles = IndexWriter.createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, newSegment.info, context);
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexDeletionPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexDeletionPolicy.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexDeletionPolicy.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexDeletionPolicy.java Wed Jul 3 23:26:32 2013
@@ -20,6 +20,8 @@ package org.apache.lucene.index;
import java.util.List;
import java.io.IOException;
+import org.apache.lucene.store.Directory;
+
/**
* <p>Expert: policy for deletion of stale {@link IndexCommit index commits}.
*
@@ -46,6 +48,10 @@ import java.io.IOException;
* target="top"
* href="http://issues.apache.org/jira/browse/LUCENE-710">LUCENE-710</a>
* for details.</p>
+ *
+ * <p>Implementers of sub-classes should make sure that {@link #clone()}
+ * returns an independent instance able to work with any other {@link IndexWriter}
+ * or {@link Directory} instance.</p>
*/
public abstract class IndexDeletionPolicy implements Cloneable {
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java Wed Jul 3 23:26:32 2013
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.nio.file.NoSuchFileException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -170,7 +171,7 @@ final class IndexFileDeleter implements
SegmentInfos sis = new SegmentInfos();
try {
sis.read(directory, fileName);
- } catch (FileNotFoundException e) {
+ } catch (FileNotFoundException | NoSuchFileException e) {
// LUCENE-948: on NFS (and maybe others), if
// you have writers switching back and forth
// between machines, it's very likely that the
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Wed Jul 3 23:26:32 2013
@@ -214,7 +214,7 @@ public class IndexWriter implements Clos
private final Analyzer analyzer; // how to analyze text
private volatile long changeCount; // increments every time a change is completed
- private long lastCommitChangeCount; // last changeCount that was committed
+ private volatile long lastCommitChangeCount; // last changeCount that was committed
private List<SegmentInfoPerCommit> rollbackSegments; // list of segmentInfo we will fallback to if the commit fails
@@ -631,7 +631,12 @@ public class IndexWriter implements Clos
/**
* Constructs a new IndexWriter per the settings given in <code>conf</code>.
* Note that the passed in {@link IndexWriterConfig} is
- * privately cloned; if you need to make subsequent "live"
+ * privately cloned, which, in-turn, clones the
+ * {@link IndexWriterConfig#getFlushPolicy() flush policy},
+ * {@link IndexWriterConfig#getIndexDeletionPolicy() deletion policy},
+ * {@link IndexWriterConfig#getMergePolicy() merge policy},
+ * and {@link IndexWriterConfig#getMergeScheduler() merge scheduler}.
+ * If you need to make subsequent "live"
* changes to the configuration use {@link #getConfig}.
* <p>
*
@@ -2269,10 +2274,6 @@ public class IndexWriter implements Clos
}
}
- synchronized boolean useCompoundFile(SegmentInfoPerCommit segmentInfo) throws IOException {
- return mergePolicy.useCompoundFile(segmentInfos, segmentInfo);
- }
-
private synchronized void resetMergeExceptions() {
mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
mergeGen++;
@@ -2826,6 +2827,11 @@ public class IndexWriter implements Clos
commitInternal();
}
+ /** Returns true if there are changes that have not been committed */
+ public final boolean hasUncommittedChanges() {
+ return changeCount != lastCommitChangeCount;
+ }
+
private final void commitInternal() throws IOException {
if (infoStream.isEnabled("IW")) {
@@ -2865,8 +2871,8 @@ public class IndexWriter implements Clos
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: wrote segments file \"" + pendingCommit.getSegmentsFileName() + "\"");
}
- lastCommitChangeCount = pendingCommitChangeCount;
segmentInfos.updateGeneration(pendingCommit);
+ lastCommitChangeCount = pendingCommitChangeCount;
rollbackSegments = pendingCommit.createBackupSegmentInfos();
deleter.checkpoint(pendingCommit, true);
} finally {
@@ -4162,7 +4168,10 @@ public class IndexWriter implements Clos
// startCommitMergeDeletes
// startMergeInit
// DocumentsWriter.ThreadState.init start
- boolean testPoint(String name) {
+ private final boolean testPoint(String message) {
+ if (infoStream.isEnabled("TP")) {
+ infoStream.message("TP", message);
+ }
return true;
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java Wed Jul 3 23:26:32 2013
@@ -110,6 +110,10 @@ public final class IndexWriterConfig ext
* others to finish. Default value is 8. */
public final static int DEFAULT_MAX_THREAD_STATES = 8;
+ /** Default value for compound file system for newly written segments
+ * (set to <code>true</code>). For batch indexing with very large
+ * ram buffers use <code>false</code> */
+ public final static boolean DEFAULT_USE_COMPOUND_FILE_SYSTEM = true;
/**
* Sets the default (for any instance) maximum time to wait for a write lock
* (in milliseconds).
@@ -540,5 +544,9 @@ public final class IndexWriterConfig ext
public IndexWriterConfig setTermIndexInterval(int interval) {
return (IndexWriterConfig) super.setTermIndexInterval(interval);
}
+
+ public IndexWriterConfig setUseCompoundFile(boolean useCompoundFile) {
+ return (IndexWriterConfig) super.setUseCompoundFile(useCompoundFile);
+ }
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java Wed Jul 3 23:26:32 2013
@@ -98,6 +98,9 @@ public class LiveIndexWriterConfig {
/** {@link Version} that {@link IndexWriter} should emulate. */
protected final Version matchVersion;
+ /** True if segment flushes should use compound file format */
+ protected volatile boolean useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
+
// used by IndexWriterConfig
LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) {
this.analyzer = analyzer;
@@ -110,6 +113,7 @@ public class LiveIndexWriterConfig {
termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
delPolicy = new KeepOnlyLastCommitDeletionPolicy();
commit = null;
+ useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;
openMode = OpenMode.CREATE_OR_APPEND;
similarity = IndexSearcher.getDefaultSimilarity();
mergeScheduler = new ConcurrentMergeScheduler();
@@ -154,6 +158,7 @@ public class LiveIndexWriterConfig {
readerPooling = config.getReaderPooling();
flushPolicy = config.getFlushPolicy();
perThreadHardLimitMB = config.getRAMPerThreadHardLimitMB();
+ useCompoundFile = config.getUseCompoundFile();
}
/** Returns the default analyzer to use for indexing documents. */
@@ -542,6 +547,33 @@ public class LiveIndexWriterConfig {
return infoStream;
}
+ /**
+ * Sets if the {@link IndexWriter} should pack newly written segments in a
+ * compound file. Default is <code>true</code>.
+ * <p>
+ * Use <code>false</code> for batch indexing with very large ram buffer
+ * settings.
+ * </p>
+ * <p>
+ * <b>Note: To control compound file usage during segment merges see
+ * {@link MergePolicy#setNoCFSRatio(double)} and
+ * {@link MergePolicy#setMaxCFSSegmentSizeMB(double)}. This setting only
+ * applies to newly created segments.</b>
+ * </p>
+ */
+ public LiveIndexWriterConfig setUseCompoundFile(boolean useCompoundFile) {
+ this.useCompoundFile = useCompoundFile;
+ return this;
+ }
+
+ /**
+ * Returns <code>true</code> iff the {@link IndexWriter} packs
+ * newly written segments in a compound file. Default is <code>true</code>.
+ */
+ public boolean getUseCompoundFile() {
+ return useCompoundFile ;
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@@ -567,7 +599,10 @@ public class LiveIndexWriterConfig {
sb.append("indexerThreadPool=").append(getIndexerThreadPool()).append("\n");
sb.append("readerPooling=").append(getReaderPooling()).append("\n");
sb.append("perThreadHardLimitMB=").append(getRAMPerThreadHardLimitMB()).append("\n");
+ sb.append("useCompoundFile=").append(getUseCompoundFile()).append("\n");
return sb.toString();
}
+
+
}
Modified: lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1499601&r1=1499600&r2=1499601&view=diff
==============================================================================
--- lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/security/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java Wed Jul 3 23:26:32 2013
@@ -64,16 +64,9 @@ public abstract class LogMergePolicy ext
/** Default noCFSRatio. If a merge's size is >= 10% of
* the index, then we disable compound file for it.
- * @see #setNoCFSRatio */
+ * @see MergePolicy#setNoCFSRatio */
public static final double DEFAULT_NO_CFS_RATIO = 0.1;
- /** Default maxCFSSegmentSize value allows compound file
- * for a segment of any size. The actual file format is
- * still subject to noCFSRatio.
- * @see #setMaxCFSSegmentSizeMB(double)
- */
- public static final long DEFAULT_MAX_CFS_SEGMENT_SIZE = Long.MAX_VALUE;
-
/** How many segments to merge at a time. */
protected int mergeFactor = DEFAULT_MERGE_FACTOR;
@@ -96,30 +89,14 @@ public abstract class LogMergePolicy ext
* will never be merged. */
protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;
- /** If the size of the merge segment exceeds this ratio of
- * the total index size then it will remain in
- * non-compound format even if {@link
- * #setUseCompoundFile} is {@code true}. */
- protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
-
- /** If the size of the merged segment exceeds
- * this value then it will not use compound file format. */
- protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
-
/** If true, we pro-rate a segment's size by the
* percentage of non-deleted documents. */
protected boolean calibrateSizeByDeletes = true;
- /** True if new segments (flushed or merged) should use
- * the compound file format. Note that large segments
- * may sometimes still use non-compound format (see
- * {@link #setNoCFSRatio}. */
- protected boolean useCompoundFile = true;
-
/** Sole constructor. (For invocation by subclass
* constructors, typically implicit.) */
public LogMergePolicy() {
- super();
+ super(DEFAULT_NO_CFS_RATIO, MergePolicy.DEFAULT_MAX_CFS_SEGMENT_SIZE);
}
/** Returns true if {@code LMP} is enabled in {@link
@@ -129,25 +106,6 @@ public abstract class LogMergePolicy ext
return w != null && w.infoStream.isEnabled("LMP");
}
- /** Returns current {@code noCFSRatio}.
- *
- * @see #setNoCFSRatio */
- public double getNoCFSRatio() {
- return noCFSRatio;
- }
-
- /** If a merged segment will be more than this percentage
- * of the total size of the index, leave the segment as
- * non-compound file even if compound file is enabled.
- * Set to 1.0 to always use CFS regardless of merge
- * size. */
- public void setNoCFSRatio(double noCFSRatio) {
- if (noCFSRatio < 0.0 || noCFSRatio > 1.0) {
- throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio);
- }
- this.noCFSRatio = noCFSRatio;
- }
-
/** Print a debug message to {@link IndexWriter}'s {@code
* infoStream}. */
protected void message(String message) {
@@ -178,39 +136,6 @@ public abstract class LogMergePolicy ext
this.mergeFactor = mergeFactor;
}
- // Javadoc inherited
- @Override
- public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
- if (!getUseCompoundFile()) {
- return false;
- }
- long mergedInfoSize = size(mergedInfo);
- if (mergedInfoSize > maxCFSSegmentSize) {
- return false;
- }
- if (getNoCFSRatio() >= 1.0) {
- return true;
- }
- long totalSize = 0;
- for (SegmentInfoPerCommit info : infos) {
- totalSize += size(info);
- }
- return mergedInfoSize <= getNoCFSRatio() * totalSize;
- }
-
- /** Sets whether compound file format should be used for
- * newly flushed and newly merged segments. */
- public void setUseCompoundFile(boolean useCompoundFile) {
- this.useCompoundFile = useCompoundFile;
- }
-
- /** Returns true if newly flushed and newly merge segments
- * are written in compound file format. @see
- * #setUseCompoundFile */
- public boolean getUseCompoundFile() {
- return useCompoundFile;
- }
-
/** Sets whether the segment size should be calibrated by
* the number of deletes when choosing segments for merge. */
public void setCalibrateSizeByDeletes(boolean calibrateSizeByDeletes) {
@@ -226,9 +151,6 @@ public abstract class LogMergePolicy ext
@Override
public void close() {}
- /** Return the size of the provided {@link
- * SegmentInfoPerCommit}. */
- abstract protected long size(SegmentInfoPerCommit info) throws IOException;
/** Return the number of documents in the provided {@link
* SegmentInfoPerCommit}, pro-rated by percentage of
@@ -249,15 +171,10 @@ public abstract class LogMergePolicy ext
* non-deleted documents if {@link
* #setCalibrateSizeByDeletes} is set. */
protected long sizeBytes(SegmentInfoPerCommit info) throws IOException {
- long byteSize = info.sizeInBytes();
if (calibrateSizeByDeletes) {
- int delCount = writer.get().numDeletedDocs(info);
- double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((float)delCount / (float)info.info.getDocCount()));
- assert delRatio <= 1.0;
- return (info.info.getDocCount() <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio)));
- } else {
- return byteSize;
+ return super.size(info);
}
+ return info.sizeInBytes();
}
/** Returns true if the number of segments eligible for
@@ -282,19 +199,6 @@ public abstract class LogMergePolicy ext
(numToMerge != 1 || !segmentIsOriginal || isMerged(mergeInfo));
}
- /** Returns true if this single info is already fully merged (has no
- * pending norms or deletes, is in the same dir as the
- * writer, and matches the current compound file setting */
- protected boolean isMerged(SegmentInfoPerCommit info)
- throws IOException {
- IndexWriter w = writer.get();
- assert w != null;
- boolean hasDeletions = w.numDeletedDocs(info) > 0;
- return !hasDeletions &&
- info.info.dir == w.getDirectory() &&
- (info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
- }
-
/**
* Returns the merges necessary to merge the index, taking the max merge
* size or max merge docs into consideration. This method attempts to respect
@@ -726,29 +630,10 @@ public abstract class LogMergePolicy ext
sb.append("maxMergeSizeForForcedMerge=").append(maxMergeSizeForForcedMerge).append(", ");
sb.append("calibrateSizeByDeletes=").append(calibrateSizeByDeletes).append(", ");
sb.append("maxMergeDocs=").append(maxMergeDocs).append(", ");
- sb.append("useCompoundFile=").append(useCompoundFile).append(", ");
sb.append("maxCFSSegmentSizeMB=").append(getMaxCFSSegmentSizeMB()).append(", ");
sb.append("noCFSRatio=").append(noCFSRatio);
sb.append("]");
return sb.toString();
}
- /** Returns the largest size allowed for a compound file segment */
- public final double getMaxCFSSegmentSizeMB() {
- return maxCFSSegmentSize/1024/1024.;
- }
-
- /** If a merged segment will be more than this value,
- * leave the segment as
- * non-compound file even if compound file is enabled.
- * Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0
- * to always use CFS regardless of merge size. */
- public final void setMaxCFSSegmentSizeMB(double v) {
- if (v < 0.0) {
- throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")");
- }
- v *= 1024 * 1024;
- this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v;
- }
-
}