You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2014/12/05 16:24:13 UTC
svn commit: r1643326 [1/4] - in /lucene/dev/branches/lucene2878: ./ lucene/
lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/
lucene/core/ lucene/core/src/java/org/apache/lucene/codecs/compressing/
lucene/core/src/java/org/apac...
Author: romseygeek
Date: Fri Dec 5 15:24:11 2014
New Revision: 1643326
URL: http://svn.apache.org/r1643326
Log:
Merge trunk
Added:
lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/search/TestFilterCachingPolicy.java
- copied unchanged from r1643324, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestFilterCachingPolicy.java
lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java
- copied unchanged from r1643324, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java
lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java
- copied unchanged from r1643324, lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java
lucene/dev/branches/lucene2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Zipper.java
- copied unchanged from r1643324, lucene/dev/trunk/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/Zipper.java
Removed:
lucene/dev/branches/lucene2878/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
Modified:
lucene/dev/branches/lucene2878/ (props changed)
lucene/dev/branches/lucene2878/lucene/ (props changed)
lucene/dev/branches/lucene2878/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene2878/lucene/codecs/ (props changed)
lucene/dev/branches/lucene2878/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
lucene/dev/branches/lucene2878/lucene/core/ (props changed)
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/CorruptIndexException.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/search/SearcherManager.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/IndexOutput.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RateLimitedIndexOutput.java
lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java
lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/mockfile/TestMockFilesystems.java
lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/search/TestSearcherManager.java
lucene/dev/branches/lucene2878/lucene/facet/ (props changed)
lucene/dev/branches/lucene2878/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java
lucene/dev/branches/lucene2878/lucene/highlighter/ (props changed)
lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
lucene/dev/branches/lucene2878/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
lucene/dev/branches/lucene2878/lucene/memory/ (props changed)
lucene/dev/branches/lucene2878/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
lucene/dev/branches/lucene2878/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
lucene/dev/branches/lucene2878/lucene/misc/ (props changed)
lucene/dev/branches/lucene2878/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
lucene/dev/branches/lucene2878/lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
lucene/dev/branches/lucene2878/lucene/queries/ (props changed)
lucene/dev/branches/lucene2878/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java
lucene/dev/branches/lucene2878/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java
lucene/dev/branches/lucene2878/lucene/test-framework/ (props changed)
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/HighCompressionCompressingCodec.java
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/dummy/DummyCompressingCodec.java
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/index/BaseFieldInfoFormatTestCase.java
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/index/BaseSegmentInfoFormatTestCase.java
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/mockfile/VerboseFS.java
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/store/BaseDirectoryTestCase.java
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
lucene/dev/branches/lucene2878/lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java
lucene/dev/branches/lucene2878/solr/ (props changed)
lucene/dev/branches/lucene2878/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene2878/solr/bin/ (props changed)
lucene/dev/branches/lucene2878/solr/bin/solr
lucene/dev/branches/lucene2878/solr/bin/solr.cmd
lucene/dev/branches/lucene2878/solr/bin/solr.in.cmd
lucene/dev/branches/lucene2878/solr/bin/solr.in.sh
lucene/dev/branches/lucene2878/solr/build.xml (contents, props changed)
lucene/dev/branches/lucene2878/solr/cloud-dev/ (props changed)
lucene/dev/branches/lucene2878/solr/cloud-dev/clean.sh
lucene/dev/branches/lucene2878/solr/cloud-dev/cli-test-solrcloud-start.sh
lucene/dev/branches/lucene2878/solr/cloud-dev/functions.sh
lucene/dev/branches/lucene2878/solr/cloud-dev/solrcloud-extzk-start.sh
lucene/dev/branches/lucene2878/solr/cloud-dev/solrcloud-multi-start.sh
lucene/dev/branches/lucene2878/solr/cloud-dev/solrcloud-start-existing.sh
lucene/dev/branches/lucene2878/solr/cloud-dev/solrcloud-start.sh
lucene/dev/branches/lucene2878/solr/cloud-dev/stop.sh
lucene/dev/branches/lucene2878/solr/contrib/ (props changed)
lucene/dev/branches/lucene2878/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
lucene/dev/branches/lucene2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DIHCacheSupport.java
lucene/dev/branches/lucene2878/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/EntityProcessorBase.java
lucene/dev/branches/lucene2878/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/AbstractSqlEntityProcessorTestCase.java
lucene/dev/branches/lucene2878/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/MockSolrEntityProcessor.java
lucene/dev/branches/lucene2878/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSolrEntityProcessorUnit.java
lucene/dev/branches/lucene2878/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestSqlEntityProcessor.java
lucene/dev/branches/lucene2878/solr/contrib/extraction/src/test-files/extraction/solr/collection1/conf/solrconfig.xml
lucene/dev/branches/lucene2878/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java
lucene/dev/branches/lucene2878/solr/core/ (props changed)
lucene/dev/branches/lucene2878/solr/core/src/java/org/apache/solr/cloud/ZkController.java
lucene/dev/branches/lucene2878/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
lucene/dev/branches/lucene2878/solr/core/src/java/org/apache/solr/store/blockcache/CachedIndexOutput.java
lucene/dev/branches/lucene2878/solr/core/src/java/org/apache/solr/store/blockcache/ReusedBufferedIndexOutput.java
lucene/dev/branches/lucene2878/solr/core/src/java/org/apache/solr/store/hdfs/HdfsFileWriter.java
lucene/dev/branches/lucene2878/solr/core/src/java/org/apache/solr/update/processor/RunUpdateProcessorFactory.java
lucene/dev/branches/lucene2878/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
lucene/dev/branches/lucene2878/solr/core/src/test/org/apache/solr/cloud/HttpPartitionTest.java
lucene/dev/branches/lucene2878/solr/core/src/test/org/apache/solr/handler/component/DistributedQueryComponentOptimizationTest.java
lucene/dev/branches/lucene2878/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java
lucene/dev/branches/lucene2878/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
lucene/dev/branches/lucene2878/solr/solrj/ (props changed)
lucene/dev/branches/lucene2878/solr/solrj/src/java/org/apache/solr/common/params/DefaultSolrParams.java
lucene/dev/branches/lucene2878/solr/solrj/src/java/org/apache/solr/common/util/IteratorChain.java
lucene/dev/branches/lucene2878/solr/solrj/src/test/org/apache/solr/common/params/SolrParamTest.java
lucene/dev/branches/lucene2878/solr/webapp/ (props changed)
lucene/dev/branches/lucene2878/solr/webapp/build.xml
Modified: lucene/dev/branches/lucene2878/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/CHANGES.txt?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene2878/lucene/CHANGES.txt Fri Dec 5 15:24:11 2014
@@ -117,6 +117,58 @@ New Features
* LUCENE-6077: Added a filter cache. (Adrien Grand, Robert Muir)
+* LUCENE-6088: TermsFilter implements Accountable. (Adrien Grand)
+
+* LUCENE-6034: The default highlighter when used with QueryScorer will highlight payload-sensitive
+ queries provided that term vectors with positions, offsets, and payloads are present. This is the
+ only highlighter that can highlight such queries accurately. (David Smiley)
+
+Optimizations
+
+* LUCENE-5960: Use a more efficient bitset, not a Set<Integer>, to
+ track visited states. (Markus Heiden via Mike McCandless)
+
+* LUCENE-5959: Don't allocate excess memory when building automaton in
+ finish. (Markus Heiden via Mike McCandless)
+
+* LUCENE-5963: Reduce memory allocations in
+ AnalyzingSuggester. (Markus Heiden via Mike McCandless)
+
+* LUCENE-5938: MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE is now faster on
+ queries that match few documents by using a sparse bit set implementation.
+ (Adrien Grand)
+
+* LUCENE-5969: Refactor merging to be more efficient, checksum calculation is
+ per-segment/per-producer, and norms and doc values merging no longer cause
+ RAM spikes for latent fields. (Mike McCandless, Robert Muir)
+
+* LUCENE-5983: CachingWrapperFilter now uses a new DocIdSet implementation
+ called RoaringDocIdSet instead of WAH8DocIdSet. (Adrien Grand)
+
+* LUCENE-6022: DocValuesDocIdSet checks live docs before doc values.
+ (Adrien Grand)
+
+* LUCENE-6030: Add norms patched compression for a small number of common values
+ (Ryan Ernst)
+
+* LUCENE-6040: Speed up EliasFanoDocIdSet through broadword bit selection.
+ (Paul Elschot)
+
+* LUCENE-6033: CachingTokenFilter now uses ArrayList not LinkedList, and has new
+ isCached() method. (David Smiley)
+
+* LUCENE-6031: TokenSources (in the default highlighter) converts term vectors into a
+ TokenStream much faster in linear time (not N*log(N) using less memory, and with reset()
+ implemented. Only one of offsets or positions are required of the term vector.
+ (David Smiley)
+
+* LUCENE-6089, LUCENE-6090: Tune CompressionMode.HIGH_COMPRESSION for
+ better compression and less cpu usage. (Adrien Grand, Robert Muir)
+
+* LUCENE-6034: QueryScorer, used by the default highlighter, needn't re-index the provided
+ TokenStream with MemoryIndex when it comes from TokenSources (term vectors) with offsets and
+ positions. (David Smiley)
+
API Changes
* LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and
@@ -229,6 +281,17 @@ API Changes
* LUCENE-6082: Remove abort() from codec apis. (Robert Muir)
+* LUCENE-6084: IndexOutput's constructor now requires a String
+ resourceDescription so its toString is sane (Robert Muir, Mike
+ McCandless)
+
+* LUCENE-6087: Allow passing custom DirectoryReader to SearcherManager
+ (Mike McCandless)
+
+* LUCENE-6085: Undeprecate SegmentInfo attributes, but add safety so they
+ won't be trappy if codec tries to use them during docvalues updates.
+ (Robert Muir)
+
Bug Fixes
* LUCENE-5650: Enforce read-only access to any path outside the temporary
@@ -318,45 +381,6 @@ Tests
* LUCENE-5968: Improve error message when 'ant beast' is run on top-level
modules. (Ramkumar Aiyengar, Uwe Schindler)
-Optimizations
-
-* LUCENE-5960: Use a more efficient bitset, not a Set<Integer>, to
- track visited states. (Markus Heiden via Mike McCandless)
-
-* LUCENE-5959: Don't allocate excess memory when building automaton in
- finish. (Markus Heiden via Mike McCandless)
-
-* LUCENE-5963: Reduce memory allocations in
- AnalyzingSuggester. (Markus Heiden via Mike McCandless)
-
-* LUCENE-5938: MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE is now faster on
- queries that match few documents by using a sparse bit set implementation.
- (Adrien Grand)
-
-* LUCENE-5969: Refactor merging to be more efficient, checksum calculation is
- per-segment/per-producer, and norms and doc values merging no longer cause
- RAM spikes for latent fields. (Mike McCandless, Robert Muir)
-
-* LUCENE-5983: CachingWrapperFilter now uses a new DocIdSet implementation
- called RoaringDocIdSet instead of WAH8DocIdSet. (Adrien Grand)
-
-* LUCENE-6022: DocValuesDocIdSet checks live docs before doc values.
- (Adrien Grand)
-
-* LUCENE-6030: Add norms patched compression for a small number of common values
- (Ryan Ernst)
-
-* LUCENE-6040: Speed up EliasFanoDocIdSet through broadword bit selection.
- (Paul Elschot)
-
-* LUCENE-6033: CachingTokenFilter now uses ArrayList not LinkedList, and has new
- isCached() method. (David Smiley)
-
-* LUCENE-6031: TokenSources (in the default highlighter) converts term vectors into a
- TokenStream much faster in linear time (not N*log(N) using less memory, and with reset()
- implemented. Only one of offsets or positions are required of the term vector.
- (David Smiley)
-
Build
* LUCENE-5909: Smoke tester now has better command line parsing and
Modified: lucene/dev/branches/lucene2878/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java (original)
+++ lucene/dev/branches/lucene2878/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoFormat.java Fri Dec 5 15:24:11 2014
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
@@ -52,6 +53,9 @@ public class SimpleTextSegmentInfoFormat
final static BytesRef SI_NUM_DIAG = new BytesRef(" diagnostics ");
final static BytesRef SI_DIAG_KEY = new BytesRef(" key ");
final static BytesRef SI_DIAG_VALUE = new BytesRef(" value ");
+ final static BytesRef SI_NUM_ATT = new BytesRef(" attributes ");
+ final static BytesRef SI_ATT_KEY = new BytesRef(" key ");
+ final static BytesRef SI_ATT_VALUE = new BytesRef(" value ");
final static BytesRef SI_NUM_FILES = new BytesRef(" files ");
final static BytesRef SI_FILE = new BytesRef(" file ");
final static BytesRef SI_ID = new BytesRef(" id ");
@@ -97,6 +101,22 @@ public class SimpleTextSegmentInfoFormat
}
SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_NUM_ATT);
+ int numAtt = Integer.parseInt(readString(SI_NUM_ATT.length, scratch));
+ Map<String,String> attributes = new HashMap<>(numAtt);
+
+ for (int i = 0; i < numAtt; i++) {
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_ATT_KEY);
+ String key = readString(SI_ATT_KEY.length, scratch);
+
+ SimpleTextUtil.readLine(input, scratch);
+ assert StringHelper.startsWith(scratch.get(), SI_ATT_VALUE);
+ String value = readString(SI_ATT_VALUE.length, scratch);
+ attributes.put(key, value);
+ }
+
+ SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch.get(), SI_NUM_FILES);
int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch));
Set<String> files = new HashSet<>();
@@ -120,7 +140,7 @@ public class SimpleTextSegmentInfoFormat
SimpleTextUtil.checkFooter(input);
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
- isCompoundFile, null, diagnostics, id);
+ isCompoundFile, null, diagnostics, id, Collections.unmodifiableMap(attributes));
info.setFiles(files);
return info;
}
@@ -169,6 +189,21 @@ public class SimpleTextSegmentInfoFormat
}
}
+ Map<String,String> attributes = si.getAttributes();
+ SimpleTextUtil.write(output, SI_NUM_ATT);
+ SimpleTextUtil.write(output, Integer.toString(attributes.size()), scratch);
+ SimpleTextUtil.writeNewline(output);
+
+ for (Map.Entry<String,String> attEntry : attributes.entrySet()) {
+ SimpleTextUtil.write(output, SI_ATT_KEY);
+ SimpleTextUtil.write(output, attEntry.getKey(), scratch);
+ SimpleTextUtil.writeNewline(output);
+
+ SimpleTextUtil.write(output, SI_ATT_VALUE);
+ SimpleTextUtil.write(output, attEntry.getValue(), scratch);
+ SimpleTextUtil.writeNewline(output);
+ }
+
Set<String> files = si.files();
int numFiles = files == null ? 0 : files.size();
SimpleTextUtil.write(output, SI_NUM_FILES);
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsFormat.java Fri Dec 5 15:24:11 2014
@@ -48,15 +48,16 @@ public class CompressingStoredFieldsForm
private final String segmentSuffix;
private final CompressionMode compressionMode;
private final int chunkSize;
+ private final int maxDocsPerChunk;
/**
* Create a new {@link CompressingStoredFieldsFormat} with an empty segment
* suffix.
*
- * @see CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(String, String, CompressionMode, int)
+ * @see CompressingStoredFieldsFormat#CompressingStoredFieldsFormat(String, String, CompressionMode, int, int)
*/
- public CompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode, int chunkSize) {
- this(formatName, "", compressionMode, chunkSize);
+ public CompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk) {
+ this(formatName, "", compressionMode, chunkSize, maxDocsPerChunk);
}
/**
@@ -79,6 +80,8 @@ public class CompressingStoredFieldsForm
* <code>chunkSize</code> is the minimum byte size of a chunk of documents.
* A value of <code>1</code> can make sense if there is redundancy across
* fields.
+ * <code>maxDocsPerChunk</code> is an upperbound on how many docs may be stored
+ * in a single chunk. This is to bound the cpu costs for highly compressible data.
* <p>
* Higher values of <code>chunkSize</code> should improve the compression
* ratio but will require more memory at indexing time and might make document
@@ -88,10 +91,11 @@ public class CompressingStoredFieldsForm
* @param formatName the name of the {@link StoredFieldsFormat}
* @param compressionMode the {@link CompressionMode} to use
* @param chunkSize the minimum number of bytes of a single chunk of stored documents
+ * @param maxDocsPerChunk the maximum number of documents in a single chunk
* @see CompressionMode
*/
public CompressingStoredFieldsFormat(String formatName, String segmentSuffix,
- CompressionMode compressionMode, int chunkSize) {
+ CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk) {
this.formatName = formatName;
this.segmentSuffix = segmentSuffix;
this.compressionMode = compressionMode;
@@ -99,7 +103,10 @@ public class CompressingStoredFieldsForm
throw new IllegalArgumentException("chunkSize must be >= 1");
}
this.chunkSize = chunkSize;
-
+ if (maxDocsPerChunk < 1) {
+ throw new IllegalArgumentException("maxDocsPerChunk must be >= 1");
+ }
+ this.maxDocsPerChunk = maxDocsPerChunk;
}
@Override
@@ -113,13 +120,13 @@ public class CompressingStoredFieldsForm
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si,
IOContext context) throws IOException {
return new CompressingStoredFieldsWriter(directory, si, segmentSuffix, context,
- formatName, compressionMode, chunkSize);
+ formatName, compressionMode, chunkSize, maxDocsPerChunk);
}
@Override
public String toString() {
return getClass().getSimpleName() + "(compressionMode=" + compressionMode
- + ", chunkSize=" + chunkSize + ")";
+ + ", chunkSize=" + chunkSize + ", maxDocsPerChunk=" + maxDocsPerChunk + ")";
}
}
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java Fri Dec 5 15:24:11 2014
@@ -54,9 +54,6 @@ public final class CompressingStoredFiel
/** Extension of stored fields index file */
public static final String FIELDS_INDEX_EXTENSION = "fdx";
-
- // hard limit on the maximum number of documents per chunk
- static final int MAX_DOCUMENTS_PER_CHUNK = 128;
static final int STRING = 0x00;
static final int BYTE_ARR = 0x01;
@@ -82,6 +79,7 @@ public final class CompressingStoredFiel
private final CompressionMode compressionMode;
private final Compressor compressor;
private final int chunkSize;
+ private final int maxDocsPerChunk;
private final GrowableByteArrayDataOutput bufferedDocs;
private int[] numStoredFields; // number of stored fields
@@ -91,7 +89,7 @@ public final class CompressingStoredFiel
/** Sole constructor. */
public CompressingStoredFieldsWriter(Directory directory, SegmentInfo si, String segmentSuffix, IOContext context,
- String formatName, CompressionMode compressionMode, int chunkSize) throws IOException {
+ String formatName, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk) throws IOException {
assert directory != null;
this.directory = directory;
this.segment = si.name;
@@ -99,6 +97,7 @@ public final class CompressingStoredFiel
this.compressionMode = compressionMode;
this.compressor = compressionMode.newCompressor();
this.chunkSize = chunkSize;
+ this.maxDocsPerChunk = maxDocsPerChunk;
this.docBase = 0;
this.bufferedDocs = new GrowableByteArrayDataOutput(chunkSize);
this.numStoredFields = new int[16];
@@ -210,7 +209,7 @@ public final class CompressingStoredFiel
private boolean triggerFlush() {
return bufferedDocs.length >= chunkSize || // chunks of at least chunkSize bytes
- numBufferedDocs >= MAX_DOCUMENTS_PER_CHUNK;
+ numBufferedDocs >= maxDocsPerChunk;
}
private void flush() throws IOException {
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java Fri Dec 5 15:24:11 2014
@@ -70,7 +70,8 @@ public abstract class CompressionMode {
@Override
public Compressor newCompressor() {
- return new DeflateCompressor(Deflater.BEST_COMPRESSION);
+ // 3 is the highest level that doesn't have lazy match evaluation
+ return new DeflateCompressor(3);
}
@Override
@@ -185,7 +186,7 @@ public abstract class CompressionMode {
byte[] compressed;
DeflateDecompressor() {
- decompressor = new Inflater();
+ decompressor = new Inflater(true);
compressed = new byte[0];
}
@@ -197,13 +198,18 @@ public abstract class CompressionMode {
return;
}
final int compressedLength = in.readVInt();
- if (compressedLength > compressed.length) {
- compressed = new byte[ArrayUtil.oversize(compressedLength, 1)];
+ // pad with extra "dummy byte": see javadocs for using Inflater(true)
+ // we do it for compliance, but its unnecessary for years in zlib.
+ final int paddedLength = compressedLength + 1;
+ if (paddedLength > compressed.length) {
+ compressed = new byte[ArrayUtil.oversize(paddedLength, 1)];
}
in.readBytes(compressed, 0, compressedLength);
+ compressed[compressedLength] = 0; // explicitly set dummy byte to 0
decompressor.reset();
- decompressor.setInput(compressed, 0, compressedLength);
+ // extra "dummy byte"
+ decompressor.setInput(compressed, 0, paddedLength);
bytes.offset = bytes.length = 0;
while (true) {
@@ -241,7 +247,7 @@ public abstract class CompressionMode {
byte[] compressed;
DeflateCompressor(int level) {
- compressor = new Deflater(level);
+ compressor = new Deflater(level, true);
compressed = new byte[64];
}
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java Fri Dec 5 15:24:11 2014
@@ -221,7 +221,7 @@ public final class Lucene50PostingsWrite
final int docDelta = docID - lastDocID;
if (docID < 0 || (docCount > 0 && docDelta <= 0)) {
- throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )", docOut.toString());
+ throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " )", docOut);
}
docDeltaBuffer[docBufferUpto] = docDelta;
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50SegmentInfoFormat.java Fri Dec 5 15:24:11 2014
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene5
*/
import java.io.IOException;
+import java.util.Collections;
import java.util.Map;
import java.util.Set;
@@ -33,7 +34,6 @@ import org.apache.lucene.store.DataOutpu
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
/**
@@ -41,7 +41,7 @@ import org.apache.lucene.util.Version;
* <p>
* Files:
* <ul>
- * <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Footer
+ * <li><tt>.si</tt>: Header, SegVersion, SegSize, IsCompoundFile, Diagnostics, Files, Attributes, Footer
* </ul>
* </p>
* Data types:
@@ -51,7 +51,7 @@ import org.apache.lucene.util.Version;
* <li>SegSize --> {@link DataOutput#writeInt Int32}</li>
* <li>SegVersion --> {@link DataOutput#writeString String}</li>
* <li>Files --> {@link DataOutput#writeStringSet Set<String>}</li>
- * <li>Diagnostics --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
+ * <li>Diagnostics,Attributes --> {@link DataOutput#writeStringStringMap Map<String,String>}</li>
* <li>IsCompoundFile --> {@link DataOutput#writeByte Int8}</li>
* <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li>
* </ul>
@@ -101,8 +101,9 @@ public class Lucene50SegmentInfoFormat e
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readStringStringMap();
final Set<String> files = input.readStringSet();
+ final Map<String,String> attributes = input.readStringStringMap();
- si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID);
+ si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile, null, diagnostics, segmentID, Collections.unmodifiableMap(attributes));
si.setFiles(files);
} catch (Throwable exception) {
priorE = exception;
@@ -144,6 +145,7 @@ public class Lucene50SegmentInfoFormat e
}
}
output.writeStringSet(files);
+ output.writeStringStringMap(si.getAttributes());
CodecUtil.writeFooter(output);
}
}
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50StoredFieldsFormat.java Fri Dec 5 15:24:11 2014
@@ -118,7 +118,7 @@ public final class Lucene50StoredFieldsF
/** Sole constructor. */
public Lucene50StoredFieldsFormat() {
- super("Lucene50StoredFields", CompressionMode.FAST, 1 << 14);
+ super("Lucene50StoredFields", CompressionMode.FAST, 1 << 14, 128);
}
}
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/CorruptIndexException.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/CorruptIndexException.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/CorruptIndexException.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/CorruptIndexException.java Fri Dec 5 15:24:11 2014
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
/**
* This exception is thrown when Lucene detects
@@ -31,11 +32,21 @@ public class CorruptIndexException exten
public CorruptIndexException(String message, DataInput input) {
this(message, input, null);
}
+
+ /** Create exception with a message only */
+ public CorruptIndexException(String message, DataOutput output) {
+ this(message, output, null);
+ }
/** Create exception with message and root cause. */
public CorruptIndexException(String message, DataInput input, Throwable cause) {
this(message, Objects.toString(input), cause);
}
+
+ /** Create exception with message and root cause. */
+ public CorruptIndexException(String message, DataOutput output, Throwable cause) {
+ this(message, Objects.toString(output), cause);
+ }
/** Create exception with a message only */
public CorruptIndexException(String message, String resourceDescription) {
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java Fri Dec 5 15:24:11 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.text.NumberFormat;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
@@ -178,7 +179,7 @@ class DocumentsWriterPerThread {
pendingUpdates.clear();
deleteSlice = deleteQueue.newSlice();
- segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, null, StringHelper.randomId());
+ segmentInfo = new SegmentInfo(directoryOrig, Version.LATEST, segmentName, -1, false, codec, null, StringHelper.randomId(), new HashMap<>());
assert numDocsInRAM == 0;
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", Thread.currentThread().getName() + " init seg=" + segmentName + " delQueue=" + deleteQueue);
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Fri Dec 5 15:24:11 2014
@@ -2535,7 +2535,7 @@ public class IndexWriter implements Clos
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
SegmentInfo info = new SegmentInfo(directory, Version.LATEST, mergedName, -1,
- false, codec, null, StringHelper.randomId());
+ false, codec, null, StringHelper.randomId(), new HashMap<>());
SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
MergeState.CheckAbort.NONE, globalFieldNumberMap,
@@ -2631,7 +2631,7 @@ public class IndexWriter implements Clos
// Same SI as before but we change directory and name
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
info.info.getUseCompoundFile(), info.info.getCodec(),
- info.info.getDiagnostics(), info.info.getId());
+ info.info.getDiagnostics(), info.info.getId(), info.info.getAttributes());
SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, info.getDelCount(), info.getDelGen(),
info.getFieldInfosGen(), info.getDocValuesGen());
@@ -3742,7 +3742,7 @@ public class IndexWriter implements Clos
// ConcurrentMergePolicy we keep deterministic segment
// names.
final String mergeSegmentName = newSegmentName();
- SegmentInfo si = new SegmentInfo(directory, Version.LATEST, mergeSegmentName, -1, false, codec, null, StringHelper.randomId());
+ SegmentInfo si = new SegmentInfo(directory, Version.LATEST, mergeSegmentName, -1, false, codec, null, StringHelper.randomId(), new HashMap<>());
Map<String,String> details = new HashMap<>();
details.put("mergeMaxNumSegments", "" + merge.maxNumSegments);
details.put("mergeFactor", Integer.toString(merge.segments.size()));
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java Fri Dec 5 15:24:11 2014
@@ -21,8 +21,10 @@ package org.apache.lucene.index;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
+import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
@@ -65,6 +67,8 @@ public final class SegmentInfo {
private Codec codec;
private Map<String,String> diagnostics;
+
+ private Map<String,String> attributes;
// Tracks the Lucene version this segment was created with, since 3.1. Null
// indicates an older than 3.0 index, and it's used to detect a too old index.
@@ -90,7 +94,7 @@ public final class SegmentInfo {
*/
public SegmentInfo(Directory dir, Version version, String name, int docCount,
boolean isCompoundFile, Codec codec, Map<String,String> diagnostics,
- byte[] id) {
+ byte[] id, Map<String,String> attributes) {
assert !(dir instanceof TrackingDirectoryWrapper);
this.dir = dir;
this.version = version;
@@ -103,6 +107,7 @@ public final class SegmentInfo {
if (id.length != StringHelper.ID_LENGTH) {
throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
}
+ this.attributes = Objects.requireNonNull(attributes);
}
/**
@@ -267,4 +272,34 @@ public final class SegmentInfo {
String namedForThisSegment(String file) {
return name + IndexFileNames.stripSegmentName(file);
}
+
+ /**
+ * Get a codec attribute value, or null if it does not exist
+ */
+ public String getAttribute(String key) {
+ return attributes.get(key);
+ }
+
+ /**
+ * Puts a codec attribute value.
+ * <p>
+ * This is a key-value mapping for the field that the codec can use to store
+ * additional metadata, and will be available to the codec when reading the
+ * segment via {@link #getAttribute(String)}
+ * <p>
+ * If a value already exists for the field, it will be replaced with the new
+ * value.
+ */
+ public String putAttribute(String key, String value) {
+ return attributes.put(key, value);
+ }
+
+ /**
+ * Returns the internal codec attributes map.
+ * @return internal codec attributes map.
+ */
+ public Map<String,String> getAttributes() {
+ return attributes;
+ }
}
+
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/search/SearcherManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/search/SearcherManager.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/search/SearcherManager.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/search/SearcherManager.java Fri Dec 5 15:24:11 2014
@@ -106,6 +106,25 @@ public final class SearcherManager exten
current = getSearcher(searcherFactory, DirectoryReader.open(dir));
}
+ /**
+ * Creates and returns a new SearcherManager from an existing {@link DirectoryReader}. Note that
+ * this steals the incoming reference.
+ *
+ * @param reader the DirectoryReader.
+ * @param searcherFactory An optional {@link SearcherFactory}. Pass
+ * <code>null</code> if you don't require the searcher to be warmed
+ * before going live or other custom behavior.
+ *
+ * @throws IOException if there is a low-level I/O error
+ */
+ public SearcherManager(DirectoryReader reader, SearcherFactory searcherFactory) throws IOException {
+ if (searcherFactory == null) {
+ searcherFactory = new SearcherFactory();
+ }
+ this.searcherFactory = searcherFactory;
+ this.current = getSearcher(searcherFactory, reader);
+ }
+
@Override
protected void decRef(IndexSearcher reference) throws IOException {
reference.getIndexReader().decRef();
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/search/UsageTrackingFilterCachingPolicy.java Fri Dec 5 15:24:11 2014
@@ -36,18 +36,18 @@ import org.apache.lucene.util.FrequencyT
*/
public final class UsageTrackingFilterCachingPolicy implements FilterCachingPolicy {
- private static boolean isCostly(Filter filter) {
+ static boolean isCostly(Filter filter) {
// This does not measure the cost of iterating over the filter (for this we
// already have the DocIdSetIterator#cost API) but the cost to build the
// DocIdSet in the first place
return filter instanceof MultiTermQueryWrapperFilter;
}
- private static boolean isCheapToCache(DocIdSet set) {
+ static boolean isCheapToCache(DocIdSet set) {
// the produced doc set is already cacheable, so caching has no
// overhead at all. TODO: extend this to sets whose iterators have a low
// cost?
- return set.isCacheable();
+ return set == null || set.isCacheable();
}
private final FilterCachingPolicy.CacheOnLargeSegments segmentPolicy;
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java Fri Dec 5 15:24:11 2014
@@ -282,7 +282,7 @@ public abstract class FSDirectory extend
private final String name;
public FSIndexOutput(String name) throws IOException {
- super(new FilterOutputStream(Files.newOutputStream(directory.resolve(name))) {
+ super("FSIndexOutput(path=\"" + directory.resolve(name) + "\")", new FilterOutputStream(Files.newOutputStream(directory.resolve(name))) {
// This implementation ensures, that we never write more than CHUNK_SIZE bytes:
@Override
public void write(byte[] b, int offset, int length) throws IOException {
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/IndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/IndexOutput.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/IndexOutput.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/IndexOutput.java Fri Dec 5 15:24:11 2014
@@ -31,6 +31,17 @@ import java.io.IOException;
*/
public abstract class IndexOutput extends DataOutput implements Closeable {
+ private final String resourceDescription;
+
+ /** Sole constructor. resourceDescription should be non-null, opaque string
+ * describing this resource; it's returned from {@link #toString}. */
+ protected IndexOutput(String resourceDescription) {
+ if (resourceDescription == null) {
+ throw new IllegalArgumentException("resourceDescription must not be null");
+ }
+ this.resourceDescription = resourceDescription;
+ }
+
/** Closes this stream to further operations. */
@Override
public abstract void close() throws IOException;
@@ -42,4 +53,9 @@ public abstract class IndexOutput extend
/** Returns the current checksum of bytes written so far */
public abstract long getChecksum() throws IOException;
+
+ @Override
+ public String toString() {
+ return resourceDescription;
+ }
}
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/OutputStreamIndexOutput.java Fri Dec 5 15:24:11 2014
@@ -36,7 +36,8 @@ public class OutputStreamIndexOutput ext
* @param bufferSize the buffer size in bytes used to buffer writes internally.
* @throws IllegalArgumentException if the given buffer size is less or equal to <tt>0</tt>
*/
- public OutputStreamIndexOutput(OutputStream out, int bufferSize) {
+ public OutputStreamIndexOutput(String resourceDescription, OutputStream out, int bufferSize) {
+ super(resourceDescription);
this.os = new BufferedOutputStream(new CheckedOutputStream(out, crc), bufferSize);
}
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java Fri Dec 5 15:24:11 2014
@@ -173,7 +173,7 @@ public class RAMDirectory extends BaseDi
existing.directory = null;
}
fileMap.put(name, file);
- return new RAMOutputStream(file, true);
+ return new RAMOutputStream(name, file, true);
}
/**
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java Fri Dec 5 15:24:11 2014
@@ -46,10 +46,17 @@ public class RAMOutputStream extends Ind
/** Construct an empty output buffer. */
public RAMOutputStream() {
- this(new RAMFile(), false);
+ this("noname", new RAMFile(), false);
}
+ /** Creates this, with no name. */
public RAMOutputStream(RAMFile f, boolean checksum) {
+ this("noname", f, checksum);
+ }
+
+ /** Creates this, with specified name. */
+ public RAMOutputStream(String name, RAMFile f, boolean checksum) {
+ super("RAMOutputStream(name=\"" + name + "\")");
file = f;
// make sure that we switch to the
Modified: lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RateLimitedIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RateLimitedIndexOutput.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RateLimitedIndexOutput.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/java/org/apache/lucene/store/RateLimitedIndexOutput.java Fri Dec 5 15:24:11 2014
@@ -37,6 +37,7 @@ final class RateLimitedIndexOutput exten
private long currentMinPauseCheckBytes;
RateLimitedIndexOutput(final RateLimiter rateLimiter, final IndexOutput delegate) {
+ super("RateLimitedIndexOutput(" + delegate + ")");
this.delegate = delegate;
this.rateLimiter = rateLimiter;
this.currentMinPauseCheckBytes = rateLimiter.getMinPauseCheckBytes();
Modified: lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java Fri Dec 5 15:24:11 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Random;
@@ -220,7 +221,7 @@ public class TestCodecs extends LuceneTe
final FieldInfos fieldInfos = builder.finish();
final Directory dir = newDirectory();
Codec codec = Codec.getDefault();
- final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, null, StringHelper.randomId());
+ final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, null, StringHelper.randomId(), new HashMap<>());
this.write(si, fieldInfos, dir, fields);
final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random())));
@@ -277,7 +278,7 @@ public class TestCodecs extends LuceneTe
}
Codec codec = Codec.getDefault();
- final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, null, StringHelper.randomId());
+ final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, null, StringHelper.randomId(), new HashMap<>());
this.write(si, fieldInfos, dir, fields);
if (VERBOSE) {
Modified: lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestDemoParallelLeafReader.java Fri Dec 5 15:24:11 2014
@@ -120,6 +120,9 @@ public class TestDemoParallelLeafReader
IndexWriterConfig iwc = getIndexWriterConfig();
iwc.setMergePolicy(new ReindexingMergePolicy(iwc.getMergePolicy()));
+ if (DEBUG) {
+ System.out.println("TEST: use IWC:\n" + iwc);
+ }
w = new IndexWriter(indexDir, iwc);
w.getConfig().setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
@@ -644,7 +647,12 @@ public class TestDemoParallelLeafReader
return new ReindexingReader(root) {
@Override
protected IndexWriterConfig getIndexWriterConfig() throws IOException {
- return newIndexWriterConfig();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ TieredMergePolicy tmp = new TieredMergePolicy();
+ // We write tiny docs, so we need tiny floor to avoid O(N^2) merging:
+ tmp.setFloorSegmentMB(.01);
+ iwc.setMergePolicy(tmp);
+ return iwc;
}
@Override
@@ -694,7 +702,12 @@ public class TestDemoParallelLeafReader
return new ReindexingReader(root) {
@Override
protected IndexWriterConfig getIndexWriterConfig() throws IOException {
- return newIndexWriterConfig();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ TieredMergePolicy tmp = new TieredMergePolicy();
+ // We write tiny docs, so we need tiny floor to avoid O(N^2) merging:
+ tmp.setFloorSegmentMB(.01);
+ iwc.setMergePolicy(tmp);
+ return iwc;
}
@Override
@@ -780,7 +793,12 @@ public class TestDemoParallelLeafReader
return new ReindexingReader(root) {
@Override
protected IndexWriterConfig getIndexWriterConfig() throws IOException {
- return newIndexWriterConfig();
+ IndexWriterConfig iwc = newIndexWriterConfig();
+ TieredMergePolicy tmp = new TieredMergePolicy();
+ // We write tiny docs, so we need tiny floor to avoid O(N^2) merging:
+ tmp.setFloorSegmentMB(.01);
+ iwc.setMergePolicy(tmp);
+ return iwc;
}
@Override
Modified: lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestDoc.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestDoc.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestDoc.java Fri Dec 5 15:24:11 2014
@@ -28,6 +28,7 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collection;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
@@ -220,7 +221,7 @@ public class TestDoc extends LuceneTestC
final Codec codec = Codec.getDefault();
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
- final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, merged, -1, false, codec, null, StringHelper.randomId());
+ final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, merged, -1, false, codec, null, StringHelper.randomId(), new HashMap<>());
SegmentMerger merger = new SegmentMerger(Arrays.<LeafReader>asList(r1, r2),
si, InfoStream.getDefault(), trackingDir,
Modified: lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java Fri Dec 5 15:24:11 2014
@@ -2582,7 +2582,7 @@ public class TestIndexWriter extends Luc
MockDirectoryWrapper dir = newMockDirectory();
if (TestUtil.isWindowsFS(dir)) {
dir.close();
- assumeFalse("this test can't run on Windows", true);
+ assumeFalse("this test can't run on simulated windows (WindowsFS)", true);
}
// don't act like windows either, or the test won't simulate the condition
Modified: lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/index/TestSegmentMerger.java Fri Dec 5 15:24:11 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Arrays;
+import java.util.HashMap;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
@@ -79,7 +80,7 @@ public class TestSegmentMerger extends L
public void testMerge() throws IOException {
final Codec codec = Codec.getDefault();
- final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, mergedSegment, -1, false, codec, null, StringHelper.randomId());
+ final SegmentInfo si = new SegmentInfo(mergedDir, Version.LATEST, mergedSegment, -1, false, codec, null, StringHelper.randomId(), new HashMap<>());
SegmentMerger merger = new SegmentMerger(Arrays.<LeafReader>asList(reader1, reader2),
si, InfoStream.getDefault(), mergedDir,
Modified: lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/mockfile/TestMockFilesystems.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/mockfile/TestMockFilesystems.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/mockfile/TestMockFilesystems.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/mockfile/TestMockFilesystems.java Fri Dec 5 15:24:11 2014
@@ -27,6 +27,7 @@ import java.nio.channels.FileChannel;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.FileSystem;
import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
@@ -211,6 +212,30 @@ public class TestMockFilesystems extends
file.close();
}
+ public void testVerboseFSNoSuchFileException() throws IOException {
+ Path dir = FilterPath.unwrap(createTempDir());
+ FileSystem fs = new VerboseFS(dir.getFileSystem(), InfoStream.NO_OUTPUT).getFileSystem(URI.create("file:///"));
+ Path wrapped = new FilterPath(dir, fs);
+ try {
+ AsynchronousFileChannel.open(wrapped.resolve("doesNotExist.rip"));
+ fail("did not hit exception");
+ } catch (NoSuchFileException nsfe) {
+ // expected
+ }
+ try {
+ FileChannel.open(wrapped.resolve("doesNotExist.rip"));
+ fail("did not hit exception");
+ } catch (NoSuchFileException nsfe) {
+ // expected
+ }
+ try {
+ Files.newByteChannel(wrapped.resolve("stillopen"));
+ fail("did not hit exception");
+ } catch (NoSuchFileException nsfe) {
+ // expected
+ }
+ }
+
public void testTooManyOpenFiles() throws IOException {
int n = 60;
Modified: lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/search/TestSearcherManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/search/TestSearcherManager.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/search/TestSearcherManager.java (original)
+++ lucene/dev/branches/lucene2878/lucene/core/src/test/org/apache/lucene/search/TestSearcherManager.java Fri Dec 5 15:24:11 2014
@@ -31,16 +31,20 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.document.Document;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FilterDirectoryReader;
+import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.ThreadedIndexingAndSearchingTestCase;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util.TestUtil;
@@ -445,5 +449,51 @@ public class TestSearcherManager extends
sm.close();
dir.close();
}
-
+
+ private static class MyFilterLeafReader extends FilterLeafReader {
+ public MyFilterLeafReader(LeafReader in) {
+ super(in);
+ }
+ }
+
+ private static class MyFilterDirectoryReader extends FilterDirectoryReader {
+ public MyFilterDirectoryReader(DirectoryReader in) {
+ super(in,
+ new FilterDirectoryReader.SubReaderWrapper() {
+ @Override
+ public LeafReader wrap(LeafReader reader) {
+ return new MyFilterLeafReader(reader);
+ }
+ });
+ }
+
+ @Override
+ protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) {
+ return new MyFilterDirectoryReader(in);
+ }
+ }
+
+ // LUCENE-6087
+ public void testCustomDirectoryReader() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ DirectoryReader reader = new MyFilterDirectoryReader(w.getReader());
+ SearcherManager mgr = new SearcherManager(reader, null);
+ for(int i=0;i<10;i++) {
+ w.addDocument(new Document());
+ mgr.maybeRefresh();
+ IndexSearcher s = mgr.acquire();
+ try {
+ assertTrue(s.getIndexReader() instanceof MyFilterDirectoryReader);
+ for (LeafReaderContext ctx : s.getIndexReader().leaves()) {
+ assertTrue(ctx.reader() instanceof MyFilterLeafReader);
+ }
+ } finally {
+ mgr.release(s);
+ }
+ }
+ mgr.close();
+ w.close();
+ dir.close();
+ }
}
Modified: lucene/dev/branches/lucene2878/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java (original)
+++ lucene/dev/branches/lucene2878/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java Fri Dec 5 15:24:11 2014
@@ -143,6 +143,7 @@ public class SlowRAMDirectory extends RA
private final Random rand;
public SlowIndexOutput(IndexOutput io) {
+ super("SlowIndexOutput(" + io + ")");
this.io = io;
this.rand = forkRandom();
}
Modified: lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java (original)
+++ lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java Fri Dec 5 15:24:11 2014
@@ -265,7 +265,8 @@ public class QueryScorer implements Scor
* {@link CachingTokenFilter} are wrapped in a {@link CachingTokenFilter} to
* ensure an efficient reset - if you are already using a different caching
* {@link TokenStream} impl and you don't want it to be wrapped, set this to
- * false.
+ * false. Note that term-vector based tokenstreams are detected and won't be
+ * wrapped either.
*/
public void setWrapIfNotCachingTokenFilter(boolean wrap) {
this.wrapToCaching = wrap;
Modified: lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Fri Dec 5 15:24:11 2014
@@ -36,7 +36,7 @@ import org.apache.lucene.index.Terms;
*/
public class TokenSources {
/**
- * A convenience method that tries to first get a TermPositionVector for the
+ * A convenience method that tries to first get a {@link TokenStreamFromTermVector} for the
* specified docId, then, falls back to using the passed in
* {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
* This is useful when you already have the document, but would prefer to use
Modified: lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1643326&r1=1643325&r2=1643326&view=diff
==============================================================================
--- lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/branches/lucene2878/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Fri Dec 5 15:24:11 2014
@@ -16,6 +16,7 @@ package org.apache.lucene.search.highlig
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
@@ -29,13 +30,13 @@ import java.util.TreeSet;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.index.FilterLeafReader;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
@@ -43,7 +44,18 @@ import org.apache.lucene.index.TermConte
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.CommonTermsQuery;
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.FilteredQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.join.ToChildBlockJoinQuery;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
@@ -65,7 +77,7 @@ import org.apache.lucene.util.IOUtils;
public class WeightedSpanTermExtractor {
private String fieldName;
- private TokenStream tokenStream;
+ private TokenStream tokenStream;//set subsequent to getWeightedSpanTerms* methods
private String defaultField;
private boolean expandMultiTermQuery;
private boolean cachedTokenStream;
@@ -209,6 +221,8 @@ public class WeightedSpanTermExtractor {
sp.setBoost(query.getBoost());
extractWeightedSpanTerms(terms, sp);
}
+ } else if (query instanceof MatchAllDocsQuery) {
+ //nothing
} else {
Query origQuery = query;
if (query instanceof MultiTermQuery) {
@@ -357,18 +371,39 @@ public class WeightedSpanTermExtractor {
protected LeafReaderContext getLeafContext() throws IOException {
if (internalReader == null) {
- if(wrapToCaching && !(tokenStream instanceof CachingTokenFilter)) {
- assert !cachedTokenStream;
- tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
- cachedTokenStream = true;
- }
- final MemoryIndex indexer = new MemoryIndex(true);
- indexer.addField(DelegatingLeafReader.FIELD_NAME, tokenStream);
- tokenStream.reset();
- final IndexSearcher searcher = indexer.createSearcher();
- // MEM index has only atomic ctx
- internalReader = new DelegatingLeafReader(((LeafReaderContext)searcher.getTopReaderContext()).reader());
+ boolean cacheIt = wrapToCaching && !(tokenStream instanceof CachingTokenFilter);
+
+ // If it's from term vectors, simply wrap the underlying Terms in a reader
+ if (tokenStream instanceof TokenStreamFromTermVector) {
+ cacheIt = false;
+ Terms termVectorTerms = ((TokenStreamFromTermVector) tokenStream).getTermVectorTerms();
+ if (termVectorTerms.hasPositions() && termVectorTerms.hasOffsets()) {
+ internalReader = new TermVectorLeafReader(DelegatingLeafReader.FIELD_NAME, termVectorTerms);
+ }
+ }
+
+ // Use MemoryIndex (index/invert this tokenStream now)
+ if (internalReader == null) {
+ final MemoryIndex indexer = new MemoryIndex(true);
+ if (cacheIt) {
+ assert !cachedTokenStream;
+ tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
+ cachedTokenStream = true;
+ indexer.addField(DelegatingLeafReader.FIELD_NAME, tokenStream);
+ } else {
+ indexer.addField(DelegatingLeafReader.FIELD_NAME,
+ new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
+ }
+ tokenStream.reset();//reset to beginning when we return
+ final IndexSearcher searcher = indexer.createSearcher();
+ // MEM index has only atomic ctx
+ internalReader = ((LeafReaderContext) searcher.getTopReaderContext()).reader();
+ }
+
+ //Now wrap it so we always use a common field.
+ this.internalReader = new DelegatingLeafReader(internalReader);
}
+
return internalReader.getContext();
}
@@ -532,7 +567,7 @@ public class WeightedSpanTermExtractor {
return terms;
}
-
+
protected void collectSpanQueryFields(SpanQuery spanQuery, Set<String> fieldNames) {
if (spanQuery instanceof FieldMaskingSpanQuery) {
collectSpanQueryFields(((FieldMaskingSpanQuery)spanQuery).getMaskedQuery(), fieldNames);
@@ -622,8 +657,11 @@ public class WeightedSpanTermExtractor {
public boolean isCachedTokenStream() {
return cachedTokenStream;
}
-
+
+ /** Returns the tokenStream which may have been wrapped in a CachingTokenFilter.
+ * getWeightedSpanTerms* sets the tokenStream, so don't call this before. */
public TokenStream getTokenStream() {
+ assert tokenStream != null;
return tokenStream;
}
@@ -632,12 +670,16 @@ public class WeightedSpanTermExtractor {
* {@link CachingTokenFilter} are wrapped in a {@link CachingTokenFilter} to
* ensure an efficient reset - if you are already using a different caching
* {@link TokenStream} impl and you don't want it to be wrapped, set this to
- * false.
+ * false. This setting is ignored when a term vector based TokenStream is supplied,
+ * since it can be reset efficiently.
*/
public void setWrapIfNotCachingTokenFilter(boolean wrap) {
this.wrapToCaching = wrap;
}
+ /** A threshold of number of characters to analyze. When a TokenStream based on
+ * term vectors with offsets and positions are supplied, this setting
+ * does not apply. */
protected final void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) {
this.maxDocCharsToAnalyze = maxDocCharsToAnalyze;
}