You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/11 15:39:47 UTC
svn commit: r1432065 [1/5] - in /lucene/dev/branches/lucene4547: ./ lucene/
lucene/analysis/ lucene/analysis/common/
lucene/analysis/common/src/test/org/apache/lucene/analysis/core/
lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/...
Author: rmuir
Date: Fri Jan 11 14:39:45 2013
New Revision: 1432065
URL: http://svn.apache.org/viewvc?rev=1432065&view=rev
Log:
Merged /lucene/dev/trunk:r1430124-1432061
Added:
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReader.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedWriter.java
lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/association/CategoryAssociationsIndexer.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/facet/src/examples/org/apache/lucene/facet/example/association/CategoryAssociationsIndexer.java
lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/association/CategoryAssociationsMain.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/facet/src/examples/org/apache/lucene/facet/example/association/CategoryAssociationsMain.java
lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/association/CategoryAssociationsSearcher.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/facet/src/examples/org/apache/lucene/facet/example/association/CategoryAssociationsSearcher.java
lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/association/CategoryAssociationsUtils.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/facet/src/examples/org/apache/lucene/facet/example/association/CategoryAssociationsUtils.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsListBuilder.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsListBuilder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadCategoryListIteraor.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadCategoryListIteraor.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8.java
lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiCategoryListIteratorTest.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiCategoryListIteratorTest.java
lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/util/encoding/Vint8Test.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/util/encoding/Vint8Test.java
lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java
- copied unchanged from r1432061, lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java
lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/dummy/
- copied from r1432061, lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/dummy/
lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestZKPropertiesWriter.java
- copied unchanged from r1432061, lucene/dev/trunk/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestZKPropertiesWriter.java
lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
- copied unchanged from r1432061, lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
lucene/dev/branches/lucene4547/solr/webapp/web/img/ico/cross-button.png
- copied unchanged from r1432061, lucene/dev/trunk/solr/webapp/web/img/ico/cross-button.png
lucene/dev/branches/lucene4547/solr/webapp/web/img/ico/hammer.png
- copied unchanged from r1432061, lucene/dev/trunk/solr/webapp/web/img/ico/hammer.png
lucene/dev/branches/lucene4547/solr/webapp/web/js/lib/jquery.autogrow.js
- copied unchanged from r1432061, lucene/dev/trunk/solr/webapp/web/js/lib/jquery.autogrow.js
Removed:
lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/association/AssociationIndexer.java
lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/association/AssociationMain.java
lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/association/AssociationSearcher.java
lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/association/AssociationUtils.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsCategoryListBuilder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIntDecodingIterator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/Vint8.java
lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiIteratorsPerCLParamsTest.java
lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/util/Vint8Test.java
lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/DummyCompressingCodec.java
Modified:
lucene/dev/branches/lucene4547/ (props changed)
lucene/dev/branches/lucene4547/build.xml
lucene/dev/branches/lucene4547/lucene/ (props changed)
lucene/dev/branches/lucene4547/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene4547/lucene/analysis/ (props changed)
lucene/dev/branches/lucene4547/lucene/analysis/common/ (props changed)
lucene/dev/branches/lucene4547/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
lucene/dev/branches/lucene4547/lucene/build.xml (contents, props changed)
lucene/dev/branches/lucene4547/lucene/classification/ (props changed)
lucene/dev/branches/lucene4547/lucene/classification/build.xml
lucene/dev/branches/lucene4547/lucene/codecs/ (props changed)
lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
lucene/dev/branches/lucene4547/lucene/core/ (props changed)
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/Compressor.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/StoredFieldsProcessor.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/CharsRef.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java
lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/TestCharsRef.java
lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
lucene/dev/branches/lucene4547/lucene/facet/ (props changed)
lucene/dev/branches/lucene4547/lucene/facet/build.xml
lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/simple/SimpleUtils.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsFacetFields.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsPayloadIterator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/associations/CategoryAssociationsContainer.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/associations/CategoryFloatAssociation.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/associations/CategoryIntAssociation.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsPayloadIterator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsPayloadIterator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/CategoryListBuilder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/FacetFields.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/OrdinalMappingAtomicReader.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/PerDimensionIndexingParams.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/CategoryListIterator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/Aggregator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/ComplementCountingAggregator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/ScoringAggregator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/util/MultiCategoryListIterator.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/ChunksIntEncoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapIntDecoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/DGapIntEncoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/EightFlagsIntDecoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/EightFlagsIntEncoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/FourFlagsIntDecoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/FourFlagsIntEncoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/IntDecoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/IntEncoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/IntEncoderFilter.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntDecoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntEncoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/SimpleIntDecoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/SimpleIntEncoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/SortingIntEncoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/UniqueValuesIntEncoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8IntDecoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8IntEncoder.java
lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/util/encoding/package.html
lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/example/TestAssociationExample.java
lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java
lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/DrillDownTest.java
lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/TestCategoryListCache.java
lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingSpeed.java
lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/util/encoding/EncodingTest.java
lucene/dev/branches/lucene4547/lucene/misc/ (props changed)
lucene/dev/branches/lucene4547/lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
lucene/dev/branches/lucene4547/lucene/suggest/ (props changed)
lucene/dev/branches/lucene4547/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
lucene/dev/branches/lucene4547/lucene/test-framework/ (props changed)
lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingCodec.java
lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java
lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
lucene/dev/branches/lucene4547/lucene/test-framework/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
lucene/dev/branches/lucene4547/lucene/tools/ (props changed)
lucene/dev/branches/lucene4547/lucene/tools/forbiddenApis/servlet-api.txt
lucene/dev/branches/lucene4547/solr/ (props changed)
lucene/dev/branches/lucene4547/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene4547/solr/build.xml (contents, props changed)
lucene/dev/branches/lucene4547/solr/contrib/ (props changed)
lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SimplePropertiesWriter.java
lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/ZKPropertiesWriter.java
lucene/dev/branches/lucene4547/solr/contrib/dataimporthandler/src/test-files/dih/solr/collection1/conf/dataimport-schema.xml
lucene/dev/branches/lucene4547/solr/core/ (props changed)
lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/cloud/ZkController.java
lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java
lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java
lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
lucene/dev/branches/lucene4547/solr/core/src/test-files/solr/collection1/conf/schema12.xml
lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/servlet/DirectSolrConnectionTest.java
lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java
lucene/dev/branches/lucene4547/solr/example/ (props changed)
lucene/dev/branches/lucene4547/solr/example/solr/collection1/conf/solrconfig.xml
lucene/dev/branches/lucene4547/solr/test-framework/ (props changed)
lucene/dev/branches/lucene4547/solr/test-framework/src/java/org/apache/solr/cloud/AbstractZkTestCase.java
lucene/dev/branches/lucene4547/solr/webapp/ (props changed)
lucene/dev/branches/lucene4547/solr/webapp/web/admin.html
lucene/dev/branches/lucene4547/solr/webapp/web/css/styles/dataimport.css
lucene/dev/branches/lucene4547/solr/webapp/web/js/lib/console.js
lucene/dev/branches/lucene4547/solr/webapp/web/js/main.js
lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/app.js
lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/cloud.js
lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/dataimport.js
lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/index.js
lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/plugins.js
lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/query.js
lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/replication.js
lucene/dev/branches/lucene4547/solr/webapp/web/tpl/dataimport.html
Modified: lucene/dev/branches/lucene4547/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/build.xml?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/build.xml (original)
+++ lucene/dev/branches/lucene4547/build.xml Fri Jan 11 14:39:45 2013
@@ -281,18 +281,9 @@
<copy todir="${fakeRelease}/lucene">
<fileset dir="lucene/dist"/>
</copy>
- <copy todir="${fakeRelease}/lucene/changes">
- <fileset dir="lucene/build/docs/changes"/>
- </copy>
- <get src="http://people.apache.org/keys/group/lucene.asc"
- dest="${fakeRelease}/lucene/KEYS"/>
<copy todir="${fakeRelease}/solr">
<fileset dir="solr/package"/>
</copy>
- <copy file="${fakeRelease}/lucene/KEYS" todir="${fakeRelease}/solr"/>
- <copy todir="${fakeRelease}/solr/changes">
- <fileset dir="solr/build/docs/changes"/>
- </copy>
<makeurl file="${fakeRelease}" validate="false" property="fakeRelease.uri"/>
<exec executable="${python32.exe}" failonerror="true">
<arg value="-u"/>
Modified: lucene/dev/branches/lucene4547/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/CHANGES.txt?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene4547/lucene/CHANGES.txt Fri Jan 11 14:39:45 2013
@@ -106,7 +106,15 @@ Changes in backwards compatibility polic
* LUCENE-4659: Massive cleanup to CategoryPath API. Additionally, CategoryPath is
now immutable, so you don't need to clone() it. (Shai Erera)
-
+
+* LUCENE-4670: StoredFieldsWriter and TermVectorsWriter have new finish* callbacks
+ which are called after a doc/field/term has been completely added.
+ (Adrien Grand, Robert Muir)
+
+* LUCENE-4620: IntEncoder/Decoder were changed to do bulk encoding/decoding. As a
+ result, few other classes such as Aggregator and CategoryListIterator were
+ changed to handle bulk category ordinals. (Shai Erera)
+
New Features
* LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
@@ -324,6 +332,8 @@ Bug Fixes
* LUCENE-4662: Add missing elided articles and prepositions to FrenchAnalyzer's
DEFAULT_ARTICLES list passed to ElisionFilter. (David Leunen via Steve Rowe)
+
+* LUCENE-4671: Fix CharsRef.subSequence method. (Tim Smith via Robert Muir)
Changes in Runtime Behavior
Modified: lucene/dev/branches/lucene4547/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Fri Jan 11 14:39:45 2013
@@ -34,6 +34,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Enumeration;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
@@ -66,6 +67,8 @@ import org.apache.lucene.analysis.compou
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
import org.apache.lucene.analysis.hunspell.HunspellDictionaryTest;
import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
+import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
+import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
@@ -103,67 +106,145 @@ public class TestRandomChains extends Ba
static List<Constructor<? extends TokenFilter>> tokenfilters;
static List<Constructor<? extends CharFilter>> charfilters;
- // TODO: fix those and remove
- private static final Set<Class<?>> brokenComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
+ private static interface Predicate<T> {
+ boolean apply(T o);
+ }
+
+ private static final Predicate<Object[]> ALWAYS = new Predicate<Object[]>() {
+ public boolean apply(Object[] args) {
+ return true;
+ };
+ };
+
+ private static final Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new HashMap<Constructor<?>, Predicate<Object[]>>();
static {
- // TODO: can we promote some of these to be only
- // offsets offenders?
- Collections.<Class<?>>addAll(brokenComponents,
- // doesn't actual reset itself!
- CachingTokenFilter.class,
- // doesn't consume whole stream!
- LimitTokenCountFilter.class,
- // Not broken: we forcefully add this, so we shouldn't
- // also randomly pick it:
- ValidatingTokenFilter.class,
- // NOTE: these by themselves won't cause any 'basic assertions' to fail.
- // but see https://issues.apache.org/jira/browse/LUCENE-3920, if any
- // tokenfilter that combines words (e.g. shingles) comes after them,
- // this will create bogus offsets because their 'offsets go backwards',
- // causing shingle or whatever to make a single token with a
- // startOffset thats > its endOffset
- // (see LUCENE-3738 for a list of other offenders here)
- // broken!
- NGramTokenizer.class,
- // broken!
- NGramTokenFilter.class,
- // broken!
- EdgeNGramTokenizer.class,
- // broken!
- EdgeNGramTokenFilter.class,
- // broken!
- WordDelimiterFilter.class,
- // broken!
- TrimFilter.class
- );
+ try {
+ brokenConstructors.put(
+ LimitTokenCountFilter.class.getConstructor(TokenStream.class, int.class),
+ ALWAYS);
+ brokenConstructors.put(
+ LimitTokenCountFilter.class.getConstructor(TokenStream.class, int.class, boolean.class),
+ new Predicate<Object[]>() {
+ @Override
+ public boolean apply(Object[] args) {
+ assert args.length == 3;
+ return !((Boolean) args[2]); // args are broken if consumeAllTokens is false
+ }
+ });
+ for (Class<?> c : Arrays.<Class<?>>asList(
+ // TODO: can we promote some of these to be only
+ // offsets offenders?
+ // doesn't actual reset itself!
+ CachingTokenFilter.class,
+ // Not broken: we forcefully add this, so we shouldn't
+ // also randomly pick it:
+ ValidatingTokenFilter.class,
+ // NOTE: these by themselves won't cause any 'basic assertions' to fail.
+ // but see https://issues.apache.org/jira/browse/LUCENE-3920, if any
+ // tokenfilter that combines words (e.g. shingles) comes after them,
+ // this will create bogus offsets because their 'offsets go backwards',
+ // causing shingle or whatever to make a single token with a
+ // startOffset thats > its endOffset
+ // (see LUCENE-3738 for a list of other offenders here)
+ // broken!
+ NGramTokenizer.class,
+ // broken!
+ NGramTokenFilter.class,
+ // broken!
+ EdgeNGramTokenizer.class,
+ // broken!
+ EdgeNGramTokenFilter.class,
+ // broken!
+ WordDelimiterFilter.class)) {
+ for (Constructor<?> ctor : c.getConstructors()) {
+ brokenConstructors.put(ctor, ALWAYS);
+ }
+ }
+ } catch (Exception e) {
+ throw new Error(e);
+ }
}
// TODO: also fix these and remove (maybe):
- // Classes that don't produce consistent graph offsets:
- private static final Set<Class<?>> brokenOffsetsComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
+ // Classes/options that don't produce consistent graph offsets:
+ private static final Map<Constructor<?>,Predicate<Object[]>> brokenOffsetsConstructors = new HashMap<Constructor<?>, Predicate<Object[]>>();
static {
- Collections.<Class<?>>addAll(brokenOffsetsComponents,
- ReversePathHierarchyTokenizer.class,
- PathHierarchyTokenizer.class,
- HyphenationCompoundWordTokenFilter.class,
- DictionaryCompoundWordTokenFilter.class,
- // TODO: corrumpts graphs (offset consistency check):
- PositionFilter.class,
- // TODO: it seems to mess up offsets!?
- WikipediaTokenizer.class,
- // TODO: doesn't handle graph inputs
- ThaiWordFilter.class,
- // TODO: doesn't handle graph inputs
- CJKBigramFilter.class,
- // TODO: doesn't handle graph inputs (or even look at positionIncrement)
- HyphenatedWordsFilter.class,
- // LUCENE-4065: only if you pass 'false' to enablePositionIncrements!
- TypeTokenFilter.class,
- // TODO: doesn't handle graph inputs
- CommonGramsQueryFilter.class
- );
+ try {
+ brokenOffsetsConstructors.put(
+ TrimFilter.class.getConstructor(TokenStream.class, boolean.class),
+ new Predicate<Object[]>() {
+ @Override
+ public boolean apply(Object[] args) {
+ assert args.length == 2;
+ return (Boolean) args[1]; // args are broken if updateOffsets is true
+ }
+ });
+ brokenOffsetsConstructors.put(
+ TypeTokenFilter.class.getConstructor(boolean.class, TokenStream.class, Set.class, boolean.class),
+ new Predicate<Object[]>() {
+ @Override
+ public boolean apply(Object[] args) {
+ assert args.length == 4;
+ // LUCENE-4065: only if you pass 'false' to enablePositionIncrements!
+ return !(Boolean) args[0];
+ }
+ });
+ brokenOffsetsConstructors.put(
+ TypeTokenFilter.class.getConstructor(boolean.class, TokenStream.class, Set.class),
+ new Predicate<Object[]>() {
+ @Override
+ public boolean apply(Object[] args) {
+ assert args.length == 3;
+ // LUCENE-4065: only if you pass 'false' to enablePositionIncrements!
+ return !(Boolean) args[0];
+ }
+ });
+ brokenOffsetsConstructors.put(
+ LengthFilter.class.getConstructor(boolean.class, TokenStream.class, int.class, int.class),
+ new Predicate<Object[]>() {
+ @Override
+ public boolean apply(Object[] args) {
+ assert args.length == 4;
+ // LUCENE-4065: only if you pass 'false' to enablePositionIncrements!
+ return !(Boolean) args[0];
+ }
+ });
+ brokenOffsetsConstructors.put(
+ KeepWordFilter.class.getConstructor(boolean.class, TokenStream.class, CharArraySet.class),
+ new Predicate<Object[]>() {
+ @Override
+ public boolean apply(Object[] args) {
+ assert args.length == 3;
+ // LUCENE-4065: only if you pass 'false' to enablePositionIncrements!
+ return !(Boolean) args[0];
+ }
+ });
+ for (Class<?> c : Arrays.<Class<?>>asList(
+ ReversePathHierarchyTokenizer.class,
+ PathHierarchyTokenizer.class,
+ HyphenationCompoundWordTokenFilter.class,
+ DictionaryCompoundWordTokenFilter.class,
+ // TODO: corrumpts graphs (offset consistency check):
+ PositionFilter.class,
+ // TODO: it seems to mess up offsets!?
+ WikipediaTokenizer.class,
+ // TODO: doesn't handle graph inputs
+ ThaiWordFilter.class,
+ // TODO: doesn't handle graph inputs
+ CJKBigramFilter.class,
+ // TODO: doesn't handle graph inputs (or even look at positionIncrement)
+ HyphenatedWordsFilter.class,
+ // TODO: doesn't handle graph inputs
+ CommonGramsQueryFilter.class)) {
+ for (Constructor<?> ctor : c.getConstructors()) {
+ brokenOffsetsConstructors.put(ctor, ALWAYS);
+ }
+ }
+ } catch (Exception e) {
+ throw new Error(e);
+ }
}
-
+
@BeforeClass
public static void beforeClass() throws Exception {
List<Class<?>> analysisClasses = getClassesForPackage("org.apache.lucene.analysis");
@@ -176,7 +257,6 @@ public class TestRandomChains extends Ba
// don't waste time with abstract classes or deprecated known-buggy ones
Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers)
|| c.isSynthetic() || c.isAnonymousClass() || c.isMemberClass() || c.isInterface()
- || brokenComponents.contains(c)
|| c.isAnnotationPresent(Deprecated.class)
|| !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharFilter.class.isAssignableFrom(c))
) {
@@ -185,7 +265,7 @@ public class TestRandomChains extends Ba
for (final Constructor<?> ctor : c.getConstructors()) {
// don't test synthetic or deprecated ctors, they likely have known bugs:
- if (ctor.isSynthetic() || ctor.isAnnotationPresent(Deprecated.class)) {
+ if (ctor.isSynthetic() || ctor.isAnnotationPresent(Deprecated.class) || brokenConstructors.get(ctor) == ALWAYS) {
continue;
}
if (Tokenizer.class.isAssignableFrom(c)) {
@@ -679,7 +759,17 @@ public class TestRandomChains extends Ba
}
return null; // no success
}
-
+
+ private boolean broken(Constructor<?> ctor, Object[] args) {
+ final Predicate<Object[]> pred = brokenConstructors.get(ctor);
+ return pred != null && pred.apply(args);
+ }
+
+ private boolean brokenOffsets(Constructor<?> ctor, Object[] args) {
+ final Predicate<Object[]> pred = brokenOffsetsConstructors.get(ctor);
+ return pred != null && pred.apply(args);
+ }
+
// create a new random tokenizer from classpath
private TokenizerSpec newTokenizer(Random random, Reader reader) {
TokenizerSpec spec = new TokenizerSpec();
@@ -688,11 +778,12 @@ public class TestRandomChains extends Ba
final StringBuilder descr = new StringBuilder();
final CheckThatYouDidntReadAnythingReaderWrapper wrapper = new CheckThatYouDidntReadAnythingReaderWrapper(reader);
final Object args[] = newTokenizerArgs(random, wrapper, ctor.getParameterTypes());
+ if (broken(ctor, args)) {
+ continue;
+ }
spec.tokenizer = createComponent(ctor, args, descr);
if (spec.tokenizer != null) {
- if (brokenOffsetsComponents.contains(ctor.getDeclaringClass())) {
- spec.offsetsAreCorrect = false;
- }
+ spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
spec.toString = descr.toString();
} else {
assertFalse(ctor.getDeclaringClass().getName() + " has read something in ctor but failed with UOE/IAE", wrapper.readSomething);
@@ -710,6 +801,9 @@ public class TestRandomChains extends Ba
while (true) {
final Constructor<? extends CharFilter> ctor = charfilters.get(random.nextInt(charfilters.size()));
final Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
+ if (broken(ctor, args)) {
+ continue;
+ }
reader = createComponent(ctor, args, descr);
if (reader != null) {
spec.reader = reader;
@@ -746,11 +840,12 @@ public class TestRandomChains extends Ba
}
final Object args[] = newFilterArgs(random, spec.stream, ctor.getParameterTypes());
+ if (broken(ctor, args)) {
+ continue;
+ }
final TokenFilter flt = createComponent(ctor, args, descr);
if (flt != null) {
- if (brokenOffsetsComponents.contains(ctor.getDeclaringClass())) {
- spec.offsetsAreCorrect = false;
- }
+ spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
spec.stream = flt;
break;
}
Modified: lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java Fri Jan 11 14:39:45 2013
@@ -132,7 +132,7 @@ public class TokenInfoDictionaryBuilder
System.out.println(" encode...");
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(true);
- Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true);
+ Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, true);
IntsRef scratch = new IntsRef();
long ord = -1; // first ord will be 0
String lastValue = null;
Modified: lucene/dev/branches/lucene4547/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/build.xml?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/build.xml Fri Jan 11 14:39:45 2013
@@ -458,7 +458,20 @@
<!-- ================================================================== -->
<target name="dist-src" depends="package-tgz-src"/>
- <target name="dist-all" depends="dist, dist-src"/>
+ <target name="dist-all" depends="dist, dist-src, -dist-changes, -dist-keys"/>
+
+ <!-- copy changes/ to the release folder -->
+ <target name="-dist-changes">
+ <copy todir="${dist.dir}/changes">
+ <fileset dir="${build.dir}/docs/changes"/>
+ </copy>
+ </target>
+
+ <!-- copy KEYS to the release folder -->
+ <target name="-dist-keys">
+ <get src="http://people.apache.org/keys/group/lucene.asc"
+ dest="${dist.dir}/KEYS"/>
+ </target>
<target name="copy-to-stage">
<copy-to-stage-macro artifacts.dir="${dist.dir}"/>
Modified: lucene/dev/branches/lucene4547/lucene/classification/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/classification/build.xml?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/classification/build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/classification/build.xml Fri Jan 11 14:39:45 2013
@@ -24,23 +24,25 @@
<import file="../module-build.xml"/>
- <path id="base.classpath">
- <pathelement location="${common.dir}/build/core/classes/java"/>
+ <path id="classpath">
+ <path refid="base.classpath"/>
+ <pathelement path="${lucene-core.jar}"/>
<pathelement path="${queries.jar}"/>
<pathelement path="${project.classpath}"/>
+ <pathelement location="${build.dir}/classes/java" />
</path>
<path id="test.classpath">
<pathelement path="${analyzers-common.jar}"/>
- <pathelement location="${common.dir}/build/test-framework/classes/java"/>
- <pathelement location="${common.dir}/build/codecs/classes/java"/>
- <path refid="classpath"/>
- <path refid="junit-path"/>
- <pathelement location="${build.dir}/classes/java"/>
+ <pathelement location="${test-framework.jar}"/>
+ <pathelement location="${codecs.jar}"/>
+ <path refid="test.base.classpath"/>
</path>
<target name="compile-core" depends="jar-queries,jar-analyzers-common,common.compile-core" />
+ <target name="jar-core" depends="common.jar-core" />
+
<target name="javadocs" depends="javadocs-queries,compile-core">
<invoke-module-javadoc>
<links>
Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Fri Jan 11 14:39:45 2013
@@ -113,7 +113,7 @@ public final class MemoryPostingsFormat
this.field = field;
this.doPackFST = doPackFST;
this.acceptableOverheadRatio = acceptableOverheadRatio;
- builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio);
+ builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio, true);
}
private class PostingsWriter extends PostingsConsumer {
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java Fri Jan 11 14:39:45 2013
@@ -419,7 +419,7 @@ public class BlockTreeTermsWriter extend
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final Builder<BytesRef> indexBuilder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1,
0, 0, true, false, Integer.MAX_VALUE,
- outputs, null, false);
+ outputs, null, false, true);
//if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix);
//}
@@ -962,7 +962,7 @@ public class BlockTreeTermsWriter extend
0, 0, true,
true, Integer.MAX_VALUE,
noOutputs,
- new FindBlocks(), false);
+ new FindBlocks(), false, true);
postingsWriter.setField(fieldInfo);
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java Fri Jan 11 14:39:45 2013
@@ -55,7 +55,10 @@ public abstract class StoredFieldsWriter
* called even if the document has no stored fields, in
* this case <code>numStoredFields</code> will be zero. */
public abstract void startDocument(int numStoredFields) throws IOException;
-
+
+ /** Called when a document and all its fields have been added. */
+ public void finishDocument() throws IOException {}
+
/** Writes a single stored field. */
public abstract void writeField(FieldInfo info, StorableField field) throws IOException;
@@ -116,6 +119,8 @@ public abstract class StoredFieldsWriter
for (StorableField field : doc) {
writeField(fieldInfos.fieldInfo(field.name()), field);
}
+
+ finishDocument();
}
@Override
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java Fri Jan 11 14:39:45 2013
@@ -71,18 +71,27 @@ public abstract class TermVectorsWriter
* has no vector fields, in this case <code>numVectorFields</code>
* will be zero. */
public abstract void startDocument(int numVectorFields) throws IOException;
-
+
+ /** Called after a doc and all its fields have been added. */
+ public void finishDocument() throws IOException {};
+
/** Called before writing the terms of the field.
* {@link #startTerm(BytesRef, int)} will be called <code>numTerms</code> times. */
public abstract void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException;
-
+
+ /** Called after a field and all its terms have been added. */
+ public void finishField() throws IOException {};
+
/** Adds a term and its term frequency <code>freq</code>.
* If this field has positions and/or offsets enabled, then
* {@link #addPosition(int, int, int, BytesRef)} will be called
* <code>freq</code> times respectively.
*/
public abstract void startTerm(BytesRef term, int freq) throws IOException;
-
+
+ /** Called after a term and all its positions have been added. */
+ public void finishTerm() throws IOException {}
+
/** Adds a term position and offsets */
public abstract void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException;
@@ -97,7 +106,7 @@ public abstract class TermVectorsWriter
* check that this is the case to detect the JRE bug described
* in LUCENE-1282. */
public abstract void finish(FieldInfos fis, int numDocs) throws IOException;
-
+
/**
* Called by IndexWriter when writing new segments.
* <p>
@@ -197,6 +206,7 @@ public abstract class TermVectorsWriter
protected final void addAllDocVectors(Fields vectors, MergeState mergeState) throws IOException {
if (vectors == null) {
startDocument(0);
+ finishDocument();
return;
}
@@ -275,10 +285,13 @@ public abstract class TermVectorsWriter
addPosition(pos, startOffset, endOffset, payload);
}
}
+ finishTerm();
}
assert termCount == numTerms;
+ finishField();
}
assert fieldCount == numFields;
+ finishDocument();
}
/** Return the BytesRef Comparator used to sort terms
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Fri Jan 11 14:39:45 2013
@@ -395,8 +395,10 @@ public final class CompressingStoredFiel
* Copy compressed data.
*/
void copyCompressedData(DataOutput out) throws IOException {
- final int chunkSize = chunkSize();
- decompressor.copyCompressedData(fieldsStream, chunkSize, out);
+ final long chunkEnd = docBase + chunkDocs == numDocs
+ ? fieldsStream.length()
+ : indexReader.getStartPointer(docBase + chunkDocs);
+ out.copyBytes(fieldsStream, chunkEnd - fieldsStream.getFilePointer());
}
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java Fri Jan 11 14:39:45 2013
@@ -136,19 +136,8 @@ public final class CompressingStoredFiel
}
}
- private void endWithPreviousDocument() throws IOException {
- if (numBufferedDocs > 0) {
- endOffsets[numBufferedDocs - 1] = bufferedDocs.length;
- }
- }
-
@Override
public void startDocument(int numStoredFields) throws IOException {
- endWithPreviousDocument();
- if (triggerFlush()) {
- flush();
- }
-
if (numBufferedDocs == this.numStoredFields.length) {
final int newLength = ArrayUtil.oversize(numBufferedDocs + 1, 4);
this.numStoredFields = Arrays.copyOf(this.numStoredFields, newLength);
@@ -158,6 +147,14 @@ public final class CompressingStoredFiel
++numBufferedDocs;
}
+ @Override
+ public void finishDocument() throws IOException {
+ endOffsets[numBufferedDocs - 1] = bufferedDocs.length;
+ if (triggerFlush()) {
+ flush();
+ }
+ }
+
private static void saveInts(int[] values, int length, DataOutput out) throws IOException {
assert length > 0;
if (length == 1) {
@@ -295,9 +292,10 @@ public final class CompressingStoredFiel
@Override
public void finish(FieldInfos fis, int numDocs) throws IOException {
- endWithPreviousDocument();
if (numBufferedDocs > 0) {
flush();
+ } else {
+ assert bufferedDocs.length == 0;
}
if (docBase != numDocs) {
throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs);
@@ -351,17 +349,13 @@ public final class CompressingStoredFiel
}
if (compressionMode == matchingFieldsReader.getCompressionMode() // same compression mode
- && (numBufferedDocs == 0 || triggerFlush()) // starting a new chunk
+ && numBufferedDocs == 0 // starting a new chunk
&& startOffsets[it.chunkDocs - 1] < chunkSize // chunk is small enough
&& startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] >= chunkSize // chunk is large enough
&& nextDeletedDoc(it.docBase, liveDocs, it.docBase + it.chunkDocs) == it.docBase + it.chunkDocs) { // no deletion in the chunk
assert docID == it.docBase;
// no need to decompress, just copy data
- endWithPreviousDocument();
- if (triggerFlush()) {
- flush();
- }
indexWriter.writeIndex(it.chunkDocs, fieldsStream.getFilePointer());
writeHeader(this.docBase, it.chunkDocs, it.numStoredFields, it.lengths);
it.copyCompressedData(fieldsStream);
@@ -380,6 +374,7 @@ public final class CompressingStoredFiel
final int diff = docID - it.docBase;
startDocument(it.numStoredFields[diff]);
bufferedDocs.writeBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff], it.lengths[diff]);
+ finishDocument();
++docCount;
mergeState.checkAbort.work(300);
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java Fri Jan 11 14:39:45 2013
@@ -141,14 +141,6 @@ public abstract class CompressionMode {
}
@Override
- public void copyCompressedData(DataInput in, int originalLength, DataOutput out) throws IOException {
- final int copied = LZ4.copyCompressedData(in, originalLength, out);
- if (copied != originalLength) {
- throw new CorruptIndexException("Currupted compressed stream: expected " + originalLength + " bytes, but got at least" + copied);
- }
- }
-
- @Override
public Decompressor clone() {
return this;
}
@@ -225,13 +217,6 @@ public abstract class CompressionMode {
}
@Override
- public void copyCompressedData(DataInput in, int originalLength, DataOutput out) throws IOException {
- final int compressedLength = in.readVInt();
- out.writeVInt(compressedLength);
- out.copyBytes(in, compressedLength);
- }
-
- @Override
public Decompressor clone() {
return new DeflateDecompressor();
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/Compressor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/Compressor.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/Compressor.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/Compressor.java Fri Jan 11 14:39:45 2013
@@ -24,7 +24,10 @@ import org.apache.lucene.store.DataOutpu
/**
* A data compressor.
*/
-abstract class Compressor {
+public abstract class Compressor {
+
+ /** Sole constructor, typically called from sub-classes. */
+ protected Compressor() {}
/**
* Compress bytes into <code>out</code>. It it the responsibility of the
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/Decompressor.java Fri Jan 11 14:39:45 2013
@@ -20,13 +20,15 @@ package org.apache.lucene.codecs.compres
import java.io.IOException;
import org.apache.lucene.store.DataInput;
-import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.BytesRef;
/**
- * An decompressor.
+ * A decompressor.
*/
-abstract class Decompressor implements Cloneable {
+public abstract class Decompressor implements Cloneable {
+
+ /** Sole constructor, typically called from sub-classes. */
+ protected Decompressor() {}
/**
* Decompress bytes that were stored between offsets <code>offset</code> and
@@ -44,10 +46,6 @@ abstract class Decompressor implements C
*/
public abstract void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException;
- /** Copy a compressed stream whose original length is
- * <code>originalLength</code> from <code>in</code> to <code>out</code>. */
- public abstract void copyCompressedData(DataInput in, int originalLength, DataOutput out) throws IOException;
-
@Override
public abstract Decompressor clone();
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/LZ4.java Fri Jan 11 14:39:45 2013
@@ -506,51 +506,4 @@ class LZ4 {
encodeLastLiterals(src, anchor, srcEnd - anchor, out);
}
- /** Copy bytes from <code>in</code> to <code>out</code> where
- * <code>in</code> is a LZ4-encoded stream. This method copies enough bytes
- * so that <code>out</code> can be used later on to restore the first
- * <code>length</code> bytes of the stream. This method always reads at
- * least one byte from <code>in</code> so make sure not to call this method
- * if <code>in</code> reached the end of the stream, even if
- * <code>length=0</code>. */
- public static int copyCompressedData(DataInput in, int length, DataOutput out) throws IOException {
- int n = 0;
- do {
- // literals
- final byte token = in.readByte();
- out.writeByte(token);
- int literalLen = (token & 0xFF) >>> 4;
- if (literalLen == 0x0F) {
- byte len;
- while ((len = in.readByte()) == (byte) 0xFF) {
- literalLen += 0xFF;
- out.writeByte(len);
- }
- literalLen += len & 0xFF;
- out.writeByte(len);
- }
- out.copyBytes(in, literalLen);
- n += literalLen;
- if (n >= length) {
- break;
- }
-
- // matchs
- out.copyBytes(in, 2); // match dec
- int matchLen = token & 0x0F;
- if (matchLen == 0x0F) {
- byte len;
- while ((len = in.readByte()) == (byte) 0xFF) {
- matchLen += 0xFF;
- out.writeByte(len);
- }
- matchLen += len & 0xFF;
- out.writeByte(len);
- }
- matchLen += MIN_MATCH;
- n += matchLen;
- } while (n < length);
- return n;
- }
-
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java Fri Jan 11 14:39:45 2013
@@ -124,17 +124,16 @@ public final class Lucene40TermVectorsWr
if (payloads)
bits |= Lucene40TermVectorsReader.STORE_PAYLOAD_WITH_TERMVECTOR;
tvf.writeByte(bits);
-
- assert fieldCount <= numVectorFields;
- if (fieldCount == numVectorFields) {
- // last field of the document
- // this is crazy because the file format is crazy!
- for (int i = 1; i < fieldCount; i++) {
- tvd.writeVLong(fps[i] - fps[i-1]);
- }
- }
}
+ @Override
+ public void finishDocument() throws IOException {
+ assert fieldCount == numVectorFields;
+ for (int i = 1; i < fieldCount; i++) {
+ tvd.writeVLong(fps[i] - fps[i-1]);
+ }
+ }
+
private final BytesRef lastTerm = new BytesRef(10);
// NOTE: we override addProx, so we don't need to buffer when indexing.
@@ -222,20 +221,6 @@ public final class Lucene40TermVectorsWr
}
bufferedIndex++;
-
- // dump buffer if we are done
- if (bufferedIndex == bufferedFreq) {
- if (payloads) {
- tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
- }
- for (int i = 0; i < bufferedIndex; i++) {
- if (offsets) {
- tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
- tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
- lastOffset = offsetEndBuffer[i];
- }
- }
- }
} else if (positions) {
// write position delta
writePosition(position - lastPosition, payload);
@@ -248,6 +233,25 @@ public final class Lucene40TermVectorsWr
}
}
+ @Override
+ public void finishTerm() throws IOException {
+ if (bufferedIndex > 0) {
+ // dump buffer
+ assert positions && (offsets || payloads);
+ assert bufferedIndex == bufferedFreq;
+ if (payloads) {
+ tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
+ }
+ for (int i = 0; i < bufferedIndex; i++) {
+ if (offsets) {
+ tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
+ tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
+ lastOffset = offsetEndBuffer[i];
+ }
+ }
+ }
+ }
+
private void writePosition(int delta, BytesRef payload) throws IOException {
if (payloads) {
int payloadLength = payload == null ? 0 : payload.length;
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/StoredFieldsProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/StoredFieldsProcessor.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/StoredFieldsProcessor.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/StoredFieldsProcessor.java Fri Jan 11 14:39:45 2013
@@ -108,6 +108,7 @@ final class StoredFieldsProcessor extend
while(lastDocID < docID) {
fieldsWriter.startDocument(0);
lastDocID++;
+ fieldsWriter.finishDocument();
}
}
@@ -123,6 +124,7 @@ final class StoredFieldsProcessor extend
for (int i = 0; i < numStoredFields; i++) {
fieldsWriter.writeField(fieldInfos[i], storedFields[i]);
}
+ fieldsWriter.finishDocument();
lastDocID++;
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java Fri Jan 11 14:39:45 2013
@@ -78,6 +78,7 @@ final class TermVectorsConsumer extends
void fill(int docID) throws IOException {
while(lastDocID < docID) {
writer.startDocument(0);
+ writer.finishDocument();
lastDocID++;
}
}
@@ -108,6 +109,7 @@ final class TermVectorsConsumer extends
for (int i = 0; i < numVectorFields; i++) {
perFields[i].finishDocument();
}
+ writer.finishDocument();
assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java Fri Jan 11 14:39:45 2013
@@ -182,7 +182,9 @@ final class TermVectorsConsumerPerField
}
tv.addProx(freq, posReader, offReader);
}
+ tv.finishTerm();
}
+ tv.finishField();
termsHashPerField.reset();
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/CharsRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/CharsRef.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/CharsRef.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/CharsRef.java Fri Jan 11 14:39:45 2013
@@ -218,7 +218,7 @@ public final class CharsRef implements C
if (start < 0 || end > length || start > end) {
throw new IndexOutOfBoundsException();
}
- return new CharsRef(chars, offset + start, offset + end);
+ return new CharsRef(chars, offset + start, end - start);
}
/** @deprecated This comparator is only a transition mechanism */
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java Fri Jan 11 14:39:45 2013
@@ -84,11 +84,11 @@ public class Builder<T> {
/**
* Instantiates an FST/FSA builder without any pruning. A shortcut
* to {@link #Builder(FST.INPUT_TYPE, int, int, boolean,
- * boolean, int, Outputs, FreezeTail, boolean)} with
+ * boolean, int, Outputs, FreezeTail, boolean, boolean)} with
* pruning options turned off.
*/
public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
- this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT);
+ this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT, true);
}
/**
@@ -97,9 +97,9 @@ public class Builder<T> {
*/
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
- FreezeTail<T> freezeTail, boolean willPackFST) {
+ FreezeTail<T> freezeTail, boolean willPackFST, boolean allowArrayArcs) {
this(inputType, minSuffixCount1, minSuffixCount2, doShareSuffix, doShareNonSingletonNodes,
- shareMaxTailLength, outputs, freezeTail, willPackFST, PackedInts.DEFAULT);
+ shareMaxTailLength, outputs, freezeTail, willPackFST, PackedInts.DEFAULT, allowArrayArcs);
}
/**
@@ -143,10 +143,14 @@ public class Builder<T> {
*
* @param acceptableOverheadRatio How to trade speed for space when building the FST. This option
* is only relevant when doPackFST is true. @see PackedInts#getMutable(int, int, float)
+ *
+ * @param allowArrayArcs Pass false to disable the array arc optimization
+ * while building the FST; this will make the resulting
+ * FST smaller but slower to traverse.
*/
public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
- FreezeTail<T> freezeTail, boolean doPackFST, float acceptableOverheadRatio) {
+ FreezeTail<T> freezeTail, boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs) {
this.minSuffixCount1 = minSuffixCount1;
this.minSuffixCount2 = minSuffixCount2;
this.freezeTail = freezeTail;
@@ -154,7 +158,7 @@ public class Builder<T> {
this.shareMaxTailLength = shareMaxTailLength;
this.doPackFST = doPackFST;
this.acceptableOverheadRatio = acceptableOverheadRatio;
- fst = new FST<T>(inputType, outputs, doPackFST, acceptableOverheadRatio);
+ fst = new FST<T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs);
if (doShareSuffix) {
dedupHash = new NodeHash<T>(fst);
} else {
@@ -182,13 +186,6 @@ public class Builder<T> {
return dedupHash == null ? 0 : fst.nodeCount;
}
- /** Pass false to disable the array arc optimization
- * while building the FST; this will make the resulting
- * FST smaller but slower to traverse. */
- public void setAllowArrayArcs(boolean b) {
- fst.setAllowArrayArcs(b);
- }
-
private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throws IOException {
final int node;
if (dedupHash != null && (doShareNonSingletonNodes || nodeIn.numArcs <= 1) && tailLength <= shareMaxTailLength) {
Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/FST.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/FST.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/FST.java Fri Jan 11 14:39:45 2013
@@ -33,6 +33,7 @@ import org.apache.lucene.store.DataInput
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.InputStreamDataInput;
import org.apache.lucene.store.OutputStreamDataOutput;
+import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
@@ -137,16 +138,18 @@ public final class FST<T> {
// if non-null, this FST accepts the empty string and
// produces this output
T emptyOutput;
- private byte[] emptyOutputBytes;
// Not private to avoid synthetic access$NNN methods:
byte[] bytes;
- int byteUpto = 0;
private int startNode = -1;
public final Outputs<T> outputs;
+ // Used for the BIT_TARGET_NEXT optimization (whereby
+ // instead of storing the address of the target node for
+ // a given arc, we mark a single bit noting that the next
+ // node in the byte[] is the target node):
private int lastFrozenNode;
private final T NO_OUTPUT;
@@ -161,7 +164,7 @@ public final class FST<T> {
/** If arc has this label then that arc is final/accepted */
public static final int END_LABEL = -1;
- private boolean allowArrayArcs = true;
+ private final boolean allowArrayArcs;
private Arc<T> cachedRootArcs[];
@@ -262,9 +265,10 @@ public final class FST<T> {
// make a new empty FST, for building; Builder invokes
// this ctor
- FST(INPUT_TYPE inputType, Outputs<T> outputs, boolean willPackFST, float acceptableOverheadRatio) {
+ FST(INPUT_TYPE inputType, Outputs<T> outputs, boolean willPackFST, float acceptableOverheadRatio, boolean allowArrayArcs) {
this.inputType = inputType;
this.outputs = outputs;
+ this.allowArrayArcs = allowArrayArcs;
bytes = new byte[128];
NO_OUTPUT = outputs.getNoOutput();
if (willPackFST) {
@@ -293,14 +297,15 @@ public final class FST<T> {
if (in.readByte() == 1) {
// accepts empty string
int numBytes = in.readVInt();
- // messy
bytes = new byte[numBytes];
in.readBytes(bytes, 0, numBytes);
+
+ // De-serialize empty-string output:
BytesReader reader;
if (packed) {
- reader = getBytesReader(0);
+ reader = new ForwardBytesReader(bytes, 0);
} else {
- reader = getBytesReader(numBytes-1);
+ reader = new ReverseBytesReader(bytes, bytes.length-1);
}
emptyOutput = outputs.readFinalOutput(reader);
} else {
@@ -335,6 +340,11 @@ public final class FST<T> {
NO_OUTPUT = outputs.getNoOutput();
cacheRootArcs();
+
+ // NOTE: bogus because this is only used during
+ // building; we need to break out mutable FST from
+ // immutable
+ allowArrayArcs = false;
}
public INPUT_TYPE getInputType() {
@@ -412,26 +422,6 @@ public final class FST<T> {
} else {
emptyOutput = v;
}
-
- // TODO: this is messy -- replace with sillyBytesWriter; maybe make
- // bytes private
- final int posSave = writer.getPosition();
- outputs.writeFinalOutput(emptyOutput, writer);
- emptyOutputBytes = new byte[writer.getPosition()-posSave];
-
- if (!packed) {
- // reverse
- final int stopAt = (writer.getPosition() - posSave)/2;
- int upto = 0;
- while(upto < stopAt) {
- final byte b = bytes[posSave + upto];
- bytes[posSave+upto] = bytes[writer.getPosition()-upto-1];
- bytes[writer.getPosition()-upto-1] = b;
- upto++;
- }
- }
- System.arraycopy(bytes, posSave, emptyOutputBytes, 0, writer.getPosition()-posSave);
- writer.setPosition(posSave);
}
public void save(DataOutput out) throws IOException {
@@ -453,7 +443,27 @@ public final class FST<T> {
// TODO: really we should encode this as an arc, arriving
// to the root node, instead of special casing here:
if (emptyOutput != null) {
+ // Accepts empty string
out.writeByte((byte) 1);
+
+ // Serialize empty-string output:
+ RAMOutputStream ros = new RAMOutputStream();
+ outputs.writeFinalOutput(emptyOutput, ros);
+
+ byte[] emptyOutputBytes = new byte[(int) ros.getFilePointer()];
+ ros.writeTo(emptyOutputBytes, 0);
+
+ if (!packed) {
+ // reverse
+ final int stopAt = emptyOutputBytes.length/2;
+ int upto = 0;
+ while(upto < stopAt) {
+ final byte b = emptyOutputBytes[upto];
+ emptyOutputBytes[upto] = emptyOutputBytes[emptyOutputBytes.length-upto-1];
+ emptyOutputBytes[emptyOutputBytes.length-upto-1] = b;
+ upto++;
+ }
+ }
out.writeVInt(emptyOutputBytes.length);
out.writeBytes(emptyOutputBytes, 0, emptyOutputBytes.length);
} else {
@@ -1160,10 +1170,6 @@ public final class FST<T> {
return arcWithOutputCount;
}
- public void setAllowArrayArcs(boolean v) {
- allowArrayArcs = v;
- }
-
/**
* Nodes will be expanded if their depth (distance from the root node) is
* <= this value and their number of arcs is >=
@@ -1453,6 +1459,11 @@ public final class FST<T> {
this.outputs = outputs;
NO_OUTPUT = outputs.getNoOutput();
writer = new DefaultBytesWriter();
+
+ // NOTE: bogus because this is only used during
+ // building; we need to break out mutable FST from
+ // immutable
+ allowArrayArcs = false;
}
/** Expert: creates an FST by packing this one. This
Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/codecs/compressing/AbstractTestCompressionMode.java Fri Jan 11 14:39:45 2013
@@ -80,16 +80,6 @@ public abstract class AbstractTestCompre
return Arrays.copyOfRange(bytes.bytes, bytes.offset, bytes.offset + bytes.length);
}
- static byte[] copyCompressedData(Decompressor decompressor, byte[] compressed, int originalLength) throws IOException {
- GrowableByteArrayDataOutput out = new GrowableByteArrayDataOutput(compressed.length);
- decompressor.copyCompressedData(new ByteArrayDataInput(compressed), originalLength, out);
- return Arrays.copyOf(out.bytes, out.length);
- }
-
- byte[] copyCompressedData(byte[] compressed, int originalLength) throws IOException {
- return copyCompressedData(mode.newDecompressor(), compressed, originalLength);
- }
-
public void testDecompress() throws IOException {
final int iterations = atLeast(10);
for (int i = 0; i < iterations; ++i) {
@@ -117,17 +107,10 @@ public abstract class AbstractTestCompre
}
}
- public void testCopyCompressedData() throws IOException {
- final byte[] decompressed = randomArray();
- final byte[] compressed = compress(decompressed);
- assertArrayEquals(compressed, copyCompressedData(compressed, decompressed.length));
- }
-
public byte[] test(byte[] decompressed) throws IOException {
final byte[] compressed = compress(decompressed);
final byte[] restored = decompress(compressed, decompressed.length);
assertEquals(decompressed.length, restored.length);
- assertArrayEquals(compressed, copyCompressedData(compressed, decompressed.length));
return compressed;
}
Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/TestCharsRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/TestCharsRef.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/TestCharsRef.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/TestCharsRef.java Fri Jan 11 14:39:45 2013
@@ -116,11 +116,28 @@ public class TestCharsRef extends Lucene
}
// LUCENE-3590: fix off-by-one in subsequence, and fully obey interface
+ // LUCENE-4671: fix subSequence
public void testCharSequenceSubSequence() {
- CharSequence c = new CharsRef("abc");
+ CharSequence sequences[] = {
+ new CharsRef("abc"),
+ new CharsRef("0abc".toCharArray(), 1, 3),
+ new CharsRef("abc0".toCharArray(), 0, 3),
+ new CharsRef("0abc0".toCharArray(), 1, 3)
+ };
+
+ for (CharSequence c : sequences) {
+ doTestSequence(c);
+ }
+ }
+
+ private void doTestSequence(CharSequence c) {
// slice
assertEquals("a", c.subSequence(0, 1).toString());
+ // mid subsequence
+ assertEquals("b", c.subSequence(1, 2).toString());
+ // end subsequence
+ assertEquals("bc", c.subSequence(1, 3).toString());
// empty subsequence
assertEquals("", c.subSequence(0, 0).toString());
Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java Fri Jan 11 14:39:45 2013
@@ -310,7 +310,7 @@ public class TestFSTs extends LuceneTest
final boolean doRewrite = random().nextBoolean();
- Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doRewrite);
+ Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doRewrite, true);
boolean storeOrd = random().nextBoolean();
if (VERBOSE) {
@@ -453,8 +453,7 @@ public class TestFSTs extends LuceneTest
this.outputs = outputs;
this.doPack = doPack;
- builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null, doPack);
- builder.setAllowArrayArcs(!noArcArrays);
+ builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null, doPack, !noArcArrays);
}
protected abstract T getOutput(IntsRef input, int ord) throws IOException;
@@ -1063,7 +1062,7 @@ public class TestFSTs extends LuceneTest
public void testFinalOutputOnEndState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
- final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random().nextBoolean());
+ final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random().nextBoolean(), true);
builder.add(Util.toUTF32("stat", new IntsRef()), 17L);
builder.add(Util.toUTF32("station", new IntsRef()), 10L);
final FST<Long> fst = builder.finish();
@@ -1078,7 +1077,7 @@ public class TestFSTs extends LuceneTest
public void testInternalFinalState() throws Exception {
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final boolean willRewrite = random().nextBoolean();
- final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite);
+ final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite, true);
builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRef()), outputs.getNoOutput());
builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRef()), outputs.getNoOutput());
final FST<Long> fst = builder.finish();
@@ -1101,7 +1100,7 @@ public class TestFSTs extends LuceneTest
final Long nothing = outputs.getNoOutput();
final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
- final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT);
+ final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT, true);
final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);
Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java Fri Jan 11 14:39:45 2013
@@ -27,6 +27,8 @@ import java.util.Locale;
import java.util.Random;
import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@@ -875,4 +877,102 @@ public class TestPackedInts extends Luce
in.close();
dir.close();
}
+ public void testBlockPackedReaderWriter() throws IOException {
+ final int iters = atLeast(2);
+ for (int iter = 0; iter < iters; ++iter) {
+ final int blockSize = 64 * _TestUtil.nextInt(random(), 1, 1 << 12);
+ final int valueCount = random().nextInt(1 << 18);
+ final long[] values = new long[valueCount];
+ long minValue = 0;
+ int bpv = 0;
+ for (int i = 0; i < valueCount; ++i) {
+ if (i % blockSize == 0) {
+ minValue = rarely() ? random().nextInt(256) : rarely() ? -5 : random().nextLong();
+ bpv = random().nextInt(65);
+ }
+ if (bpv == 0) {
+ values[i] = minValue;
+ } else if (bpv == 64) {
+ values[i] = random().nextLong();
+ } else {
+ values[i] = minValue + _TestUtil.nextLong(random(), 0, (1L << bpv) - 1);
+ }
+ }
+
+ final Directory dir = newDirectory();
+ final IndexOutput out = dir.createOutput("out.bin", IOContext.DEFAULT);
+ final BlockPackedWriter writer = new BlockPackedWriter(out, blockSize);
+ for (int i = 0; i < valueCount; ++i) {
+ assertEquals(i, writer.ord());
+ writer.add(values[i]);
+ }
+ assertEquals(valueCount, writer.ord());
+ writer.finish();
+ assertEquals(valueCount, writer.ord());
+ final long fp = out.getFilePointer();
+ out.close();
+
+ DataInput in = dir.openInput("out.bin", IOContext.DEFAULT);
+ if (random().nextBoolean()) {
+ byte[] buf = new byte[(int) fp];
+ in.readBytes(buf, 0, (int) fp);
+ ((IndexInput) in).close();
+ in = new ByteArrayDataInput(buf);
+ }
+ final BlockPackedReader reader = new BlockPackedReader(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
+ for (int i = 0; i < valueCount; ) {
+ if (random().nextBoolean()) {
+ assertEquals("" + i, values[i], reader.next());
+ ++i;
+ } else {
+ final LongsRef nextValues = reader.next(_TestUtil.nextInt(random(), 1, 1024));
+ for (int j = 0; j < nextValues.length; ++j) {
+ assertEquals("" + (i + j), values[i + j], nextValues.longs[nextValues.offset + j]);
+ }
+ i += nextValues.length;
+ }
+ assertEquals(i, reader.ord());
+ }
+ assertEquals(fp, in instanceof ByteArrayDataInput ? ((ByteArrayDataInput) in).getPosition() : ((IndexInput) in).getFilePointer());
+ try {
+ reader.next();
+ assertTrue(false);
+ } catch (IOException e) {
+ // OK
+ }
+
+ if (in instanceof ByteArrayDataInput) {
+ ((ByteArrayDataInput) in).setPosition(0);
+ } else {
+ ((IndexInput) in).seek(0L);
+ }
+ final BlockPackedReader reader2 = new BlockPackedReader(in, PackedInts.VERSION_CURRENT, blockSize, valueCount);
+ int i = 0;
+ while (true) {
+ final int skip = _TestUtil.nextInt(random(), 0, valueCount - i);
+ reader2.skip(skip);
+ i += skip;
+ assertEquals(i, reader2.ord());
+ if (i == valueCount) {
+ break;
+ } else {
+ assertEquals(values[i], reader2.next());
+ ++i;
+ }
+ }
+ assertEquals(fp, in instanceof ByteArrayDataInput ? ((ByteArrayDataInput) in).getPosition() : ((IndexInput) in).getFilePointer());
+ try {
+ reader2.skip(1);
+ assertTrue(false);
+ } catch (IOException e) {
+ // OK
+ }
+
+ if (in instanceof IndexInput) {
+ ((IndexInput) in).close();
+ }
+ dir.close();
+ }
+ }
+
}
Modified: lucene/dev/branches/lucene4547/lucene/facet/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/build.xml?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/build.xml Fri Jan 11 14:39:45 2013
@@ -81,5 +81,12 @@
</links>
</invoke-module-javadoc>
</target>
-
+
+ <target name="run-encoding-benchmark" depends="compile-test">
+ <java classname="org.apache.lucene.util.encoding.EncodingSpeed" fork="true" failonerror="true">
+ <classpath refid="test.classpath" />
+ <classpath path="${build.dir}/classes/test" />
+ </java>
+ </target>
+
</project>
Modified: lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/simple/SimpleUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/simple/SimpleUtils.java?rev=1432065&r1=1432064&r2=1432065&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/simple/SimpleUtils.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/examples/org/apache/lucene/facet/example/simple/SimpleUtils.java Fri Jan 11 14:39:45 2013
@@ -1,11 +1,7 @@
package org.apache.lucene.facet.example.simple;
-import java.util.ArrayList;
-import java.util.List;
-
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
-
import org.apache.lucene.facet.example.ExampleUtils;
import org.apache.lucene.facet.taxonomy.CategoryPath;