You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/08/27 16:59:04 UTC
svn commit: r1377705 [1/2] - in /lucene/dev/branches/branch_4x: ./
dev-tools/ dev-tools/idea/.idea/libraries/ lucene/ lucene/analysis/
lucene/analysis/common/src/java/org/apache/lucene/analysis/util/
lucene/analysis/common/src/resources/META-INF/servic...
Author: rmuir
Date: Mon Aug 27 14:58:55 2012
New Revision: 1377705
URL: http://svn.apache.org/viewvc?rev=1377705&view=rev
Log:
LUCENE-3923: fail the build on wrong svn:eol-style
Added:
lucene/dev/branches/branch_4x/lucene/tools/src/java/org/apache/lucene/validation/SVNEolCheckTask.java
- copied unchanged from r1377702, lucene/dev/trunk/lucene/tools/src/java/org/apache/lucene/validation/SVNEolCheckTask.java
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/build.xml
lucene/dev/branches/branch_4x/dev-tools/ (props changed)
lucene/dev/branches/branch_4x/dev-tools/idea/.idea/libraries/Ant.xml (props changed)
lucene/dev/branches/branch_4x/dev-tools/idea/.idea/libraries/HSQLDB.xml (contents, props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/MultiTermAwareComponent.java (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.CharFilterFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testCompressed.aff (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testCompressed.dic (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testWrongAffixRule.aff (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/icu/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/icu/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/resources/META-INF/services/org.apache.lucene.analysis.util.CharFilterFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/kuromoji/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorphosyntacticTagsAttributeImpl.java (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/morfologik/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/phonetic/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/smartcn/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/smartcn/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/stempel/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory (props changed)
lucene/dev/branches/branch_4x/lucene/analysis/uima/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory (props changed)
lucene/dev/branches/branch_4x/lucene/benchmark/ (props changed)
lucene/dev/branches/branch_4x/lucene/benchmark/conf/addIndexes.alg (props changed)
lucene/dev/branches/branch_4x/lucene/build.xml (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsFormat.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsWriter.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipReader.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/block/BlockSkipWriter.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/block/ForUtil.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/block/package.html (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilterFactory.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/bloom/DefaultBloomFilterFactory.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/bloom/package.html (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/ByteDocValuesField.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/DerefBytesDocValuesField.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/DoubleDocValuesField.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/FloatDocValuesField.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/IntDocValuesField.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/LongDocValuesField.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/PackedLongDocValuesField.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/ShortDocValuesField.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/SortedBytesDocValuesField.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/StraightBytesDocValuesField.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/FuzzySet.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/LongsRef.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/hash/HashFunction.java (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/hash/MurmurHash2.java (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/hash/package.html (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/src/resources/META-INF/services/org.apache.lucene.util.hash.HashFunction (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/TestWorstCaseTestBehavior.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/block/TestForUtil.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterStallControl.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestMaxFailuresRule.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestBeforeAfterOverrides.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestCodecReported.java (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestSameRandomnessLocalePassedOrNot.java (props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/ (props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragListBuilder.java (props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SimpleFieldFragList.java (props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/WeightedFieldFragList.java (props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/WeightedFragListBuilder.java (props changed)
lucene/dev/branches/branch_4x/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/WeightedFragListBuilderTest.java (props changed)
lucene/dev/branches/branch_4x/lucene/join/ (props changed)
lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ScoreMode.java (props changed)
lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java (props changed)
lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/ (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/Tagger-2.3.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/WhitespaceTokenizer-2.3.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/ant-1.8.2.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/ant-junit-1.8.2.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/asm-debug-all-4.0.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/commons-codec-1.6.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/commons-compress-1.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/icu4j-49.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/jakarta-regexp-1.4.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/junit-4.10.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/junit4-ant-2.0.0.rc5.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/junit4-ant-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/junit4-ant-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/morfologik-fsa-1.5.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/morfologik-polish-1.5.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/morfologik-stemming-1.5.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/nekohtml-1.9.15.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/randomizedtesting-runner-2.0.0.rc5.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/randomizedtesting-runner-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/randomizedtesting-runner-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/servlet-api-2.4.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/spatial4j-0.2.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/uimaj-core-2.3.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/licenses/xercesImpl-2.9.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/lucene/queries/ (props changed)
lucene/dev/branches/branch_4x/lucene/queries/src/test/org/apache/lucene/queries/function/TestBoostedQuery.java (props changed)
lucene/dev/branches/branch_4x/lucene/spatial/ (props changed)
lucene/dev/branches/branch_4x/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java (props changed)
lucene/dev/branches/branch_4x/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgsParser.java (props changed)
lucene/dev/branches/branch_4x/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialOperation.java (props changed)
lucene/dev/branches/branch_4x/lucene/spatial/src/java/org/apache/lucene/spatial/query/UnsupportedSpatialOperation.java (props changed)
lucene/dev/branches/branch_4x/lucene/spatial/src/java/org/apache/lucene/spatial/query/package-info.java (props changed)
lucene/dev/branches/branch_4x/lucene/spatial/src/test/org/apache/lucene/spatial/PortedSolr3Test.java (props changed)
lucene/dev/branches/branch_4x/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialExample.java (props changed)
lucene/dev/branches/branch_4x/lucene/spatial/src/test/org/apache/lucene/spatial/query/SpatialArgsParserTest.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/ (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/TestBloomFilteredLucene40Postings.java (contents, props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/bloom/package.html (contents, props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/CloseableDirectory.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/CloseableFile.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/LuceneJUnit3MethodProvider.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/QuickPatchThreadsFilter.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/RunListenerPrintReproduceInfo.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleAssertionsRequired.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIgnoreAfterMaxFailures.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleIgnoreTestSuites.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleMarkFailure.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupTeardownChained.java (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/TimeUnits.java (props changed)
lucene/dev/branches/branch_4x/lucene/tools/ (props changed)
lucene/dev/branches/branch_4x/lucene/tools/forbiddenApis/executors.txt (props changed)
lucene/dev/branches/branch_4x/lucene/tools/junit4/cached-timehints.txt (props changed)
lucene/dev/branches/branch_4x/lucene/tools/junit4/logging.properties (props changed)
lucene/dev/branches/branch_4x/lucene/tools/src/java/lucene-solr.antlib.xml
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/cloud-dev/ (props changed)
lucene/dev/branches/branch_4x/solr/cloud-dev/cli-test-solrcloud-start.sh (props changed)
lucene/dev/branches/branch_4x/solr/cloud-dev/solrcloud-multi-start.sh (props changed)
lucene/dev/branches/branch_4x/solr/contrib/ (props changed)
lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/DuplicatingStemmerFactory.java (props changed)
lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/DuplicatingTokenizerFactory.java (props changed)
lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoStemsClusteringAlgorithm.java (props changed)
lucene/dev/branches/branch_4x/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/EchoTokensClusteringAlgorithm.java (props changed)
lucene/dev/branches/branch_4x/solr/core/ (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/cloud/ZkCLI.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/common/ResourceLoader.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/highlight/WeightedFragListBuilder.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/logging/CircularList.java (contents, props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/logging/LogWatcher.java (contents, props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/logging/LoggerInfo.java (contents, props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/logging/jul/JulInfo.java (contents, props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/logging/jul/JulWatcher.java (contents, props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/logging/jul/RecordHandler.java (contents, props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/grouping/distributed/command/Pair.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/update/MemOutputStream.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/util/AdjustableSemaphore.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/util/plugin/ResourceLoaderAware.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/AnalysisAfterCoreReloadTest.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/TestDocumentBuilder.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZk2Test.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/SyncSliceTest.java (props changed)
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/cloud/ZkCLITest.java (props changed)
lucene/dev/branches/branch_4x/solr/example/ (props changed)
lucene/dev/branches/branch_4x/solr/example/cloud-scripts/zkcli.bat (contents, props changed)
lucene/dev/branches/branch_4x/solr/example/cloud-scripts/zkcli.sh (props changed)
lucene/dev/branches/branch_4x/solr/example/contexts/solr.xml (props changed)
lucene/dev/branches/branch_4x/solr/example/etc/logging.properties (props changed)
lucene/dev/branches/branch_4x/solr/example/example-DIH/hsqldb/ex.log (props changed)
lucene/dev/branches/branch_4x/solr/example/example-DIH/hsqldb/ex.script (props changed)
lucene/dev/branches/branch_4x/solr/example/solr/collection1/conf/update-script.js (props changed)
lucene/dev/branches/branch_4x/solr/licenses/ (props changed)
lucene/dev/branches/branch_4x/solr/licenses/AlchemyAPIAnnotator-2.3.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/OpenCalaisAnnotator-2.3.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/Tagger-2.3.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/WhitespaceTokenizer-2.3.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/activation-1.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/ant-1.8.2.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/ant-junit-1.8.2.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/apache-mime4j-core-0.7.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/apache-mime4j-dom-0.7.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/bcmail-jdk15-1.45.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/bcprov-jdk15-1.45.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/boilerpipe-1.1.0.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/carrot2-core-3.5.0.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/commons-beanutils-1.7.0.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/commons-cli-1.2.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/commons-cli-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/commons-cli-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/commons-codec-1.6.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/commons-collections-3.2.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/commons-compress-1.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/commons-digester-2.0.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/commons-fileupload-1.2.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/commons-io-2.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/commons-lang-2.6.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/dom4j-1.6.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/easymock-2.2.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/fontbox-1.6.0.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/guava-r05.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/hppc-0.3.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/hsqldb-1.8.0.10.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpclient-4.1.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpcore-4.1.4.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/httpmime-4.1.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/icu4j-49.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/isoparser-1.0-beta-5.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jackson-core-asl-1.7.4.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jackson-mapper-asl-1.7.4.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/javassist-3.6.0.GA.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/javax.servlet-api-3.0.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jcl-over-slf4j-1.6.4.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jdom-1.0.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jempbox-1.6.0.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jetty-continuation-8.1.2.v20120308.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jetty-deploy-8.1.2.v20120308.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jetty-http-8.1.2.v20120308.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jetty-io-8.1.2.v20120308.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jetty-jmx-8.1.2.v20120308.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jetty-security-8.1.2.v20120308.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jetty-server-8.1.2.v20120308.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jetty-servlet-8.1.2.v20120308.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jetty-util-8.1.2.v20120308.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jetty-webapp-8.1.2.v20120308.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jetty-xml-8.1.2.v20120308.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/jsonic-1.2.7.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/junit-4.10.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/junit4-ant-2.0.0.rc5.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/junit4-ant-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/junit4-ant-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/langdetect-1.1-20120112.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/log4j-over-slf4j-1.6.4.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/mahout-collections-0.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/mahout-math-0.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/mail-1.4.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/metadata-extractor-2.4.0-beta-1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/morfologik-fsa-1.5.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/morfologik-polish-1.5.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/morfologik-stemming-1.5.3.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/netcdf-4.2-min.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/pdfbox-1.6.0.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/poi-3.8-beta5.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/poi-ooxml-3.8-beta5.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/poi-ooxml-schemas-3.8-beta5.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/poi-scratchpad-3.8-beta5.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/randomizedtesting-runner-2.0.0.rc5.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/randomizedtesting-runner-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/randomizedtesting-runner-NOTICE.txt (props changed)
lucene/dev/branches/branch_4x/solr/licenses/rome-0.9.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/scannotation-1.0.2.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/servlet-api-3.0.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/simple-xml-2.4.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/slf4j-api-1.6.4.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/slf4j-jdk14-1.6.4.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/spatial4j-0.2.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/start.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/tagsoup-1.2.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/tika-core-1.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/tika-parsers-1.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/uimaj-core-2.3.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/velocity-1.6.4.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/velocity-tools-2.0.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/vorbis-java-core-0.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/vorbis-java-tika-0.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/wstx-asl-3.2.7.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/xercesImpl-2.9.1.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/xmlbeans-2.3.0.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/licenses/zookeeper-3.3.6.jar.sha1 (props changed)
lucene/dev/branches/branch_4x/solr/solrj/ (props changed)
lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/client/solrj/request/IsUpdateRequest.java (props changed)
lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/params/CollectionParams.java (props changed)
lucene/dev/branches/branch_4x/solr/solrj/src/java/org/apache/solr/common/util/SolrjNamedThreadFactory.java (props changed)
lucene/dev/branches/branch_4x/solr/solrj/src/test-files/solrj/solr/collection1/conf/solrconfig.xml (props changed)
lucene/dev/branches/branch_4x/solr/solrj/src/test-files/solrj/solr/solr.xml (props changed)
lucene/dev/branches/branch_4x/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrServerTest.java (props changed)
lucene/dev/branches/branch_4x/solr/test-framework/ (props changed)
lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/SolrIgnoredThreadsFilter.java (props changed)
lucene/dev/branches/branch_4x/solr/test-framework/src/java/org/apache/solr/util/RevertDefaultThreadHandlerRule.java (props changed)
Modified: lucene/dev/branches/branch_4x/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/build.xml?rev=1377705&r1=1377704&r2=1377705&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/build.xml (original)
+++ lucene/dev/branches/branch_4x/build.xml Mon Aug 27 14:58:55 2012
@@ -74,6 +74,12 @@
</pathconvert>
<fail if="validate.patternsFound">The following files contain @author tags or nocommits:${line.separator}${validate.patternsFound}</fail>
</target>
+
+ <target name="check-svn-properties">
+ <subant target="-check-svn-properties" inheritall="false" failonerror="true">
+ <fileset dir="lucene" includes="build.xml" />
+ </subant>
+ </target>
<target name="rat-sources" description="Runs rat across all sources and tests">
<sequential><subant target="rat-sources" inheritall="false" failonerror="true">
@@ -256,7 +262,7 @@
</target>
<!-- Jenkins tasks -->
- <target name="jenkins-hourly" depends="clean,test,validate,-jenkins-javadocs-lint,-svn-status"/>
+ <target name="jenkins-hourly" depends="clean,test,validate,-jenkins-javadocs-lint,-svn-status,check-svn-properties"/>
<target name="jenkins-clover">
<antcall target="-jenkins-clover">
Modified: lucene/dev/branches/branch_4x/dev-tools/idea/.idea/libraries/HSQLDB.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/dev-tools/idea/.idea/libraries/HSQLDB.xml?rev=1377705&r1=1377704&r2=1377705&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/dev-tools/idea/.idea/libraries/HSQLDB.xml (original)
+++ lucene/dev/branches/branch_4x/dev-tools/idea/.idea/libraries/HSQLDB.xml Mon Aug 27 14:58:55 2012
@@ -1,9 +1,9 @@
-<component name="libraryTable">
- <library name="HSQLDB">
- <CLASSES>
- <root url="jar://$PROJECT_DIR$/solr/example/example-DIH/solr/db/lib/hsqldb-1.8.0.10.jar!/" />
- </CLASSES>
- <JAVADOC />
- <SOURCES />
- </library>
+<component name="libraryTable">
+ <library name="HSQLDB">
+ <CLASSES>
+ <root url="jar://$PROJECT_DIR$/solr/example/example-DIH/solr/db/lib/hsqldb-1.8.0.10.jar!/" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ </library>
</component>
\ No newline at end of file
Modified: lucene/dev/branches/branch_4x/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/build.xml?rev=1377705&r1=1377704&r2=1377705&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/build.xml (original)
+++ lucene/dev/branches/branch_4x/lucene/build.xml Mon Aug 27 14:58:55 2012
@@ -198,6 +198,16 @@
</forbidden-apis>
</target>
+ <!-- note: we don't include this in validate because we want to check from releases -->
+ <target name="-check-svn-properties" depends="compile-tools,resolve,load-custom-tasks">
+ <svn-eol-style svnExecutable="${svn.exe}">
+ <fileset dir="${basedir}/..">
+ <exclude name="**/build/**"/>
+ <exclude name="**/*.jar"/>
+ </fileset>
+ </svn-eol-style>
+ </target>
+
<target name="resolve">
<sequential>
<ant dir="test-framework" target="resolve" inheritall="false">
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java?rev=1377705&r1=1377704&r2=1377705&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java Mon Aug 27 14:58:55 2012
@@ -1,489 +1,489 @@
-package org.apache.lucene.codecs.bloom;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.codecs.FieldsConsumer;
-import org.apache.lucene.codecs.FieldsProducer;
-import org.apache.lucene.codecs.PostingsConsumer;
-import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.TermStats;
-import org.apache.lucene.codecs.TermsConsumer;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.IndexFileNames;
-import org.apache.lucene.index.SegmentReadState;
-import org.apache.lucene.index.SegmentWriteState;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FuzzySet;
-import org.apache.lucene.util.FuzzySet.ContainsResult;
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
-import org.apache.lucene.util.hash.MurmurHash2;
-
-/**
- * <p>
- * A {@link PostingsFormat} useful for low doc-frequency fields such as primary
- * keys. Bloom filters are maintained in a ".blm" file which offers "fast-fail"
- * for reads in segments known to have no record of the key. A choice of
- * delegate PostingsFormat is used to record all other Postings data.
- * </p>
- * <p>
- * A choice of {@link BloomFilterFactory} can be passed to tailor Bloom Filter
- * settings on a per-field basis. The default configuration is
- * {@link DefaultBloomFilterFactory} which allocates a ~8mb bitset and hashes
- * values using {@link MurmurHash2}. This should be suitable for most purposes.
- * </p>
- * <p>
- * The format of the blm file is as follows:
- * </p>
- * <ul>
- * <li>BloomFilter (.blm) --> Header, DelegatePostingsFormatName,
- * NumFilteredFields, Filter<sup>NumFilteredFields</sup></li>
- * <li>Filter --> FieldNumber, FuzzySet</li>
- * <li>FuzzySet -->See {@link FuzzySet#serialize(DataOutput)}</li>
- * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
- * <li>DelegatePostingsFormatName --> {@link DataOutput#writeString(String)
- * String} The name of a ServiceProvider registered {@link PostingsFormat}</li>
- * <li>NumFilteredFields --> {@link DataOutput#writeInt Uint32}</li>
- * <li>FieldNumber --> {@link DataOutput#writeInt Uint32} The number of the
- * field in this segment</li>
- * </ul>
- * @lucene.experimental
- */
-public class BloomFilteringPostingsFormat extends PostingsFormat {
-
- public static final String BLOOM_CODEC_NAME = "BloomFilter";
- public static final int BLOOM_CODEC_VERSION = 1;
-
- /** Extension of Bloom Filters file */
- static final String BLOOM_EXTENSION = "blm";
-
- BloomFilterFactory bloomFilterFactory = new DefaultBloomFilterFactory();
- private PostingsFormat delegatePostingsFormat;
-
- /**
- * Creates Bloom filters for a selection of fields created in the index. This
- * is recorded as a set of Bitsets held as a segment summary in an additional
- * "blm" file. This PostingsFormat delegates to a choice of delegate
- * PostingsFormat for encoding all other postings data.
- *
- * @param delegatePostingsFormat
- * The PostingsFormat that records all the non-bloom filter data i.e.
- * postings info.
- * @param bloomFilterFactory
- * The {@link BloomFilterFactory} responsible for sizing BloomFilters
- * appropriately
- */
- public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat,
- BloomFilterFactory bloomFilterFactory) {
- super(BLOOM_CODEC_NAME);
- this.delegatePostingsFormat = delegatePostingsFormat;
- this.bloomFilterFactory = bloomFilterFactory;
- }
-
- /**
- * Creates Bloom filters for a selection of fields created in the index. This
- * is recorded as a set of Bitsets held as a segment summary in an additional
- * "blm" file. This PostingsFormat delegates to a choice of delegate
- * PostingsFormat for encoding all other postings data. This choice of
- * constructor defaults to the {@link DefaultBloomFilterFactory} for
- * configuring per-field BloomFilters.
- *
- * @param delegatePostingsFormat
- * The PostingsFormat that records all the non-bloom filter data i.e.
- * postings info.
- */
- public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat) {
- this(delegatePostingsFormat, new DefaultBloomFilterFactory());
- }
-
- // Used only by core Lucene at read-time via Service Provider instantiation -
- // do not use at Write-time in application code.
- public BloomFilteringPostingsFormat() {
- super(BLOOM_CODEC_NAME);
- }
-
- public FieldsConsumer fieldsConsumer(SegmentWriteState state)
- throws IOException {
- if (delegatePostingsFormat == null) {
- throw new UnsupportedOperationException("Error - " + getClass().getName()
- + " has been constructed without a choice of PostingsFormat");
- }
- return new BloomFilteredFieldsConsumer(
- delegatePostingsFormat.fieldsConsumer(state), state,
- delegatePostingsFormat);
- }
-
- public FieldsProducer fieldsProducer(SegmentReadState state)
- throws IOException {
- return new BloomFilteredFieldsProducer(state);
- }
-
- public class BloomFilteredFieldsProducer extends FieldsProducer {
- private FieldsProducer delegateFieldsProducer;
- HashMap<String,FuzzySet> bloomsByFieldName = new HashMap<String,FuzzySet>();
-
- public BloomFilteredFieldsProducer(SegmentReadState state)
- throws IOException {
-
- String bloomFileName = IndexFileNames.segmentFileName(
- state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
- IndexInput bloomIn = null;
- try {
- bloomIn = state.dir.openInput(bloomFileName, state.context);
- CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
- BLOOM_CODEC_VERSION);
- // // Load the hash function used in the BloomFilter
- // hashFunction = HashFunction.forName(bloomIn.readString());
- // Load the delegate postings format
- PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn
- .readString());
-
- this.delegateFieldsProducer = delegatePostingsFormat
- .fieldsProducer(state);
- int numBlooms = bloomIn.readInt();
- for (int i = 0; i < numBlooms; i++) {
- int fieldNum = bloomIn.readInt();
- FuzzySet bloom = FuzzySet.deserialize(bloomIn);
- FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
- bloomsByFieldName.put(fieldInfo.name, bloom);
- }
- } finally {
- IOUtils.close(bloomIn);
- }
-
- }
-
- public Iterator<String> iterator() {
- return delegateFieldsProducer.iterator();
- }
-
- public void close() throws IOException {
- delegateFieldsProducer.close();
- }
-
- public Terms terms(String field) throws IOException {
- FuzzySet filter = bloomsByFieldName.get(field);
- if (filter == null) {
- return delegateFieldsProducer.terms(field);
- } else {
- Terms result = delegateFieldsProducer.terms(field);
- if (result == null) {
- return null;
- }
- return new BloomFilteredTerms(result, filter);
- }
- }
-
- public int size() {
- return delegateFieldsProducer.size();
- }
-
- public long getUniqueTermCount() throws IOException {
- return delegateFieldsProducer.getUniqueTermCount();
- }
-
- class BloomFilteredTerms extends Terms {
- private Terms delegateTerms;
- private FuzzySet filter;
-
- public BloomFilteredTerms(Terms terms, FuzzySet filter) {
- this.delegateTerms = terms;
- this.filter = filter;
- }
-
- @Override
- public TermsEnum intersect(CompiledAutomaton compiled,
- final BytesRef startTerm) throws IOException {
- return delegateTerms.intersect(compiled, startTerm);
- }
-
- @Override
- public TermsEnum iterator(TermsEnum reuse) throws IOException {
- TermsEnum result;
- if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
- // recycle the existing BloomFilteredTermsEnum by asking the delegate
- // to recycle its contained TermsEnum
- BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
- if (bfte.filter == filter) {
- bfte.delegateTermsEnum = delegateTerms
- .iterator(bfte.delegateTermsEnum);
- return bfte;
- }
- }
- // We have been handed something we cannot reuse (either null, wrong
- // class or wrong filter) so allocate a new object
- result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse),
- filter);
- return result;
- }
-
- @Override
- public Comparator<BytesRef> getComparator() throws IOException {
- return delegateTerms.getComparator();
- }
-
- @Override
- public long size() throws IOException {
- return delegateTerms.size();
- }
-
- @Override
- public long getSumTotalTermFreq() throws IOException {
- return delegateTerms.getSumTotalTermFreq();
- }
-
- @Override
- public long getSumDocFreq() throws IOException {
- return delegateTerms.getSumDocFreq();
- }
-
- @Override
- public int getDocCount() throws IOException {
- return delegateTerms.getDocCount();
- }
-
- @Override
- public boolean hasOffsets() {
- return delegateTerms.hasOffsets();
- }
-
- @Override
- public boolean hasPositions() {
- return delegateTerms.hasPositions();
- }
-
- @Override
- public boolean hasPayloads() {
- return delegateTerms.hasPayloads();
- }
- }
-
- class BloomFilteredTermsEnum extends TermsEnum {
-
- TermsEnum delegateTermsEnum;
- private FuzzySet filter;
-
- public BloomFilteredTermsEnum(TermsEnum iterator, FuzzySet filter) {
- this.delegateTermsEnum = iterator;
- this.filter = filter;
- }
-
- @Override
- public final BytesRef next() throws IOException {
- return delegateTermsEnum.next();
- }
-
- @Override
- public final Comparator<BytesRef> getComparator() {
- return delegateTermsEnum.getComparator();
- }
-
- @Override
- public final boolean seekExact(BytesRef text, boolean useCache)
- throws IOException {
- // The magical fail-fast speed up that is the entire point of all of
- // this code - save a disk seek if there is a match on an in-memory
- // structure
- // that may occasionally give a false positive but guaranteed no false
- // negatives
- if (filter.contains(text) == ContainsResult.NO) {
- return false;
- }
- return delegateTermsEnum.seekExact(text, useCache);
- }
-
- @Override
- public final SeekStatus seekCeil(BytesRef text, boolean useCache)
- throws IOException {
- return delegateTermsEnum.seekCeil(text, useCache);
- }
-
- @Override
- public final void seekExact(long ord) throws IOException {
- delegateTermsEnum.seekExact(ord);
- }
-
- @Override
- public final BytesRef term() throws IOException {
- return delegateTermsEnum.term();
- }
-
- @Override
- public final long ord() throws IOException {
- return delegateTermsEnum.ord();
- }
-
- @Override
- public final int docFreq() throws IOException {
- return delegateTermsEnum.docFreq();
- }
-
- @Override
- public final long totalTermFreq() throws IOException {
- return delegateTermsEnum.totalTermFreq();
- }
-
-
- @Override
- public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
- DocsAndPositionsEnum reuse, int flags) throws IOException {
- return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags);
- }
-
- @Override
- public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
- throws IOException {
- return delegateTermsEnum.docs(liveDocs, reuse, flags);
- }
-
-
- }
-
- }
-
- class BloomFilteredFieldsConsumer extends FieldsConsumer {
- private FieldsConsumer delegateFieldsConsumer;
- private Map<FieldInfo,FuzzySet> bloomFilters = new HashMap<FieldInfo,FuzzySet>();
- private SegmentWriteState state;
-
- // private PostingsFormat delegatePostingsFormat;
-
- public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer,
- SegmentWriteState state, PostingsFormat delegatePostingsFormat) {
- this.delegateFieldsConsumer = fieldsConsumer;
- // this.delegatePostingsFormat=delegatePostingsFormat;
- this.state = state;
- }
-
- @Override
- public TermsConsumer addField(FieldInfo field) throws IOException {
- FuzzySet bloomFilter = bloomFilterFactory.getSetForField(state,field);
- if (bloomFilter != null) {
- assert bloomFilters.containsKey(field) == false;
- bloomFilters.put(field, bloomFilter);
- return new WrappedTermsConsumer(delegateFieldsConsumer.addField(field),bloomFilter);
- } else {
- // No, use the unfiltered fieldsConsumer - we are not interested in
- // recording any term Bitsets.
- return delegateFieldsConsumer.addField(field);
- }
- }
-
- @Override
- public void close() throws IOException {
- delegateFieldsConsumer.close();
- // Now we are done accumulating values for these fields
- List<Entry<FieldInfo,FuzzySet>> nonSaturatedBlooms = new ArrayList<Map.Entry<FieldInfo,FuzzySet>>();
-
- for (Entry<FieldInfo,FuzzySet> entry : bloomFilters.entrySet()) {
- FuzzySet bloomFilter = entry.getValue();
- if(!bloomFilterFactory.isSaturated(bloomFilter,entry.getKey())){
- nonSaturatedBlooms.add(entry);
- }
- }
- String bloomFileName = IndexFileNames.segmentFileName(
- state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
- IndexOutput bloomOutput = null;
- try {
- bloomOutput = state.directory
- .createOutput(bloomFileName, state.context);
- CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
- BLOOM_CODEC_VERSION);
- // remember the name of the postings format we will delegate to
- bloomOutput.writeString(delegatePostingsFormat.getName());
-
- // First field in the output file is the number of fields+blooms saved
- bloomOutput.writeInt(nonSaturatedBlooms.size());
- for (Entry<FieldInfo,FuzzySet> entry : nonSaturatedBlooms) {
- FieldInfo fieldInfo = entry.getKey();
- FuzzySet bloomFilter = entry.getValue();
- bloomOutput.writeInt(fieldInfo.number);
- saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
- }
- } finally {
- IOUtils.close(bloomOutput);
- }
- //We are done with large bitsets so no need to keep them hanging around
- bloomFilters.clear();
- }
-
- private void saveAppropriatelySizedBloomFilter(IndexOutput bloomOutput,
- FuzzySet bloomFilter, FieldInfo fieldInfo) throws IOException {
-
- FuzzySet rightSizedSet = bloomFilterFactory.downsize(fieldInfo,
- bloomFilter);
- if (rightSizedSet == null) {
- rightSizedSet = bloomFilter;
- }
- rightSizedSet.serialize(bloomOutput);
- }
-
- }
-
- class WrappedTermsConsumer extends TermsConsumer {
- private TermsConsumer delegateTermsConsumer;
- private FuzzySet bloomFilter;
-
- public WrappedTermsConsumer(TermsConsumer termsConsumer,FuzzySet bloomFilter) {
- this.delegateTermsConsumer = termsConsumer;
- this.bloomFilter = bloomFilter;
- }
-
- public PostingsConsumer startTerm(BytesRef text) throws IOException {
- return delegateTermsConsumer.startTerm(text);
- }
-
- public void finishTerm(BytesRef text, TermStats stats) throws IOException {
-
- // Record this term in our BloomFilter
- if (stats.docFreq > 0) {
- bloomFilter.addValue(text);
- }
- delegateTermsConsumer.finishTerm(text, stats);
- }
-
- public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
- throws IOException {
- delegateTermsConsumer.finish(sumTotalTermFreq, sumDocFreq, docCount);
- }
-
- public Comparator<BytesRef> getComparator() throws IOException {
- return delegateTermsConsumer.getComparator();
- }
-
- }
-
-}
+package org.apache.lucene.codecs.bloom;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.FieldsConsumer;
+import org.apache.lucene.codecs.FieldsProducer;
+import org.apache.lucene.codecs.PostingsConsumer;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.TermStats;
+import org.apache.lucene.codecs.TermsConsumer;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FuzzySet;
+import org.apache.lucene.util.FuzzySet.ContainsResult;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.hash.MurmurHash2;
+
+/**
+ * <p>
+ * A {@link PostingsFormat} useful for low doc-frequency fields such as primary
+ * keys. Bloom filters are maintained in a ".blm" file which offers "fast-fail"
+ * for reads in segments known to have no record of the key. A choice of
+ * delegate PostingsFormat is used to record all other Postings data.
+ * </p>
+ * <p>
+ * A choice of {@link BloomFilterFactory} can be passed to tailor Bloom Filter
+ * settings on a per-field basis. The default configuration is
+ * {@link DefaultBloomFilterFactory} which allocates a ~8mb bitset and hashes
+ * values using {@link MurmurHash2}. This should be suitable for most purposes.
+ * </p>
+ * <p>
+ * The format of the blm file is as follows:
+ * </p>
+ * <ul>
+ * <li>BloomFilter (.blm) --> Header, DelegatePostingsFormatName,
+ * NumFilteredFields, Filter<sup>NumFilteredFields</sup></li>
+ * <li>Filter --> FieldNumber, FuzzySet</li>
+ * <li>FuzzySet -->See {@link FuzzySet#serialize(DataOutput)}</li>
+ * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
+ * <li>DelegatePostingsFormatName --> {@link DataOutput#writeString(String)
+ * String} The name of a ServiceProvider registered {@link PostingsFormat}</li>
+ * <li>NumFilteredFields --> {@link DataOutput#writeInt Uint32}</li>
+ * <li>FieldNumber --> {@link DataOutput#writeInt Uint32} The number of the
+ * field in this segment</li>
+ * </ul>
+ * @lucene.experimental
+ */
+public class BloomFilteringPostingsFormat extends PostingsFormat {
+
+ public static final String BLOOM_CODEC_NAME = "BloomFilter";
+ public static final int BLOOM_CODEC_VERSION = 1;
+
+ /** Extension of Bloom Filters file */
+ static final String BLOOM_EXTENSION = "blm";
+
+ BloomFilterFactory bloomFilterFactory = new DefaultBloomFilterFactory();
+ private PostingsFormat delegatePostingsFormat;
+
+ /**
+ * Creates Bloom filters for a selection of fields created in the index. This
+ * is recorded as a set of Bitsets held as a segment summary in an additional
+ * "blm" file. This PostingsFormat delegates to a choice of delegate
+ * PostingsFormat for encoding all other postings data.
+ *
+ * @param delegatePostingsFormat
+ * The PostingsFormat that records all the non-bloom filter data i.e.
+ * postings info.
+ * @param bloomFilterFactory
+ * The {@link BloomFilterFactory} responsible for sizing BloomFilters
+ * appropriately
+ */
+ public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat,
+ BloomFilterFactory bloomFilterFactory) {
+ super(BLOOM_CODEC_NAME);
+ this.delegatePostingsFormat = delegatePostingsFormat;
+ this.bloomFilterFactory = bloomFilterFactory;
+ }
+
+ /**
+ * Creates Bloom filters for a selection of fields created in the index. This
+ * is recorded as a set of Bitsets held as a segment summary in an additional
+ * "blm" file. This PostingsFormat delegates to a choice of delegate
+ * PostingsFormat for encoding all other postings data. This choice of
+ * constructor defaults to the {@link DefaultBloomFilterFactory} for
+ * configuring per-field BloomFilters.
+ *
+ * @param delegatePostingsFormat
+ * The PostingsFormat that records all the non-bloom filter data i.e.
+ * postings info.
+ */
+ public BloomFilteringPostingsFormat(PostingsFormat delegatePostingsFormat) {
+ this(delegatePostingsFormat, new DefaultBloomFilterFactory());
+ }
+
+ // Used only by core Lucene at read-time via Service Provider instantiation -
+ // do not use at Write-time in application code.
+ public BloomFilteringPostingsFormat() {
+ super(BLOOM_CODEC_NAME);
+ }
+
+ public FieldsConsumer fieldsConsumer(SegmentWriteState state)
+ throws IOException {
+ if (delegatePostingsFormat == null) {
+ throw new UnsupportedOperationException("Error - " + getClass().getName()
+ + " has been constructed without a choice of PostingsFormat");
+ }
+ return new BloomFilteredFieldsConsumer(
+ delegatePostingsFormat.fieldsConsumer(state), state,
+ delegatePostingsFormat);
+ }
+
+ public FieldsProducer fieldsProducer(SegmentReadState state)
+ throws IOException {
+ return new BloomFilteredFieldsProducer(state);
+ }
+
+ public class BloomFilteredFieldsProducer extends FieldsProducer {
+ private FieldsProducer delegateFieldsProducer;
+ HashMap<String,FuzzySet> bloomsByFieldName = new HashMap<String,FuzzySet>();
+
+ public BloomFilteredFieldsProducer(SegmentReadState state)
+ throws IOException {
+
+ String bloomFileName = IndexFileNames.segmentFileName(
+ state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
+ IndexInput bloomIn = null;
+ try {
+ bloomIn = state.dir.openInput(bloomFileName, state.context);
+ CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
+ BLOOM_CODEC_VERSION);
+ // // Load the hash function used in the BloomFilter
+ // hashFunction = HashFunction.forName(bloomIn.readString());
+ // Load the delegate postings format
+ PostingsFormat delegatePostingsFormat = PostingsFormat.forName(bloomIn
+ .readString());
+
+ this.delegateFieldsProducer = delegatePostingsFormat
+ .fieldsProducer(state);
+ int numBlooms = bloomIn.readInt();
+ for (int i = 0; i < numBlooms; i++) {
+ int fieldNum = bloomIn.readInt();
+ FuzzySet bloom = FuzzySet.deserialize(bloomIn);
+ FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
+ bloomsByFieldName.put(fieldInfo.name, bloom);
+ }
+ } finally {
+ IOUtils.close(bloomIn);
+ }
+
+ }
+
+ public Iterator<String> iterator() {
+ return delegateFieldsProducer.iterator();
+ }
+
+ public void close() throws IOException {
+ delegateFieldsProducer.close();
+ }
+
+ public Terms terms(String field) throws IOException {
+ FuzzySet filter = bloomsByFieldName.get(field);
+ if (filter == null) {
+ return delegateFieldsProducer.terms(field);
+ } else {
+ Terms result = delegateFieldsProducer.terms(field);
+ if (result == null) {
+ return null;
+ }
+ return new BloomFilteredTerms(result, filter);
+ }
+ }
+
+ public int size() {
+ return delegateFieldsProducer.size();
+ }
+
+ public long getUniqueTermCount() throws IOException {
+ return delegateFieldsProducer.getUniqueTermCount();
+ }
+
+ class BloomFilteredTerms extends Terms {
+ private Terms delegateTerms;
+ private FuzzySet filter;
+
+ public BloomFilteredTerms(Terms terms, FuzzySet filter) {
+ this.delegateTerms = terms;
+ this.filter = filter;
+ }
+
+ @Override
+ public TermsEnum intersect(CompiledAutomaton compiled,
+ final BytesRef startTerm) throws IOException {
+ return delegateTerms.intersect(compiled, startTerm);
+ }
+
+ @Override
+ public TermsEnum iterator(TermsEnum reuse) throws IOException {
+ TermsEnum result;
+ if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
+ // recycle the existing BloomFilteredTermsEnum by asking the delegate
+ // to recycle its contained TermsEnum
+ BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
+ if (bfte.filter == filter) {
+ bfte.delegateTermsEnum = delegateTerms
+ .iterator(bfte.delegateTermsEnum);
+ return bfte;
+ }
+ }
+ // We have been handed something we cannot reuse (either null, wrong
+ // class or wrong filter) so allocate a new object
+ result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse),
+ filter);
+ return result;
+ }
+
+ @Override
+ public Comparator<BytesRef> getComparator() throws IOException {
+ return delegateTerms.getComparator();
+ }
+
+ @Override
+ public long size() throws IOException {
+ return delegateTerms.size();
+ }
+
+ @Override
+ public long getSumTotalTermFreq() throws IOException {
+ return delegateTerms.getSumTotalTermFreq();
+ }
+
+ @Override
+ public long getSumDocFreq() throws IOException {
+ return delegateTerms.getSumDocFreq();
+ }
+
+ @Override
+ public int getDocCount() throws IOException {
+ return delegateTerms.getDocCount();
+ }
+
+ @Override
+ public boolean hasOffsets() {
+ return delegateTerms.hasOffsets();
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return delegateTerms.hasPositions();
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return delegateTerms.hasPayloads();
+ }
+ }
+
+ class BloomFilteredTermsEnum extends TermsEnum {
+
+ TermsEnum delegateTermsEnum;
+ private FuzzySet filter;
+
+ public BloomFilteredTermsEnum(TermsEnum iterator, FuzzySet filter) {
+ this.delegateTermsEnum = iterator;
+ this.filter = filter;
+ }
+
+ @Override
+ public final BytesRef next() throws IOException {
+ return delegateTermsEnum.next();
+ }
+
+ @Override
+ public final Comparator<BytesRef> getComparator() {
+ return delegateTermsEnum.getComparator();
+ }
+
+ @Override
+ public final boolean seekExact(BytesRef text, boolean useCache)
+ throws IOException {
+ // The magical fail-fast speed up that is the entire point of all of
+ // this code - save a disk seek if there is a match on an in-memory
+ // structure
+ // that may occasionally give a false positive but guaranteed no false
+ // negatives
+ if (filter.contains(text) == ContainsResult.NO) {
+ return false;
+ }
+ return delegateTermsEnum.seekExact(text, useCache);
+ }
+
+ @Override
+ public final SeekStatus seekCeil(BytesRef text, boolean useCache)
+ throws IOException {
+ return delegateTermsEnum.seekCeil(text, useCache);
+ }
+
+ @Override
+ public final void seekExact(long ord) throws IOException {
+ delegateTermsEnum.seekExact(ord);
+ }
+
+ @Override
+ public final BytesRef term() throws IOException {
+ return delegateTermsEnum.term();
+ }
+
+ @Override
+ public final long ord() throws IOException {
+ return delegateTermsEnum.ord();
+ }
+
+ @Override
+ public final int docFreq() throws IOException {
+ return delegateTermsEnum.docFreq();
+ }
+
+ @Override
+ public final long totalTermFreq() throws IOException {
+ return delegateTermsEnum.totalTermFreq();
+ }
+
+
+ @Override
+ public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
+ DocsAndPositionsEnum reuse, int flags) throws IOException {
+ return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags);
+ }
+
+ @Override
+ public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
+ throws IOException {
+ return delegateTermsEnum.docs(liveDocs, reuse, flags);
+ }
+
+
+ }
+
+ }
+
+ class BloomFilteredFieldsConsumer extends FieldsConsumer {
+ private FieldsConsumer delegateFieldsConsumer;
+ private Map<FieldInfo,FuzzySet> bloomFilters = new HashMap<FieldInfo,FuzzySet>();
+ private SegmentWriteState state;
+
+ // private PostingsFormat delegatePostingsFormat;
+
+ public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer,
+ SegmentWriteState state, PostingsFormat delegatePostingsFormat) {
+ this.delegateFieldsConsumer = fieldsConsumer;
+ // this.delegatePostingsFormat=delegatePostingsFormat;
+ this.state = state;
+ }
+
+ @Override
+ public TermsConsumer addField(FieldInfo field) throws IOException {
+ FuzzySet bloomFilter = bloomFilterFactory.getSetForField(state,field);
+ if (bloomFilter != null) {
+ assert bloomFilters.containsKey(field) == false;
+ bloomFilters.put(field, bloomFilter);
+ return new WrappedTermsConsumer(delegateFieldsConsumer.addField(field),bloomFilter);
+ } else {
+ // No, use the unfiltered fieldsConsumer - we are not interested in
+ // recording any term Bitsets.
+ return delegateFieldsConsumer.addField(field);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ delegateFieldsConsumer.close();
+ // Now we are done accumulating values for these fields
+ List<Entry<FieldInfo,FuzzySet>> nonSaturatedBlooms = new ArrayList<Map.Entry<FieldInfo,FuzzySet>>();
+
+ for (Entry<FieldInfo,FuzzySet> entry : bloomFilters.entrySet()) {
+ FuzzySet bloomFilter = entry.getValue();
+ if(!bloomFilterFactory.isSaturated(bloomFilter,entry.getKey())){
+ nonSaturatedBlooms.add(entry);
+ }
+ }
+ String bloomFileName = IndexFileNames.segmentFileName(
+ state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
+ IndexOutput bloomOutput = null;
+ try {
+ bloomOutput = state.directory
+ .createOutput(bloomFileName, state.context);
+ CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
+ BLOOM_CODEC_VERSION);
+ // remember the name of the postings format we will delegate to
+ bloomOutput.writeString(delegatePostingsFormat.getName());
+
+ // First field in the output file is the number of fields+blooms saved
+ bloomOutput.writeInt(nonSaturatedBlooms.size());
+ for (Entry<FieldInfo,FuzzySet> entry : nonSaturatedBlooms) {
+ FieldInfo fieldInfo = entry.getKey();
+ FuzzySet bloomFilter = entry.getValue();
+ bloomOutput.writeInt(fieldInfo.number);
+ saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
+ }
+ } finally {
+ IOUtils.close(bloomOutput);
+ }
+ //We are done with large bitsets so no need to keep them hanging around
+ bloomFilters.clear();
+ }
+
+ private void saveAppropriatelySizedBloomFilter(IndexOutput bloomOutput,
+ FuzzySet bloomFilter, FieldInfo fieldInfo) throws IOException {
+
+ FuzzySet rightSizedSet = bloomFilterFactory.downsize(fieldInfo,
+ bloomFilter);
+ if (rightSizedSet == null) {
+ rightSizedSet = bloomFilter;
+ }
+ rightSizedSet.serialize(bloomOutput);
+ }
+
+ }
+
+ class WrappedTermsConsumer extends TermsConsumer {
+ private TermsConsumer delegateTermsConsumer;
+ private FuzzySet bloomFilter;
+
+ public WrappedTermsConsumer(TermsConsumer termsConsumer,FuzzySet bloomFilter) {
+ this.delegateTermsConsumer = termsConsumer;
+ this.bloomFilter = bloomFilter;
+ }
+
+ public PostingsConsumer startTerm(BytesRef text) throws IOException {
+ return delegateTermsConsumer.startTerm(text);
+ }
+
+ public void finishTerm(BytesRef text, TermStats stats) throws IOException {
+
+ // Record this term in our BloomFilter
+ if (stats.docFreq > 0) {
+ bloomFilter.addValue(text);
+ }
+ delegateTermsConsumer.finishTerm(text, stats);
+ }
+
+ public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
+ throws IOException {
+ delegateTermsConsumer.finish(sumTotalTermFreq, sumDocFreq, docCount);
+ }
+
+ public Comparator<BytesRef> getComparator() throws IOException {
+ return delegateTermsConsumer.getComparator();
+ }
+
+ }
+
+}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/bloom/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/bloom/package.html?rev=1377705&r1=1377704&r2=1377705&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/bloom/package.html (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/bloom/package.html Mon Aug 27 14:58:55 2012
@@ -1,25 +1,25 @@
-<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<head>
- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
-</head>
-<body>
-Codec PostingsFormat for fast access to low-frequency terms such as primary key fields.
-</body>
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+Codec PostingsFormat for fast access to low-frequency terms such as primary key fields.
+</body>
</html>
\ No newline at end of file
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/hash/HashFunction.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/hash/HashFunction.java?rev=1377705&r1=1377704&r2=1377705&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/hash/HashFunction.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/hash/HashFunction.java Mon Aug 27 14:58:55 2012
@@ -1,84 +1,84 @@
-package org.apache.lucene.util.hash;
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-import java.util.Set;
-
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.NamedSPILoader;
-
-
-/**
- * Base class for hashing functions that can be referred to by name.
- * Subclasses are expected to provide threadsafe implementations of the hash function
- * on the range of bytes referenced in the provided {@link BytesRef}
- * @lucene.experimental
- */
-public abstract class HashFunction implements NamedSPILoader.NamedSPI {
-
- /**
- * Hashes the contents of the referenced bytes
- * @param bytes the data to be hashed
- * @return the hash of the bytes referenced by bytes.offset and length bytes.length
- */
- public abstract int hash(BytesRef bytes);
-
- private static final NamedSPILoader<HashFunction> loader =
- new NamedSPILoader<HashFunction>(HashFunction.class);
-
- private final String name;
-
- public HashFunction(String name) {
- NamedSPILoader.checkServiceName(name);
- this.name = name;
- }
-
- /** Returns this codec's name */
- @Override
- public final String getName() {
- return name;
- }
-
- /** looks up a hash function by name */
- public static HashFunction forName(String name) {
- return loader.lookup(name);
- }
-
- /** returns a list of all available hash function names */
- public static Set<String> availableHashFunctionNames() {
- return loader.availableServices();
- }
-
- /**
- * Reloads the hash function list from the given {@link ClassLoader}.
- * Changes to the function list are visible after the method ends, all
- * iterators ({@link #availableHashFunctionNames()},...) stay consistent.
- *
- * <p><b>NOTE:</b> Only new functions are added, existing ones are
- * never removed or replaced.
- *
- * <p><em>This method is expensive and should only be called for discovery
- * of new functions on the given classpath/classloader!</em>
- */
- public static void reloadHashFunctions(ClassLoader classloader) {
- loader.reload(classloader);
- }
-
- @Override
- public String toString() {
- return name;
- }
-}
+package org.apache.lucene.util.hash;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.Set;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NamedSPILoader;
+
+
+/**
+ * Base class for hashing functions that can be referred to by name.
+ * Subclasses are expected to provide threadsafe implementations of the hash function
+ * on the range of bytes referenced in the provided {@link BytesRef}
+ * @lucene.experimental
+ */
+public abstract class HashFunction implements NamedSPILoader.NamedSPI {
+
+ /**
+ * Hashes the contents of the referenced bytes
+ * @param bytes the data to be hashed
+ * @return the hash of the bytes referenced by bytes.offset and length bytes.length
+ */
+ public abstract int hash(BytesRef bytes);
+
+ private static final NamedSPILoader<HashFunction> loader =
+ new NamedSPILoader<HashFunction>(HashFunction.class);
+
+ private final String name;
+
+ public HashFunction(String name) {
+ NamedSPILoader.checkServiceName(name);
+ this.name = name;
+ }
+
+ /** Returns this codec's name */
+ @Override
+ public final String getName() {
+ return name;
+ }
+
+ /** looks up a hash function by name */
+ public static HashFunction forName(String name) {
+ return loader.lookup(name);
+ }
+
+ /** returns a list of all available hash function names */
+ public static Set<String> availableHashFunctionNames() {
+ return loader.availableServices();
+ }
+
+ /**
+ * Reloads the hash function list from the given {@link ClassLoader}.
+ * Changes to the function list are visible after the method ends, all
+ * iterators ({@link #availableHashFunctionNames()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new functions are added, existing ones are
+ * never removed or replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery
+ * of new functions on the given classpath/classloader!</em>
+ */
+ public static void reloadHashFunctions(ClassLoader classloader) {
+ loader.reload(classloader);
+ }
+
+ @Override
+ public String toString() {
+ return name;
+ }
+}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/hash/MurmurHash2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/hash/MurmurHash2.java?rev=1377705&r1=1377704&r2=1377705&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/hash/MurmurHash2.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/hash/MurmurHash2.java Mon Aug 27 14:58:55 2012
@@ -1,105 +1,105 @@
-package org.apache.lucene.util.hash;
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.BytesRef;
-
-/**
- * This is a very fast, non-cryptographic hash suitable for general hash-based
- * lookup. See http://murmurhash.googlepages.com/ for more details.
- * <p>
- * The C version of MurmurHash 2.0 found at that site was ported to Java by
- * Andrzej Bialecki (ab at getopt org).
- * </p>
- * <p>
- * The code from getopt.org was adapted by Mark Harwood in the form here as one of a pluggable choice of
- * hashing functions as the core function had to be adapted to work with BytesRefs with offsets and lengths
- * rather than raw byte arrays.
- * </p>
- * @lucene.experimental
- */
-public class MurmurHash2 extends HashFunction{
-
-
- public static final String HASH_NAME="MurmurHash2";
-
- public MurmurHash2() {
- super(HASH_NAME);
- }
-
- public static int hash(byte[] data, int seed, int offset, int len) {
- int m = 0x5bd1e995;
- int r = 24;
- int h = seed ^ len;
- int len_4 = len >> 2;
- for (int i = 0; i < len_4; i++) {
- int i_4 = offset + (i << 2);
- int k = data[i_4 + 3];
- k = k << 8;
- k = k | (data[i_4 + 2] & 0xff);
- k = k << 8;
- k = k | (data[i_4 + 1] & 0xff);
- k = k << 8;
- k = k | (data[i_4 + 0] & 0xff);
- k *= m;
- k ^= k >>> r;
- k *= m;
- h *= m;
- h ^= k;
- }
- int len_m = len_4 << 2;
- int left = len - len_m;
- if (left != 0) {
- if (left >= 3) {
- h ^= data[offset + len - 3] << 16;
- }
- if (left >= 2) {
- h ^= data[offset + len - 2] << 8;
- }
- if (left >= 1) {
- h ^= data[offset + len - 1];
- }
- h *= m;
- }
- h ^= h >>> 13;
- h *= m;
- h ^= h >>> 15;
- return h;
- }
-
- /**
- * Generates 32 bit hash from byte array with default seed value.
- *
- * @param data
- * byte array to hash
- * @param offset
- * the start position in the array to hash
- * @param len
- * length of the array elements to hash
- * @return 32 bit hash of the given array
- */
- public static final int hash32(final byte[] data, int offset, int len) {
- return MurmurHash2.hash(data, 0x9747b28c, offset, len);
- }
-
-
- @Override
- public final int hash(BytesRef br) {
- return hash32(br.bytes, br.offset, br.length);
- }
-
-}
+package org.apache.lucene.util.hash;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This is a very fast, non-cryptographic hash suitable for general hash-based
+ * lookup. See http://murmurhash.googlepages.com/ for more details.
+ * <p>
+ * The C version of MurmurHash 2.0 found at that site was ported to Java by
+ * Andrzej Bialecki (ab at getopt org).
+ * </p>
+ * <p>
+ * The code from getopt.org was adapted by Mark Harwood in the form here as one of a pluggable choice of
+ * hashing functions as the core function had to be adapted to work with BytesRefs with offsets and lengths
+ * rather than raw byte arrays.
+ * </p>
+ * @lucene.experimental
+ */
+public class MurmurHash2 extends HashFunction{
+
+
+ public static final String HASH_NAME="MurmurHash2";
+
+ public MurmurHash2() {
+ super(HASH_NAME);
+ }
+
+ public static int hash(byte[] data, int seed, int offset, int len) {
+ int m = 0x5bd1e995;
+ int r = 24;
+ int h = seed ^ len;
+ int len_4 = len >> 2;
+ for (int i = 0; i < len_4; i++) {
+ int i_4 = offset + (i << 2);
+ int k = data[i_4 + 3];
+ k = k << 8;
+ k = k | (data[i_4 + 2] & 0xff);
+ k = k << 8;
+ k = k | (data[i_4 + 1] & 0xff);
+ k = k << 8;
+ k = k | (data[i_4 + 0] & 0xff);
+ k *= m;
+ k ^= k >>> r;
+ k *= m;
+ h *= m;
+ h ^= k;
+ }
+ int len_m = len_4 << 2;
+ int left = len - len_m;
+ if (left != 0) {
+ if (left >= 3) {
+ h ^= data[offset + len - 3] << 16;
+ }
+ if (left >= 2) {
+ h ^= data[offset + len - 2] << 8;
+ }
+ if (left >= 1) {
+ h ^= data[offset + len - 1];
+ }
+ h *= m;
+ }
+ h ^= h >>> 13;
+ h *= m;
+ h ^= h >>> 15;
+ return h;
+ }
+
+ /**
+ * Generates 32 bit hash from byte array with default seed value.
+ *
+ * @param data
+ * byte array to hash
+ * @param offset
+ * the start position in the array to hash
+ * @param len
+ * length of the array elements to hash
+ * @return 32 bit hash of the given array
+ */
+ public static final int hash32(final byte[] data, int offset, int len) {
+ return MurmurHash2.hash(data, 0x9747b28c, offset, len);
+ }
+
+
+ @Override
+ public final int hash(BytesRef br) {
+ return hash32(br.bytes, br.offset, br.length);
+ }
+
+}