You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/20 20:52:08 UTC
svn commit: r1061480 [1/5] - in /lucene/dev/branches/bulkpostings: ./ dev-tools/idea/.idea/copyright/ lucene/ lucene/contrib/ lucene/contrib/demo/src/java/org/apache/lucene/demo/ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/ l...

Author: rmuir
Date: Thu Jan 20 19:52:03 2011
New Revision: 1061480

URL: http://svn.apache.org/viewvc?rev=1061480&view=rev
Log:
merge trunk (1059198:1061424)

Added:
    lucene/dev/branches/bulkpostings/dev-tools/idea/.idea/copyright/
      - copied from r1061424, lucene/dev/trunk/dev-tools/idea/.idea/copyright/
    lucene/dev/branches/bulkpostings/dev-tools/idea/.idea/copyright/Apache_Software_Foundation.xml
      - copied unchanged from r1061424, lucene/dev/trunk/dev-tools/idea/.idea/copyright/Apache_Software_Foundation.xml
    lucene/dev/branches/bulkpostings/dev-tools/idea/.idea/copyright/profiles_settings.xml
      - copied unchanged from r1061424, lucene/dev/trunk/dev-tools/idea/.idea/copyright/profiles_settings.xml
    lucene/dev/branches/bulkpostings/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
      - copied unchanged from r1061424, lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
    lucene/dev/branches/bulkpostings/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/
      - copied from r1061424, lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/
    lucene/dev/branches/bulkpostings/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java
      - copied unchanged from r1061424, lucene/dev/trunk/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/config/TestAttributes.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermStats.java
      - copied unchanged from r1061424, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/TermStats.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/AttributeReflector.java
      - copied unchanged from r1061424, lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/AttributeReflector.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java
      - copied unchanged from r1061424, lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestSimpleAttributeImpl.java
    lucene/dev/branches/bulkpostings/solr/contrib/analysis-extras/CHANGES.txt
      - copied unchanged from r1061424, lucene/dev/trunk/solr/contrib/analysis-extras/CHANGES.txt
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/SolrSortField.java
      - copied unchanged from r1061424, lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrSortField.java
    lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/response/TestPHPSerializedResponseWriter.java
      - copied unchanged from r1061424, lucene/dev/trunk/solr/src/test/org/apache/solr/response/TestPHPSerializedResponseWriter.java
Removed:
    lucene/dev/branches/bulkpostings/solr/contrib/clustering/DISABLED-README.txt
    lucene/dev/branches/bulkpostings/solr/contrib/clustering/build.xml.disabled
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
    lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/analysis/EnglishPorterFilterFactoryTest.java
    lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java
Modified:
    lucene/dev/branches/bulkpostings/   (props changed)
    lucene/dev/branches/bulkpostings/lucene/   (props changed)
    lucene/dev/branches/bulkpostings/lucene/CHANGES.txt
    lucene/dev/branches/bulkpostings/lucene/MIGRATE.txt
    lucene/dev/branches/bulkpostings/lucene/contrib/CHANGES.txt
    lucene/dev/branches/bulkpostings/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java
    lucene/dev/branches/bulkpostings/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
    lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
    lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
    lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
    lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java
    lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
    lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
    lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java
    lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java
    lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java
    lucene/dev/branches/bulkpostings/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java
    lucene/dev/branches/bulkpostings/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/Token.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogDocMergePolicy.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTerms.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Terms.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermsEnum.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsConsumer.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsConsumer.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexFields.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsReader.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/simpletext/SimpleTextFieldsWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanQuery.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BooleanScorer2.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/BoostAttributeImpl.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/ConstantScoreQuery.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FieldComparator.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FieldValueHitQueue.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/Filter.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredQuery.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/Query.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/TermQuery.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/Weight.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/function/CustomScoreQuery.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/function/ValueSource.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/function/ValueSourceQuery.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/AttributeImpl.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/AttributeSource.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/PerReaderTermState.java
    lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/util/SorterTemplate.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/analysis/TestNumericTokenStream.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/analysis/TestToken.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestCodecs.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/preflexrw/PreFlexFieldsWriter.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestBooleanScorer.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestAttributeSource.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/_TestUtil.java
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
    lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLimitTokenCountAnalyzer.java
    lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java
    lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
    lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java
    lucene/dev/branches/bulkpostings/solr/   (props changed)
    lucene/dev/branches/bulkpostings/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/bulkpostings/solr/KEYS   (props changed)
    lucene/dev/branches/bulkpostings/solr/LICENSE.txt   (props changed)
    lucene/dev/branches/bulkpostings/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/bulkpostings/solr/README.txt   (props changed)
    lucene/dev/branches/bulkpostings/solr/build.xml   (props changed)
    lucene/dev/branches/bulkpostings/solr/client/   (props changed)
    lucene/dev/branches/bulkpostings/solr/common-build.xml   (props changed)
    lucene/dev/branches/bulkpostings/solr/contrib/   (props changed)
    lucene/dev/branches/bulkpostings/solr/contrib/clustering/CHANGES.txt
    lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
    lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/LuceneLanguageModelFactory.java
    lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/DistributedClusteringComponentTest.java
    lucene/dev/branches/bulkpostings/solr/contrib/clustering/src/test/resources/solr-clustering/conf/solrconfig.xml
    lucene/dev/branches/bulkpostings/solr/contrib/dataimporthandler/CHANGES.txt
    lucene/dev/branches/bulkpostings/solr/contrib/extraction/CHANGES.txt
    lucene/dev/branches/bulkpostings/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml
    lucene/dev/branches/bulkpostings/solr/example/   (props changed)
    lucene/dev/branches/bulkpostings/solr/example/solr/conf/synonyms.txt
    lucene/dev/branches/bulkpostings/solr/lib/   (props changed)
    lucene/dev/branches/bulkpostings/solr/lib/apache-solr-noggit-r944541.jar
    lucene/dev/branches/bulkpostings/solr/site/   (props changed)
    lucene/dev/branches/bulkpostings/solr/src/   (props changed)
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/analysis/ArabicLetterTokenizerFactory.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/analysis/BaseTokenStreamFactory.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/analysis/ChineseFilterFactory.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/analysis/ChineseTokenizerFactory.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/handler/component/QueryComponent.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/request/SimpleFacets.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/request/UnInvertedField.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/schema/LatLonType.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/Grouping.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/BoostedQuery.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/FunctionQuery.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/search/function/ValueSource.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
    lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/update/SolrIndexConfig.java
    lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema-copyfield-test.xml
    lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema-required-fields.xml
    lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema.xml
    lucene/dev/branches/bulkpostings/solr/src/test-files/solr/conf/schema12.xml
    lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/TestDistributedSearch.java
    lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java
    lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java
    lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/request/JSONWriterTest.java
    lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/request/SimpleFacetsTest.java
    lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/search/TestSort.java
    lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/search/function/TestFunctionQuery.java
    lucene/dev/branches/bulkpostings/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
    lucene/dev/branches/bulkpostings/solr/src/webapp/web/admin/analysis.jsp
    lucene/dev/branches/bulkpostings/solr/src/webapp/web/admin/replication/header.jsp
    lucene/dev/branches/bulkpostings/solr/testlogging.properties   (props changed)

Modified: lucene/dev/branches/bulkpostings/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/CHANGES.txt?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/lucene/CHANGES.txt Thu Jan 20 19:52:03 2011
@@ -89,19 +89,9 @@ Changes in backwards compatibility polic
 * LUCENE-2484: Removed deprecated TermAttribute. Use CharTermAttribute
   and TermToBytesRefAttribute instead.  (Uwe Schindler)
 
-* LUCENE-2602: The default (LogByteSizeMergePolicy) merge policy now
-  takes deletions into account by default.  You can disable this by
-  calling setCalibrateSizeByDeletes(false) on the merge policy.  (Mike
-  McCandless)
-
 * LUCENE-2600: Remove IndexReader.isDeleted in favor of
   IndexReader.getDeletedDocs().  (Mike McCandless)
 
-* LUCENE-2529, LUCENE-2668: Position increment gap and offset gap of empty
-  values in multi-valued field has been changed for some cases in index.
-  If you index empty fields and uses positions/offsets information on that
-  fields, reindex is recommended. (David Smiley, Koji Sekiguchi)
-
 * LUCENE-2667: FuzzyQuery's defaults have changed for more performant 
   behavior: the minimum similarity is 2 edit distances from the word,
   and the priority queue size is 50. To support this, FuzzyQuery now allows
@@ -140,21 +130,6 @@ Changes in backwards compatibility polic
 
 Changes in Runtime Behavior
 
-* LUCENE-2650, LUCENE-2825: The behavior of FSDirectory.open has changed. On 64-bit
-  Windows and Solaris systems that support unmapping, FSDirectory.open returns
-  MMapDirectory. Additionally the behavior of MMapDirectory has been
-  changed to enable unmapping by default if supported by the JRE.
-  (Mike McCandless, Uwe Schindler, Robert Muir)
-
-* LUCENE-2790: LogMergePolicy.useCompoundFile's logic now factors in noCFSRatio 
-  to determine whether the passed in segment should be compound. 
-  (Shai Erera, Earwin Burrfoot)
-
-* LUCENE-2805: IndexWriter now increments the index version on every change to
-  the index instead of for every commit. Committing or closing the IndexWriter
-  without any changes to the index will not cause any index version increment.
-  (Simon Willnauer, Mike McCandless)
-
 * LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
   omitNorms(true) for field "a" for 1000 documents, but then add a document with
   omitNorms(false) for field "a", all documents for field "a" will have no norms.
@@ -181,17 +156,6 @@ API Changes
   deleted docs (getDeletedDocs), providing a new Bits interface to
   directly query by doc ID.
 
-* LUCENE-2402: IndexWriter.deleteUnusedFiles now deletes unreferenced commit
-  points too. If you use an IndexDeletionPolicy which holds onto index commits
-  (such as SnapshotDeletionPolicy), you can call this method to remove those
-  commit points when they are not needed anymore (instead of waiting for the 
-  next commit). (Shai Erera)
-
-* LUCENE-2674: A new idfExplain method was added to Similarity, that
-  accepts an incoming docFreq.  If you subclass Similarity, make sure
-  you also override this method on upgrade.  (Robert Muir, Mike
-  McCandless)
-
 * LUCENE-2691: IndexWriter.getReader() has been made package local and is now
   exposed via open and reopen methods on IndexReader.  The semantics of the
   call is the same as it was prior to the API change.
@@ -199,9 +163,6 @@ API Changes
 
 * LUCENE-2566: QueryParser: Unary operators +,-,! will not be treated as
   operators if they are followed by whitespace. (yonik)
-
-* LUCENE-2778: RAMDirectory now exposes newRAMFile() which allows to override
-  and return a different RAMFile implementation. (Shai Erera)
   
 * LUCENE-2831: Weight#scorer, Weight#explain, Filter#getDocIdSet,
   Collector#setNextReader & FieldComparator#setNextReader now expect an
@@ -253,10 +214,6 @@ New features
   data and payloads in 5 separate files instead of the 2 used by
   standard codec), and int block (really a "base" for using
   block-based compressors like PForDelta for storing postings data).
-
-* LUCENE-2385: Moved NoDeletionPolicy from benchmark to core. NoDeletionPolicy
-  can be used to prevent commits from ever getting deleted from the index.
-  (Shai Erera)
   
 * LUCENE-1458, LUCENE-2111: The in-memory terms index used by standard
   codec is more RAM efficient: terms data is stored as block byte
@@ -271,16 +228,6 @@ New features
   applications that have many unique terms, since it reduces how often
   a new segment must be flushed given a fixed RAM buffer size.
 
-* LUCENE-1585: IndexWriter now accepts a PayloadProcessorProvider which can 
-  return a DirPayloadProcessor for a given Directory, which returns a 
-  PayloadProcessor for a given Term. The PayloadProcessor will be used to 
-  process the payloads of the segments as they are merged (e.g. if one wants to
-  rewrite payloads of external indexes as they are added, or of local ones). 
-  (Shai Erera, Michael Busch, Mike McCandless)
-
-* LUCENE-2440: Add support for custom ExecutorService in
-  ParallelMultiSearcher (Edward Drapkin via Mike McCandless)
-
 * LUCENE-2489: Added PerFieldCodecWrapper (in oal.index.codecs) which
   lets you set the Codec per field (Mike McCandless)
 
@@ -291,17 +238,6 @@ New features
   SegmentInfosReader to allow customization of SegmentInfos data.
   (Andrzej Bialecki)
 
-* LUCENE-2559: Added SegmentReader.reopen methods (John Wang via Mike
-  McCandless)
-
-* LUCENE-2590: Added Scorer.visitSubScorers, and Scorer.freq.  Along
-  with a custom Collector these experimental methods make it possible
-  to gather the hit-count per sub-clause and per document while a
-  search is running.  (Simon Willnauer, Mike McCandless)
-
-* LUCENE-2636: Added MultiCollector which allows running the search with several
-  Collectors. (Shai Erera)
-
 * LUCENE-2504: FieldComparator.setNextReader now returns a
   FieldComparator instance.  You can "return this", to just reuse the
   same instance, or you can return a comparator optimized to the new
@@ -359,18 +295,10 @@ New features
   terms dict.  This impl stores the indexed terms in an FST, which is
   much more RAM efficient than FixedGapTermsIndex.  (Mike McCandless)
 
-Optimizations
-
-* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
-  (Mike McCandless)
-
-* LUCENE-2531: Fix issue when sorting by a String field that was
-  causing too many fallbacks to compare-by-value (instead of by-ord).
-  (Mike McCandless)
+* LUCENE-2862: Added TermsEnum.totalTermFreq() and
+  Terms.getSumTotalTermFreq().  (Mike McCandless, Robert Muir)
 
-* LUCENE-2574: IndexInput exposes copyBytes(IndexOutput, long) to allow for 
-  efficient copying by sub-classes. Optimized copy is implemented for RAM and FS
-  streams. (Shai Erera)
+Optimizations
 
 * LUCENE-2588: Don't store unnecessary suffixes when writing the terms
   index, saving RAM in IndexReader; change default terms index
@@ -386,11 +314,6 @@ Optimizations
   MultiTermQuery now stores TermState per leaf reader during rewrite to re-
   seek the term dictionary in TermQuery / TermWeight.
   (Simon Willnauer, Mike McCandless, Robert Muir)
-
-Documentation
-
-* LUCENE-2579: Fix oal.search's package.html description of abstract
-  methods.  (Santiago M. Mola via Mike McCandless)
   
 Bug fixes
 
@@ -401,14 +324,6 @@ Bug fixes
   with more document deletions is requested before a reader with fewer
   deletions, provided they share some segments. (yonik)
 
-* LUCENE-2802: NRT DirectoryReader returned incorrect values from
-  getVersion, isOptimized, getCommitUserData, getIndexCommit and isCurrent due
-  to a mutable reference to the IndexWriters SegmentInfos. 
-  (Simon Willnauer, Earwin Burrfoot)
-
-* LUCENE-2860: Fixed SegmentInfo.sizeInBytes to factor includeDocStores when it 
-  decides whether to return the cached computed size or not. (Shai Erera)
-
 ======================= Lucene 3.x (not yet released) =======================
 
 Changes in backwards compatibility policy
@@ -447,9 +362,9 @@ Changes in backwards compatibility polic
 * LUCENE-2302: The new interface for term attributes, CharTermAttribute,
   now implements CharSequence. This requires the toString() methods of
   CharTermAttribute, deprecated TermAttribute, and Token to return only
-  the term text and no other attribute contents.
-  TODO: Point to new attribute inspection API coming with LUCENE-2374.
-  (Uwe Schindler, Robert Muir)
+  the term text and no other attribute contents. LUCENE-2374 implements
+  an attribute reflection API to no longer rely on toString() for attribute
+  inspection. (Uwe Schindler, Robert Muir)
 
 * LUCENE-2372, LUCENE-2389: StandardAnalyzer, KeywordAnalyzer,
   PerFieldAnalyzerWrapper, WhitespaceTokenizer are now final.  Also removed
@@ -473,10 +388,37 @@ Changes in backwards compatibility polic
 * LUCENE-2733: Removed public constructors of utility classes with only static
   methods to prevent instantiation.  (Uwe Schindler)
 
-* LUCENE-2753: IndexReader and DirectoryReader .listCommits() now return a List
-  instead of a Collection, guaranteeing the commits are sorted from oldest to 
-  latest. (Shai Erera)
+* LUCENE-2602: The default (LogByteSizeMergePolicy) merge policy now
+  takes deletions into account by default.  You can disable this by
+  calling setCalibrateSizeByDeletes(false) on the merge policy.  (Mike
+  McCandless)
+
+* LUCENE-2529, LUCENE-2668: Position increment gap and offset gap of empty
+  values in multi-valued field has been changed for some cases in index.
+  If you index empty fields and uses positions/offsets information on that
+  fields, reindex is recommended. (David Smiley, Koji Sekiguchi)
   
+* LUCENE-2804: Directory.setLockFactory new declares throwing an IOException.
+  (Shai Erera, Robert Muir)
+  
+* LUCENE-2837: Added deprecations noting that in 4.0, Searcher and
+  Searchable are collapsed into IndexSearcher; contrib/remote and
+  MultiSearcher have been removed.  (Mike McCandless)
+
+* LUCENE-2854: Deprecated SimilarityDelegator and
+  Similarity.lengthNorm; the latter is now final, forcing any custom
+  Similarity impls to cutover to the more general computeNorm (Robert
+  Muir, Mike McCandless)
+
+* LUCENE-2869: Deprecated Query.getSimilarity: instead of using
+  "runtime" subclassing/delegation, subclass the Weight instead.
+  (Robert Muir)
+
+* LUCENE-2674: A new idfExplain method was added to Similarity, that
+  accepts an incoming docFreq.  If you subclass Similarity, make sure
+  you also override this method on upgrade.  (Robert Muir, Mike
+  McCandless)
+
 Changes in runtime behavior
 
 * LUCENE-1923: Made IndexReader.toString() produce something
@@ -492,7 +434,7 @@ Changes in runtime behavior
   invokes a merge on the incoming and target segments, but instead copies the
   segments to the target index. You can call maybeMerge or optimize after this
   method completes, if you need to.
-  
+
   In addition, Directory.copyTo* were removed in favor of copy which takes the
   target Directory, source and target files as arguments, and copies the source
   file to the target Directory under the target file name. (Shai Erera)
@@ -504,10 +446,38 @@ Changes in runtime behavior
   usage, allowing applications to accidentally open two writers on the
   same directory.  (Mike McCandless)
 
-* LUCENE-2701: maxMergeMB and maxMergeDocs constraints set on LogMergePolicy now
-  affect optimize() as well (as opposed to only regular merges). This means that
-  you can run optimize() and too large segments won't be merged. (Shai Erera)
+* LUCENE-2701: maxMergeMBForOptimize and maxMergeDocs constraints set on 
+  LogMergePolicy now affect optimize() as well (as opposed to only regular 
+  merges). This means that you can run optimize() and too large segments won't 
+  be merged. (Shai Erera)
+
+* LUCENE-2753: IndexReader and DirectoryReader .listCommits() now return a List,
+  guaranteeing the commits are sorted from oldest to latest. (Shai Erera)
+
+* LUCENE-2785: TopScoreDocCollector, TopFieldCollector and
+  the IndexSearcher search methods that take an int nDocs will now
+  throw IllegalArgumentException if nDocs is 0.  Instead, you should
+  use the newly added TotalHitCountCollector.  (Mike McCandless)
+  
+* LUCENE-2790: LogMergePolicy.useCompoundFile's logic now factors in noCFSRatio 
+  to determine whether the passed in segment should be compound. 
+  (Shai Erera, Earwin Burrfoot)
+
+* LUCENE-2805: IndexWriter now increments the index version on every change to
+  the index instead of for every commit. Committing or closing the IndexWriter
+  without any changes to the index will not cause any index version increment.
+  (Simon Willnauer, Mike McCandless)
+
+* LUCENE-2650, LUCENE-2825: The behavior of FSDirectory.open has changed. On 64-bit
+  Windows and Solaris systems that support unmapping, FSDirectory.open returns
+  MMapDirectory. Additionally the behavior of MMapDirectory has been
+  changed to enable unmapping by default if supported by the JRE.
+  (Mike McCandless, Uwe Schindler, Robert Muir)
 
+* LUCENE-2829: Improve the performance of "primary key" lookup use
+  case (running a TermQuery that matches one document) on a
+  multi-segment index.  (Robert Muir, Mike McCandless)
+  
 API Changes
 
 * LUCENE-2076: Rename FSDirectory.getFile -> getDirectory.  (George
@@ -518,7 +488,7 @@ API Changes
   custom Similarity can alter how norms are encoded, though they must
   still be encoded as a single byte (Johan Kindgren via Mike
   McCandless)
-  
+
 * LUCENE-2103: NoLockFactory should have a private constructor;
   until Lucene 4.0 the default one will be deprecated.
   (Shai Erera via Uwe Schindler) 
@@ -590,17 +560,59 @@ API Changes
   (such as SnapshotDeletionPolicy), you can call this method to remove those
   commit points when they are not needed anymore (instead of waiting for the 
   next commit). (Shai Erera)
-
-* LUCENE-2455: IndexWriter.addIndexesNoOptimize was renamed to addIndexes.
-  IndexFileNames.segmentFileName now takes another parameter to accommodate
-  custom file names. You should use this method to name all your files.
-  (Shai Erera)
   
 * LUCENE-2481: SnapshotDeletionPolicy.snapshot() and release() were replaced
   with equivalent ones that take a String (id) as argument. You can pass
   whatever ID you want, as long as you use the same one when calling both. 
   (Shai Erera)
   
+* LUCENE-2356: Add IndexWriterConfig.set/getReaderTermIndexDivisor, to
+  set what IndexWriter passes for termsIndexDivisor to the readers it
+  opens internally when apply deletions or creating a near-real-time
+  reader.  (Earwin Burrfoot via Mike McCandless)
+
+* LUCENE-2167,LUCENE-2699,LUCENE-2763,LUCENE-2847: StandardTokenizer/Analyzer
+  in common/standard/ now implement the Word Break rules from the Unicode 6.0.0
+  Text Segmentation algorithm (UAX#29), covering the full range of Unicode code
+  points, including values from U+FFFF to U+10FFFF
+   
+  ClassicTokenizer/Analyzer retains the old (pre-Lucene 3.1) StandardTokenizer/
+  Analyzer implementation and behavior.  Only the Unicode Basic Multilingual
+  Plane (code points from U+0000 to U+FFFF) is covered.
+
+  UAX29URLEmailTokenizer tokenizes URLs and E-mail addresses according to the
+  relevant RFCs, in addition to implementing the UAX#29 Word Break rules.
+  (Steven Rowe, Robert Muir, Uwe Schindler)
+   
+* LUCENE-2778: RAMDirectory now exposes newRAMFile() which allows to override
+  and return a different RAMFile implementation. (Shai Erera)
+  
+* LUCENE-2785: Added TotalHitCountCollector whose sole purpose is to
+  count the number of hits matching the query.  (Mike McCandless)
+
+* LUCENE-2846: Deprecated IndexReader.setNorm(int, String, float). This method 
+  is only syntactic sugar for setNorm(int, String, byte), but  using the global 
+  Similarity.getDefault().encodeNormValue().  Use the byte-based method instead 
+  to ensure that the norm is encoded with your Similarity.
+  (Robert Muir, Mike McCandless)
+
+* LUCENE-2374: Added Attribute reflection API: It's now possible to inspect the
+  contents of AttributeImpl and AttributeSource using a well-defined API.
+  This is e.g. used by Solr's AnalysisRequestHandlers to display all attributes
+  in a structured way.
+  There are also some backwards incompatible changes in toString() output,
+  as LUCENE-2302 introduced the CharSequence interface to CharTermAttribute
+  leading to changed toString() return values. The new API allows to get a
+  string representation in a well-defined way using a new method
+  reflectAsString(). For backwards compatibility reasons, when toString()
+  was implemented by implementation subclasses, the default implementation of
+  AttributeImpl.reflectWith() uses toString()s output instead to report the
+  Attribute's properties. Otherwise, reflectWith() uses Java's reflection
+  (like toString() did before) to get the attribute properties.
+  In addition, the mandatory equals() and hashCode() are no longer required
+  for AttributeImpls, but can still be provided (if needed).
+  (Uwe Schindler)
+
 Bug fixes
 
 * LUCENE-2249: ParallelMultiSearcher should shut down thread pool on
@@ -621,10 +633,6 @@ Bug fixes
   a prior (corrupt) index missing its segments_N file.  (Mike
   McCandless)
 
-* LUCENE-2534: fix over-sharing bug in
-  MultiTermsEnum.docs/AndPositionsEnum.  (Robert Muir, Mike
-  McCandless)
-
 * LUCENE-2458: QueryParser no longer automatically forms phrase queries,
   assuming whitespace tokenization. Previously all CJK queries, for example,
   would be turned into phrase queries. The old behavior is preserved with
@@ -643,7 +651,22 @@ Bug fixes
   can cause the same document to score to differently depending on
   what segment it resides in. (yonik)
 
-* LUCENE-2272: Fix explain in PayloadNearQuery and also fix scoring issue (Peter Keegan via Grant Ingersoll)  
+* LUCENE-2272: Fix explain in PayloadNearQuery and also fix scoring issue (Peter Keegan via Grant Ingersoll)
+
+* LUCENE-2732: Fix charset problems in XML loading in
+  HyphenationCompoundWordTokenFilter.  (Uwe Schindler)
+
+* LUCENE-2802: NRT DirectoryReader returned incorrect values from
+  getVersion, isOptimized, getCommitUserData, getIndexCommit and isCurrent due
+  to a mutable reference to the IndexWriters SegmentInfos. 
+  (Simon Willnauer, Earwin Burrfoot)
+
+* LUCENE-2852: Fixed corner case in RAMInputStream that would hit a
+  false EOF after seeking to EOF then seeking back to same block you
+  were just in and then calling readBytes (Robert Muir, Mike McCandless)
+
+* LUCENE-2860: Fixed SegmentInfo.sizeInBytes to factor includeDocStores when it 
+  decides whether to return the cached computed size or not. (Shai Erera)
 
 New features
 
@@ -716,6 +739,16 @@ New features
   can be used to prevent commits from ever getting deleted from the index.
   (Shai Erera)
   
+* LUCENE-1585: IndexWriter now accepts a PayloadProcessorProvider which can 
+  return a DirPayloadProcessor for a given Directory, which returns a 
+  PayloadProcessor for a given Term. The PayloadProcessor will be used to 
+  process the payloads of the segments as they are merged (e.g. if one wants to
+  rewrite payloads of external indexes as they are added, or of local ones). 
+  (Shai Erera, Michael Busch, Mike McCandless)
+
+* LUCENE-2440: Add support for custom ExecutorService in
+  ParallelMultiSearcher (Edward Drapkin via Mike McCandless)
+
 * LUCENE-2295: Added a LimitTokenCountAnalyzer / LimitTokenCountFilter
   to wrap any other Analyzer and provide the same functionality as
   MaxFieldLength provided on IndexWriter.  This patch also fixes a bug
@@ -723,9 +756,17 @@ New features
 
 * LUCENE-2526: Don't throw NPE from MultiPhraseQuery.toString when
   it's empty.  (Ross Woolf via Mike McCandless)
+  
+* LUCENE-2559: Added SegmentReader.reopen methods (John Wang via Mike
+  McCandless)
 
-* LUCENE-2671: Add SortField.setMissingValue( v ) to enable sorting
-  behavior for documents that do not include the given field. (ryan)
+* LUCENE-2590: Added Scorer.visitSubScorers, and Scorer.freq.  Along
+  with a custom Collector these experimental methods make it possible
+  to gather the hit-count per sub-clause and per document while a
+  search is running.  (Simon Willnauer, Mike McCandless)
+
+* LUCENE-2636: Added MultiCollector which allows running the search with several
+  Collectors. (Shai Erera)
 
 * LUCENE-2754, LUCENE-2757: Added a wrapper around MultiTermQueries
   to add span support: SpanMultiTermQueryWrapper<Q extends MultiTermQuery>.
@@ -744,6 +785,9 @@ New features
   
 Optimizations
 
+* LUCENE-2494: Use CompletionService in ParallelMultiSearcher instead of
+  simple polling for results. (Edward Drapkin, Simon Willnauer)
+
 * LUCENE-2075: Terms dict cache is now shared across threads instead
   of being stored separately in thread local storage.  Also fixed
   terms dict so that the cache is used when seeking the thread local
@@ -806,6 +850,17 @@ Optimizations
   (getStrings, getStringIndex), consume quite a bit less RAM in most
   cases.  (Mike McCandless)
 
+* LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
+  (Mike McCandless)
+
+* LUCENE-2531: Fix issue when sorting by a String field that was
+  causing too many fallbacks to compare-by-value (instead of by-ord).
+  (Mike McCandless)
+
+* LUCENE-2574: IndexInput exposes copyBytes(IndexOutput, long) to allow for 
+  efficient copying by sub-classes. Optimized copy is implemented for RAM and FS
+  streams. (Shai Erera)
+
 * LUCENE-2719: Improved TermsHashPerField's sorting to use a better
   quick sort algorithm that dereferences the pivot element not on
   every compare call. Also replaced lots of sorting code in Lucene
@@ -885,6 +940,18 @@ Test Cases
   as Eclipse and IntelliJ.
   (Paolo Castagna, Steven Rowe via Robert Muir)
 
+* LUCENE-2804: add newFSDirectory to LuceneTestCase to create a FSDirectory at
+  random. (Shai Erera, Robert Muir)
+  
+Documentation
+
+* LUCENE-2579: Fix oal.search's package.html description of abstract
+  methods.  (Santiago M. Mola via Mike McCandless)
+   
+* LUCENE-2625: Add a note to IndexReader.termDocs() with additional verbiage
+  that the TermEnum must be seeked since it is unpositioned.
+  (Adriano Crestani via Robert Muir)
+  
 ================== Release 2.9.4 / 3.0.3 2010-12-03 ====================
 
 Changes in runtime behavior

Modified: lucene/dev/branches/bulkpostings/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/MIGRATE.txt?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/MIGRATE.txt (original)
+++ lucene/dev/branches/bulkpostings/lucene/MIGRATE.txt Thu Jan 20 19:52:03 2011
@@ -328,3 +328,10 @@ LUCENE-1458, LUCENE-2111: Flexible Index
 * LUCENE-2761: DataInput.readVInt/readVLong and DataOutput.writeVInt/writeVLong
   are final. If you subclassed this code before to encode variable-length
   integers in some specialized way, use the Codec API instead.
+
+* LUCENE-2374: The backwards layer in AttributeImpl was removed. To support correct
+  reflection of AttributeImpl instances, where the reflection was done using deprecated
+  toString() parsing, you have to now override reflectWith() to customize output.
+  toString() is no longer implemented by AttributeImpl, so if you have overridden
+  toString(), port your customization over to reflectWith(). reflectAsString() would
+  then return what toString() did before.

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/CHANGES.txt?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/CHANGES.txt Thu Jan 20 19:52:03 2011
@@ -4,53 +4,39 @@ Lucene contrib change Log
   
 Build
 
- * LUCENE-2413: Moved the demo out of lucene core and into contrib/demo.
-   (Robert Muir)
-
  * LUCENE-2845: Moved contrib/benchmark to modules.
 
 New Features
 
-  * LUCENE-2604: Added RegexpQuery support to contrib/queryparser.
-    (Simon Willnauer, Robert Muir)
+ * LUCENE-2604: Added RegexpQuery support to contrib/queryparser.
+   (Simon Willnauer, Robert Muir)
 
-  * LUCENE-2500: Added DirectIOLinuxDirectory, a Linux-specific
-    Directory impl that uses the O_DIRECT flag to bypass the buffer
-    cache.  This is useful to prevent segment merging from evicting
-    pages from the buffer cache, since fadvise/madvise do not seem.
-    (Michael McCandless)
-
-  * LUCENE-2373: Added a Codec implementation that works with append-only
-    filesystems (such as e.g. Hadoop DFS). SegmentInfos writing/reading
-    code is refactored to support append-only FS, and to allow for future
-    customization of per-segment information. (Andrzej Bialecki)
+ * LUCENE-2373: Added a Codec implementation that works with append-only
+   filesystems (such as e.g. Hadoop DFS). SegmentInfos writing/reading
+   code is refactored to support append-only FS, and to allow for future
+   customization of per-segment information. (Andrzej Bialecki)
 
-  * LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along
-    with two implementations.  The existing comparator (score, then frequency) is the default (Grant Ingersoll)
+ * LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along
+   with two implementations.  The existing comparator (score, then frequency) is the default (Grant Ingersoll)
 
-  * LUCENE-2608: Added the ability to specify the accuracy at method time in the SpellChecker.  The per class
-    method is also still available.  (Grant Ingersoll)
+ * LUCENE-2608: Added the ability to specify the accuracy at method time in the SpellChecker.  The per class
+   method is also still available.  (Grant Ingersoll)
     
-  * LUCENE-2507: Added DirectSpellChecker, which retrieves correction candidates directly 
-    from the term dictionary using levenshtein automata.  (Robert Muir)
+ * LUCENE-2507: Added DirectSpellChecker, which retrieves correction candidates directly 
+   from the term dictionary using levenshtein automata.  (Robert Muir)
 
-  * LUCENE-2791: Added WindowsDirectory, a Windows-specific Directory impl
-    that doesn't synchronize on the file handle. This can be useful to 
-    avoid the performance problems of SimpleFSDirectory and NIOFSDirectory.
-    (Robert Muir, Simon Willnauer, Uwe Schindler, Michael McCandless)
-
-  * LUCENE-2836: Add FieldCacheRewriteMethod, which rewrites MultiTermQueries
-    using the FieldCache's TermsEnum.  (Robert Muir)
+ * LUCENE-2836: Add FieldCacheRewriteMethod, which rewrites MultiTermQueries
+   using the FieldCache's TermsEnum.  (Robert Muir)
   
 API Changes
 
-  * LUCENE-2606: Changed RegexCapabilities interface to fix thread 
-    safety, serialization, and performance problems. If you have
-    written a custom RegexCapabilities it will need to be updated
-    to the new API.  (Robert Muir, Uwe Schindler)
+ * LUCENE-2606: Changed RegexCapabilities interface to fix thread 
+   safety, serialization, and performance problems. If you have
+   written a custom RegexCapabilities it will need to be updated
+   to the new API.  (Robert Muir, Uwe Schindler)
 
-  * LUCENE-2638 MakeHighFreqTerms.TermStats public to make it more useful
-    for API use. (Andrzej Bialecki)
+ * LUCENE-2638 MakeHighFreqTerms.TermStats public to make it more useful
+   for API use. (Andrzej Bialecki)
 
 ======================= Lucene 3.x (not yet released) =======================
 
@@ -100,6 +86,11 @@ Changes in runtime behavior
 
 Bug fixes
 
+ * LUCENE-2855: contrib queryparser was using CharSequence as key in some internal 
+   Map instances, which was leading to incorrect behaviour, since some CharSequence
+   implementors do not override hashcode and equals methods. Now the internal Maps 
+   are using String instead. (Adriano Crestani)
+
  * LUCENE-2068: Fixed ReverseStringFilter which was not aware of supplementary
    characters. During reverse the filter created unpaired surrogates, which
    will be replaced by U+FFFD by the indexer, but not at query time. The filter
@@ -117,41 +108,48 @@ Bug fixes
    default.  (Robert Muir, Uwe Schindler, Simon Willnauer)
 
  * LUCENE-2184: Fixed bug with handling best fit value when the proper best fit value is
-		not an indexed field.  Note, this change affects the APIs. (Grant Ingersoll)
+   not an indexed field.  Note, this change affects the APIs. (Grant Ingersoll)
 		
  * LUCENE-2359: Fix bug in CartesianPolyFilterBuilder related to handling of behavior around
-		the 180th meridian (Grant Ingersoll)
+   the 180th meridian (Grant Ingersoll)
 
  * LUCENE-2404: Fix bugs with position increment and empty tokens in ThaiWordFilter.
    For matchVersion >= 3.1 the filter also no longer lowercases. ThaiAnalyzer
    will use a separate LowerCaseFilter instead. (Uwe Schindler, Robert Muir)
 
-* LUCENE-2615: Fix DirectIOLinuxDirectory to not assign bogus
-  permissions to newly created files, and to not silently hardwire
-  buffer size to 1 MB.  (Mark Miller, Robert Muir, Mike McCandless)
-
-* LUCENE-2629: Fix gennorm2 task for generating ICUFoldingFilter's .nrm file. This allows
-  you to customize its normalization/folding, by editing the source data files in src/data
-  and regenerating a new .nrm with 'ant gennorm2'.  (David Bowen via Robert Muir)
-
-* LUCENE-2653: ThaiWordFilter depends on the JRE having a Thai dictionary, which is not
-  always the case. If the dictionary is unavailable, the filter will now throw 
-  UnsupportedOperationException in the constructor.  (Robert Muir)
+ * LUCENE-2615: Fix DirectIOLinuxDirectory to not assign bogus
+   permissions to newly created files, and to not silently hardwire
+   buffer size to 1 MB.  (Mark Miller, Robert Muir, Mike McCandless)
+
+ * LUCENE-2629: Fix gennorm2 task for generating ICUFoldingFilter's .nrm file. This allows
+   you to customize its normalization/folding, by editing the source data files in src/data
+   and regenerating a new .nrm with 'ant gennorm2'.  (David Bowen via Robert Muir)
+
+ * LUCENE-2653: ThaiWordFilter depends on the JRE having a Thai dictionary, which is not
+   always the case. If the dictionary is unavailable, the filter will now throw 
+   UnsupportedOperationException in the constructor.  (Robert Muir)
 
-* LUCENE-589: Fix contrib/demo for international documents. 
-  (Curtis d'Entremont via Robert Muir)
+ * LUCENE-589: Fix contrib/demo for international documents. 
+   (Curtis d'Entremont via Robert Muir)
   
-* LUCENE-2246: Fix contrib/demo for Turkish html documents.
-  (Selim Nadi via Robert Muir)  
+ * LUCENE-2246: Fix contrib/demo for Turkish html documents.
+   (Selim Nadi via Robert Muir)  
   
-* LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading
-  (Curtis d'Entremont via Robert Muir)
+ * LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading
+   (Curtis d'Entremont via Robert Muir)
 
-* LUCENE-591: The demo indexer now indexes meta keywords.
-  (Curtis d'Entremont via Robert Muir)
+ * LUCENE-591: The demo indexer now indexes meta keywords.
+   (Curtis d'Entremont via Robert Muir)
+
+ * LUCENE-2874: Highlighting overlapping tokens outputted doubled words.
+   (Pierre GossÃ© via Robert Muir)
    
 API Changes
 
+ * LUCENE-2867: Some contrib queryparser methods that receives CharSequence as
+   identifier, such as QueryNode#unsetTag(CharSequence), were deprecated and
+   will be removed on version 4. (Adriano Crestani)
+
  * LUCENE-2147: Spatial GeoHashUtils now always decode GeoHash strings
    with full precision. GeoHash#decode_exactly(String) was merged into
    GeoHash#decode(String). (Chris Male, Simon Willnauer)
@@ -192,6 +190,12 @@ API Changes
    
 New features
 
+ * LUCENE-2500: Added DirectIOLinuxDirectory, a Linux-specific
+   Directory impl that uses the O_DIRECT flag to bypass the buffer
+   cache.  This is useful to prevent segment merging from evicting
+   pages from the buffer cache, since fadvise/madvise do not seem.
+   (Michael McCandless)
+    
  * LUCENE-2306: Add NumericRangeFilter and NumericRangeQuery support to XMLQueryParser.
    (Jingkei Ly, via Mark Harwood)
 
@@ -281,6 +285,11 @@ New features
    BooleanModifiersQueryNodeProcessor, for example instead of GroupQueryNodeProcessor.
    (Adriano Crestani via Robert Muir)
 
+ * LUCENE-2791: Added WindowsDirectory, a Windows-specific Directory impl
+   that doesn't synchronize on the file handle. This can be useful to 
+   avoid the performance problems of SimpleFSDirectory and NIOFSDirectory.
+   (Robert Muir, Simon Willnauer, Uwe Schindler, Michael McCandless)
+
  * LUCENE-2842: Add analyzer for Galician. Also adds the RSLP (Orengo) stemmer
    for Portuguese.  (Robert Muir)
 
@@ -305,7 +314,10 @@ Build
    (Robert Muir)
 
  * LUCENE-2833: Upgrade contrib/ant's jtidy jar file to r938 (Robert Muir)
-   
+
+ * LUCENE-2413: Moved the demo out of lucene core and into contrib/demo.
+   (Robert Muir)
+
 Optimizations
 
  * LUCENE-2157: DelimitedPayloadTokenFilter no longer copies the buffer

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexHTML.java Thu Jan 20 19:52:03 2011
@@ -84,8 +84,7 @@ public class IndexHTML {
       }
       writer = new IndexWriter(FSDirectory.open(index), new IndexWriterConfig(
           Version.LUCENE_CURRENT, new StandardAnalyzer(Version.LUCENE_CURRENT))
-          .setMaxFieldLength(1000000).setOpenMode(
-              create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND));
+          .setOpenMode(create ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND));
       indexDocs(root, index, create);		  // add new docs
 
       System.out.println("Optimizing index...");

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Thu Jan 20 19:52:03 2011
@@ -233,7 +233,10 @@ public class TokenSources {
           .size()]);
       ArrayUtil.quickSort(tokensInOriginalOrder, new Comparator<Token>() {
         public int compare(Token t1, Token t2) {
-          return t1.startOffset() - t2.endOffset();
+          if (t1.startOffset() == t2.startOffset())
+            return t1.endOffset() - t2.endOffset();
+          else
+            return t1.startOffset() - t2.startOffset();
         }
       });
     }

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java Thu Jan 20 19:52:03 2011
@@ -238,6 +238,10 @@ public class InstantiatedIndex
           while((text = termsEnum.next()) != null) {
             String termText = text.utf8ToString();
             InstantiatedTerm instantiatedTerm = new InstantiatedTerm(field, termText);
+            final long totalTermFreq = termsEnum.totalTermFreq();
+            if (totalTermFreq != -1) {
+              instantiatedTerm.addPositionsCount(totalTermFreq);
+            }
             getTermsByFieldAndText().get(field).put(termText, instantiatedTerm);
             instantiatedTerm.setTermIndex(terms.size());
             terms.add(instantiatedTerm);

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java Thu Jan 20 19:52:03 2011
@@ -398,12 +398,22 @@ public class InstantiatedIndexReader ext
         if (i < 0) {
           i = -i - 1;
         }
-        if (i >= orderedTerms.length || !orderedTerms[i].field().equals(field)) {
+        if (i >= orderedTerms.length || orderedTerms[i].field() != field) {
           // field does not exist
           return null;
         }
         final int startLoc = i;
 
+        // TODO: heavy to do this here; would be better to
+        // do it up front & cache
+        long sum = 0;
+        int upto = i;
+        while(upto < orderedTerms.length && orderedTerms[i].field() == field) {
+          sum += orderedTerms[i].getTotalTermFreq();
+          upto++;
+        }
+        final long sumTotalTermFreq = sum;
+
         return new Terms() {
           @Override 
           public TermsEnum iterator() {
@@ -411,6 +421,11 @@ public class InstantiatedIndexReader ext
           }
 
           @Override
+          public long getSumTotalTermFreq() {
+            return sumTotalTermFreq;
+          }
+
+          @Override
           public Comparator<BytesRef> getComparator() {
             return BytesRef.getUTF8SortedAsUnicodeComparator();
           }

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java Thu Jan 20 19:52:03 2011
@@ -315,6 +315,7 @@ public class InstantiatedIndexWriter imp
           }
           associatedDocuments[associatedDocuments.length - 1] = info;          
           term.setAssociatedDocuments(associatedDocuments);
+          term.addPositionsCount(positions.length);
 
           // todo optimize, only if term vector?
           informationByTermOfCurrentDocument.put(term, info);

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTerm.java Thu Jan 20 19:52:03 2011
@@ -45,6 +45,8 @@ public class InstantiatedTerm
   
   private Term term;
 
+  private long totalTermFreq;
+
   /**
    * index of term in InstantiatedIndex
    * @see org.apache.lucene.store.instantiated.InstantiatedIndex#getOrderedTerms() */
@@ -92,6 +94,14 @@ public class InstantiatedTerm
     this.associatedDocuments = associatedDocuments;
   }
 
+  void addPositionsCount(long count) {
+    totalTermFreq += count;
+  }
+
+  public long getTotalTermFreq() {
+    return totalTermFreq;
+  }
+
   /**
    * Finds index to the first beyond the current whose document number is
    * greater than or equal to <i>target</i>, -1 if there is no such element.

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Thu Jan 20 19:52:03 2011
@@ -111,6 +111,12 @@ public class InstantiatedTermsEnum exten
   }
 
   @Override
+  public long totalTermFreq() {
+    final long v = terms[upto].getTotalTermFreq();
+    return v == 0 ? -1 : v;
+  }
+
+  @Override
   public DocsEnum docs(Bits skipDocs, DocsEnum reuse) {
     if (reuse == null || !(reuse instanceof InstantiatedDocsEnum)) {
       reuse = new InstantiatedDocsEnum();

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java Thu Jan 20 19:52:03 2011
@@ -66,6 +66,7 @@ public class TestIndicesEquals extends L
     // create dir data
     IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig(
         TEST_VERSION_CURRENT, new MockAnalyzer()));
+    
     for (int i = 0; i < 20; i++) {
       Document document = new Document();
       assembleDocument(document, i);
@@ -395,6 +396,10 @@ public class TestIndicesEquals extends L
         }
 
         assertTrue(aprioriTermEnum.docFreq() == testTermEnum.docFreq());
+        final long totalTermFreq = aprioriTermEnum.totalTermFreq();
+        if (totalTermFreq != -1) {
+          assertEquals(totalTermFreq, testTermEnum.totalTermFreq());
+        }
 
         // compare termDocs seeking
 

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Thu Jan 20 19:52:03 2011
@@ -611,6 +611,8 @@ public class MemoryIndex implements Seri
     /** Term for this field's fieldName, lazily computed on demand */
     public transient Term template;
 
+    private final long sumTotalTermFreq;
+
     private static final long serialVersionUID = 2882195016849084649L;  
 
     public Info(HashMap<BytesRef,ArrayIntList> terms, int numTokens, int numOverlapTokens, float boost) {
@@ -618,6 +620,15 @@ public class MemoryIndex implements Seri
       this.numTokens = numTokens;
       this.numOverlapTokens = numOverlapTokens;
       this.boost = boost;
+      long sum = 0;
+      for(Map.Entry<BytesRef,ArrayIntList> ent : terms.entrySet()) {
+        sum += ent.getValue().size();
+      }
+      sumTotalTermFreq = sum;
+    }
+
+    public long getSumTotalTermFreq() {
+      return sumTotalTermFreq;
     }
     
     /**
@@ -827,6 +838,11 @@ public class MemoryIndex implements Seri
               public long getUniqueTermCount() {
                 return info.sortedTerms.length;
               }
+
+              @Override
+              public long getSumTotalTermFreq() {
+                return info.getSumTotalTermFreq();
+              }
             };
           }
         }
@@ -897,6 +913,11 @@ public class MemoryIndex implements Seri
       }
 
       @Override
+      public long totalTermFreq() {
+        return info.sortedTerms[termUpto].getValue().size();
+      }
+
+      @Override
       public DocsEnum docs(Bits skipDocs, DocsEnum reuse) {
         if (reuse == null || !(reuse instanceof MemoryDocsEnum)) {
           reuse = new MemoryDocsEnum();

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java Thu Jan 20 19:52:03 2011
@@ -189,6 +189,17 @@ public class HighFreqTerms {
       @Override
       protected void add(int base, IndexReader r) throws IOException {
         Bits skipDocs = r.getDeletedDocs();
+        if (skipDocs == null) {
+          // TODO: we could do this up front, during the scan
+          // (next()), instead of after-the-fact here w/ seek,
+          // if the codec supports it and there are no del
+          // docs...
+          final long totTF = r.totalTermFreq(field, termtext);
+          if (totTF != -1) {
+            totalTF[0] += totTF;
+            return;
+          }
+        }
         DocsEnum de = r.termDocsEnum(skipDocs, field, termtext);
         if (de != null) {
           while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/misc/TermStats.java Thu Jan 20 19:52:03 2011
@@ -41,4 +41,9 @@ public final class TermStats {
   String getTermText() {
     return termtext.utf8ToString();
   }
+
+  @Override
+  public String toString() {
+    return("TermStats: term=" + termtext.utf8ToString() + " docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq);
+  }
 }
\ No newline at end of file

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java Thu Jan 20 19:52:03 2011
@@ -17,15 +17,16 @@ package org.apache.lucene.misc;
  * limitations under the License.
  */
 
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.store.Directory;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
@@ -41,8 +42,10 @@ public class TestHighFreqTerms extends L
     writer = new IndexWriter(dir, newIndexWriterConfig(random,
        TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.WHITESPACE, false))
        .setMaxBufferedDocs(2));
+    writer.setInfoStream(VERBOSE ? System.out : null);
     indexDocs(writer);
     reader = IndexReader.open(dir, true);
+    _TestUtil.checkIndex(dir);
   }
   
   @AfterClass
@@ -75,8 +78,8 @@ public class TestHighFreqTerms extends L
     String field="FIELD_1";
     TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
     for (int i = 0; i < terms.length; i++) {
-      if (i >0){
-       assertTrue ("out of order " + terms[i-1].docFreq + "should be >= " + terms[i].docFreq,terms[i-1].docFreq >= terms[i].docFreq);
+      if (i > 0) {
+        assertTrue ("out of order " + terms[i-1].docFreq + "should be >= " + terms[i].docFreq,terms[i-1].docFreq >= terms[i].docFreq);
       }
     }    
   }
@@ -134,11 +137,12 @@ public class TestHighFreqTerms extends L
     TermStats[] terms = HighFreqTerms.getHighFreqTerms(reader, numTerms, field);
     TermStats[] termsWithTF = HighFreqTerms.sortByTotalTermFreq(reader, terms);
  
-  for (int i = 0; i < termsWithTF.length; i++) {
-    // check that they are sorted by descending termfreq order
-    if (i >0){
-      assertTrue ("out of order" +termsWithTF[i-1]+ " > " +termsWithTF[i],termsWithTF[i-1].totalTermFreq > termsWithTF[i].totalTermFreq);
-     }
+    for (int i = 0; i < termsWithTF.length; i++) {
+      // check that they are sorted by descending termfreq
+      // order
+      if (i > 0) {
+        assertTrue ("out of order" +termsWithTF[i-1]+ " > " +termsWithTF[i],termsWithTF[i-1].totalTermFreq >= termsWithTF[i].totalTermFreq);
+      }
     } 
   }
   

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/queries/src/java/org/apache/lucene/search/BoostingQuery.java Thu Jan 20 19:52:03 2011
@@ -21,10 +21,9 @@ import java.io.IOException;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.DefaultSimilarity;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Similarity;
+
 /**
  * The BoostingQuery class can be used to effectively demote results that match a given query. 
  * Unlike the "NOT" clause, this still selects documents that contain undesirable terms, 
@@ -56,10 +55,9 @@ public class BoostingQuery extends Query
     @Override
     public Query rewrite(IndexReader reader) throws IOException {
       BooleanQuery result = new BooleanQuery() {
-
         @Override
-        public Similarity getSimilarity(IndexSearcher searcher) {
-          return new DefaultSimilarity() {
+        public Weight createWeight(IndexSearcher searcher) throws IOException {
+          return new BooleanWeight(searcher, false) {
 
             @Override
             public float coord(int overlap, int max) {

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/queries/src/java/org/apache/lucene/search/FieldCacheRewriteMethod.java Thu Jan 20 19:52:03 2011
@@ -123,7 +123,11 @@ public final class FieldCacheRewriteMeth
         public TermsEnum iterator() throws IOException {
           return fcsi.getTermsEnum();
         }
-        
+
+        @Override
+        public long getSumTotalTermFreq() {
+          return -1;
+        }
       });
       
       assert termsEnum != null;

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java Thu Jan 20 19:52:03 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis;
 
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.document.NumericField; // for javadocs
@@ -95,22 +96,34 @@ public final class NumericTokenStream ex
   /** The lower precision tokens gets this token type assigned. */
   public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
   
-  /** <b>Expert:</b> Use this attribute to get the details of the currently generated token
+  /** <b>Expert:</b> Use this attribute to get the details of the currently generated token.
    * @lucene.experimental
    * @since 4.0
    */
   public interface NumericTermAttribute extends Attribute {
     /** Returns current shift value, undefined before first token */
     int getShift();
-    /** Returns {@link NumericTokenStream}'s raw value as {@code long} */
+    /** Returns current token's raw value as {@code long} with all {@link #getShift} applied, undefined before first token */
     long getRawValue();
     /** Returns value size in bits (32 for {@code float}, {@code int}; 64 for {@code double}, {@code long}) */
     int getValueSize();
+    
+    /** <em>Don't call this method!</em>
+      * @lucene.internal */
+    void init(long value, int valSize, int precisionStep, int shift);
+
+    /** <em>Don't call this method!</em>
+      * @lucene.internal */
+    void setShift(int shift);
+
+    /** <em>Don't call this method!</em>
+      * @lucene.internal */
+    int incShift();
   }
   
+  // just a wrapper to prevent adding CTA
   private static final class NumericAttributeFactory extends AttributeFactory {
     private final AttributeFactory delegate;
-    private NumericTokenStream ts = null;
 
     NumericAttributeFactory(AttributeFactory delegate) {
       this.delegate = delegate;
@@ -118,72 +131,79 @@ public final class NumericTokenStream ex
   
     @Override
     public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
-      if (attClass == NumericTermAttribute.class)
-        return new NumericTermAttributeImpl(ts);
       if (CharTermAttribute.class.isAssignableFrom(attClass))
         throw new IllegalArgumentException("NumericTokenStream does not support CharTermAttribute.");
       return delegate.createAttributeInstance(attClass);
     }
   }
 
-  private static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute {
-    private final NumericTokenStream ts;
+  /** Implementatation of {@link NumericTermAttribute}.
+   * @lucene.internal
+   * @since 4.0
+   */
+  public static final class NumericTermAttributeImpl extends AttributeImpl implements NumericTermAttribute,TermToBytesRefAttribute {
+    private long value = 0L;
+    private int valueSize = 0, shift = 0, precisionStep = 0;
     
-    public NumericTermAttributeImpl(NumericTokenStream ts) {
-      this.ts = ts;
-    }
-  
     public int toBytesRef(BytesRef bytes) {
       try {
-        assert ts.valSize == 64 || ts.valSize == 32;
-        return (ts.valSize == 64) ? 
-          NumericUtils.longToPrefixCoded(ts.value, ts.shift, bytes) :
-          NumericUtils.intToPrefixCoded((int) ts.value, ts.shift, bytes);
+        assert valueSize == 64 || valueSize == 32;
+        return (valueSize == 64) ? 
+          NumericUtils.longToPrefixCoded(value, shift, bytes) :
+          NumericUtils.intToPrefixCoded((int) value, shift, bytes);
       } catch (IllegalArgumentException iae) {
-        // return empty token before first
+        // return empty token before first or after last
         bytes.length = 0;
         return 0;
       }
     }
 
-    public int getShift() { return ts.shift; }
-    public long getRawValue() { return ts.value; }
-    public int getValueSize() { return ts.valSize; }
-
-    @Override
-    public void clear() {
-      // this attribute has no contents to clear
+    public int getShift() { return shift; }
+    public void setShift(int shift) { this.shift = shift; }
+    public int incShift() {
+      return (shift += precisionStep);
     }
 
-    @Override
-    public boolean equals(Object other) {
-      return other == this;
+    public long getRawValue() { return value  & ~((1L << shift) - 1L); }
+    public int getValueSize() { return valueSize; }
+
+    public void init(long value, int valueSize, int precisionStep, int shift) {
+      this.value = value;
+      this.valueSize = valueSize;
+      this.precisionStep = precisionStep;
+      this.shift = shift;
     }
 
     @Override
-    public int hashCode() {
-      return System.identityHashCode(this);
+    public void clear() {
+      // this attribute has no contents to clear!
+      // we keep it untouched as it's fully controlled by outer class.
     }
     
     @Override
-    public void copyTo(AttributeImpl target) {
-      // this attribute has no contents to copy
+    public void reflectWith(AttributeReflector reflector) {
+      final BytesRef bytes = new BytesRef();
+      toBytesRef(bytes);
+      reflector.reflect(TermToBytesRefAttribute.class, "bytes", bytes);
+      reflector.reflect(NumericTermAttribute.class, "shift", shift);
+      reflector.reflect(NumericTermAttribute.class, "rawValue", getRawValue());
+      reflector.reflect(NumericTermAttribute.class, "valueSize", valueSize);
     }
-    
+  
     @Override
-    public Object clone() {
-      // cannot throw CloneNotSupportedException (checked)
-      throw new UnsupportedOperationException();
+    public void copyTo(AttributeImpl target) {
+      final NumericTermAttribute a = (NumericTermAttribute) target;
+      a.init(value, valueSize, precisionStep, shift);
     }
   }
-
+  
   /**
    * Creates a token stream for numeric values using the default <code>precisionStep</code>
    * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized,
    * before using set a value using the various set<em>???</em>Value() methods.
    */
   public NumericTokenStream() {
-    this(NumericUtils.PRECISION_STEP_DEFAULT);
+    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, NumericUtils.PRECISION_STEP_DEFAULT);
   }
   
   /**
@@ -192,15 +212,7 @@ public final class NumericTokenStream ex
    * before using set a value using the various set<em>???</em>Value() methods.
    */
   public NumericTokenStream(final int precisionStep) {
-    super(new NumericAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY));
-    // we must do this after the super call :(
-    ((NumericAttributeFactory) getAttributeFactory()).ts = this;
-    addAttribute(NumericTermAttribute.class);
-
-    this.precisionStep = precisionStep;
-    if (precisionStep < 1)
-      throw new IllegalArgumentException("precisionStep must be >=1");
-    shift = -precisionStep;
+    this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, precisionStep);
   }
 
   /**
@@ -212,14 +224,10 @@ public final class NumericTokenStream ex
    */
   public NumericTokenStream(AttributeFactory factory, final int precisionStep) {
     super(new NumericAttributeFactory(factory));
-    // we must do this after the super call :(
-    ((NumericAttributeFactory) getAttributeFactory()).ts = this;
-    addAttribute(NumericTermAttribute.class);
-
-    this.precisionStep = precisionStep;
     if (precisionStep < 1)
       throw new IllegalArgumentException("precisionStep must be >=1");
-    shift = -precisionStep;
+    this.precisionStep = precisionStep;
+    numericAtt.setShift(-precisionStep);
   }
 
   /**
@@ -229,9 +237,7 @@ public final class NumericTokenStream ex
    * <code>new Field(name, new NumericTokenStream(precisionStep).setLongValue(value))</code>
    */
   public NumericTokenStream setLongValue(final long value) {
-    this.value = value;
-    valSize = 64;
-    shift = -precisionStep;
+    numericAtt.init(value, valSize = 64, precisionStep, -precisionStep);
     return this;
   }
   
@@ -242,9 +248,7 @@ public final class NumericTokenStream ex
    * <code>new Field(name, new NumericTokenStream(precisionStep).setIntValue(value))</code>
    */
   public NumericTokenStream setIntValue(final int value) {
-    this.value = value;
-    valSize = 32;
-    shift = -precisionStep;
+    numericAtt.init(value, valSize = 32, precisionStep, -precisionStep);
     return this;
   }
   
@@ -255,9 +259,7 @@ public final class NumericTokenStream ex
    * <code>new Field(name, new NumericTokenStream(precisionStep).setDoubleValue(value))</code>
    */
   public NumericTokenStream setDoubleValue(final double value) {
-    this.value = NumericUtils.doubleToSortableLong(value);
-    valSize = 64;
-    shift = -precisionStep;
+    numericAtt.init(NumericUtils.doubleToSortableLong(value), valSize = 64, precisionStep, -precisionStep);
     return this;
   }
   
@@ -268,9 +270,7 @@ public final class NumericTokenStream ex
    * <code>new Field(name, new NumericTokenStream(precisionStep).setFloatValue(value))</code>
    */
   public NumericTokenStream setFloatValue(final float value) {
-    this.value = NumericUtils.floatToSortableInt(value);
-    valSize = 32;
-    shift = -precisionStep;
+    numericAtt.init(NumericUtils.floatToSortableInt(value), valSize = 32, precisionStep, -precisionStep);
     return this;
   }
   
@@ -278,40 +278,28 @@ public final class NumericTokenStream ex
   public void reset() {
     if (valSize == 0)
       throw new IllegalStateException("call set???Value() before usage");
-    shift = -precisionStep;
+    numericAtt.setShift(-precisionStep);
   }
 
   @Override
   public boolean incrementToken() {
     if (valSize == 0)
       throw new IllegalStateException("call set???Value() before usage");
-    shift += precisionStep;
-    if (shift >= valSize) {
-      // reset so the attribute still works after exhausted stream
-      shift -= precisionStep;
-      return false;
-    }
-
+    
+    // this will only clear all other attributes in this TokenStream
     clearAttributes();
-    // the TermToBytesRefAttribute is directly accessing shift & value.
+
+    final int shift = numericAtt.incShift();
     typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
     posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0);
-    return true;
-  }
-  
-  @Override
-  public String toString() {
-    final StringBuilder sb = new StringBuilder("(numeric,valSize=").append(valSize);
-    sb.append(",precisionStep=").append(precisionStep).append(')');
-    return sb.toString();
+    return (shift < valSize);
   }
 
   // members
+  private final NumericTermAttribute numericAtt = addAttribute(NumericTermAttribute.class);
   private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
   private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
   
-  int shift, valSize = 0; // valSize==0 means not initialized
+  private int valSize = 0; // valSize==0 means not initialized
   private final int precisionStep;
-  
-  long value = 0L;
 }

Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/Token.java?rev=1061480&r1=1061479&r2=1061480&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/analysis/Token.java Thu Jan 20 19:52:03 2011
@@ -28,6 +28,7 @@ import org.apache.lucene.index.DocsAndPo
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
 
 /** 
   A Token is an occurrence of a term from the text of a field.  It consists of
@@ -588,6 +589,17 @@ public class Token extends CharTermAttri
     }
   }
 
+  @Override
+  public void reflectWith(AttributeReflector reflector) {
+    super.reflectWith(reflector);
+    reflector.reflect(OffsetAttribute.class, "startOffset", startOffset);
+    reflector.reflect(OffsetAttribute.class, "endOffset", endOffset);
+    reflector.reflect(PositionIncrementAttribute.class, "positionIncrement", positionIncrement);
+    reflector.reflect(PayloadAttribute.class, "payload", payload);
+    reflector.reflect(FlagsAttribute.class, "flags", flags);
+    reflector.reflect(TypeAttribute.class, "type", type);
+  }
+
   /** Convenience factory that returns <code>Token</code> as implementation for the basic
    * attributes and return the default impl (with &quot;Impl&quot; appended) for all other
    * attributes.