You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/14 19:54:24 UTC

svn commit: r1433035 [1/4] - in /lucene/dev/branches/lucene4547: ./ dev-tools/ dev-tools/scripts/ lucene/ lucene/analysis/ lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ lucene/analysis/kuromoji/src/resources/org/apache/lucene/an...

Author: rmuir
Date: Mon Jan 14 18:54:22 2013
New Revision: 1433035

URL: http://svn.apache.org/viewvc?rev=1433035&view=rev
Log:
Merged /lucene/dev/trunk:r1432062-1433030

Added:
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java
      - copied unchanged from r1433030, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/ForwardBytesReader.java
      - copied unchanged from r1433030, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/ForwardBytesReader.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/ReverseBytesReader.java
      - copied unchanged from r1433030, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/fst/ReverseBytesReader.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/moreterms.40.zip
      - copied unchanged from r1433030, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/moreterms.40.zip
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java
      - copied unchanged from r1433030, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java
      - copied unchanged from r1433030, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java
      - copied unchanged from r1433030, lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/util/AssertingCategoryListIterator.java
      - copied unchanged from r1433030, lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/util/AssertingCategoryListIterator.java
    lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/update/processor/TestPartialUpdateDeduplication.java
      - copied unchanged from r1433030, lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/processor/TestPartialUpdateDeduplication.java
Modified:
    lucene/dev/branches/lucene4547/   (props changed)
    lucene/dev/branches/lucene4547/dev-tools/   (props changed)
    lucene/dev/branches/lucene4547/dev-tools/scripts/checkJavadocLinks.py
    lucene/dev/branches/lucene4547/dev-tools/scripts/smokeTestRelease.py
    lucene/dev/branches/lucene4547/lucene/   (props changed)
    lucene/dev/branches/lucene4547/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene4547/lucene/analysis/   (props changed)
    lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoFST.java
    lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat
    lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
    lucene/dev/branches/lucene4547/lucene/codecs/   (props changed)
    lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
    lucene/dev/branches/lucene4547/lucene/core/   (props changed)
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/analysis/package.html
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/Norm.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
    lucene/dev/branches/lucene4547/lucene/demo/   (props changed)
    lucene/dev/branches/lucene4547/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java
    lucene/dev/branches/lucene4547/lucene/facet/   (props changed)
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsPayloadIterator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsPayloadIterator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsPayloadIterator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/CategoryListIterator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadCategoryListIteraor.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/Aggregator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/ScoringAggregator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/ScoreFacetRequest.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/util/MultiCategoryListIterator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/TestCategoryListCache.java
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiCategoryListIteratorTest.java
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java
    lucene/dev/branches/lucene4547/lucene/suggest/   (props changed)
    lucene/dev/branches/lucene4547/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
    lucene/dev/branches/lucene4547/lucene/test-framework/   (props changed)
    lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
    lucene/dev/branches/lucene4547/solr/   (props changed)
    lucene/dev/branches/lucene4547/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene4547/solr/README.txt
    lucene/dev/branches/lucene4547/solr/common-build.xml   (contents, props changed)
    lucene/dev/branches/lucene4547/solr/contrib/   (props changed)
    lucene/dev/branches/lucene4547/solr/contrib/uima/README.txt
    lucene/dev/branches/lucene4547/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/solrconfig.xml
    lucene/dev/branches/lucene4547/solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml
    lucene/dev/branches/lucene4547/solr/contrib/velocity/src/test-files/velocity/solr/collection1/conf/solrconfig.xml
    lucene/dev/branches/lucene4547/solr/core/   (props changed)
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/cloud/Overseer.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/core/CoreContainer.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/core/MMapDirectoryFactory.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/core/NIOFSDirectoryFactory.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/core/NRTCachingDirectoryFactory.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/core/RAMDirectoryFactory.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/core/SimpleFSDirectoryFactory.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/core/SolrCore.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/handler/SnapShooter.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/update/processor/SignatureUpdateProcessorFactory.java
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/util/SimplePostTool.java
    lucene/dev/branches/lucene4547/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml
    lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/core/AlternateDirectoryTest.java
    lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/core/RAMDirectoryFactoryTest.java
    lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/update/processor/SignatureUpdateProcessorFactoryTest.java
    lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/util/SimplePostToolTest.java
    lucene/dev/branches/lucene4547/solr/example/   (props changed)
    lucene/dev/branches/lucene4547/solr/example/example-DIH/solr/db/conf/solrconfig.xml
    lucene/dev/branches/lucene4547/solr/example/example-DIH/solr/mail/conf/solrconfig.xml
    lucene/dev/branches/lucene4547/solr/example/example-DIH/solr/rss/conf/solrconfig.xml
    lucene/dev/branches/lucene4547/solr/example/example-DIH/solr/solr/conf/solrconfig.xml
    lucene/dev/branches/lucene4547/solr/example/example-DIH/solr/tika/conf/solrconfig.xml
    lucene/dev/branches/lucene4547/solr/example/solr/collection1/conf/solrconfig.xml
    lucene/dev/branches/lucene4547/solr/solrj/   (props changed)
    lucene/dev/branches/lucene4547/solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
    lucene/dev/branches/lucene4547/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java
    lucene/dev/branches/lucene4547/solr/test-framework/   (props changed)
    lucene/dev/branches/lucene4547/solr/test-framework/src/java/org/apache/solr/core/MockDirectoryFactory.java
    lucene/dev/branches/lucene4547/solr/test-framework/src/java/org/apache/solr/core/MockFSDirectoryFactory.java
    lucene/dev/branches/lucene4547/solr/webapp/   (props changed)
    lucene/dev/branches/lucene4547/solr/webapp/build.xml
    lucene/dev/branches/lucene4547/solr/webapp/web/css/styles/dataimport.css
    lucene/dev/branches/lucene4547/solr/webapp/web/css/styles/query.css
    lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/dataimport.js
    lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/plugins.js
    lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/query.js
    lucene/dev/branches/lucene4547/solr/webapp/web/tpl/dataimport.html
    lucene/dev/branches/lucene4547/solr/webapp/web/tpl/query.html

Modified: lucene/dev/branches/lucene4547/dev-tools/scripts/checkJavadocLinks.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/dev-tools/scripts/checkJavadocLinks.py?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/dev-tools/scripts/checkJavadocLinks.py (original)
+++ lucene/dev/branches/lucene4547/dev-tools/scripts/checkJavadocLinks.py Mon Jan 14 18:54:22 2013
@@ -197,6 +197,9 @@ def checkAll(dirName):
         elif link.find('lucene.apache.org/java/docs/discussion.html') != -1:
           # OK
           pass
+        elif link.find('lucene.apache.org/core/discussion.html') != -1:
+          # OK
+          pass
         elif link.find('lucene.apache.org/solr/mirrors-solr-latest-redir.html') != -1:
           # OK
           pass

Modified: lucene/dev/branches/lucene4547/dev-tools/scripts/smokeTestRelease.py
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/dev-tools/scripts/smokeTestRelease.py?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/dev-tools/scripts/smokeTestRelease.py (original)
+++ lucene/dev/branches/lucene4547/dev-tools/scripts/smokeTestRelease.py Mon Jan 14 18:54:22 2013
@@ -308,7 +308,7 @@ def checkSigs(project, urlString, versio
       artifact = text
       artifactURL = subURL
       if project == 'solr':
-        expected = 'apache-solr-%s' % version
+        expected = 'solr-%s' % version
       else:
         expected = 'lucene-%s' % version
       if not artifact.startswith(expected):
@@ -334,9 +334,9 @@ def checkSigs(project, urlString, versio
                 'lucene-%s.tgz' % version,
                 'lucene-%s.zip' % version]
   else:
-    expected = ['apache-solr-%s-src.tgz' % version,
-                'apache-solr-%s.tgz' % version,
-                'apache-solr-%s.zip' % version]
+    expected = ['solr-%s-src.tgz' % version,
+                'solr-%s.tgz' % version,
+                'solr-%s.zip' % version]
 
   actual = [x[0] for x in artifacts]
   if expected != actual:
@@ -556,10 +556,7 @@ def unpackAndVerify(project, tmpDir, art
 
   # make sure it unpacks to proper subdir
   l = os.listdir(destDir)
-  if project == 'solr':
-    expected = 'apache-%s-%s' % (project, version)
-  else:
-    expected = '%s-%s' % (project, version)
+  expected = '%s-%s' % (project, version)
   if l != [expected]:
     raise RuntimeError('unpack produced entries %s; expected only %s' % (l, expected))
 
@@ -956,7 +953,6 @@ def getDistributionsForMavenChecks(tmpDi
   distributionFiles = defaultdict()
   for project in ('lucene', 'solr'):
     distribution = '%s-%s.tgz' % (project, version)
-    if project == 'solr': distribution = 'apache-' + distribution
     if not os.path.exists('%s/%s' % (tmpDir, distribution)):
       distURL = '%s/%s/%s' % (baseURL, project, distribution)
       print('    download %s...' % distribution, end=' ')
@@ -1010,8 +1006,6 @@ def checkIdenticalMavenArtifacts(distrib
     distFilenames = dict()
     for file in distributionFiles[project]:
       baseName = os.path.basename(file)
-      if project == 'solr': # Remove 'apache-' prefix to allow comparison to Maven artifacts
-        baseName = baseName.replace('apache-', '')
       distFilenames[baseName] = file
     for artifact in artifacts[project]:
       if reJarWar.search(artifact):
@@ -1348,9 +1342,9 @@ def smokeTest(baseURL, version, tmpDir, 
   print()
   print('Test Solr...')
   checkSigs('solr', solrPath, version, tmpDir, isSigned)
-  for artifact in ('apache-solr-%s.tgz' % version, 'apache-solr-%s.zip' % version):
+  for artifact in ('solr-%s.tgz' % version, 'solr-%s.zip' % version):
     unpackAndVerify('solr', tmpDir, artifact, version)
-  unpackAndVerify('solr', tmpDir, 'apache-solr-%s-src.tgz' % version, version)
+  unpackAndVerify('solr', tmpDir, 'solr-%s-src.tgz' % version, version)
 
   print()
   print('Test Maven artifacts for Lucene and Solr...')

Modified: lucene/dev/branches/lucene4547/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/CHANGES.txt?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene4547/lucene/CHANGES.txt Mon Jan 14 18:54:22 2013
@@ -19,6 +19,16 @@ Changes in backwards compatibility polic
   (Nikola Tanković, Uwe Schindler, Chris Male, Mike McCandless,
   Robert Muir)
 
+* LUCENE-4677, LUCENE-4682: unpacked FSTs now use vInt to encode the node target,
+  to reduce their size (Mike McCandless)
+
+* LUCENE-4678: FST now uses a paged byte[] structure instead of a
+  single byte[] internally, to avoid large memory spikes during
+  building (James Dyer, Mike McCandless)
+
+* LUCENE-3298: FST can now be larger than 2.1 GB / 2.1 B nodes.
+  (James Dyer, Mike McCandless)
+
 ======================= Lucene 4.1.0 =======================
 
 Changes in backwards compatibility policy
@@ -45,7 +55,7 @@ Changes in backwards compatibility polic
     Instead of calling refresh(), you should write similar code to how you reopen
     a regular DirectoryReader.
   - TaxonomyReader.openIfChanged (previously refresh()) no longer throws
-    IncosistentTaxonomyException, and supports recreate. InconsistentTaxoEx
+    InconsistentTaxonomyException, and supports recreate. InconsistentTaxoEx
     was removed.
   - ChildrenArrays was pulled out of TaxonomyReader into a top-level class.
   - TaxonomyReader was made an abstract class (instead of an interface), with
@@ -94,7 +104,7 @@ Changes in backwards compatibility polic
   Also, the entire IndexingParams chain is now immutable. If you need to override
   a setting, you should extend the relevant class.
   Additionally, FacetSearchParams is now immutable, and requires all FacetRequests
-  to speified at initialization time. (Shai Erera)
+  to specified at initialization time. (Shai Erera)
 
 * LUCENE-4647: CategoryDocumentBuilder and EnhancementsDocumentBuilder are replaced
   by FacetFields and AssociationsFacetFields respectively. CategoryEnhancement and
@@ -115,6 +125,10 @@ Changes in backwards compatibility polic
   result, few other classes such as Aggregator and CategoryListIterator were
   changed to handle bulk category ordinals. (Shai Erera)
 
+* LUCENE-4683: CategoryListIterator and Aggregator are now per-segment. As such
+  their implementations no longer take a top-level IndexReader in the constructor
+  but rather implement a setNextReader. (Shai Erera)
+  
 New Features
 
 * LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
@@ -152,11 +166,6 @@ New Features
 * LUCENE-4515: MemoryIndex now supports adding the same field multiple
   times. (Simon Willnauer)
 
-* LUCENE-4540: Added an experimental Norm.setPackedLong, which allows
-  the use of VAR_INTS-encoded norms. This can be useful for cases where
-  you only need a few bits per-document, or where you might want exact
-  document length, and so on.  (Robert Muir)
-
 * LUCENE-4489: Added consumeAllTokens option to LimitTokenCountFilter
   (hossman, Robert Muir)
 
@@ -267,7 +276,7 @@ Bug Fixes
   allow 1+maxMergeCount merges threads to be created, instead of just
   maxMergeCount (Radim Kolar, Mike McCandless)
 
-* LUCENE-4567: Fixed NullPointerException in analzying, fuzzy, and
+* LUCENE-4567: Fixed NullPointerException in analyzing, fuzzy, and
   WFST suggesters when no suggestions were added (selckin via Mike
   McCandless)
 
@@ -527,7 +536,7 @@ API Changes
   StoredFieldVisitor API.  (Mike McCandless)
 
 * LUCENE-4343: Made Tokenizer.setReader final. This is a setter that should
-  not be overriden by subclasses: per-stream initialization should happen
+  not be overridden by subclasses: per-stream initialization should happen
   in reset().  (Robert Muir)
 
 * LUCENE-4377: Remove IndexInput.copyBytes(IndexOutput, long). 
@@ -753,7 +762,7 @@ API Changes
 
 * LUCENE-4273: When pulling a DocsEnum, you can pass an int flags
   instead of the previous boolean needsFlags; consistent with the changes
-  for DocsAndPositionsEnum in LUCENE-4230. Currently othe only flag
+  for DocsAndPositionsEnum in LUCENE-4230. Currently the only flag
   is DocsEnum.FLAG_FREQS. (Robert Muir, Mike McCandless)
   
 * LUCENE-3616: TextField(String, Reader, Store) was reduced to TextField(String, Reader),
@@ -825,7 +834,7 @@ Bug Fixes
   instance are already checked out and queued up but not yet flushed. 
   (Simon Willnauer)
 
-* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
+* LUCENE-4282: Automaton FuzzyQuery didn't always deliver all results.
   (Johannes Christen, Uwe Schindler, Robert Muir)
 
 * LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
@@ -1055,7 +1064,7 @@ Changes in backwards compatibility polic
   Query/Weight/Scorer. If you extended Similarity directly before, you should 
   extend TFIDFSimilarity instead.  Similarity is now a lower-level API to 
   implement other scoring algorithms.  See MIGRATE.txt for more details.
-  (David Nemeskey, Simon Willnauer, Mike Mccandless, Robert Muir)
+  (David Nemeskey, Simon Willnauer, Mike McCandless, Robert Muir)
 
 * LUCENE-3330: The expert visitor API in Scorer has been simplified and
   extended to support arbitrary relationships. To navigate to a scorer's 
@@ -1163,12 +1172,12 @@ Changes in Runtime Behavior
   omitNorms(true) for field "a" for 1000 documents, but then add a document with
   omitNorms(false) for field "a", all documents for field "a" will have no 
   norms.  Previously, Lucene would fill the first 1000 documents with 
-  "fake norms" from Similarity.getDefault(). (Robert Muir, Mike Mccandless)
+  "fake norms" from Similarity.getDefault(). (Robert Muir, Mike McCandless)
 
 * LUCENE-2846: When some documents contain field "a", and others do not, the
   documents that don't have the field get a norm byte value of 0. Previously, 
   Lucene would populate "fake norms" with Similarity.getDefault() for these 
-  documents.  (Robert Muir, Mike Mccandless)
+  documents.  (Robert Muir, Mike McCandless)
   
 * LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather 
   than later when e.g. a merge starts. 
@@ -1201,13 +1210,13 @@ Changes in Runtime Behavior
     update or delete on IndexWriter. By default DWPTs are flushed either on
     maxBufferedDocs per DWPT or the global active used memory. Once the active
     memory exceeds ramBufferSizeMB only the largest DWPT is selected for
-    flushing and the memory used by this DWPT is substracted from the active
+    flushing and the memory used by this DWPT is subtracted from the active
     memory and added to a flushing memory pool, which can lead to temporarily
     higher memory usage due to ongoing indexing.
     
   - IndexWriter now can utilize ramBufferSize > 2048 MB. Each DWPT can address
     up to 2048 MB memory such that the ramBufferSize is now bounded by the max
-    number of DWPT avaliable in the used DocumentsWriterPerThreadPool.
+    number of DWPT available in the used DocumentsWriterPerThreadPool.
     IndexWriters net memory consumption can grow far beyond the 2048 MB limit if
     the application can use all available DWPTs. To prevent a DWPT from
     exhausting its address space IndexWriter will forcefully flush a DWPT if its
@@ -1215,7 +1224,7 @@ Changes in Runtime Behavior
     via IndexWriterConfig and defaults to 1945 MB. 
     Since IndexWriter flushes DWPT concurrently not all memory is released
     immediately. Applications should still use a ramBufferSize significantly
-    lower than the JVMs avaliable heap memory since under high load multiple
+    lower than the JVMs available heap memory since under high load multiple
     flushing DWPT can consume substantial transient memory when IO performance
     is slow relative to indexing rate.
     
@@ -1223,7 +1232,7 @@ Changes in Runtime Behavior
     'currently' RAM resident documents to disk. Yet, flushes that occur while a
     a full flush is running are queued and will happen after all DWPT involved
     in the full flush are done flushing. Applications using multiple threads
-    during indexing and trigger a full flush (eg call commmit() or open a new
+    during indexing and trigger a full flush (eg call commit() or open a new
     NRT reader) can use significantly more transient memory.
     
   - IndexWriter#addDocument and IndexWriter.updateDocument can block indexing
@@ -1266,7 +1275,7 @@ Changes in Runtime Behavior
 
 * LUCENE-3455: QueryParserBase.newFieldQuery() will throw a ParseException if
   any of the calls to the Analyzer throw an IOException.  QueryParseBase.analyzeRangePart()
-  will throw a RuntimException if an IOException is thrown by the Analyzer.
+  will throw a RuntimeException if an IOException is thrown by the Analyzer.
 
 * LUCENE-4127: IndexWriter will now throw IllegalArgumentException if
   the first token of an indexed field has 0 positionIncrement
@@ -1356,7 +1365,7 @@ API Changes
   customized on a per-field basis.  (Robert Muir)
 
 * LUCENE-3308: DuplicateFilter keepMode and processingMode have been converted to
-  enums DuplicateFilter.KeepMode and DuplicateFilter.ProcessingMode repsectively.
+  enums DuplicateFilter.KeepMode and DuplicateFilter.ProcessingMode respectively.
 
 * LUCENE-3483: Move Function grouping collectors from Solr to grouping module.
   (Martijn van Groningen)
@@ -1514,7 +1523,7 @@ New features
 
 * LUCENE-2742: Add native per-field postings format support. Codec lets you now
   register a postings format for each field and which is in turn recorded 
-  into the index. Postings formtas are maintained on a per-segment basis and be
+  into the index. Postings formats are maintained on a per-segment basis and be
   resolved without knowing the actual postings format used for writing the segment.
   (Simon Willnauer)
 
@@ -1722,7 +1731,7 @@ New features
    - o.a.l.analysis.miscellaneous.CapitalizationFilter: A TokenFilter that applies
      capitalization rules to tokens.
    - o.a.l.analysis.pattern: Package for pattern-based analysis, containing a 
-     CharFilter, Tokenizer, and Tokenfilter for transforming text with regexes.
+     CharFilter, Tokenizer, and TokenFilter for transforming text with regexes.
    - o.a.l.analysis.synonym.SynonymFilter: A synonym filter that supports multi-word
      synonyms.
    - o.a.l.analysis.phonetic: Package for phonetic search, containing various
@@ -1894,7 +1903,7 @@ Bug fixes
   DocsAndPositionsEnum while merging (Marc Sturlese, Erick Erickson,
   Robert Muir, Simon Willnauer, Mike McCandless)
 
-* LUCENE-3589: BytesRef copy(short) didnt set length.
+* LUCENE-3589: BytesRef copy(short) didn't set length.
   (Peter Chang via Robert Muir)
 
 * LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
@@ -1997,6 +2006,51 @@ Build
   XSL.  (Greg Bowyer, Uwe Schindler)
 
 
+======================= Lucene 3.6.2 =======================
+
+Bug Fixes
+
+* LUCENE-4234: Exception when FacetsCollector is used with ScoreFacetRequest,
+  and the number of matching documents is too large. (Gilad Barkai via Shai Erera)
+
+* LUCENE-2686, LUCENE-3505, LUCENE-4401: Fix BooleanQuery scorers to
+  return correct freq().
+  (Koji Sekiguchi, Mike McCandless, Liu Chao, Robert Muir)
+
+* LUCENE-2501: Fixed rare thread-safety issue that could cause
+  ArrayIndexOutOfBoundsException inside ByteBlockPool (Robert Muir,
+  Mike McCandless)
+
+* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
+  twice for conjunctions: for most users this is no problem, but
+  if you had a customized Similarity that returned something other
+  than 1 when overlap == maxOverlap (always the case for conjunctions),
+  then the score would be incorrect.  (Pascal Chollet, Robert Muir)
+
+* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
+  had a custom Similarity where coord(1,1) != 1F, then the rewritten
+  query would be scored differently.  (Robert Muir)
+
+* LUCENE-4398: If you index many different field names in your
+  documents then due to a bug in how it measures its RAM
+  usage, IndexWriter would flush each segment too early eventually
+  reaching the point where it flushes after every doc.  (Tim Smith via
+  Mike McCandless)
+
+* LUCENE-4411: when sampling is enabled for a FacetRequest, its depth
+  parameter is reset to the default (1), even if set otherwise.
+  (Gilad Barkai via Shai Erera)
+
+* LUCENE-4635: Fixed ArrayIndexOutOfBoundsException when in-memory
+  terms index requires more than 2.1 GB RAM (indices with billions of
+  terms).  (Tom Burton-West via Mike McCandless)
+
+Documentation
+
+* LUCENE-4302: Fix facet userguide to have HTML loose doctype like
+  all other javadocs.  (Karl Nicholas via Uwe Schindler)
+
+
 ======================= Lucene 3.6.1 =======================
 More information about this release, including any errata related to the 
 release notes, upgrade instructions, or other changes may be found online at:
@@ -2043,7 +2097,7 @@ Tests
   random graph tokens.  (Mike McCandless)
 
 * LUCENE-3968: factor out LookaheadTokenFilter from 
-  MockGraphTokenFilter (Mike Mccandless)
+  MockGraphTokenFilter (Mike McCandless)
 
 
 ======================= Lucene 3.6.0 =======================
@@ -2323,7 +2377,7 @@ Bug fixes
 
 * LUCENE-3876: Fix bug where positions for a document exceeding
   Integer.MAX_VALUE/2 would produce a corrupt index.  
-  (Simon Willnauer, Mike Mccandless, Robert Muir)
+  (Simon Willnauer, Mike McCandless, Robert Muir)
 
 * LUCENE-3880: UAX29URLEmailTokenizer now recognizes emails when the mailto:
   scheme is prepended. (Kai Gülzau, Steve Rowe)

Modified: lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoFST.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoFST.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoFST.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoFST.java Mon Jan 14 18:54:22 2013
@@ -19,8 +19,8 @@ package org.apache.lucene.analysis.ja.di
 
 import java.io.IOException;
 
-import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FST.Arc;
+import org.apache.lucene.util.fst.FST;
 
 /**
  * Thin wrapper around an FST with root-arc caching for Japanese.
@@ -48,7 +48,7 @@ public final class TokenInfoFST {
     rootCache = cacheRootArcs();
   }
   
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings({"rawtypes","unchecked"})
   private FST.Arc<Long>[] cacheRootArcs() throws IOException {
     FST.Arc<Long> rootCache[] = new FST.Arc[1+(cacheCeiling-0x3040)];
     FST.Arc<Long> firstArc = new FST.Arc<Long>();

Modified: lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary%24fst.dat?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java Mon Jan 14 18:54:22 2013
@@ -132,7 +132,7 @@ public class TokenInfoDictionaryBuilder 
     System.out.println("  encode...");
 
     PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(true);
-    Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, true);
+    Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, PackedInts.DEFAULT, true, 15);
     IntsRef scratch = new IntsRef();
     long ord = -1; // first ord will be 0
     String lastValue = null;

Modified: lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (original)
+++ lucene/dev/branches/lucene4547/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java Mon Jan 14 18:54:22 2013
@@ -113,7 +113,7 @@ public final class MemoryPostingsFormat 
       this.field = field;
       this.doPackFST = doPackFST;
       this.acceptableOverheadRatio = acceptableOverheadRatio;
-      builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio, true);
+      builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio, true, 15);
     }
 
     private class PostingsWriter extends PostingsConsumer {

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/analysis/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/analysis/package.html?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/analysis/package.html (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/analysis/package.html Mon Jan 14 18:54:22 2013
@@ -230,7 +230,7 @@ and proximity searches (though sentence 
   create, or a combination of existing and newly created components.  Before
   pursuing this approach, you may find it worthwhile to explore the
   <a href="{@docRoot}/../analyzers-common/overview-summary.html">analyzers-common</a> library and/or ask on the 
-  <a href="http://lucene.apache.org/java/docs/mailinglists.html"
+  <a href="http://lucene.apache.org/core/discussion.html"
       >java-user@lucene.apache.org mailing list</a> first to see if what you
   need already exists. If you are still committed to creating your own
   Analyzer, have a look at the source code of any one of the many samples

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Mon Jan 14 18:54:22 2013
@@ -276,13 +276,13 @@ public class BlockTreeTermsReader extend
    */
   public static class Stats {
     /** How many nodes in the index FST. */
-    public int indexNodeCount;
+    public long indexNodeCount;
 
     /** How many arcs in the index FST. */
-    public int indexArcCount;
+    public long indexArcCount;
 
     /** Byte size of the index. */
-    public int indexNumBytes;
+    public long indexNumBytes;
 
     /** Total number of terms in the field. */
     public long totalTermCount;

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java Mon Jan 14 18:54:22 2013
@@ -23,7 +23,6 @@ import java.util.Comparator;
 import java.util.List;
 
 import org.apache.lucene.index.FieldInfo.IndexOptions;
-import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexFileNames;
@@ -41,6 +40,7 @@ import org.apache.lucene.util.fst.BytesR
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.NoOutputs;
 import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.packed.PackedInts;
 
 /*
   TODO:
@@ -187,7 +187,7 @@ public class BlockTreeTermsWriter extend
   public final static int DEFAULT_MAX_BLOCK_SIZE = 48;
 
   //public final static boolean DEBUG = false;
-  private final static boolean SAVE_DOT_FILES = false;
+  //private final static boolean SAVE_DOT_FILES = false;
 
   static final int OUTPUT_FLAGS_NUM_BITS = 2;
   static final int OUTPUT_FLAGS_MASK = 0x3;
@@ -419,7 +419,8 @@ public class BlockTreeTermsWriter extend
       final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
       final Builder<BytesRef> indexBuilder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1,
                                                                    0, 0, true, false, Integer.MAX_VALUE,
-                                                                   outputs, null, false, true);
+                                                                   outputs, null, false,
+                                                                   PackedInts.COMPACT, true, 15);
       //if (DEBUG) {
       //  System.out.println("  compile index for prefix=" + prefix);
       //}
@@ -962,7 +963,9 @@ public class BlockTreeTermsWriter extend
                                          0, 0, true,
                                          true, Integer.MAX_VALUE,
                                          noOutputs,
-                                         new FindBlocks(), false, true);
+                                         new FindBlocks(), false,
+                                         PackedInts.COMPACT,
+                                         true, 15);
 
       postingsWriter.setField(fieldInfo);
     }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Mon Jan 14 18:54:22 2013
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -3475,6 +3476,7 @@ public class IndexWriter implements Clos
     diagnostics.put("os.version", Constants.OS_VERSION);
     diagnostics.put("java.version", Constants.JAVA_VERSION);
     diagnostics.put("java.vendor", Constants.JAVA_VENDOR);
+    diagnostics.put("timestamp", Long.toString(new Date().getTime()));
     if (details != null) {
       diagnostics.putAll(details);
     }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/Norm.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/Norm.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/Norm.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/index/Norm.java Mon Jan 14 18:54:22 2013
@@ -115,15 +115,6 @@ public final class Norm  {
     setType(Type.FIXED_INTS_64);
     this.field.setLongValue(norm);
   }
-  
-  /**
-   * Sets a packed long norm value.
-   * @lucene.experimental
-   */
-  public void setPackedLong(long norm) {
-    setType(Type.VAR_INTS);
-    this.field.setLongValue(norm);
-  }
 
   /**
    * Sets a byte norm value

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java Mon Jan 14 18:54:22 2013
@@ -38,7 +38,7 @@ import org.apache.lucene.search.DocIdSet
 
 public final class FixedBitSet extends DocIdSet implements Bits {
   private final long[] bits;
-  private int numBits;
+  private final int numBits;
 
   /** returns the number of 64 bit words it would take to hold numBits */
   public static int bits2words(int numBits) {

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java?rev=1433035&r1=1433034&r2=1433035&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java Mon Jan 14 18:54:22 2013
@@ -36,9 +36,13 @@ import org.apache.lucene.util.packed.Pac
  * <p>NOTE: The algorithm is described at
  * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.3698</p>
  *
- * The parameterized type T is the output type.  See the
+ * <p>The parameterized type T is the output type.  See the
  * subclasses of {@link Outputs}.
  *
+ * <p>FSTs larger than 2.1GB are now possible (as of Lucene
+ * 4.2).  FSTs containing more than 2.1B nodes are also now
+ * possible, however they cannot be packed.
+ *
  * @lucene.experimental
  */
 
@@ -84,22 +88,11 @@ public class Builder<T> {
   /**
    * Instantiates an FST/FSA builder without any pruning. A shortcut
    * to {@link #Builder(FST.INPUT_TYPE, int, int, boolean,
-   * boolean, int, Outputs, FreezeTail, boolean, boolean)} with
-   * pruning options turned off.
+   * boolean, int, Outputs, FreezeTail, boolean, float,
+   * boolean, int)} with pruning options turned off.
    */
   public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
-    this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT, true);
-  }
-
-  /**
-   * Instantiates an FST/FSA builder with {@link PackedInts#DEFAULT}
-   * <code>acceptableOverheadRatio</code>.
-   */
-  public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
-      boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
-      FreezeTail<T> freezeTail, boolean willPackFST, boolean allowArrayArcs) {
-    this(inputType, minSuffixCount1, minSuffixCount2, doShareSuffix, doShareNonSingletonNodes,
-         shareMaxTailLength, outputs, freezeTail, willPackFST, PackedInts.DEFAULT, allowArrayArcs);
+    this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT, true, 15);
   }
 
   /**
@@ -147,10 +140,16 @@ public class Builder<T> {
    * @param allowArrayArcs Pass false to disable the array arc optimization
    *    while building the FST; this will make the resulting
    *    FST smaller but slower to traverse.
+   *
+   * @param bytesPageBits How many bits wide to make each
+   *    byte[] block in the BytesStore; if you know the FST
+   *    will be large then make this larger.  For example 15
+   *    bits = 32768 byte pages.
    */
   public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
                  boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
-                 FreezeTail<T> freezeTail, boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs) {
+                 FreezeTail<T> freezeTail, boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs,
+                 int bytesPageBits) {
     this.minSuffixCount1 = minSuffixCount1;
     this.minSuffixCount2 = minSuffixCount2;
     this.freezeTail = freezeTail;
@@ -158,9 +157,9 @@ public class Builder<T> {
     this.shareMaxTailLength = shareMaxTailLength;
     this.doPackFST = doPackFST;
     this.acceptableOverheadRatio = acceptableOverheadRatio;
-    fst = new FST<T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs);
+    fst = new FST<T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs, bytesPageBits);
     if (doShareSuffix) {
-      dedupHash = new NodeHash<T>(fst);
+      dedupHash = new NodeHash<T>(fst, fst.bytes.getReverseReader(false));
     } else {
       dedupHash = null;
     }
@@ -174,7 +173,7 @@ public class Builder<T> {
     }
   }
 
-  public int getTotStateCount() {
+  public long getTotStateCount() {
     return fst.nodeCount;
   }
 
@@ -182,12 +181,12 @@ public class Builder<T> {
     return frontier[0].inputCount;
   }
 
-  public int getMappedStateCount() {
+  public long getMappedStateCount() {
     return dedupHash == null ? 0 : fst.nodeCount;
   }
 
   private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throws IOException {
-    final int node;
+    final long node;
     if (dedupHash != null && (doShareNonSingletonNodes || nodeIn.numArcs <= 1) && tailLength <= shareMaxTailLength) {
       if (nodeIn.numArcs == 0) {
         node = fst.addNode(nodeIn);
@@ -475,7 +474,7 @@ public class Builder<T> {
     fst.finish(compileNode(root, lastInput.length).node);
 
     if (doPackFST) {
-      return fst.pack(3, Math.max(10, fst.getNodeCount()/4), acceptableOverheadRatio);
+      return fst.pack(3, Math.max(10, (int) (fst.getNodeCount()/4)), acceptableOverheadRatio);
     } else {
       return fst;
     }
@@ -513,8 +512,12 @@ public class Builder<T> {
     boolean isCompiled();
   }
 
+  public long fstSizeInBytes() {
+    return fst.sizeInBytes();
+  }
+
   static final class CompiledNode implements Node {
-    int node;
+    long node;
     @Override
     public boolean isCompiled() {
       return true;