You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/01/24 13:43:36 UTC

svn commit: r1437982 [1/2] - in /lucene/dev/branches/lucene4547: ./ lucene/ lucene/analysis/ lucene/analysis/common/ lucene/core/ lucene/core/src/java/org/apache/lucene/codecs/compressing/ lucene/core/src/java/org/apache/lucene/search/ lucene/core/src/...

Author: rmuir
Date: Thu Jan 24 12:43:34 2013
New Revision: 1437982

URL: http://svn.apache.org/viewvc?rev=1437982&view=rev
Log:
Merged /lucene/dev/trunk:r1436566-1437977

Added:
    lucene/dev/branches/lucene4547/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
      - copied unchanged from r1437977, lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java
    lucene/dev/branches/lucene4547/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialArgsTest.java
      - copied unchanged from r1437977, lucene/dev/trunk/lucene/spatial/src/test/org/apache/lucene/spatial/SpatialArgsTest.java
Removed:
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/categorypolicy/
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/index/categorypolicy/
Modified:
    lucene/dev/branches/lucene4547/   (props changed)
    lucene/dev/branches/lucene4547/build.xml
    lucene/dev/branches/lucene4547/lucene/   (props changed)
    lucene/dev/branches/lucene4547/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene4547/lucene/analysis/   (props changed)
    lucene/dev/branches/lucene4547/lucene/analysis/common/   (props changed)
    lucene/dev/branches/lucene4547/lucene/analysis/common/build.xml
    lucene/dev/branches/lucene4547/lucene/common-build.xml   (contents, props changed)
    lucene/dev/branches/lucene4547/lucene/core/   (props changed)
    lucene/dev/branches/lucene4547/lucene/core/build.xml
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java
    lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java
    lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java
    lucene/dev/branches/lucene4547/lucene/facet/   (props changed)
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/SamplingWrapper.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResultNode.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java
    lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java
    lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java
    lucene/dev/branches/lucene4547/lucene/highlighter/   (props changed)
    lucene/dev/branches/lucene4547/lucene/memory/   (props changed)
    lucene/dev/branches/lucene4547/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/branches/lucene4547/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
    lucene/dev/branches/lucene4547/lucene/spatial/   (props changed)
    lucene/dev/branches/lucene4547/lucene/spatial/src/java/org/apache/lucene/spatial/query/SpatialArgs.java
    lucene/dev/branches/lucene4547/lucene/test-framework/   (props changed)
    lucene/dev/branches/lucene4547/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
    lucene/dev/branches/lucene4547/solr/   (props changed)
    lucene/dev/branches/lucene4547/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene4547/solr/core/   (props changed)
    lucene/dev/branches/lucene4547/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
    lucene/dev/branches/lucene4547/solr/core/src/test/org/apache/solr/TestGroupingSearch.java
    lucene/dev/branches/lucene4547/solr/webapp/   (props changed)
    lucene/dev/branches/lucene4547/solr/webapp/web/css/styles/analysis.css
    lucene/dev/branches/lucene4547/solr/webapp/web/css/styles/cloud.css
    lucene/dev/branches/lucene4547/solr/webapp/web/css/styles/schema-browser.css
    lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/analysis.js
    lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/plugins.js
    lucene/dev/branches/lucene4547/solr/webapp/web/js/scripts/schema-browser.js
    lucene/dev/branches/lucene4547/solr/webapp/web/tpl/analysis.html
    lucene/dev/branches/lucene4547/solr/webapp/web/tpl/cloud.html
    lucene/dev/branches/lucene4547/solr/webapp/web/tpl/schema-browser.html

Modified: lucene/dev/branches/lucene4547/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/build.xml?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/build.xml (original)
+++ lucene/dev/branches/lucene4547/build.xml Thu Jan 24 12:43:34 2013
@@ -287,6 +287,8 @@
    <makeurl file="${fakeRelease}" validate="false" property="fakeRelease.uri"/>
    <exec executable="${python32.exe}" failonerror="true">
      <arg value="-u"/>
+     <!-- Tell Python not to write any bytecode cache into the filesystem: -->
+     <arg value="-B"/>
      <arg file="dev-tools/scripts/smokeTestRelease.py"/>
      <arg value="${fakeRelease.uri}"/>
      <arg value="${fakeReleaseVersion}"/>

Modified: lucene/dev/branches/lucene4547/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/CHANGES.txt?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene4547/lucene/CHANGES.txt Thu Jan 24 12:43:34 2013
@@ -37,6 +37,11 @@ Changes in backwards compatibility polic
   FacetsCollector) and CountingFacetsCollector. FacetsCollector.create()
   returns the most optimized collector for the given parameters.
   (Shai Erera, Michael McCandless)
+  
+* LUCENE-4700: OrdinalPolicy is now per CategoryListParams, and is no longer
+  an interface, but rather an enum with values NO_PARENTS and ALL_PARENTS.
+  PathPolicy was removed, you should extend FacetFields and DrillDownStream
+  to control which categories are added as drill-down terms. (Shai Erera)
 
 Optimizations
 
@@ -65,6 +70,23 @@ New Features
   compresses term vectors into chunks of documents similarly to
   CompressingStoredFieldsFormat. (Adrien Grand)
 
+API Changes
+
+* LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera)
+  
+Bug Fixes
+
+* LUCENE-4705: Pass on FilterStrategy in FilteredQuery if the filtered query is 
+  rewritten. (Simon Willnauer)
+
+* LUCENE-4712: MemoryIndex#normValues() throws NPE if field doesn't exist. 
+  (Simon Willnauer, Ricky Pritchett)
+
+* LUCENE-4550: Shapes wider than 180 degrees would use too much accuracy for the
+  PrefixTree based SpatialStrategy. For a pathological case of nearly 360
+  degrees and barely any height, it would generate so many indexed terms
+  (> 500k) that it could even cause an OutOfMemoryError. Fixed. (David Smiley)
+
 ======================= Lucene 4.1.0 =======================
 
 Changes in backwards compatibility policy

Modified: lucene/dev/branches/lucene4547/lucene/analysis/common/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/analysis/common/build.xml?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/analysis/common/build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/analysis/common/build.xml Thu Jan 24 12:43:34 2013
@@ -59,6 +59,8 @@
     <exec dir="src/java/org/apache/lucene/analysis/charfilter"
           output="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex"
           executable="${python.exe}" failonerror="true" logerror="true">
+      <!-- Tell Python not to write any bytecode cache into the filesystem: -->
+      <arg value="-B"/>
       <arg value="htmlentity.py"/>
     </exec>
     <fixcrlf file="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex" encoding="UTF-8"/>

Modified: lucene/dev/branches/lucene4547/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/common-build.xml?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/common-build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/common-build.xml Thu Jan 24 12:43:34 2013
@@ -1962,6 +1962,8 @@ ${tests-output}/junit4-*.suites     - pe
        <attribute name="dir"/>
      <sequential>
        <exec dir="." executable="${python32.exe}" failonerror="true">
+	 <!-- Tell Python not to write any bytecode cache into the filesystem: -->
+	 <arg value="-B"/>
          <arg value="${dev-tools.dir}/scripts/checkJavadocLinks.py"/>
          <arg value="@{dir}"/>
        </exec>
@@ -1973,6 +1975,8 @@ ${tests-output}/junit4-*.suites     - pe
        <attribute name="level" default="class"/>
      <sequential>
        <exec dir="." executable="${python32.exe}" failonerror="true">
+	 <!-- Tell Python not to write any bytecode cache into the filesystem: -->
+	 <arg value="-B"/>
          <arg value="${dev-tools.dir}/scripts/checkJavaDocs.py"/>
          <arg value="@{dir}"/>
          <arg value="@{level}"/>

Modified: lucene/dev/branches/lucene4547/lucene/core/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/build.xml?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/build.xml (original)
+++ lucene/dev/branches/lucene4547/lucene/core/build.xml Thu Jan 24 12:43:34 2013
@@ -63,12 +63,16 @@
   	<sequential>
       <exec dir="src/java/org/apache/lucene/util/automaton"
             executable="${python.exe}" failonerror="true">
+	 <!-- Tell Python not to write any bytecode cache into the filesystem: -->
+	 <arg value="-B"/>
         <arg value="createLevAutomata.py"/>
         <arg value="@{n}"/>
         <arg value="True"/>
       </exec>
       <exec dir="src/java/org/apache/lucene/util/automaton"
             executable="${python.exe}" failonerror="true">
+	 <!-- Tell Python not to write any bytecode cache into the filesystem: -->
+	 <arg value="-B"/>
         <arg value="createLevAutomata.py"/>
         <arg value="@{n}"/>
         <arg value="False"/>
@@ -80,18 +84,26 @@
   <target name="createPackedIntSources">
     <exec dir="src/java/org/apache/lucene/util/packed"
           executable="${python.exe}" failonerror="true">
+      <!-- Tell Python not to write any bytecode cache into the filesystem: -->
+      <arg value="-B"/>
       <arg value="gen_BulkOperation.py"/>
     </exec>
     <exec dir="src/java/org/apache/lucene/util/packed"
           executable="${python.exe}" failonerror="true">
+      <!-- Tell Python not to write any bytecode cache into the filesystem: -->
+      <arg value="-B"/>
       <arg value="gen_Direct.py"/>
     </exec>
     <exec dir="src/java/org/apache/lucene/util/packed"
           executable="${python.exe}" failonerror="true">
+      <!-- Tell Python not to write any bytecode cache into the filesystem: -->
+      <arg value="-B"/>
       <arg value="gen_Packed64SingleBlock.py"/>
     </exec>
     <exec dir="src/java/org/apache/lucene/util/packed"
           executable="${python.exe}" failonerror="true">
+      <!-- Tell Python not to write any bytecode cache into the filesystem: -->
+      <arg value="-B"/>
       <arg value="gen_PackedThreeBlocks.py"/>
     </exec>
     <fixcrlf srcdir="src/java/org/apache/lucene/util/packed" includes="BulkOperation*.java,Direct*.java,Packed64SingleBlock.java,Packed*ThreeBlocks.py" encoding="UTF-8"/>

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java Thu Jan 24 12:43:34 2013
@@ -873,22 +873,7 @@ public final class CompressingTermVector
     }
 
     @Override
-    public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
-        throws IOException {
-      return docsAndPositions(liveDocs, reuse, flags);
-    }
-
-    @Override
-    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
-        DocsAndPositionsEnum reuse, int flags) throws IOException {
-      if ((flags & POSITIONS) == 0 && (flags & OFFSETS) == 0) {
-        return null;
-      }
-      return docsAndPositions(liveDocs, (DocsEnum) reuse, flags);
-    }
-
-    private DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
-        DocsEnum reuse, int ignoredFlags) throws IOException {
+    public final DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
       final TVDocsEnum docsEnum;
       if (reuse != null && reuse instanceof TVDocsEnum) {
         docsEnum = (TVDocsEnum) reuse;
@@ -900,6 +885,15 @@ public final class CompressingTermVector
       return docsEnum;
     }
 
+    @Override
+    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+      if (positions == null && startOffsets == null) {
+        return null;
+      }
+      // TODO: slightly sheisty
+      return (DocsAndPositionsEnum) docs(liveDocs, reuse, flags);
+    }
+
   }
 
   private static class TVDocsEnum extends DocsAndPositionsEnum {

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java Thu Jan 24 12:43:34 2013
@@ -344,7 +344,7 @@ public class FilteredQuery extends Query
     
     if (queryRewritten != query) {
       // rewrite to a new FilteredQuery wrapping the rewritten query
-      final Query rewritten = new FilteredQuery(queryRewritten, filter);
+      final Query rewritten = new FilteredQuery(queryRewritten, filter, strategy);
       rewritten.setBoost(this.getBoost());
       return rewritten;
     } else {
@@ -362,6 +362,11 @@ public class FilteredQuery extends Query
   public final Filter getFilter() {
     return filter;
   }
+  
+  /** Returns this FilteredQuery's {@link FilterStrategy} */
+  public FilterStrategy getFilterStrategy() {
+    return this.strategy;
+  }
 
   // inherit javadoc
   @Override

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java Thu Jan 24 12:43:34 2013
@@ -21,33 +21,56 @@ import java.io.IOException;
 
 /** Base implementation class for buffered {@link IndexOutput}. */
 public abstract class BufferedIndexOutput extends IndexOutput {
-  static final int BUFFER_SIZE = 16384;
+  /** The default buffer size in bytes ({@value #DEFAULT_BUFFER_SIZE}). */
+  public static final int DEFAULT_BUFFER_SIZE = 16384;
 
-  private final byte[] buffer = new byte[BUFFER_SIZE];
+  private final int bufferSize;
+  private final byte[] buffer;
   private long bufferStart = 0;           // position in file of buffer
   private int bufferPosition = 0;         // position in buffer
 
+  /**
+   * Creates a new {@link BufferedIndexOutput} with the default buffer size
+   * ({@value #DEFAULT_BUFFER_SIZE} bytes see {@link #DEFAULT_BUFFER_SIZE})
+   */
+  public BufferedIndexOutput() {
+    this(DEFAULT_BUFFER_SIZE);
+  }
+  
+  /**
+   * Creates a new {@link BufferedIndexOutput} with the given buffer size. 
+   * @param bufferSize the buffer size in bytes used to buffer writes internally.
+   * @throws IllegalArgumentException if the given buffer size is less or equal to <tt>0</tt>
+   */
+  public BufferedIndexOutput(int bufferSize) {
+    if (bufferSize <= 0) {
+      throw new IllegalArgumentException("bufferSize must be greater than 0 (got " + bufferSize + ")");
+    }
+    this.bufferSize = bufferSize;
+    buffer = new byte[bufferSize];
+  }
+
   @Override
   public void writeByte(byte b) throws IOException {
-    if (bufferPosition >= BUFFER_SIZE)
+    if (bufferPosition >= bufferSize)
       flush();
     buffer[bufferPosition++] = b;
   }
 
   @Override
   public void writeBytes(byte[] b, int offset, int length) throws IOException {
-    int bytesLeft = BUFFER_SIZE - bufferPosition;
+    int bytesLeft = bufferSize - bufferPosition;
     // is there enough space in the buffer?
     if (bytesLeft >= length) {
       // we add the data to the end of the buffer
       System.arraycopy(b, offset, buffer, bufferPosition, length);
       bufferPosition += length;
       // if the buffer is full, flush it
-      if (BUFFER_SIZE - bufferPosition == 0)
+      if (bufferSize - bufferPosition == 0)
         flush();
     } else {
       // is data larger then buffer?
-      if (length > BUFFER_SIZE) {
+      if (length > bufferSize) {
         // we flush the buffer
         if (bufferPosition > 0)
           flush();
@@ -64,10 +87,10 @@ public abstract class BufferedIndexOutpu
           pos += pieceLength;
           bufferPosition += pieceLength;
           // if the buffer is full, flush it
-          bytesLeft = BUFFER_SIZE - bufferPosition;
+          bytesLeft = bufferSize - bufferPosition;
           if (bytesLeft == 0) {
             flush();
-            bytesLeft = BUFFER_SIZE;
+            bytesLeft = bufferSize;
           }
         }
       }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java Thu Jan 24 12:43:34 2013
@@ -272,6 +272,7 @@ abstract class ByteBufferIndexInput exte
       // make local copy, then un-set early
       final ByteBuffer[] bufs = buffers;
       unsetBuffers();
+      clones.remove(this);
       
       if (isClone) return;
       

Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java Thu Jan 24 12:43:34 2013
@@ -289,6 +289,8 @@ public class TestTermVectorsReader exten
       String term = text.utf8ToString();
       //System.out.println("Term: " + term);
       assertEquals(testTerms[i], term);
+      assertNotNull(termsEnum.docs(null, null));
+      assertNull(termsEnum.docsAndPositions(null, null)); // no pos
     }
     reader.close();
   }

Modified: lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java (original)
+++ lucene/dev/branches/lucene4547/lucene/core/src/test/org/apache/lucene/search/TestFilteredQuery.java Thu Jan 24 12:43:34 2013
@@ -31,6 +31,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.FilteredQuery.FilterStrategy;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.DocIdBitSet;
@@ -341,11 +342,16 @@ public class TestFilteredQuery extends L
     }
   }
   
+  private FilterStrategy randomFilterStrategy() {
+    return randomFilterStrategy(random(), true);
+  }
+  
   private void assertRewrite(FilteredQuery fq, Class<? extends Query> clazz) throws Exception {
     // assign crazy boost to FQ
     final float boost = random().nextFloat() * 100.f;
     fq.setBoost(boost);
     
+    
     // assign crazy boost to inner
     final float innerBoost = random().nextFloat() * 100.f;
     fq.getQuery().setBoost(innerBoost);
@@ -356,6 +362,7 @@ public class TestFilteredQuery extends L
     if (rewritten instanceof FilteredQuery) {
       assertEquals(boost, rewritten.getBoost(), 1.E-5f);
       assertEquals(innerBoost, ((FilteredQuery) rewritten).getQuery().getBoost(), 1.E-5f);
+      assertEquals(fq.getFilterStrategy(), ((FilteredQuery) rewritten).getFilterStrategy());
     } else {
       assertEquals(boost * innerBoost, rewritten.getBoost(), 1.E-5f);
     }
@@ -366,8 +373,15 @@ public class TestFilteredQuery extends L
   }
 
   public void testRewrite() throws Exception {
-    assertRewrite(new FilteredQuery(new TermQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o"))), FilteredQuery.class);
-    assertRewrite(new FilteredQuery(new MatchAllDocsQuery(), new PrefixFilter(new Term("field", "o"))), ConstantScoreQuery.class);
+    assertRewrite(new FilteredQuery(new TermQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), FilteredQuery.class);
+    assertRewrite(new FilteredQuery(new PrefixQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), FilteredQuery.class);
+    assertRewrite(new FilteredQuery(new MatchAllDocsQuery(), new PrefixFilter(new Term("field", "o")), randomFilterStrategy()), ConstantScoreQuery.class);
+  }
+  
+  public void testGetFilterStrategy() {
+    FilterStrategy randomFilterStrategy = randomFilterStrategy();
+    FilteredQuery filteredQuery = new FilteredQuery(new TermQuery(new Term("field", "one")), new PrefixFilter(new Term("field", "o")), randomFilterStrategy);
+    assertSame(randomFilterStrategy, filteredQuery.getFilterStrategy());
   }
   
   private static FilteredQuery.FilterStrategy randomFilterStrategy(Random random, final boolean useRandomAccess) {

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java Thu Jan 24 12:43:34 2013
@@ -6,8 +6,8 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.Map.Entry;
 
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
 import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
 import org.apache.lucene.facet.index.params.FacetIndexingParams;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
@@ -120,7 +120,7 @@ public class CountingListBuilder impleme
   public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams, 
       TaxonomyWriter taxoWriter) {
     this.taxoWriter = taxoWriter;
-    this.ordinalPolicy = indexingParams.getOrdinalPolicy();
+    this.ordinalPolicy = categoryListParams.getOrdinalPolicy();
     if (indexingParams.getPartitionSize() == Integer.MAX_VALUE) {
       ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams);
     } else {
@@ -143,14 +143,14 @@ public class CountingListBuilder impleme
   public Map<String,BytesRef> build(IntsRef ordinals, Iterable<CategoryPath> categories) throws IOException {
     int upto = ordinals.length; // since we add ordinals to IntsRef, iterate upto original length
     
-    for (int i = 0; i < upto; i++) {
-      int ordinal = ordinals.ints[i];
-      int parent = taxoWriter.getParent(ordinal);
-      while (parent > 0) {
-        if (ordinalPolicy.shouldAdd(parent)) {
+    if (ordinalPolicy == OrdinalPolicy.ALL_PARENTS) { // add all parents too
+      for (int i = 0; i < upto; i++) {
+        int ordinal = ordinals.ints[i];
+        int parent = taxoWriter.getParent(ordinal);
+        while (parent > 0) {
           ordinals.ints[ordinals.length++] = parent;
+          parent = taxoWriter.getParent(parent);
         }
-        parent = taxoWriter.getParent(parent);
       }
     }
     return ordinalsEncoder.encode(ordinals);

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java Thu Jan 24 12:43:34 2013
@@ -5,7 +5,6 @@ import java.util.Iterator;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
 import org.apache.lucene.facet.index.params.FacetIndexingParams;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
 
@@ -36,7 +35,6 @@ public class DrillDownStream extends Tok
   private final FacetIndexingParams indexingParams;
   private final Iterator<CategoryPath> categories;
   private final CharTermAttribute termAttribute;
-  private final PathPolicy pathPolicy;
   
   private CategoryPath current;
   private boolean isParent;
@@ -45,7 +43,6 @@ public class DrillDownStream extends Tok
     termAttribute = addAttribute(CharTermAttribute.class);
     this.categories = categories.iterator();
     this.indexingParams = indexingParams;
-    this.pathPolicy = indexingParams.getPathPolicy();
   }
 
   protected void addAdditionalAttributes(CategoryPath category, boolean isParent) {
@@ -71,10 +68,7 @@ public class DrillDownStream extends Tok
     addAdditionalAttributes(current, isParent);
     
     // prepare current for next call by trimming the last component (parents)
-    do {
-      // skip all parent categories which are not accepted by PathPolicy
-      current = current.subpath(current.length - 1);
-    } while (!pathPolicy.shouldAdd(current) && current.length > 0);
+    current = current.subpath(current.length - 1);
     isParent = true;
     return true;
   }

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java Thu Jan 24 12:43:34 2013
@@ -36,9 +36,36 @@ import org.apache.lucene.util.encoding.U
  */
 public class CategoryListParams implements Serializable {
 
+  /** OrdinalPolicy defines which ordinals are encoded for every document. */
+  public static enum OrdinalPolicy {
+    /**
+     * Encodes only the ordinal of leaf nodes. That is, the category A/B/C will
+     * not encode the ordinals of A and A/B.
+     * 
+     * <p>
+     * <b>NOTE:</b> this {@link OrdinalPolicy} requires a special collector or
+     * accumulator, which will fix the parents' counts, unless you are not
+     * interested in the parents counts.
+     */
+    NO_PARENTS,
+    
+    /**
+     * Encodes the ordinals of all path components. That is, the category A/B/C
+     * will encode the ordinals of A and A/B as well. This is the default
+     * {@link OrdinalPolicy}.
+     */
+    ALL_PARENTS
+  }
+  
   /** The default field used to store the facets information. */
   public static final String DEFAULT_FIELD = "$facets";
 
+  /**
+   * The default {@link OrdinalPolicy} that's used when encoding a document's
+   * category ordinals.
+   */
+  public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_PARENTS;
+  
   public final String field;
 
   private final int hashCode;
@@ -92,6 +119,7 @@ public class CategoryListParams implemen
     if (this.hashCode != other.hashCode) {
       return false;
     }
+    
     // The above hashcodes might equal each other in the case of a collision,
     // so at this point only directly term equality testing will settle
     // the equality test.
@@ -110,4 +138,9 @@ public class CategoryListParams implemen
     return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder());
   }
   
+  /** Returns the {@link OrdinalPolicy} to use for this {@link CategoryListParams}. */
+  public OrdinalPolicy getOrdinalPolicy() {
+    return DEFAULT_ORDINAL_POLICY;
+  }
+  
 }
\ No newline at end of file

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java Thu Jan 24 12:43:34 2013
@@ -3,8 +3,7 @@ package org.apache.lucene.facet.index.pa
 import java.util.Collections;
 import java.util.List;
 
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
-import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
+import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
 import org.apache.lucene.facet.search.FacetArrays;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
 
@@ -47,9 +46,8 @@ public class FacetIndexingParams {
   protected static final CategoryListParams DEFAULT_CATEGORY_LIST_PARAMS = new CategoryListParams();
 
   /**
-   * A {@link FacetIndexingParams} which fixes {@link OrdinalPolicy} to
-   * {@link OrdinalPolicy#ALL_PARENTS}. This is a singleton equivalent to new
-   * {@link #FacetIndexingParams()}.
+   * A {@link FacetIndexingParams} which fixes a single
+   * {@link CategoryListParams} with {@link OrdinalPolicy#ALL_PARENTS}.
    */
   public static final FacetIndexingParams ALL_PARENTS = new FacetIndexingParams();
   
@@ -62,8 +60,6 @@ public class FacetIndexingParams {
    */
   public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749';
   
-  private final OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS;
-  private final PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
   private final int partitionSize = Integer.MAX_VALUE;
 
   protected final CategoryListParams clParams;
@@ -130,32 +126,12 @@ public class FacetIndexingParams {
     return Collections.singletonList(clParams);
   }
 
-  /**
-   * Returns the {@link OrdinalPolicy} that is used during indexing. By default
-   * returns {@link OrdinalPolicy#ALL_PARENTS} which means that the full
-   * hierarchy will be stored for every document.
-   */
-  public OrdinalPolicy getOrdinalPolicy() {
-    return ordinalPolicy;
-  }
-
-  /**
-   * Returns the {@link PathPolicy} that is used during indexing. By default
-   * returns {@link PathPolicy#ALL_CATEGORIES} which means that the full
-   * hierarchy is added as drill-down terms for every document.
-   */
-  public PathPolicy getPathPolicy() {
-    return pathPolicy;
-  }
-
   @Override
   public int hashCode() {
     final int prime = 31;
     int result = 1;
     result = prime * result + ((clParams == null) ? 0 : clParams.hashCode());
-    result = prime * result + ((ordinalPolicy == null) ? 0 : ordinalPolicy.hashCode());
     result = prime * result + partitionSize;
-    result = prime * result + ((pathPolicy == null) ? 0 : pathPolicy.hashCode());
     
     for (CategoryListParams clp : getAllCategoryListParams()) {
       result ^= clp.hashCode();
@@ -183,23 +159,9 @@ public class FacetIndexingParams {
     } else if (!clParams.equals(other.clParams)) {
       return false;
     }
-    if (ordinalPolicy == null) {
-      if (other.ordinalPolicy != null) {
-        return false;
-      }
-    } else if (!ordinalPolicy.equals(other.ordinalPolicy)) {
-      return false;
-    }
     if (partitionSize != other.partitionSize) {
       return false;
     }
-    if (pathPolicy == null) {
-      if (other.pathPolicy != null) {
-        return false;
-      }
-    } else if (!pathPolicy.equals(other.pathPolicy)) {
-      return false;
-    }
     
     Iterable<CategoryListParams> cLs = getAllCategoryListParams();
     Iterable<CategoryListParams> otherCLs = other.getAllCategoryListParams();

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java Thu Jan 24 12:43:34 2013
@@ -5,7 +5,6 @@ import java.util.List;
 
 import org.apache.lucene.facet.search.params.FacetSearchParams;
 import org.apache.lucene.facet.search.results.FacetResult;
-import org.apache.lucene.facet.search.results.FacetResultNode;
 import org.apache.lucene.facet.search.sampling.RandomSampler;
 import org.apache.lucene.facet.search.sampling.Sampler;
 import org.apache.lucene.facet.search.sampling.SamplingAccumulator;
@@ -37,8 +36,7 @@ import org.apache.lucene.index.IndexRead
  * <p>
  * Note: Sampling accumulation (Accumulation over a sampled-set of the results),
  * does not guarantee accurate values for
- * {@link FacetResult#getNumValidDescendants()} and
- * {@link FacetResultNode#residue}.
+ * {@link FacetResult#getNumValidDescendants()}.
  * 
  * @lucene.experimental
  */

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java Thu Jan 24 12:43:34 2013
@@ -3,12 +3,14 @@ package org.apache.lucene.facet.search;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map.Entry;
 
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
 import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
 import org.apache.lucene.facet.index.params.FacetIndexingParams;
 import org.apache.lucene.facet.search.params.CountFacetRequest;
 import org.apache.lucene.facet.search.params.FacetRequest.SortBy;
@@ -79,6 +81,7 @@ import org.apache.lucene.util.encoding.D
 public class CountingFacetsCollector extends FacetsCollector {
   
   private final FacetSearchParams fsp;
+  private final OrdinalPolicy ordinalPolicy;
   private final TaxonomyReader taxoReader;
   private final BytesRef buf = new BytesRef(32);
   private final FacetArrays facetArrays;
@@ -98,10 +101,12 @@ public class CountingFacetsCollector ext
     assert assertParams(fsp) == null : assertParams(fsp);
     
     this.fsp = fsp;
+    CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fsp.facetRequests.get(0).categoryPath);
+    this.ordinalPolicy = clp.getOrdinalPolicy();
+    this.facetsField = clp.field;
     this.taxoReader = taxoReader;
     this.facetArrays = facetArrays;
     this.counts = facetArrays.getIntArray();
-    this.facetsField = fsp.indexingParams.getCategoryListParams(null).field;
   }
   
   /**
@@ -129,14 +134,21 @@ public class CountingFacetsCollector ext
       }
     }
     
-    // verify that there's only one CategoryListParams
-    List<CategoryListParams> clps = fsp.indexingParams.getAllCategoryListParams();
-    if (clps.size() != 1) {
-      return "this Collector supports only one CategoryListParams";
+    // verify that there's only one CategoryListParams for all FacetRequests
+    CategoryListParams clp = null;
+    for (FacetRequest fr : fsp.facetRequests) {
+      CategoryListParams cpclp = fsp.indexingParams.getCategoryListParams(fr.categoryPath);
+      if (clp == null) {
+        clp = cpclp;
+      } else if (clp != cpclp) {
+        return "all FacetRequests must belong to the same CategoryListParams";
+      }
+    }
+    if (clp == null) {
+      return "at least one FacetRequest must be defined";
     }
     
     // verify DGapVInt decoder
-    CategoryListParams clp = clps.get(0);
     if (clp.createEncoder().createMatchingDecoder().getClass() != DGapVInt8IntDecoder.class) {
       return "this Collector supports only DGap + VInt encoding";
     }
@@ -222,7 +234,7 @@ public class CountingFacetsCollector ext
       
       ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays();
 
-      if (fsp.indexingParams.getOrdinalPolicy() == OrdinalPolicy.NO_PARENTS) {
+      if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
         // need to count parents
         countParents(arrays.parents());
       }
@@ -254,7 +266,17 @@ public class CountingFacetsCollector ext
             }
             child = siblings[child];
           }
-          root.residue = 0;
+          Collections.sort(nodes, new Comparator<FacetResultNode>() {
+            @Override
+            public int compare(FacetResultNode o1, FacetResultNode o2) {
+              int value = (int) (o2.value - o1.value);
+              if (value == 0) {
+                value = o2.ordinal - o1.ordinal;
+              }
+              return value;
+            }
+          });
+          
           root.subResults = nodes;
           res.add(new FacetResult(fr, root, nodes.size()));
           continue;
@@ -265,17 +287,13 @@ public class CountingFacetsCollector ext
         FacetResultNode top = pq.top();
         int child = children[rootOrd];
         int numResults = 0; // count the number of results
-        int residue = 0;
         while (child != TaxonomyReader.INVALID_ORDINAL) {
           int count = counts[child];
           if (count > top.value) {
-            residue += top.value;
             top.value = count;
             top.ordinal = child;
             top = pq.updateTop();
             ++numResults;
-          } else {
-            residue += count;
           }
           child = siblings[child];
         }
@@ -292,7 +310,6 @@ public class CountingFacetsCollector ext
           node.label = taxoReader.getPath(node.ordinal);
           subResults[i] = node;
         }
-        root.residue = residue;
         root.subResults = Arrays.asList(subResults);
         res.add(new FacetResult(fr, root, size));
       }

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/SamplingWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/SamplingWrapper.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/SamplingWrapper.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/SamplingWrapper.java Thu Jan 24 12:43:34 2013
@@ -6,7 +6,6 @@ import java.util.List;
 
 import org.apache.lucene.facet.search.params.FacetSearchParams;
 import org.apache.lucene.facet.search.results.FacetResult;
-import org.apache.lucene.facet.search.results.FacetResultNode;
 import org.apache.lucene.facet.search.sampling.Sampler;
 import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
 
@@ -32,8 +31,7 @@ import org.apache.lucene.facet.search.sa
  * <p>
  * Note: Sampling accumulation (Accumulation over a sampled-set of the results),
  * does not guarantee accurate values for
- * {@link FacetResult#getNumValidDescendants()} and
- * {@link FacetResultNode#residue}.
+ * {@link FacetResult#getNumValidDescendants()}.
  * 
  * @lucene.experimental
  */

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java Thu Jan 24 12:43:34 2013
@@ -3,8 +3,8 @@ package org.apache.lucene.facet.search;
 import java.io.IOException;
 import java.util.List;
 
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
 import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
 import org.apache.lucene.facet.search.params.FacetRequest;
 import org.apache.lucene.facet.search.params.FacetSearchParams;
 import org.apache.lucene.facet.search.results.FacetResult;

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java Thu Jan 24 12:43:34 2013
@@ -62,7 +62,6 @@ public class TopKFacetResultsHandler ext
         value = facetRequest.getValueOf(facetArrays, ordinal % partitionSize);
       }
       
-      // TODO (Facet): should initial value of "residue" depend on aggregator if not sum?
       FacetResultNode parentResultNode = new FacetResultNode(ordinal, value);
       
       Heap<FacetResultNode> heap = ResultSortUtils.createSuitableHeap(facetRequest);
@@ -97,11 +96,7 @@ public class TopKFacetResultsHandler ext
       }
       // bring sub results from heap of tmp res into result heap
       for (int i = tmpHeap.size(); i > 0; i--) {
-        
-        FacetResultNode a = heap.insertWithOverflow(tmpHeap.pop());
-        if (a != null) {
-          resNode.residue += a.residue;
-        }
+        heap.insertWithOverflow(tmpHeap.pop());
       }
     }
     
@@ -177,14 +172,9 @@ public class TopKFacetResultsHandler ext
             reusable.value = value;
             reusable.subResults.clear();
             reusable.label = null;
-            reusable.residue = 0;
           }
           ++childrenCounter;
           reusable = pq.insertWithOverflow(reusable);
-          if (reusable != null) {
-            // TODO (Facet): is other logic (not add) needed, per aggregator?
-            parentResultNode.residue += reusable.value;
-          }
         }
       }
       if (localDepth < depth) {

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java Thu Jan 24 12:43:34 2013
@@ -266,9 +266,7 @@ public class TopKInEachNodeHandler exten
           tosOrdinal = siblings[tosOrdinal];
         }
         // now it is inside. Run it and all its siblings inside the partition through a heap
-        // and in doing so, count them, find best K, and sum into residue
-        double residue = 0f;  // the sum of all the siblings from this partition that do not make 
-        // it to top K
+        // and in doing so, count them, find best K
         pq.clear();
 
         //reusables are consumed as from a stack. The stack starts full and returns full.
@@ -286,10 +284,6 @@ public class TopKInEachNodeHandler exten
             ac.value = value; 
             ac = pq.insertWithOverflow(ac);
             if (null != ac) {
-              residue += ac.value;
-              // TODO (Facet): could it be that we need to do something
-              // else, not add, depending on the aggregator?
-
               /* when a facet is excluded from top K, because already in this partition it has
                * K better siblings, it is only recursed for count only.
                */ 
@@ -320,7 +314,7 @@ public class TopKInEachNodeHandler exten
         // and add ords to sibling stack, and make a note in siblingExplored that these are to 
         // be visited now
         if (ords.length > 0) {
-          AACOsOfOnePartition.put(ordinalStack[localDepth-1], new AACO(ords,vals,residue));
+          AACOsOfOnePartition.put(ordinalStack[localDepth-1], new AACO(ords,vals));
           bestSignlingsStack[localDepth] = ords;
           siblingExplored[localDepth] = ords.length-1;
           ordinalStack[localDepth] = ords[ords.length-1];
@@ -449,8 +443,7 @@ public class TopKInEachNodeHandler exten
       IntToObjectMap<AACO> tmpToReturnMapToACCOs = tmpToReturn.mapToAACOs;
       IntToObjectMap<AACO> tfrMapToACCOs = tfr.mapToAACOs;
       IntIterator tfrIntIterator = tfrMapToACCOs.keyIterator();
-      //iterate over all ordinals in tfr that are maps to their children (and the residue over 
-      // non included chilren)
+      //iterate over all ordinals in tfr that are maps to their children
       while (tfrIntIterator.hasNext()) {
         int tfrkey = tfrIntIterator.next();
         AACO tmpToReturnAACO = null;
@@ -467,7 +460,6 @@ public class TopKInEachNodeHandler exten
           }
           int[] resOrds = new int [resLength];
           double[] resVals = new double [resLength];
-          double resResidue = tmpToReturnAACO.residue + tfrAACO.residue;
           int indexIntoTmpToReturn = 0;
           int indexIntoTFR = 0;
           ACComparator merger = getSuitableACComparator(); // by facet Request
@@ -504,15 +496,9 @@ public class TopKInEachNodeHandler exten
           // altogether yielding no more that best K kids for tfrkey, not to appear in the new shape of 
           // tmpToReturn
 
-          while (indexIntoTmpToReturn < tmpToReturnAACO.ordinals.length) {
-            resResidue += tmpToReturnAACO.values[indexIntoTmpToReturn++];
-          }
-          while (indexIntoTFR < tfrAACO.ordinals.length) {
-            resResidue += tfrAACO.values[indexIntoTFR++];
-          }
           //update the list of best kids of tfrkey as appear in tmpToReturn
-          tmpToReturnMapToACCOs.put(tfrkey, new AACO(resOrds, resVals, resResidue));
-        } // endof need to merge both AACO -- children and residue for same ordinal
+          tmpToReturnMapToACCOs.put(tfrkey, new AACO(resOrds, resVals));
+        } // endof need to merge both AACO -- children for same ordinal
 
       } // endof loop over all ordinals in tfr 
     } // endof loop over all temporary facet results to merge
@@ -682,19 +668,15 @@ public class TopKInEachNodeHandler exten
    * potential nodes of the {@link FacetResult} tree  
    * (i.e., the descendants of the root node, no deeper than the specified depth).
    * No more than K ( = {@link FacetRequest#getNumResults()}) 
-   * siblings are enumerated, and  
-   * <i>residue</i> holds the sum of values of the siblings rejected from the 
-   * enumerated top K.
+   * siblings are enumerated.
    * @lucene.internal
    */
   protected static final class AACO {
     int [] ordinals; // ordinals of the best K children, sorted from best to least
     double [] values; // the respective values for these children
-    double residue; // sum of values of all other children, that did not get into top K
-    AACO (int[] ords, double[] vals, double r) {
+    AACO (int[] ords, double[] vals) {
       this.ordinals = ords;
       this.values = vals;
-      this.residue = r;
     }
   }
 
@@ -787,7 +769,6 @@ public class TopKInEachNodeHandler exten
       list.add(generateNode(aaco.ordinals[i], aaco.values[i], mapToAACOs));
     }
     node.subResults = list;
-    node.residue = aaco.residue;
     return node;  
   }
 

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResultNode.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResultNode.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResultNode.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/results/FacetResultNode.java Thu Jan 24 12:43:34 2013
@@ -59,14 +59,6 @@ public class FacetResultNode {
   public double value;
 
   /**
-   * The total value of screened out sub results. If only part of the results
-   * were returned (usually because only the top-K categories are requested),
-   * then this provides information on "what else is there under this result 
-   * node".
-   */
-  public double residue;
-  
-  /**
    * The sub-results of this result. If {@link FacetRequest#getResultMode()} is
    * {@link ResultMode#PER_NODE_IN_TREE}, every sub result denotes an immediate
    * child of this node. Otherwise, it is a descendant of any level.
@@ -100,9 +92,6 @@ public class FacetResultNode {
       sb.append(label.toString());
     }
     sb.append(" (").append(Double.toString(value)).append(")");
-    if (residue > 0) {
-      sb.append(" (residue=").append(residue).append(")");
-    }
     for (FacetResultNode sub : subResults) {
       sb.append("\n").append(prefix).append(sub.toString(prefix + "  "));
     }

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java Thu Jan 24 12:43:34 2013
@@ -279,8 +279,13 @@ public class RepeatableSampler extends S
      * into a bounded PQ (retains only sampleSize highest weights).
      */
     ScoredDocIDsIterator it = collection.iterator();
+    MI mi = null;
     while (it.next()) {
-      pq.insertWithReuse((int)(it.getDocID() * PHI_32) & 0x7FFFFFFF);
+      if (mi == null) {
+        mi = new MI();
+      }
+      mi.value = (int) (it.getDocID() * PHI_32) & 0x7FFFFFFF;
+      mi = pq.insertWithOverflow(mi);
     }
     if (returnTimings) {
       times[1] = System.currentTimeMillis();
@@ -290,18 +295,26 @@ public class RepeatableSampler extends S
      */
     Object[] heap = pq.getHeap();
     for (int si = 0; si < sampleSize; si++) {
-      sample[si] = (int)(((IntPriorityQueue.MI)(heap[si+1])).value * PHI_32I) & 0x7FFFFFFF;
+      sample[si] = (int)(((MI) heap[si+1]).value * PHI_32I) & 0x7FFFFFFF;
     }
     if (returnTimings) {
       times[2] = System.currentTimeMillis();
     }
   }
+  
+  /**
+   * A mutable integer that lets queue objects be reused once they start overflowing.
+   */
+  private static class MI {
+    MI() { }
+    public int value;
+  }
 
   /**
    * A bounded priority queue for Integers, to retain a specified number of
    * the highest-weighted values for return as a random sample.
    */
-  private static class IntPriorityQueue extends PriorityQueue<Object> {
+  private static class IntPriorityQueue extends PriorityQueue<MI> {
 
     /**
      * Creates a bounded PQ of size <code>size</code>.
@@ -312,17 +325,6 @@ public class RepeatableSampler extends S
     }
 
     /**
-     * Inserts an integer with overflow and object reuse.
-     */
-    public void insertWithReuse(int intval) {
-      if (this.mi == null) {
-        this.mi = new MI();
-      }
-      this.mi.value = intval;
-      this.mi = (MI)this.insertWithOverflow(this.mi);
-    }
-
-    /**
      * Returns the underlying data structure for faster access. Extracting elements
      * one at a time would require N logN time, and since we want the elements sorted
      * in ascending order by value (not weight), the array is useful as-is.
@@ -338,23 +340,10 @@ public class RepeatableSampler extends S
      * @return True if <code>o1</code> weighs less than <code>o2</code>.
      */
     @Override
-    public boolean lessThan(Object o1, Object o2) {
-      return ((MI)o1).value < ((MI)o2).value;
-    }
-
-    /**
-     * A mutable integer that lets queue objects be reused once they start overflowing.
-     */
-    private static class MI {
-      MI() { }
-      public int value;
+    public boolean lessThan(MI o1, MI o2) {
+      return o1.value < o2.value;
     }
 
-    /**
-     * The mutable integer instance for reuse after first overflow.
-     */
-    private MI mi;
-
   }
 
   /**

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java Thu Jan 24 12:43:34 2013
@@ -39,8 +39,7 @@ import org.apache.lucene.index.IndexRead
  * <p>
  * Note: Sampling accumulation (Accumulation over a sampled-set of the results),
  * does not guarantee accurate values for
- * {@link FacetResult#getNumValidDescendants()} &
- * {@link FacetResultNode#residue}.
+ * {@link FacetResult#getNumValidDescendants()}.
  * 
  * @lucene.experimental
  */
@@ -187,17 +186,6 @@ public abstract class Sampler {
       trimmed.add(trimmedNode);
     }
     
-    /*
-     * If we are trimming, it means Sampling is in effect and the extra
-     * (over-sampled) results are being trimmed. Although the residue is not
-     * guaranteed to be accurate for Sampling, we try our best to fix it.
-     * The node's residue now will take under account the sub-nodes we're
-     * trimming.
-     */
-    for (int i = size; i < node.subResults.size(); i++) {
-      node.residue += node.subResults.get(i).value;
-    }
-    
     node.subResults = trimmed;
   }
 

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java Thu Jan 24 12:43:34 2013
@@ -12,7 +12,6 @@ import org.apache.lucene.facet.search.Sc
 import org.apache.lucene.facet.search.StandardFacetsAccumulator;
 import org.apache.lucene.facet.search.params.FacetSearchParams;
 import org.apache.lucene.facet.search.results.FacetResult;
-import org.apache.lucene.facet.search.results.FacetResultNode;
 import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.index.IndexReader;
@@ -48,8 +47,7 @@ import org.apache.lucene.index.IndexRead
  * <p>
  * Note: Sampling accumulation (Accumulation over a sampled-set of the results),
  * does not guarantee accurate values for
- * {@link FacetResult#getNumValidDescendants()} &
- * {@link FacetResultNode#residue}.
+ * {@link FacetResult#getNumValidDescendants()}.
  * 
  * @see Sampler
  * @lucene.experimental

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java Thu Jan 24 12:43:34 2013
@@ -1,5 +1,7 @@
 package org.apache.lucene.facet.taxonomy;
 
+import org.apache.lucene.util.Constants;
+
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -26,6 +28,10 @@ package org.apache.lucene.facet.taxonomy
  */
 public class CategoryPath implements Comparable<CategoryPath> {
 
+  // TODO: revisit when IBM releases Java 7 newer than SR3 (with a fix)
+  // to validate, run e.g. TestAssociationExample with -Dtests.iters=1000
+  private static final boolean IS_J9_JAVA7 = Constants.JRE_IS_MINIMUM_JAVA7 && Constants.JVM_VENDOR.contains("IBM");
+
   /** An empty {@link CategoryPath} */
   public static final CategoryPath EMPTY = new CategoryPath();
 
@@ -47,7 +53,7 @@ public class CategoryPath implements Com
   }
 
   // Used by subpath
-  private CategoryPath(CategoryPath copyFrom, int prefixLen) {
+  private CategoryPath(final CategoryPath copyFrom, final int prefixLen) {
     // while the code which calls this method is safe, at some point a test
     // tripped on AIOOBE in toString, but we failed to reproduce. adding the
     // assert as a safety check.
@@ -59,14 +65,23 @@ public class CategoryPath implements Com
   }
   
   /** Construct from the given path components. */
-  public CategoryPath(String... components) {
+  public CategoryPath(final String... components) {
     assert components.length > 0 : "use CategoryPath.EMPTY to create an empty path";
-    this.components = components;
+    if (IS_J9_JAVA7) {
+      // On IBM J9 Java 1.7.0, if we do 'this.components = components', then
+      // at some point its length becomes 0 ... quite unexpectedly. If JIT is
+      // disabled, it doesn't happen. This bypasses the bug by copying the 
+      // array (note, Arrays.copyOf did not help either!).
+      this.components = new String[components.length];
+      System.arraycopy(components, 0, this.components, 0, components.length);
+    } else {
+      this.components = components;
+    }
     length = components.length;
   }
 
   /** Construct from a given path, separating path components with {@code delimiter}. */
-  public CategoryPath(String pathString, char delimiter) {
+  public CategoryPath(final String pathString, final char delimiter) {
     String[] comps = pathString.split(Character.toString(delimiter));
     if (comps.length == 1 && comps[0].isEmpty()) {
       components = EMPTY.components;
@@ -186,7 +201,7 @@ public class CategoryPath implements Com
   }
 
   /** Returns a sub-path of this path up to {@code length} components. */
-  public CategoryPath subpath(int length) {
+  public CategoryPath subpath(final int length) {
     if (length >= this.length || length < 0) {
       return this;
     } else if (length == 0) {

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java Thu Jan 24 12:43:34 2013
@@ -1,10 +1,7 @@
 package org.apache.lucene.facet.index.params;
 
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
-import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
 import org.apache.lucene.facet.search.DrillDown;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.util.PartitionsUtils;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.LuceneTestCase;
@@ -66,35 +63,4 @@ public class FacetIndexingParamsTest ext
     assertEquals("Expected default category list field is " + clp.field, clp.field, dfip.getCategoryListParams(null).field);
   }
 
-  @Test
-  public void testCategoryPolicies() {
-    FacetIndexingParams dfip = FacetIndexingParams.ALL_PARENTS;
-    // check path policy
-    CategoryPath cp = CategoryPath.EMPTY;
-    PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
-    assertEquals("path policy does not match default for root", pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp));
-    for (int i = 0; i < 30; i++) {
-      int nComponents = random().nextInt(10) + 1;
-      String[] components = new String[nComponents];
-      for (int j = 0; j < components.length; j++) {
-        components[j] = (Integer.valueOf(random().nextInt(30))).toString();
-      }
-      cp = new CategoryPath(components);
-      assertEquals("path policy does not match default for " + cp.toString('/'), 
-          pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp));
-    }
-
-    // check ordinal policy
-    OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS;
-    assertEquals("ordinal policy does not match default for root", 
-        ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL), 
-        dfip.getOrdinalPolicy().shouldAdd(TaxonomyReader.ROOT_ORDINAL));
-    for (int i = 0; i < 30; i++) {
-      int ordinal = random().nextInt();
-      assertEquals("ordinal policy does not match default for " + ordinal, 
-          ordinalPolicy.shouldAdd(ordinal),
-          dfip.getOrdinalPolicy().shouldAdd(ordinal));
-    }
-  }
-
 }
\ No newline at end of file

Modified: lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java?rev=1437982&r1=1437981&r2=1437982&view=diff
==============================================================================
--- lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java (original)
+++ lucene/dev/branches/lucene4547/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java Thu Jan 24 12:43:34 2013
@@ -4,7 +4,9 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -12,9 +14,9 @@ import org.apache.lucene.document.Docume
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.facet.index.FacetFields;
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
 import org.apache.lucene.facet.index.params.CategoryListParams;
 import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.index.params.PerDimensionIndexingParams;
 import org.apache.lucene.facet.search.params.CountFacetRequest;
 import org.apache.lucene.facet.search.params.FacetRequest;
 import org.apache.lucene.facet.search.params.FacetRequest.SortBy;
@@ -67,8 +69,11 @@ public class CountingFacetsCollectorTest
   
   private static final Term A = new Term("f", "a");
   private static final CategoryPath CP_A = new CategoryPath("A"), CP_B = new CategoryPath("B");
+  private static final CategoryPath CP_C = new CategoryPath("C"), CP_D = new CategoryPath("D"); // indexed w/ NO_PARENTS
   private static final int NUM_CHILDREN_CP_A = 5, NUM_CHILDREN_CP_B = 3;
+  private static final int NUM_CHILDREN_CP_C = 5, NUM_CHILDREN_CP_D = 5;
   private static final CategoryPath[] CATEGORIES_A, CATEGORIES_B;
+  private static final CategoryPath[] CATEGORIES_C, CATEGORIES_D;
   static {
     CATEGORIES_A = new CategoryPath[NUM_CHILDREN_CP_A];
     for (int i = 0; i < NUM_CHILDREN_CP_A; i++) {
@@ -78,11 +83,24 @@ public class CountingFacetsCollectorTest
     for (int i = 0; i < NUM_CHILDREN_CP_B; i++) {
       CATEGORIES_B[i] = new CategoryPath(CP_B.components[0], Integer.toString(i));
     }
+    
+    // NO_PARENTS categories
+    CATEGORIES_C = new CategoryPath[NUM_CHILDREN_CP_C];
+    for (int i = 0; i < NUM_CHILDREN_CP_C; i++) {
+      CATEGORIES_C[i] = new CategoryPath(CP_C.components[0], Integer.toString(i));
+    }
+    
+    // Multi-level categories
+    CATEGORIES_D = new CategoryPath[NUM_CHILDREN_CP_D];
+    for (int i = 0; i < NUM_CHILDREN_CP_D; i++) {
+      String val = Integer.toString(i);
+      CATEGORIES_D[i] = new CategoryPath(CP_D.components[0], val, val + val); // e.g. D/1/11, D/2/22...
+    }
   }
   
-  protected static Directory indexDir, taxoDir;
-  protected static ObjectToIntMap<CategoryPath> allExpectedCounts, termExpectedCounts;
-  protected static int numChildrenIndexedA, numChildrenIndexedB;
+  private static Directory indexDir, taxoDir;
+  private static ObjectToIntMap<CategoryPath> allExpectedCounts, termExpectedCounts;
+  private static FacetIndexingParams fip;
 
   @AfterClass
   public static void afterClassCountingFacetsCollectorTest() throws Exception {
@@ -104,6 +122,11 @@ public class CountingFacetsCollectorTest
     ArrayList<CategoryPath> categories = new ArrayList<CategoryPath>();
     categories.addAll(categories_a.subList(0, numFacetsA));
     categories.addAll(categories_b.subList(0, numFacetsB));
+    
+    // add the NO_PARENT categories
+    categories.add(CATEGORIES_C[random().nextInt(NUM_CHILDREN_CP_C)]);
+    categories.add(CATEGORIES_D[random().nextInt(NUM_CHILDREN_CP_D)]);
+
     return categories;
   }
 
@@ -115,6 +138,9 @@ public class CountingFacetsCollectorTest
       throws IOException {
     List<CategoryPath> docCategories = randomCategories(random());
     for (CategoryPath cp : docCategories) {
+      if (cp.components[0].equals(CP_D.components[0])) {
+        cp = cp.subpath(2); // we'll get counts for the 2nd level only
+      }
       allExpectedCounts.put(cp, allExpectedCounts.get(cp) + 1);
       if (updateTermExpectedCounts) {
         termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1);
@@ -123,9 +149,13 @@ public class CountingFacetsCollectorTest
     // add 1 to each dimension
     allExpectedCounts.put(CP_A, allExpectedCounts.get(CP_A) + 1);
     allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1);
+    allExpectedCounts.put(CP_C, allExpectedCounts.get(CP_C) + 1);
+    allExpectedCounts.put(CP_D, allExpectedCounts.get(CP_D) + 1);
     if (updateTermExpectedCounts) {
       termExpectedCounts.put(CP_A, termExpectedCounts.get(CP_A) + 1);
       termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1);
+      termExpectedCounts.put(CP_C, termExpectedCounts.get(CP_C) + 1);
+      termExpectedCounts.put(CP_D, termExpectedCounts.get(CP_D) + 1);
     }
     
     facetFields.addFields(doc, docCategories);
@@ -145,7 +175,7 @@ public class CountingFacetsCollectorTest
       ObjectToIntMap<CategoryPath> expectedCounts) throws IOException {
     Random random = random();
     int numDocs = atLeast(random, 2);
-    FacetFields facetFields = new FacetFields(taxoWriter);
+    FacetFields facetFields = new FacetFields(taxoWriter, fip);
     for (int i = 0; i < numDocs; i++) {
       Document doc = new Document();
       addFacets(doc, facetFields, false);
@@ -158,7 +188,7 @@ public class CountingFacetsCollectorTest
       ObjectToIntMap<CategoryPath> expectedCounts) throws IOException {
     Random random = random();
     int numDocs = atLeast(random, 2);
-    FacetFields facetFields = new FacetFields(taxoWriter);
+    FacetFields facetFields = new FacetFields(taxoWriter, fip);
     for (int i = 0; i < numDocs; i++) {
       Document doc = new Document();
       addFacets(doc, facetFields, true);
@@ -172,7 +202,7 @@ public class CountingFacetsCollectorTest
       ObjectToIntMap<CategoryPath> expectedCounts) throws IOException {
     Random random = random();
     int numDocs = atLeast(random, 2);
-    FacetFields facetFields = new FacetFields(taxoWriter);
+    FacetFields facetFields = new FacetFields(taxoWriter, fip);
     for (int i = 0; i < numDocs; i++) {
       Document doc = new Document();
       boolean hasContent = random.nextBoolean();
@@ -190,12 +220,20 @@ public class CountingFacetsCollectorTest
     ObjectToIntMap<CategoryPath> counts = new ObjectToIntMap<CategoryPath>();
     counts.put(CP_A, 0);
     counts.put(CP_B, 0);
+    counts.put(CP_C, 0);
+    counts.put(CP_D, 0);
     for (CategoryPath cp : CATEGORIES_A) {
       counts.put(cp, 0);
     }
     for (CategoryPath cp : CATEGORIES_B) {
       counts.put(cp, 0);
     }
+    for (CategoryPath cp : CATEGORIES_C) {
+      counts.put(cp, 0);
+    }
+    for (CategoryPath cp : CATEGORIES_D) {
+      counts.put(cp.subpath(2), 0);
+    }
     return counts;
   }
   
@@ -214,6 +252,19 @@ public class CountingFacetsCollectorTest
     conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges, so we can control the index segments
     IndexWriter indexWriter = new IndexWriter(indexDir, conf);
     TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
+    CategoryListParams allParents = new CategoryListParams();
+    CategoryListParams noParents = new CategoryListParams("no_parents") {
+      @Override
+      public OrdinalPolicy getOrdinalPolicy() {
+        return OrdinalPolicy.NO_PARENTS;
+      }
+    };
+    Map<CategoryPath,CategoryListParams> params = new HashMap<CategoryPath,CategoryListParams>();
+    params.put(CP_A, allParents);
+    params.put(CP_B, allParents);
+    params.put(CP_C, noParents);
+    params.put(CP_D, noParents);
+    fip = new PerDimensionIndexingParams(params);
     
     allExpectedCounts = newCounts();
     termExpectedCounts = newCounts();
@@ -230,23 +281,11 @@ public class CountingFacetsCollectorTest
     // segment w/ categories and some content
     indexDocsWithFacetsAndSomeTerms(indexWriter, taxoWriter, allExpectedCounts);
     
-    // set num children indexed from each dimension
-    for (CategoryPath cp : CATEGORIES_A) {
-      if (termExpectedCounts.get(cp) > 0) {
-        ++numChildrenIndexedA;
-      }
-    }
-    for (CategoryPath cp : CATEGORIES_B) {
-      if (termExpectedCounts.get(cp) > 0) {
-        ++numChildrenIndexedB;
-      }
-    }
-    
     IOUtils.close(indexWriter, taxoWriter);
   }
   
   @Test
-  public void testInvalidValidParams() throws Exception {
+  public void testInvalidParams() throws Exception {
     final CategoryPath dummyCP = new CategoryPath("a");
     final FacetRequest dummyFR = new CountFacetRequest(dummyCP, 10);
 
@@ -274,13 +313,14 @@ public class CountingFacetsCollectorTest
     cfr.setNumLabel(2);
     assertNotNull("numToLabel should not be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(cfr)));
     
-    FacetIndexingParams fip = new FacetIndexingParams(new CategoryListParams("moo")) {
+    FacetIndexingParams fip = new FacetIndexingParams() {
       @Override
-      public List<CategoryListParams> getAllCategoryListParams() {
-        return Arrays.asList(new CategoryListParams[] { clParams, clParams });
+      public CategoryListParams getCategoryListParams(CategoryPath category) {
+        return new CategoryListParams();
       }
     };
-    assertNotNull("only one CLP should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(fip, dummyFR)));
+    assertNotNull("only one CLP should be allowed", CountingFacetsCollector.assertParams(new FacetSearchParams(fip, dummyFR, 
+        new CountFacetRequest(new CategoryPath("moo"), 10))));
     
     fip = new FacetIndexingParams(new CategoryListParams("moo")) {
       final CategoryListParams clp = new CategoryListParams() {
@@ -328,39 +368,6 @@ public class CountingFacetsCollectorTest
     for (FacetResult res : facetResults) {
       FacetResultNode root = res.getFacetResultNode();
       assertEquals("wrong count for " + root.label, termExpectedCounts.get(root.label), (int) root.value);
-      assertEquals("invalid residue", 0, (int) root.residue);
-      for (FacetResultNode child : root.subResults) {
-        assertEquals("wrong count for " + child.label, termExpectedCounts.get(child.label), (int) child.value);
-      }
-    }
-    
-    IOUtils.close(indexReader, taxoReader);
-  }
-  
-  @Test
-  public void testResidue() throws Exception {
-    // test the collector's handling of residue
-    DirectoryReader indexReader = DirectoryReader.open(indexDir);
-    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
-    IndexSearcher searcher = new IndexSearcher(indexReader);
-    
-    // asking for top 1 is the only way to guarantee there will be a residue
-    // provided that enough children were indexed (see below)
-    FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CP_A, 1), new CountFacetRequest(CP_B, 1));
-    FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
-    TermQuery q = new TermQuery(A);
-    searcher.search(q, fc);
-    
-    List<FacetResult> facetResults = fc.getFacetResults();
-    assertEquals("invalid number of facet results", 2, facetResults.size());
-    for (FacetResult res : facetResults) {
-      FacetResultNode root = res.getFacetResultNode();
-      assertEquals("wrong count for " + root.label, termExpectedCounts.get(root.label), (int) root.value);
-      // make sure randomness didn't pick only one child of root (otherwise there's no residue)
-      int numChildrenIndexed = res.getFacetRequest().categoryPath == CP_A ? numChildrenIndexedA : numChildrenIndexedB;
-      if (numChildrenIndexed > 1) {
-        assertTrue("expected residue", root.residue > 0);
-      }
       for (FacetResultNode child : root.subResults) {
         assertEquals("wrong count for " + child.label, termExpectedCounts.get(child.label), (int) child.value);
       }
@@ -385,9 +392,16 @@ public class CountingFacetsCollectorTest
     for (FacetResult res : facetResults) {
       FacetResultNode root = res.getFacetResultNode();
       assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
-      assertEquals("invalid residue", 0, (int) root.residue);
+      int prevValue = Integer.MAX_VALUE;
+      int prevOrdinal = Integer.MAX_VALUE;
       for (FacetResultNode child : root.subResults) {
         assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
+        assertTrue("wrong sort order of sub results: child.value=" + child.value + " prevValue=" + prevValue, child.value <= prevValue);
+        if (child.value == prevValue) {
+          assertTrue("wrong sort order of sub results", child.ordinal < prevOrdinal);
+        }
+        prevValue = (int) child.value;
+        prevOrdinal = child.ordinal;
       }
     }
     
@@ -410,7 +424,6 @@ public class CountingFacetsCollectorTest
     for (FacetResult res : facetResults) {
       FacetResultNode root = res.getFacetResultNode();
       assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
-      assertEquals("invalid residue", 0, (int) root.residue);
       for (FacetResultNode child : root.subResults) {
         assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
       }
@@ -435,7 +448,6 @@ public class CountingFacetsCollectorTest
     for (FacetResult res : facetResults) {
       FacetResultNode root = res.getFacetResultNode();
       assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
-      assertEquals("invalid residue", 0, (int) root.residue);
       for (FacetResultNode child : root.subResults) {
         assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
       }
@@ -446,70 +458,25 @@ public class CountingFacetsCollectorTest
 
   @Test
   public void testNoParents() throws Exception {
-    // TODO: when OrdinalPolicy is on CLP, index the NO_PARENTS categories into
-    // their own dimension, and avoid this index creation
-    Directory indexDir = newDirectory();
-    Directory taxoDir = newDirectory();
-    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
-    conf.setMaxBufferedDocs(2);
-    conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
-    IndexWriter indexWriter = new IndexWriter(indexDir, conf);
-    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
-    FacetIndexingParams fip = new FacetIndexingParams() {
-      @Override
-      public OrdinalPolicy getOrdinalPolicy() {
-        return OrdinalPolicy.NO_PARENTS;
-      }
-    };
-    FacetFields facetFields = new FacetFields(taxoWriter, fip);
-    ObjectToIntMap<CategoryPath> expCounts = newCounts();
-
-    // index few docs with categories, not sharing parents.
-    int numDocs = atLeast(10);
-    final CategoryPath cpc = new CategoryPath("L1", "L2", "L3");
-    for (int i = 0; i < numDocs; i++) {
-      Document doc = new Document();
-      ArrayList<CategoryPath> categories = new ArrayList<CategoryPath>();
-      CategoryPath cpa = CATEGORIES_A[random().nextInt(NUM_CHILDREN_CP_A)];
-      CategoryPath cpb = CATEGORIES_B[random().nextInt(NUM_CHILDREN_CP_B)];
-      categories.add(cpa);
-      categories.add(cpb);
-      categories.add(cpc);
-      expCounts.put(cpa, expCounts.get(cpa) + 1);
-      expCounts.put(cpb, expCounts.get(cpb) + 1);
-      facetFields.addFields(doc, categories);
-      indexWriter.addDocument(doc);
-    }
-    expCounts.put(CP_A, numDocs);
-    expCounts.put(CP_B, numDocs);
-    for (int i = 0; i < cpc.length; i++) {
-      expCounts.put(cpc.subpath(i+1), numDocs);
-    }
-    
-    IOUtils.close(indexWriter, taxoWriter);
-
     DirectoryReader indexReader = DirectoryReader.open(indexDir);
     TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
     IndexSearcher searcher = new IndexSearcher(indexReader);
-    FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(CP_A, NUM_CHILDREN_CP_A), 
-        new CountFacetRequest(CP_B, NUM_CHILDREN_CP_B), new CountFacetRequest(cpc.subpath(1), 10));
+    FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(CP_C, NUM_CHILDREN_CP_C), 
+        new CountFacetRequest(CP_D, NUM_CHILDREN_CP_D));
     FacetsCollector fc = new CountingFacetsCollector(fsp , taxoReader);
     searcher.search(new MatchAllDocsQuery(), fc);
     
     List<FacetResult> facetResults = fc.getFacetResults();
-    assertEquals("invalid number of facet results", 3, facetResults.size());
+    assertEquals("invalid number of facet results", fsp.facetRequests.size(), facetResults.size());
     for (FacetResult res : facetResults) {
       FacetResultNode root = res.getFacetResultNode();
-      assertEquals("wrong count for " + root.label, expCounts.get(root.label), (int) root.value);
-      assertEquals("invalid residue", 0, (int) root.residue);
+      assertEquals("wrong count for " + root.label, allExpectedCounts.get(root.label), (int) root.value);
       for (FacetResultNode child : root.subResults) {
-        assertEquals("wrong count for " + child.label, expCounts.get(child.label), (int) child.value);
+        assertEquals("wrong count for " + child.label, allExpectedCounts.get(child.label), (int) child.value);
       }
     }
     
     IOUtils.close(indexReader, taxoReader);
-    
-    IOUtils.close(indexDir, taxoDir);
   }
   
 }