You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/01/23 21:01:07 UTC
svn commit: r1437678 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/facet/ lucene/facet/src/java/org/apache/lucene/facet/index/
lucene/facet/src/java/org/apache/lucene/facet/index/categorypolicy/
lucene/facet/src/java/org/apache/lucene/facet/in...
Author: shaie
Date: Wed Jan 23 20:01:06 2013
New Revision: 1437678
URL: http://svn.apache.org/viewvc?rev=1437678&view=rev
Log:
LUCENE-4700: move OrdinalPolicy to CategoryListParams
Removed:
lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/categorypolicy/
lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/index/categorypolicy/
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/facet/ (props changed)
lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java
lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java
lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java
lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java
lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java
lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java
lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java
lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1437678&r1=1437677&r2=1437678&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Wed Jan 23 20:01:06 2013
@@ -22,6 +22,11 @@ Changes in backwards compatibility polic
FacetsCollector) and CountingFacetsCollector. FacetsCollector.create()
returns the most optimized collector for the given parameters.
(Shai Erera, Michael McCandless)
+
+* LUCENE-4700: OrdinalPolicy is now per CategoryListParams, and is no longer
+ an interface, but rather an enum with values NO_PARENTS and ALL_PARENTS.
+ PathPolicy was removed, you should extend FacetFields and DrillDownStream
+ to control which categories are added as drill-down terms. (Shai Erera)
Optimizations
Modified: lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java?rev=1437678&r1=1437677&r2=1437678&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java Wed Jan 23 20:01:06 2013
@@ -6,8 +6,8 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
@@ -120,7 +120,7 @@ public class CountingListBuilder impleme
public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams,
TaxonomyWriter taxoWriter) {
this.taxoWriter = taxoWriter;
- this.ordinalPolicy = indexingParams.getOrdinalPolicy();
+ this.ordinalPolicy = categoryListParams.getOrdinalPolicy();
if (indexingParams.getPartitionSize() == Integer.MAX_VALUE) {
ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams);
} else {
@@ -143,14 +143,14 @@ public class CountingListBuilder impleme
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<CategoryPath> categories) throws IOException {
int upto = ordinals.length; // since we add ordinals to IntsRef, iterate upto original length
- for (int i = 0; i < upto; i++) {
- int ordinal = ordinals.ints[i];
- int parent = taxoWriter.getParent(ordinal);
- while (parent > 0) {
- if (ordinalPolicy.shouldAdd(parent)) {
+ if (ordinalPolicy == OrdinalPolicy.ALL_PARENTS) { // add all parents too
+ for (int i = 0; i < upto; i++) {
+ int ordinal = ordinals.ints[i];
+ int parent = taxoWriter.getParent(ordinal);
+ while (parent > 0) {
ordinals.ints[ordinals.length++] = parent;
+ parent = taxoWriter.getParent(parent);
}
- parent = taxoWriter.getParent(parent);
}
}
return ordinalsEncoder.encode(ordinals);
Modified: lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java?rev=1437678&r1=1437677&r2=1437678&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/DrillDownStream.java Wed Jan 23 20:01:06 2013
@@ -5,7 +5,6 @@ import java.util.Iterator;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.CategoryPath;
@@ -36,7 +35,6 @@ public class DrillDownStream extends Tok
private final FacetIndexingParams indexingParams;
private final Iterator<CategoryPath> categories;
private final CharTermAttribute termAttribute;
- private final PathPolicy pathPolicy;
private CategoryPath current;
private boolean isParent;
@@ -45,7 +43,6 @@ public class DrillDownStream extends Tok
termAttribute = addAttribute(CharTermAttribute.class);
this.categories = categories.iterator();
this.indexingParams = indexingParams;
- this.pathPolicy = indexingParams.getPathPolicy();
}
protected void addAdditionalAttributes(CategoryPath category, boolean isParent) {
@@ -71,10 +68,7 @@ public class DrillDownStream extends Tok
addAdditionalAttributes(current, isParent);
// prepare current for next call by trimming the last component (parents)
- do {
- // skip all parent categories which are not accepted by PathPolicy
- current = current.subpath(current.length - 1);
- } while (!pathPolicy.shouldAdd(current) && current.length > 0);
+ current = current.subpath(current.length - 1);
isParent = true;
return true;
}
Modified: lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java?rev=1437678&r1=1437677&r2=1437678&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java Wed Jan 23 20:01:06 2013
@@ -36,9 +36,36 @@ import org.apache.lucene.util.encoding.U
*/
public class CategoryListParams implements Serializable {
+ /** OrdinalPolicy defines which ordinals are encoded for every document. */
+ public static enum OrdinalPolicy {
+ /**
+ * Encodes only the ordinal of leaf nodes. That is, the category A/B/C will
+ * not encode the ordinals of A and A/B.
+ *
+ * <p>
+ * <b>NOTE:</b> this {@link OrdinalPolicy} requires a special collector or
+ * accumulator, which will fix the parents' counts, unless you are not
+ * interested in the parents counts.
+ */
+ NO_PARENTS,
+
+ /**
+ * Encodes the ordinals of all path components. That is, the category A/B/C
+ * will encode the ordinals of A and A/B as well. This is the default
+ * {@link OrdinalPolicy}.
+ */
+ ALL_PARENTS
+ }
+
/** The default field used to store the facets information. */
public static final String DEFAULT_FIELD = "$facets";
+ /**
+ * The default {@link OrdinalPolicy} that's used when encoding a document's
+ * category ordinals.
+ */
+ public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_PARENTS;
+
public final String field;
private final int hashCode;
@@ -92,6 +119,7 @@ public class CategoryListParams implemen
if (this.hashCode != other.hashCode) {
return false;
}
+
// The above hashcodes might equal each other in the case of a collision,
// so at this point only directly term equality testing will settle
// the equality test.
@@ -110,4 +138,9 @@ public class CategoryListParams implemen
return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder());
}
+ /** Returns the {@link OrdinalPolicy} to use for this {@link CategoryListParams}. */
+ public OrdinalPolicy getOrdinalPolicy() {
+ return DEFAULT_ORDINAL_POLICY;
+ }
+
}
\ No newline at end of file
Modified: lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java?rev=1437678&r1=1437677&r2=1437678&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/index/params/FacetIndexingParams.java Wed Jan 23 20:01:06 2013
@@ -3,8 +3,7 @@ package org.apache.lucene.facet.index.pa
import java.util.Collections;
import java.util.List;
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
-import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
+import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.taxonomy.CategoryPath;
@@ -47,9 +46,8 @@ public class FacetIndexingParams {
protected static final CategoryListParams DEFAULT_CATEGORY_LIST_PARAMS = new CategoryListParams();
/**
- * A {@link FacetIndexingParams} which fixes {@link OrdinalPolicy} to
- * {@link OrdinalPolicy#ALL_PARENTS}. This is a singleton equivalent to new
- * {@link #FacetIndexingParams()}.
+ * A {@link FacetIndexingParams} which fixes a single
+ * {@link CategoryListParams} with {@link OrdinalPolicy#ALL_PARENTS}.
*/
public static final FacetIndexingParams ALL_PARENTS = new FacetIndexingParams();
@@ -62,8 +60,6 @@ public class FacetIndexingParams {
*/
public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749';
- private final OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS;
- private final PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
private final int partitionSize = Integer.MAX_VALUE;
protected final CategoryListParams clParams;
@@ -130,32 +126,12 @@ public class FacetIndexingParams {
return Collections.singletonList(clParams);
}
- /**
- * Returns the {@link OrdinalPolicy} that is used during indexing. By default
- * returns {@link OrdinalPolicy#ALL_PARENTS} which means that the full
- * hierarchy will be stored for every document.
- */
- public OrdinalPolicy getOrdinalPolicy() {
- return ordinalPolicy;
- }
-
- /**
- * Returns the {@link PathPolicy} that is used during indexing. By default
- * returns {@link PathPolicy#ALL_CATEGORIES} which means that the full
- * hierarchy is added as drill-down terms for every document.
- */
- public PathPolicy getPathPolicy() {
- return pathPolicy;
- }
-
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((clParams == null) ? 0 : clParams.hashCode());
- result = prime * result + ((ordinalPolicy == null) ? 0 : ordinalPolicy.hashCode());
result = prime * result + partitionSize;
- result = prime * result + ((pathPolicy == null) ? 0 : pathPolicy.hashCode());
for (CategoryListParams clp : getAllCategoryListParams()) {
result ^= clp.hashCode();
@@ -183,23 +159,9 @@ public class FacetIndexingParams {
} else if (!clParams.equals(other.clParams)) {
return false;
}
- if (ordinalPolicy == null) {
- if (other.ordinalPolicy != null) {
- return false;
- }
- } else if (!ordinalPolicy.equals(other.ordinalPolicy)) {
- return false;
- }
if (partitionSize != other.partitionSize) {
return false;
}
- if (pathPolicy == null) {
- if (other.pathPolicy != null) {
- return false;
- }
- } else if (!pathPolicy.equals(other.pathPolicy)) {
- return false;
- }
Iterable<CategoryListParams> cLs = getAllCategoryListParams();
Iterable<CategoryListParams> otherCLs = other.getAllCategoryListParams();
Modified: lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java?rev=1437678&r1=1437677&r2=1437678&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java Wed Jan 23 20:01:06 2013
@@ -7,8 +7,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest;
@@ -83,6 +83,7 @@ import org.apache.lucene.util.encoding.D
public class CountingFacetsCollector extends FacetsCollector {
private final FacetSearchParams fsp;
+ private final OrdinalPolicy ordinalPolicy;
private final TaxonomyReader taxoReader;
private final BytesRef buf = new BytesRef(32);
private final FacetArrays facetArrays;
@@ -104,10 +105,12 @@ public class CountingFacetsCollector ext
assert assertParams(fsp) == null : assertParams(fsp);
this.fsp = fsp;
+ CategoryListParams clp = fsp.indexingParams.getCategoryListParams(null);
+ this.ordinalPolicy = clp.getOrdinalPolicy();
this.taxoReader = taxoReader;
this.facetArrays = facetArrays;
this.counts = facetArrays.getIntArray();
- this.facetsField = fsp.indexingParams.getCategoryListParams(null).field;
+ this.facetsField = clp.field;
this.useDirectSource = useDirectSource;
}
@@ -230,7 +233,7 @@ public class CountingFacetsCollector ext
ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays();
- if (fsp.indexingParams.getOrdinalPolicy() == OrdinalPolicy.NO_PARENTS) {
+ if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
// need to count parents
countParents(arrays.parents());
}
Modified: lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java?rev=1437678&r1=1437677&r2=1437678&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java Wed Jan 23 20:01:06 2013
@@ -3,8 +3,8 @@ package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.List;
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
Modified: lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java?rev=1437678&r1=1437677&r2=1437678&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/index/params/FacetIndexingParamsTest.java Wed Jan 23 20:01:06 2013
@@ -1,10 +1,7 @@
package org.apache.lucene.facet.index.params;
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
-import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
import org.apache.lucene.facet.search.DrillDown;
import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.util.PartitionsUtils;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.LuceneTestCase;
@@ -66,35 +63,4 @@ public class FacetIndexingParamsTest ext
assertEquals("Expected default category list field is " + clp.field, clp.field, dfip.getCategoryListParams(null).field);
}
- @Test
- public void testCategoryPolicies() {
- FacetIndexingParams dfip = FacetIndexingParams.ALL_PARENTS;
- // check path policy
- CategoryPath cp = CategoryPath.EMPTY;
- PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
- assertEquals("path policy does not match default for root", pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp));
- for (int i = 0; i < 30; i++) {
- int nComponents = random().nextInt(10) + 1;
- String[] components = new String[nComponents];
- for (int j = 0; j < components.length; j++) {
- components[j] = (Integer.valueOf(random().nextInt(30))).toString();
- }
- cp = new CategoryPath(components);
- assertEquals("path policy does not match default for " + cp.toString('/'),
- pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp));
- }
-
- // check ordinal policy
- OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS;
- assertEquals("ordinal policy does not match default for root",
- ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL),
- dfip.getOrdinalPolicy().shouldAdd(TaxonomyReader.ROOT_ORDINAL));
- for (int i = 0; i < 30; i++) {
- int ordinal = random().nextInt();
- assertEquals("ordinal policy does not match default for " + ordinal,
- ordinalPolicy.shouldAdd(ordinal),
- dfip.getOrdinalPolicy().shouldAdd(ordinal));
- }
- }
-
}
\ No newline at end of file
Modified: lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java?rev=1437678&r1=1437677&r2=1437678&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java Wed Jan 23 20:01:06 2013
@@ -12,7 +12,6 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.index.FacetFields;
-import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.CountFacetRequest;
@@ -423,8 +422,13 @@ public class CountingFacetsCollectorTest
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
FacetIndexingParams fip = new FacetIndexingParams() {
@Override
- public OrdinalPolicy getOrdinalPolicy() {
- return OrdinalPolicy.NO_PARENTS;
+ public CategoryListParams getCategoryListParams(CategoryPath category) {
+ return new CategoryListParams() {
+ @Override
+ public OrdinalPolicy getOrdinalPolicy() {
+ return OrdinalPolicy.NO_PARENTS;
+ }
+ };
}
};
FacetFields facetFields = new FacetFields(taxoWriter, fip);