You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/01/04 12:19:11 UTC
svn commit: r1555338 [2/3] - in /lucene/dev/trunk: ./ dev-tools/ lucene/
lucene/analysis/ lucene/analysis/common/ lucene/backwards/ lucene/benchmark/
lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/
lucene/benchmark/src/java/org/apac...
Modified: lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestAssociationsFacetsExample.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestAssociationsFacetsExample.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestAssociationsFacetsExample.java (original)
+++ lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestAssociationsFacetsExample.java Sat Jan 4 11:19:08 2014
@@ -1,12 +1,5 @@
package org.apache.lucene.demo.facet;
-import java.util.List;
-
-import org.apache.lucene.facet.search.FacetResult;
-import org.apache.lucene.facet.search.FacetResultNode;
-import org.apache.lucene.util.LuceneTestCase;
-import org.junit.Test;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -24,32 +17,19 @@ import org.junit.Test;
* limitations under the License.
*/
+import java.util.List;
+
+import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
public class TestAssociationsFacetsExample extends LuceneTestCase {
- private static final double[] EXPECTED_INT_SUM_RESULTS = { 4, 2};
- private static final double[] EXPECTED_FLOAT_SUM_RESULTS = { 1.62, 0.34};
-
@Test
public void testExamples() throws Exception {
List<FacetResult> res = new AssociationsFacetsExample().runSumAssociations();
-
assertEquals("Wrong number of results", 2, res.size());
-
- for (FacetResult fres : res) {
- assertEquals("Wrong number of facets", 2, fres.getNumValidDescendants());
- }
-
- Iterable<? extends FacetResultNode> it = res.get(0).getFacetResultNode().subResults;
- int i = 0;
- for (FacetResultNode fResNode : it) {
- assertEquals("Wrong result for facet " + fResNode.label, EXPECTED_INT_SUM_RESULTS[i++], fResNode.value, 1E-5);
- }
-
- it = res.get(1).getFacetResultNode().subResults;
- i = 0;
- for (FacetResultNode fResNode : it) {
- assertEquals("Wrong result for facet " + fResNode.label, EXPECTED_FLOAT_SUM_RESULTS[i++], fResNode.value, 1E-5);
- }
- }
-
+ assertEquals("dim=tags path=[] value=-1 childCount=2\n lucene (4)\n solr (2)\n", res.get(0).toString());
+ assertEquals("dim=genre path=[] value=-1.0 childCount=2\n computing (1.62)\n software (0.34)\n", res.get(1).toString());
+ }
}
Modified: lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestExpressionAggregationFacetsExample.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestExpressionAggregationFacetsExample.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestExpressionAggregationFacetsExample.java (original)
+++ lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestExpressionAggregationFacetsExample.java Sat Jan 4 11:19:08 2014
@@ -1,13 +1,5 @@
package org.apache.lucene.demo.facet;
-import java.util.List;
-import java.util.Locale;
-
-import org.apache.lucene.facet.search.FacetResult;
-import org.apache.lucene.facet.search.FacetResultNode;
-import org.apache.lucene.util.LuceneTestCase;
-import org.junit.Test;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -25,25 +17,18 @@ import org.junit.Test;
* limitations under the License.
*/
-public class TestExpressionAggregationFacetsExample extends LuceneTestCase {
+import java.util.List;
+import java.util.Locale;
- private static String toSimpleString(FacetResult fr) {
- StringBuilder sb = new StringBuilder();
- toSimpleString(fr.getFacetRequest().categoryPath.length, 0, sb, fr.getFacetResultNode(), "");
- return sb.toString();
- }
-
- private static void toSimpleString(int startLength, int depth, StringBuilder sb, FacetResultNode node, String indent) {
- sb.append(String.format(Locale.ROOT, "%s%s (%.3f)\n", indent, node.label.components[startLength + depth - 1], node.value));
- for (FacetResultNode childNode : node.subResults) {
- toSimpleString(startLength, depth + 1, sb, childNode, indent + " ");
- }
- }
+import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+public class TestExpressionAggregationFacetsExample extends LuceneTestCase {
@Test
public void testSimple() throws Exception {
- List<FacetResult> facetResults = new ExpressionAggregationFacetsExample().runSearch();
- assertEquals("A (0.000)\n B (2.236)\n C (1.732)\n", toSimpleString(facetResults.get(0)));
+ FacetResult result = new ExpressionAggregationFacetsExample().runSearch();
+ assertEquals("dim=A path=[] value=3.9681187 childCount=2\n B (2.236068)\n C (1.7320508)\n", result.toString());
}
-
}
Modified: lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestMultiCategoryListsFacetsExample.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestMultiCategoryListsFacetsExample.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestMultiCategoryListsFacetsExample.java (original)
+++ lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestMultiCategoryListsFacetsExample.java Sat Jan 4 11:19:08 2014
@@ -1,14 +1,5 @@
package org.apache.lucene.demo.facet;
-import java.util.List;
-
-import org.apache.lucene.facet.collections.ObjectToIntMap;
-import org.apache.lucene.facet.search.FacetResult;
-import org.apache.lucene.facet.search.FacetResultNode;
-import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.util.LuceneTestCase;
-import org.junit.Test;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -26,33 +17,19 @@ import org.junit.Test;
* limitations under the License.
*/
-public class TestMultiCategoryListsFacetsExample extends LuceneTestCase {
+import java.util.List;
- private static final ObjectToIntMap<CategoryPath> expectedCounts = new ObjectToIntMap<CategoryPath>();
- static {
- expectedCounts.put(new CategoryPath("Publish Date", "2012"), 2);
- expectedCounts.put(new CategoryPath("Publish Date", "2010"), 2);
- expectedCounts.put(new CategoryPath("Publish Date", "1999"), 1);
- expectedCounts.put(new CategoryPath("Author", "Lisa"), 2);
- expectedCounts.put(new CategoryPath("Author", "Frank"), 1);
- expectedCounts.put(new CategoryPath("Author", "Susan"), 1);
- expectedCounts.put(new CategoryPath("Author", "Bob"), 1);
- }
-
- private void assertExpectedCounts(List<FacetResult> facetResults, ObjectToIntMap<CategoryPath> expCounts) {
- for (FacetResult res : facetResults) {
- FacetResultNode root = res.getFacetResultNode();
- for (FacetResultNode node : root.subResults) {
- assertEquals("incorrect count for " + node.label, expCounts.get(node.label), (int) node.value);
- }
- }
- }
+import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+public class TestMultiCategoryListsFacetsExample extends LuceneTestCase {
@Test
public void testExample() throws Exception {
- List<FacetResult> facetResults = new MultiCategoryListsFacetsExample().runSearch();
- assertEquals(2, facetResults.size());
- assertExpectedCounts(facetResults, expectedCounts);
+ List<FacetResult> results = new MultiCategoryListsFacetsExample().runSearch();
+ assertEquals(2, results.size());
+ assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", results.get(0).toString());
+ assertEquals("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", results.get(1).toString());
}
-
}
Modified: lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java (original)
+++ lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java Sat Jan 4 11:19:08 2014
@@ -19,12 +19,7 @@ package org.apache.lucene.demo.facet;
import java.util.List;
-import org.apache.lucene.facet.collections.ObjectToIntMap;
-import org.apache.lucene.facet.range.LongRange;
-import org.apache.lucene.facet.range.RangeFacetRequest;
-import org.apache.lucene.facet.search.FacetResult;
-import org.apache.lucene.facet.search.FacetResultNode;
-import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase;
@@ -33,27 +28,12 @@ import org.junit.Test;
@SuppressCodecs("Lucene3x")
public class TestRangeFacetsExample extends LuceneTestCase {
- private static final ObjectToIntMap<CategoryPath> expectedCounts = new ObjectToIntMap<CategoryPath>();
- static {
- expectedCounts.put(new CategoryPath("timestamp", "Past hour"), 4);
- expectedCounts.put(new CategoryPath("timestamp", "Past six hours"), 22);
- expectedCounts.put(new CategoryPath("timestamp", "Past day"), 87);
- }
-
- private void assertExpectedCounts(FacetResult res, ObjectToIntMap<CategoryPath> expCounts) {
- FacetResultNode root = res.getFacetResultNode();
- for (FacetResultNode node : root.subResults) {
- assertEquals("incorrect count for " + node.label, expCounts.get(node.label), (int) node.value);
- }
- }
-
@Test
public void testSimple() throws Exception {
RangeFacetsExample example = new RangeFacetsExample();
example.index();
- List<FacetResult> facetResults = example.search();
- assertEquals(1, facetResults.size());
- assertExpectedCounts(facetResults.get(0), expectedCounts);
+ FacetResult result = example.search();
+ assertEquals("dim=timestamp path=[] value=87 childCount=3\n Past hour (4)\n Past six hours (22)\n Past day (87)\n", result.toString());
example.close();
}
@@ -62,8 +42,7 @@ public class TestRangeFacetsExample exte
public void testDrillDown() throws Exception {
RangeFacetsExample example = new RangeFacetsExample();
example.index();
- List<FacetResult> facetResults = example.search();
- TopDocs hits = example.drillDown((LongRange) ((RangeFacetRequest<LongRange>) facetResults.get(0).getFacetRequest()).ranges[1]);
+ TopDocs hits = example.drillDown(example.PAST_SIX_HOURS);
assertEquals(22, hits.totalHits);
example.close();
}
Modified: lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestSimpleFacetsExample.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestSimpleFacetsExample.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestSimpleFacetsExample.java (original)
+++ lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestSimpleFacetsExample.java Sat Jan 4 11:19:08 2014
@@ -1,14 +1,5 @@
package org.apache.lucene.demo.facet;
-import java.util.List;
-
-import org.apache.lucene.facet.collections.ObjectToIntMap;
-import org.apache.lucene.facet.search.FacetResult;
-import org.apache.lucene.facet.search.FacetResultNode;
-import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.util.LuceneTestCase;
-import org.junit.Test;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -26,46 +17,26 @@ import org.junit.Test;
* limitations under the License.
*/
+import java.util.List;
+
+import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.facet.taxonomy.FacetLabel;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
public class TestSimpleFacetsExample extends LuceneTestCase {
- private static final ObjectToIntMap<CategoryPath> expectedCounts = new ObjectToIntMap<CategoryPath>();
- static {
- expectedCounts.put(new CategoryPath("Publish Date", "2012"), 2);
- expectedCounts.put(new CategoryPath("Publish Date", "2010"), 2);
- expectedCounts.put(new CategoryPath("Publish Date", "1999"), 1);
- expectedCounts.put(new CategoryPath("Author", "Lisa"), 2);
- expectedCounts.put(new CategoryPath("Author", "Frank"), 1);
- expectedCounts.put(new CategoryPath("Author", "Susan"), 1);
- expectedCounts.put(new CategoryPath("Author", "Bob"), 1);
- }
-
- private static final ObjectToIntMap<CategoryPath> expectedCountsDrillDown = new ObjectToIntMap<CategoryPath>();
- static {
- expectedCountsDrillDown.put(new CategoryPath("Author", "Lisa"), 1);
- expectedCountsDrillDown.put(new CategoryPath("Author", "Bob"), 1);
- }
-
- private void assertExpectedCounts(List<FacetResult> facetResults, ObjectToIntMap<CategoryPath> expCounts) {
- for (FacetResult res : facetResults) {
- FacetResultNode root = res.getFacetResultNode();
- for (FacetResultNode node : root.subResults) {
- assertEquals("incorrect count for " + node.label, expCounts.get(node.label), (int) node.value);
- }
- }
- }
-
@Test
public void testSimple() throws Exception {
- List<FacetResult> facetResults = new SimpleFacetsExample().runSearch();
- assertEquals(2, facetResults.size());
- assertExpectedCounts(facetResults, expectedCounts);
+ List<FacetResult> results = new SimpleFacetsExample().runSearch();
+ assertEquals(2, results.size());
+ assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n", results.get(0).toString());
+ assertEquals("dim=Publish Date path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", results.get(1).toString());
}
@Test
public void testDrillDown() throws Exception {
- List<FacetResult> facetResults = new SimpleFacetsExample().runDrillDown();
- assertEquals(1, facetResults.size());
- assertExpectedCounts(facetResults, expectedCountsDrillDown);
+ FacetResult result = new SimpleFacetsExample().runDrillDown();
+ assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", result.toString());
}
-
}
Modified: lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestSimpleSortedSetFacetsExample.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestSimpleSortedSetFacetsExample.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestSimpleSortedSetFacetsExample.java (original)
+++ lucene/dev/trunk/lucene/demo/src/test/org/apache/lucene/demo/facet/TestSimpleSortedSetFacetsExample.java Sat Jan 4 11:19:08 2014
@@ -1,15 +1,5 @@
package org.apache.lucene.demo.facet;
-import java.util.List;
-
-import org.apache.lucene.facet.collections.ObjectToIntMap;
-import org.apache.lucene.facet.search.FacetResult;
-import org.apache.lucene.facet.search.FacetResultNode;
-import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
-import org.apache.lucene.util.LuceneTestCase;
-import org.junit.Test;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -27,48 +17,30 @@ import org.junit.Test;
* limitations under the License.
*/
+import java.util.List;
+
+import org.apache.lucene.facet.FacetResult;
+import org.apache.lucene.facet.taxonomy.FacetLabel;
+import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+
// We require sorted set DVs:
@SuppressCodecs({"Lucene40", "Lucene41"})
public class TestSimpleSortedSetFacetsExample extends LuceneTestCase {
- private static final ObjectToIntMap<CategoryPath> expectedCounts = new ObjectToIntMap<CategoryPath>();
- static {
- expectedCounts.put(new CategoryPath("Publish Year", "2012"), 2);
- expectedCounts.put(new CategoryPath("Publish Year", "2010"), 2);
- expectedCounts.put(new CategoryPath("Publish Year", "1999"), 1);
- expectedCounts.put(new CategoryPath("Author", "Lisa"), 2);
- expectedCounts.put(new CategoryPath("Author", "Frank"), 1);
- expectedCounts.put(new CategoryPath("Author", "Susan"), 1);
- expectedCounts.put(new CategoryPath("Author", "Bob"), 1);
- }
-
- private static final ObjectToIntMap<CategoryPath> expectedCountsDrillDown = new ObjectToIntMap<CategoryPath>();
- static {
- expectedCountsDrillDown.put(new CategoryPath("Author", "Lisa"), 1);
- expectedCountsDrillDown.put(new CategoryPath("Author", "Bob"), 1);
- }
-
- private void assertExpectedCounts(List<FacetResult> facetResults, ObjectToIntMap<CategoryPath> expCounts) {
- for (FacetResult res : facetResults) {
- FacetResultNode root = res.getFacetResultNode();
- for (FacetResultNode node : root.subResults) {
- assertEquals("incorrect count for " + node.label, expCounts.get(node.label), (int) node.value);
- }
- }
- }
-
@Test
public void testSimple() throws Exception {
- List<FacetResult> facetResults = new SimpleSortedSetFacetsExample().runSearch();
- assertEquals(2, facetResults.size());
- assertExpectedCounts(facetResults, expectedCounts);
+ List<FacetResult> results = new SimpleSortedSetFacetsExample().runSearch();
+ assertEquals(2, results.size());
+ assertEquals("dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n Frank (1)\n Susan (1)\n", results.get(0).toString());
+ assertEquals("dim=Publish Year path=[] value=5 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n", results.get(1).toString());
}
@Test
public void testDrillDown() throws Exception {
- List<FacetResult> facetResults = new SimpleSortedSetFacetsExample().runDrillDown();
- assertEquals(1, facetResults.size());
- assertExpectedCounts(facetResults, expectedCountsDrillDown);
+ FacetResult result = new SimpleSortedSetFacetsExample().runDrillDown();
+ assertEquals("dim=Author path=[] value=2 childCount=2\n Bob (1)\n Lisa (1)\n", result.toString());
}
-
}
Modified: lucene/dev/trunk/lucene/facet/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/build.xml?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/build.xml (original)
+++ lucene/dev/trunk/lucene/facet/build.xml Sat Jan 4 11:19:08 2014
@@ -24,7 +24,7 @@
</description>
<property name="forbidden-sysout-excludes" value="
- org/apache/lucene/facet/util/PrintTaxonomyStats.class
+ org/apache/lucene/facet/taxonomy/PrintTaxonomyStats.class
"/>
<import file="../module-build.xml"/>
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/package.html?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/package.html (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/package.html Sat Jan 4 11:19:08 2014
@@ -20,5 +20,46 @@
</head>
<body>
<h1>faceted search</h1>
+ <p>
+ This module provides multiple methods for computing facet counts and
+ value aggregations:
+ <ul>
+ <li> Taxonomy-based methods rely on a separate taxonomy index to
+ map hierarchical facet paths to global int ordinals for fast
+ counting at search time; these methods can compute counts
+ (({@link org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts}, {@link
+ org.apache.lucene.facet.taxonomy.TaxonomyFacetCounts}) aggregate long or double values {@link
+ org.apache.lucene.facet.taxonomy.TaxonomyFacetSumIntAssociations}, {@link
+ org.apache.lucene.facet.taxonomy.TaxonomyFacetSumFloatAssociations}, {@link
+ org.apache.lucene.facet.taxonomy.TaxonomyFacetSumValueSource}. Add {@link org.apache.lucene.facet.FacetField} or
+ {@link org.apache.lucene.facet.taxonomy.AssociationFacetField} to your documents at index time
+ to use taxonomy-based methods.
+
+ <li> Sorted-set doc values method does not require a separate
+ taxonomy index, and computes counts based on sorted set doc
+ values fields ({@link org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts}). Add
+ {@link org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField} to your documents at
+ index time to use sorted set facet counts.
+
+ <li> Range faceting {@link org.apache.lucene.facet.range.LongRangeFacetCounts}, {@link
+ org.apache.lucene.facet.range.DoubleRangeFacetCounts} compute counts for a dynamic numeric
+ range from a provided {@link org.apache.lucene.queries.function.ValueSource} (previously indexed
+ numeric field, or a dynamic expression such as distance).
+ </ul>
+ </p>
+ <p>
+ At search time you first run your search, but pass a {@link
+ org.apache.lucene.facet.FacetsCollector} to gather all hits (and optionally, scores for each
+ hit). Then, instantiate whichever facet methods you'd like to use
+ to compute aggregates. Finally, all methods implement a common
+ {@link org.apache.lucene.facet.Facets} base API that you use to obtain specific facet
+ counts.
+ </p>
+ <p>
+ The various {@link org.apache.lucene.facet.FacetsCollector#search} utility methods are
+ useful for doing an "ordinary" search (sorting by score, or by a
+ specified Sort) but also collecting into a {@link org.apache.lucene.facet.FacetsCollector} for
+ subsequent faceting.
+ </p>
</body>
-</html>
\ No newline at end of file
+</html>
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ParallelTaxonomyArrays.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ParallelTaxonomyArrays.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ParallelTaxonomyArrays.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ParallelTaxonomyArrays.java Sat Jan 4 11:19:08 2014
@@ -42,6 +42,10 @@ package org.apache.lucene.facet.taxonomy
*/
public abstract class ParallelTaxonomyArrays {
+ /** Sole constructor. */
+ public ParallelTaxonomyArrays() {
+ }
+
/**
* Returns the parents array, where {@code parents[i]} denotes the parent of
* category ordinal {@code i}.
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java Sat Jan 4 11:19:08 2014
@@ -89,10 +89,14 @@ public abstract class TaxonomyReader imp
}
}
+
+ /** Sole constructor. */
+ public TaxonomyReader() {
+ }
/**
* The root category (the category with the empty path) always has the ordinal
- * 0, to which we give a name ROOT_ORDINAL. {@link #getOrdinal(CategoryPath)}
+ * 0, to which we give a name ROOT_ORDINAL. {@link #getOrdinal(FacetLabel)}
* of an empty path will always return {@code ROOT_ORDINAL}, and
* {@link #getPath(int)} with {@code ROOT_ORDINAL} will return the empty path.
*/
@@ -142,7 +146,7 @@ public abstract class TaxonomyReader imp
protected abstract TaxonomyReader doOpenIfChanged() throws IOException;
/**
- * @throws AlreadyClosedException if this IndexReader is closed
+ * Throws {@link AlreadyClosedException} if this IndexReader is closed
*/
protected final void ensureOpen() throws AlreadyClosedException {
if (getRefCount() <= 0) {
@@ -215,10 +219,18 @@ public abstract class TaxonomyReader imp
* @return the category's ordinal or {@link #INVALID_ORDINAL} if the category
* wasn't foun.
*/
- public abstract int getOrdinal(CategoryPath categoryPath) throws IOException;
+ public abstract int getOrdinal(FacetLabel categoryPath) throws IOException;
+
+ /** Returns ordinal for the dim + path. */
+ public int getOrdinal(String dim, String[] path) throws IOException {
+ String[] fullPath = new String[path.length+1];
+ fullPath[0] = dim;
+ System.arraycopy(path, 0, fullPath, 1, path.length);
+ return getOrdinal(new FacetLabel(fullPath));
+ }
/** Returns the path name of the category with the given ordinal. */
- public abstract CategoryPath getPath(int ordinal) throws IOException;
+ public abstract FacetLabel getPath(int ordinal) throws IOException;
/** Returns the current refCount for this taxonomy reader. */
public final int getRefCount() {
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyWriter.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyWriter.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyWriter.java Sat Jan 4 11:19:08 2014
@@ -64,7 +64,7 @@ public interface TaxonomyWriter extends
* ordinal of a category is guaranteed to be smaller then the ordinal of
* any of its descendants.
*/
- public int addCategory(CategoryPath categoryPath) throws IOException;
+ public int addCategory(FacetLabel categoryPath) throws IOException;
/**
* getParent() returns the ordinal of the parent category of the category
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java Sat Jan 4 11:19:08 2014
@@ -23,23 +23,8 @@ import org.apache.lucene.util.BytesRef;
* @lucene.experimental
*/
abstract class Consts {
-
static final String FULL = "$full_path$";
static final String FIELD_PAYLOADS = "$payloads$";
static final String PAYLOAD_PARENT = "p";
static final BytesRef PAYLOAD_PARENT_BYTES_REF = new BytesRef(PAYLOAD_PARENT);
-
- /**
- * Delimiter used for creating the full path of a category from the list of
- * its labels from root. It is forbidden for labels to contain this
- * character.
- * <P>
- * Originally, we used \uFFFE, officially a "unicode noncharacter" (invalid
- * unicode character) for this purpose. Recently, we switched to the
- * "private-use" character \uF749. Even more recently, we
- * switched to \U001F (INFORMATION_SEPARATOR).
- */
- //static final char DEFAULT_DELIMITER = '\uFFFE';
- //static final char DEFAULT_DELIMITER = '\uF749';
- static final char DEFAULT_DELIMITER = '\u001F';
}
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java Sat Jan 4 11:19:08 2014
@@ -5,8 +5,9 @@ import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
-import org.apache.lucene.facet.collections.LRUHashMap;
-import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.FacetsConfig;
+import org.apache.lucene.facet.taxonomy.FacetLabel;
+import org.apache.lucene.facet.taxonomy.LRUHashMap;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.CorruptIndexException; // javadocs
@@ -60,28 +61,26 @@ public class DirectoryTaxonomyReader ext
private final DirectoryReader indexReader;
// TODO: test DoubleBarrelLRUCache and consider using it instead
- private LRUHashMap<CategoryPath, Integer> ordinalCache;
- private LRUHashMap<Integer, CategoryPath> categoryCache;
+ private LRUHashMap<FacetLabel, Integer> ordinalCache;
+ private LRUHashMap<Integer, FacetLabel> categoryCache;
private volatile TaxonomyIndexArrays taxoArrays;
- private char delimiter = Consts.DEFAULT_DELIMITER;
-
/**
* Called only from {@link #doOpenIfChanged()}. If the taxonomy has been
* recreated, you should pass {@code null} as the caches and parent/children
* arrays.
*/
DirectoryTaxonomyReader(DirectoryReader indexReader, DirectoryTaxonomyWriter taxoWriter,
- LRUHashMap<CategoryPath,Integer> ordinalCache, LRUHashMap<Integer,CategoryPath> categoryCache,
+ LRUHashMap<FacetLabel,Integer> ordinalCache, LRUHashMap<Integer,FacetLabel> categoryCache,
TaxonomyIndexArrays taxoArrays) throws IOException {
this.indexReader = indexReader;
this.taxoWriter = taxoWriter;
this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.getTaxonomyEpoch();
// use the same instance of the cache, note the protective code in getOrdinal and getPath
- this.ordinalCache = ordinalCache == null ? new LRUHashMap<CategoryPath,Integer>(DEFAULT_CACHE_VALUE) : ordinalCache;
- this.categoryCache = categoryCache == null ? new LRUHashMap<Integer,CategoryPath>(DEFAULT_CACHE_VALUE) : categoryCache;
+ this.ordinalCache = ordinalCache == null ? new LRUHashMap<FacetLabel,Integer>(DEFAULT_CACHE_VALUE) : ordinalCache;
+ this.categoryCache = categoryCache == null ? new LRUHashMap<Integer,FacetLabel>(DEFAULT_CACHE_VALUE) : categoryCache;
this.taxoArrays = taxoArrays != null ? new TaxonomyIndexArrays(indexReader, taxoArrays) : null;
}
@@ -103,8 +102,8 @@ public class DirectoryTaxonomyReader ext
// These are the default cache sizes; they can be configured after
// construction with the cache's setMaxSize() method
- ordinalCache = new LRUHashMap<CategoryPath, Integer>(DEFAULT_CACHE_VALUE);
- categoryCache = new LRUHashMap<Integer, CategoryPath>(DEFAULT_CACHE_VALUE);
+ ordinalCache = new LRUHashMap<FacetLabel, Integer>(DEFAULT_CACHE_VALUE);
+ categoryCache = new LRUHashMap<Integer, FacetLabel>(DEFAULT_CACHE_VALUE);
}
/**
@@ -122,8 +121,8 @@ public class DirectoryTaxonomyReader ext
// These are the default cache sizes; they can be configured after
// construction with the cache's setMaxSize() method
- ordinalCache = new LRUHashMap<CategoryPath, Integer>(DEFAULT_CACHE_VALUE);
- categoryCache = new LRUHashMap<Integer, CategoryPath>(DEFAULT_CACHE_VALUE);
+ ordinalCache = new LRUHashMap<FacetLabel, Integer>(DEFAULT_CACHE_VALUE);
+ categoryCache = new LRUHashMap<Integer, FacetLabel>(DEFAULT_CACHE_VALUE);
}
private synchronized void initTaxoArrays() throws IOException {
@@ -209,10 +208,14 @@ public class DirectoryTaxonomyReader ext
}
}
+ /** Open the {@link DirectoryReader} from this {@link
+ * Directory}. */
protected DirectoryReader openIndexReader(Directory directory) throws IOException {
return DirectoryReader.open(directory);
}
+ /** Open the {@link DirectoryReader} from this {@link
+ * IndexWriter}. */
protected DirectoryReader openIndexReader(IndexWriter writer) throws IOException {
return DirectoryReader.open(writer, false);
}
@@ -242,7 +245,7 @@ public class DirectoryTaxonomyReader ext
}
@Override
- public int getOrdinal(CategoryPath cp) throws IOException {
+ public int getOrdinal(FacetLabel cp) throws IOException {
ensureOpen();
if (cp.length == 0) {
return ROOT_ORDINAL;
@@ -270,7 +273,7 @@ public class DirectoryTaxonomyReader ext
// If we're still here, we have a cache miss. We need to fetch the
// value from disk, and then also put it in the cache:
int ret = TaxonomyReader.INVALID_ORDINAL;
- DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(cp.toString(delimiter)), 0);
+ DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(FacetsConfig.pathToString(cp.components, cp.length)), 0);
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
ret = docs.docID();
@@ -288,7 +291,7 @@ public class DirectoryTaxonomyReader ext
}
@Override
- public CategoryPath getPath(int ordinal) throws IOException {
+ public FacetLabel getPath(int ordinal) throws IOException {
ensureOpen();
// Since the cache is shared with DTR instances allocated from
@@ -303,14 +306,14 @@ public class DirectoryTaxonomyReader ext
// wrapped as LRU?
Integer catIDInteger = Integer.valueOf(ordinal);
synchronized (categoryCache) {
- CategoryPath res = categoryCache.get(catIDInteger);
+ FacetLabel res = categoryCache.get(catIDInteger);
if (res != null) {
return res;
}
}
StoredDocument doc = indexReader.document(ordinal);
- CategoryPath ret = new CategoryPath(doc.get(Consts.FULL), delimiter);
+ FacetLabel ret = new FacetLabel(FacetsConfig.stringToPath(doc.get(Consts.FULL)));
synchronized (categoryCache) {
categoryCache.put(catIDInteger, ret);
}
@@ -326,7 +329,7 @@ public class DirectoryTaxonomyReader ext
/**
* setCacheSize controls the maximum allowed size of each of the caches
- * used by {@link #getPath(int)} and {@link #getOrdinal(CategoryPath)}.
+ * used by {@link #getPath(int)} and {@link #getOrdinal(FacetLabel)}.
* <P>
* Currently, if the given size is smaller than the current size of
* a cache, it will not shrink, and rather we be limited to its current
@@ -343,28 +346,16 @@ public class DirectoryTaxonomyReader ext
}
}
- /**
- * setDelimiter changes the character that the taxonomy uses in its
- * internal storage as a delimiter between category components. Do not
- * use this method unless you really know what you are doing.
- * <P>
- * If you do use this method, make sure you call it before any other
- * methods that actually queries the taxonomy. Moreover, make sure you
- * always pass the same delimiter for all LuceneTaxonomyWriter and
- * LuceneTaxonomyReader objects you create.
- */
- public void setDelimiter(char delimiter) {
- ensureOpen();
- this.delimiter = delimiter;
- }
-
+ /** Returns ordinal -> label mapping, up to the provided
+ * max ordinal or number of ordinals, whichever is
+ * smaller. */
public String toString(int max) {
ensureOpen();
StringBuilder sb = new StringBuilder();
int upperl = Math.min(max, indexReader.maxDoc());
for (int i = 0; i < upperl; i++) {
try {
- CategoryPath category = this.getPath(i);
+ FacetLabel category = this.getPath(i);
if (category == null) {
sb.append(i + ": NULL!! \n");
continue;
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java Sat Jan 4 11:19:08 2014
@@ -21,12 +21,13 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
-import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.FacetsConfig;
+import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
-import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
-import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
+import org.apache.lucene.facet.taxonomy.writercache.Cl2oTaxonomyWriterCache;
+import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.CorruptIndexException; // javadocs
@@ -34,8 +35,8 @@ import org.apache.lucene.index.Directory
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.ReaderManager;
import org.apache.lucene.index.SegmentInfos;
@@ -103,8 +104,7 @@ public class DirectoryTaxonomyWriter imp
// Records the taxonomy index epoch, updated on replaceTaxonomy as well.
private long indexEpoch;
-
- private char delimiter = Consts.DEFAULT_DELIMITER;
+
private SinglePositionTokenStream parentStream = new SinglePositionTokenStream(Consts.PAYLOAD_PARENT);
private Field parentStreamField;
private Field fullPathField;
@@ -140,23 +140,6 @@ public class DirectoryTaxonomyWriter imp
}
/**
- * Changes the character that the taxonomy uses in its internal storage as a
- * delimiter between category components. Do not use this method unless you
- * really know what you are doing. It has nothing to do with whatever
- * character the application may be using to represent categories for its own
- * use.
- * <p>
- * If you do use this method, make sure you call it before any other methods
- * that actually queries the taxonomy. Moreover, make sure you always pass the
- * same delimiter for all taxonomy writer and reader instances you create for
- * the same directory.
- */
- public void setDelimiter(char delimiter) {
- ensureOpen();
- this.delimiter = delimiter;
- }
-
- /**
* Forcibly unlocks the taxonomy in the named directory.
* <P>
* Caution: this should only be used by failure recovery code, when it is
@@ -248,7 +231,7 @@ public class DirectoryTaxonomyWriter imp
cacheIsComplete = true;
// Make sure that the taxonomy always contain the root category
// with category id 0.
- addCategory(CategoryPath.EMPTY);
+ addCategory(new FacetLabel());
} else {
// There are some categories on the disk, which we have not yet
// read into the cache, and therefore the cache is incomplete.
@@ -343,6 +326,7 @@ public class DirectoryTaxonomyWriter imp
return new Cl2oTaxonomyWriterCache(1024, 0.15f, 3);
}
+ /** Create this with {@code OpenMode.CREATE_OR_APPEND}. */
public DirectoryTaxonomyWriter(Directory d) throws IOException {
this(d, OpenMode.CREATE_OR_APPEND);
}
@@ -389,7 +373,7 @@ public class DirectoryTaxonomyWriter imp
* returning the category's ordinal, or a negative number in case the
* category does not yet exist in the taxonomy.
*/
- protected synchronized int findCategory(CategoryPath categoryPath) throws IOException {
+ protected synchronized int findCategory(FacetLabel categoryPath) throws IOException {
// If we can find the category in the cache, or we know the cache is
// complete, we can return the response directly from it
int res = cache.get(categoryPath);
@@ -422,7 +406,7 @@ public class DirectoryTaxonomyWriter imp
int doc = -1;
DirectoryReader reader = readerManager.acquire();
try {
- final BytesRef catTerm = new BytesRef(categoryPath.toString(delimiter));
+ final BytesRef catTerm = new BytesRef(FacetsConfig.pathToString(categoryPath.components, categoryPath.length));
TermsEnum termsEnum = null; // reuse
DocsEnum docs = null; // reuse
for (AtomicReaderContext ctx : reader.leaves()) {
@@ -448,7 +432,7 @@ public class DirectoryTaxonomyWriter imp
}
@Override
- public int addCategory(CategoryPath categoryPath) throws IOException {
+ public int addCategory(FacetLabel categoryPath) throws IOException {
ensureOpen();
// check the cache outside the synchronized block. this results in better
// concurrency when categories are there.
@@ -480,14 +464,14 @@ public class DirectoryTaxonomyWriter imp
* parent is always added to the taxonomy before its child). We do this by
* recursion.
*/
- private int internalAddCategory(CategoryPath cp) throws IOException {
+ private int internalAddCategory(FacetLabel cp) throws IOException {
// Find our parent's ordinal (recursively adding the parent category
// to the taxonomy if it's not already there). Then add the parent
// ordinal as payloads (rather than a stored field; payloads can be
// more efficiently read into memory in bulk by LuceneTaxonomyReader)
int parent;
if (cp.length > 1) {
- CategoryPath parentPath = cp.subpath(cp.length - 1);
+ FacetLabel parentPath = cp.subpath(cp.length - 1);
parent = findCategory(parentPath);
if (parent < 0) {
parent = internalAddCategory(parentPath);
@@ -516,7 +500,7 @@ public class DirectoryTaxonomyWriter imp
* Note that the methods calling addCategoryDocument() are synchornized, so
* this method is effectively synchronized as well.
*/
- private int addCategoryDocument(CategoryPath categoryPath, int parent) throws IOException {
+ private int addCategoryDocument(FacetLabel categoryPath, int parent) throws IOException {
// Before Lucene 2.9, position increments >=0 were supported, so we
// added 1 to parent to allow the parent -1 (the parent of the root).
// Unfortunately, starting with Lucene 2.9, after LUCENE-1542, this is
@@ -530,7 +514,7 @@ public class DirectoryTaxonomyWriter imp
Document d = new Document();
d.add(parentStreamField);
- fullPathField.setStringValue(categoryPath.toString(delimiter));
+ fullPathField.setStringValue(FacetsConfig.pathToString(categoryPath.components, categoryPath.length));
d.add(fullPathField);
// Note that we do no pass an Analyzer here because the fields that are
@@ -597,7 +581,7 @@ public class DirectoryTaxonomyWriter imp
}
}
- private void addToCache(CategoryPath categoryPath, int id) throws IOException {
+ private void addToCache(FacetLabel categoryPath, int id) throws IOException {
if (cache.put(categoryPath, id)) {
// If cache.put() returned true, it means the cache was limited in
// size, became full, and parts of it had to be evicted. It is
@@ -730,7 +714,7 @@ public class DirectoryTaxonomyWriter imp
// hence documents), there are no deletions in the index. Therefore, it
// is sufficient to call next(), and then doc(), exactly once with no
// 'validation' checks.
- CategoryPath cp = new CategoryPath(t.utf8ToString(), delimiter);
+ FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(t.utf8ToString()));
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
boolean res = cache.put(cp, docsEnum.nextDoc() + ctx.docBase);
assert !res : "entries should not have been evicted from the cache";
@@ -819,8 +803,7 @@ public class DirectoryTaxonomyWriter imp
final Terms terms = ar.terms(Consts.FULL);
te = terms.iterator(te);
while (te.next() != null) {
- String value = te.term().utf8ToString();
- CategoryPath cp = new CategoryPath(value, delimiter);
+ FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
final int ordinal = addCategory(cp);
docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
@@ -858,12 +841,16 @@ public class DirectoryTaxonomyWriter imp
* and size-1.
*/
public void setSize(int size) throws IOException;
+
+ /** Record a mapping. */
public void addMapping(int origOrdinal, int newOrdinal) throws IOException;
+
/**
* Call addDone() to say that all addMapping() have been done.
* In some implementations this might free some resources.
*/
public void addDone() throws IOException;
+
/**
* Return the map from the taxonomy's original (consecutive) ordinals
* to the new taxonomy's ordinals. If the map has to be read from disk
@@ -880,6 +867,11 @@ public class DirectoryTaxonomyWriter imp
*/
public static final class MemoryOrdinalMap implements OrdinalMap {
int[] map;
+
+ /** Sole constructor. */
+ public MemoryOrdinalMap() {
+ }
+
@Override
public void setSize(int taxonomySize) {
map = new int[taxonomySize];
@@ -903,6 +895,7 @@ public class DirectoryTaxonomyWriter imp
File tmpfile;
DataOutputStream out;
+ /** Sole constructor. */
public DiskOrdinalMap(File tmpfile) throws FileNotFoundException {
this.tmpfile = tmpfile;
out = new DataOutputStream(new BufferedOutputStream(
Modified: lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/TaxonomyWriterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/TaxonomyWriterCache.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/TaxonomyWriterCache.java (original)
+++ lucene/dev/trunk/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/TaxonomyWriterCache.java Sat Jan 4 11:19:08 2014
@@ -1,6 +1,6 @@
package org.apache.lucene.facet.taxonomy.writercache;
-import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
/*
@@ -37,7 +37,7 @@ import org.apache.lucene.facet.taxonomy.
* <p>
* However, if it does so, it should clear out large parts of the cache at once,
* because the user will typically need to work hard to recover from every cache
- * cleanup (see {@link #put(CategoryPath, int)}'s return value).
+ * cleanup (see {@link #put(FacetLabel, int)}'s return value).
* <p>
* <b>NOTE:</b> the cache may be accessed concurrently by multiple threads,
* therefore cache implementations should take this into consideration.
@@ -62,7 +62,7 @@ public interface TaxonomyWriterCache {
* it means the category does not exist. Otherwise, the category might
* still exist, but just be missing from the cache.
*/
- public int get(CategoryPath categoryPath);
+ public int get(FacetLabel categoryPath);
/**
* Add a category to the cache, with the given ordinal as the value.
@@ -82,7 +82,7 @@ public interface TaxonomyWriterCache {
* It doesn't really matter, because normally the next thing we do after
* finding that a category does not exist is to add it.
*/
- public boolean put(CategoryPath categoryPath, int ordinal);
+ public boolean put(FacetLabel categoryPath, int ordinal);
/**
* Returns true if the cache is full, such that the next {@link #put} will
Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/FacetTestCase.java Sat Jan 4 11:19:08 2014
@@ -1,19 +1,5 @@
package org.apache.lucene.facet;
-import java.util.Random;
-
-import org.apache.lucene.facet.encoding.DGapIntEncoder;
-import org.apache.lucene.facet.encoding.DGapVInt8IntEncoder;
-import org.apache.lucene.facet.encoding.EightFlagsIntEncoder;
-import org.apache.lucene.facet.encoding.FourFlagsIntEncoder;
-import org.apache.lucene.facet.encoding.IntEncoder;
-import org.apache.lucene.facet.encoding.NOnesIntEncoder;
-import org.apache.lucene.facet.encoding.SortingIntEncoder;
-import org.apache.lucene.facet.encoding.UniqueValuesIntEncoder;
-import org.apache.lucene.facet.encoding.VInt8IntEncoder;
-import org.apache.lucene.facet.params.CategoryListParams;
-import org.apache.lucene.util.LuceneTestCase;
-
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -31,34 +17,190 @@ import org.apache.lucene.util.LuceneTest
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
+import org.apache.lucene.facet.taxonomy.TaxonomyFacetCounts;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
public abstract class FacetTestCase extends LuceneTestCase {
- private static final IntEncoder[] ENCODERS = new IntEncoder[] {
- new SortingIntEncoder(new UniqueValuesIntEncoder(new VInt8IntEncoder())),
- new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder()))),
- new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapVInt8IntEncoder())),
- new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new EightFlagsIntEncoder()))),
- new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new FourFlagsIntEncoder()))),
- new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new NOnesIntEncoder(3)))),
- new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new NOnesIntEncoder(4)))),
- };
-
- /** Returns a {@link CategoryListParams} with random {@link IntEncoder} and field. */
- public static CategoryListParams randomCategoryListParams() {
- final String field = CategoryListParams.DEFAULT_FIELD + "$" + random().nextInt();
- return randomCategoryListParams(field);
+ public Facets getTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector c) throws IOException {
+ return getTaxonomyFacetCounts(taxoReader, config, c, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
}
-
- /** Returns a {@link CategoryListParams} with random {@link IntEncoder}. */
- public static CategoryListParams randomCategoryListParams(String field) {
- Random random = random();
- final IntEncoder encoder = ENCODERS[random.nextInt(ENCODERS.length)];
- return new CategoryListParams(field) {
- @Override
- public IntEncoder createEncoder() {
- return encoder;
+
+ public Facets getTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector c, String indexFieldName) throws IOException {
+ Facets facets;
+ if (random().nextBoolean()) {
+ facets = new FastTaxonomyFacetCounts(indexFieldName, taxoReader, config, c);
+ } else {
+ OrdinalsReader ordsReader = new DocValuesOrdinalsReader(indexFieldName);
+ if (random().nextBoolean()) {
+ ordsReader = new CachedOrdinalsReader(ordsReader);
}
- };
+ facets = new TaxonomyFacetCounts(ordsReader, taxoReader, config, c);
+ }
+
+ return facets;
+ }
+
+ protected String[] getRandomTokens(int count) {
+ String[] tokens = new String[count];
+ for(int i=0;i<tokens.length;i++) {
+ tokens[i] = _TestUtil.randomRealisticUnicodeString(random(), 1, 10);
+ //tokens[i] = _TestUtil.randomSimpleString(random(), 1, 10);
+ }
+ return tokens;
+ }
+
+ protected String pickToken(String[] tokens) {
+ for(int i=0;i<tokens.length;i++) {
+ if (random().nextBoolean()) {
+ return tokens[i];
+ }
+ }
+
+ // Move long tail onto first token:
+ return tokens[0];
+ }
+
+ protected static class TestDoc {
+ public String content;
+ public String[] dims;
+ public float value;
+ }
+
+ protected List<TestDoc> getRandomDocs(String[] tokens, int count, int numDims) {
+ List<TestDoc> docs = new ArrayList<>();
+ for(int i=0;i<count;i++) {
+ TestDoc doc = new TestDoc();
+ docs.add(doc);
+ doc.content = pickToken(tokens);
+ doc.dims = new String[numDims];
+ for(int j=0;j<numDims;j++) {
+ doc.dims[j] = pickToken(tokens);
+ if (random().nextInt(10) < 3) {
+ break;
+ }
+ }
+ if (VERBOSE) {
+ System.out.println(" doc " + i + ": content=" + doc.content);
+ for(int j=0;j<numDims;j++) {
+ if (doc.dims[j] != null) {
+ System.out.println(" dim[" + j + "]=" + doc.dims[j]);
+ }
+ }
+ }
+ }
+
+ return docs;
}
+ protected void sortTies(List<FacetResult> results) {
+ for(FacetResult result : results) {
+ sortTies(result.labelValues);
+ }
+ }
+
+ protected void sortTies(LabelAndValue[] labelValues) {
+ double lastValue = -1;
+ int numInRow = 0;
+ int i = 0;
+ while(i <= labelValues.length) {
+ if (i < labelValues.length && labelValues[i].value.doubleValue() == lastValue) {
+ numInRow++;
+ } else {
+ if (numInRow > 1) {
+ Arrays.sort(labelValues, i-numInRow, i,
+ new Comparator<LabelAndValue>() {
+ @Override
+ public int compare(LabelAndValue a, LabelAndValue b) {
+ assert a.value.doubleValue() == b.value.doubleValue();
+ return new BytesRef(a.label).compareTo(new BytesRef(b.label));
+ }
+ });
+ }
+ numInRow = 1;
+ if (i < labelValues.length) {
+ lastValue = labelValues[i].value.doubleValue();
+ }
+ }
+ i++;
+ }
+ }
+
+ protected void sortLabelValues(List<LabelAndValue> labelValues) {
+ Collections.sort(labelValues,
+ new Comparator<LabelAndValue>() {
+ @Override
+ public int compare(LabelAndValue a, LabelAndValue b) {
+ if (a.value.doubleValue() > b.value.doubleValue()) {
+ return -1;
+ } else if (a.value.doubleValue() < b.value.doubleValue()) {
+ return 1;
+ } else {
+ return new BytesRef(a.label).compareTo(new BytesRef(b.label));
+ }
+ }
+ });
+ }
+
+ protected void sortFacetResults(List<FacetResult> results) {
+ Collections.sort(results,
+ new Comparator<FacetResult>() {
+ @Override
+ public int compare(FacetResult a, FacetResult b) {
+ if (a.value.doubleValue() > b.value.doubleValue()) {
+ return -1;
+ } else if (b.value.doubleValue() > a.value.doubleValue()) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+ });
+ }
+
+ protected void assertFloatValuesEquals(List<FacetResult> a, List<FacetResult> b) {
+ assertEquals(a.size(), b.size());
+ float lastValue = Float.POSITIVE_INFINITY;
+ Map<String,FacetResult> aByDim = new HashMap<String,FacetResult>();
+ for(int i=0;i<a.size();i++) {
+ assertTrue(a.get(i).value.floatValue() <= lastValue);
+ lastValue = a.get(i).value.floatValue();
+ aByDim.put(a.get(i).dim, a.get(i));
+ }
+ lastValue = Float.POSITIVE_INFINITY;
+ Map<String,FacetResult> bByDim = new HashMap<String,FacetResult>();
+ for(int i=0;i<b.size();i++) {
+ bByDim.put(b.get(i).dim, b.get(i));
+ assertTrue(b.get(i).value.floatValue() <= lastValue);
+ lastValue = b.get(i).value.floatValue();
+ }
+ for(String dim : aByDim.keySet()) {
+ assertFloatValuesEquals(aByDim.get(dim), bByDim.get(dim));
+ }
+ }
+
+ protected void assertFloatValuesEquals(FacetResult a, FacetResult b) {
+ assertEquals(a.dim, b.dim);
+ assertTrue(Arrays.equals(a.path, b.path));
+ assertEquals(a.childCount, b.childCount);
+ assertEquals(a.value.floatValue(), b.value.floatValue(), a.value.floatValue()/1e5);
+ assertEquals(a.labelValues.length, b.labelValues.length);
+ for(int i=0;i<a.labelValues.length;i++) {
+ assertEquals(a.labelValues[i].label, b.labelValues[i].label);
+ assertEquals(a.labelValues[i].value.floatValue(), b.labelValues[i].value.floatValue(), a.labelValues[i].value.floatValue()/1e5);
+ }
+ }
}
Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyCombined.java Sat Jan 4 11:19:08 2014
@@ -113,7 +113,7 @@ public class TestTaxonomyCombined extend
*/
public static void fillTaxonomy(TaxonomyWriter tw) throws IOException {
for (int i = 0; i < categories.length; i++) {
- int ordinal = tw.addCategory(new CategoryPath(categories[i]));
+ int ordinal = tw.addCategory(new FacetLabel(categories[i]));
int expectedOrdinal = expectedPaths[i][expectedPaths[i].length-1];
if (ordinal!=expectedOrdinal) {
fail("For category "+showcat(categories[i])+" expected ordinal "+
@@ -140,14 +140,14 @@ public class TestTaxonomyCombined extend
return sb.toString();
}
- private String showcat(CategoryPath path) {
+ private String showcat(FacetLabel path) {
if (path==null) {
return "<null>";
}
if (path.length==0) {
return "<empty>";
}
- return "<"+path.toString('/')+">";
+ return "<"+path.toString()+">";
}
/** Basic tests for TaxonomyWriter. Basically, we test that
@@ -232,7 +232,7 @@ public class TestTaxonomyCombined extend
tw = new DirectoryTaxonomyWriter(indexDir);
fillTaxonomy(tw);
// Add one new category, just to make commit() do something:
- tw.addCategory(new CategoryPath("hi"));
+ tw.addCategory(new FacetLabel("hi"));
// Do a commit(). Here was a bug - if tw had a reader open, it should
// be reopened after the commit. However, in our case the reader should
// not be open (as explained above) but because it was not set to null,
@@ -254,34 +254,34 @@ public class TestTaxonomyCombined extend
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
assertEquals(1, tw.getSize()); // the root only
// Test that adding a new top-level category works
- assertEquals(1, tw.addCategory(new CategoryPath("a")));
+ assertEquals(1, tw.addCategory(new FacetLabel("a")));
assertEquals(2, tw.getSize());
// Test that adding the same category again is noticed, and the
// same ordinal (and not a new one) is returned.
- assertEquals(1, tw.addCategory(new CategoryPath("a")));
+ assertEquals(1, tw.addCategory(new FacetLabel("a")));
assertEquals(2, tw.getSize());
// Test that adding another top-level category returns a new ordinal,
// not the same one
- assertEquals(2, tw.addCategory(new CategoryPath("b")));
+ assertEquals(2, tw.addCategory(new FacetLabel("b")));
assertEquals(3, tw.getSize());
// Test that adding a category inside one of the above adds just one
// new ordinal:
- assertEquals(3, tw.addCategory(new CategoryPath("a","c")));
+ assertEquals(3, tw.addCategory(new FacetLabel("a","c")));
assertEquals(4, tw.getSize());
// Test that adding the same second-level category doesn't do anything:
- assertEquals(3, tw.addCategory(new CategoryPath("a","c")));
+ assertEquals(3, tw.addCategory(new FacetLabel("a","c")));
assertEquals(4, tw.getSize());
// Test that adding a second-level category with two new components
// indeed adds two categories
- assertEquals(5, tw.addCategory(new CategoryPath("d","e")));
+ assertEquals(5, tw.addCategory(new FacetLabel("d","e")));
assertEquals(6, tw.getSize());
// Verify that the parents were added above in the order we expected
- assertEquals(4, tw.addCategory(new CategoryPath("d")));
+ assertEquals(4, tw.addCategory(new FacetLabel("d")));
// Similar, but inside a category that already exists:
- assertEquals(7, tw.addCategory(new CategoryPath("b", "d","e")));
+ assertEquals(7, tw.addCategory(new FacetLabel("b", "d","e")));
assertEquals(8, tw.getSize());
// And now inside two levels of categories that already exist:
- assertEquals(8, tw.addCategory(new CategoryPath("b", "d","f")));
+ assertEquals(8, tw.addCategory(new FacetLabel("b", "d","f")));
assertEquals(9, tw.getSize());
tw.close();
@@ -304,7 +304,7 @@ public class TestTaxonomyCombined extend
assertEquals(1, tr.getSize());
assertEquals(0, tr.getPath(0).length);
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]);
- assertEquals(0, tr.getOrdinal(CategoryPath.EMPTY));
+ assertEquals(0, tr.getOrdinal(new FacetLabel()));
tr.close();
indexDir.close();
}
@@ -323,7 +323,7 @@ public class TestTaxonomyCombined extend
assertEquals(1, tr.getSize());
assertEquals(0, tr.getPath(0).length);
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParallelTaxonomyArrays().parents()[0]);
- assertEquals(0, tr.getOrdinal(CategoryPath.EMPTY));
+ assertEquals(0, tr.getOrdinal(new FacetLabel()));
tw.close();
tr.close();
indexDir.close();
@@ -352,8 +352,8 @@ public class TestTaxonomyCombined extend
// test TaxonomyReader.getCategory():
for (int i = 1; i < tr.getSize(); i++) {
- CategoryPath expectedCategory = new CategoryPath(expectedCategories[i]);
- CategoryPath category = tr.getPath(i);
+ FacetLabel expectedCategory = new FacetLabel(expectedCategories[i]);
+ FacetLabel category = tr.getPath(i);
if (!expectedCategory.equals(category)) {
fail("For ordinal "+i+" expected category "+
showcat(expectedCategory)+", but got "+showcat(category));
@@ -367,15 +367,15 @@ public class TestTaxonomyCombined extend
// test TaxonomyReader.getOrdinal():
for (int i = 1; i < expectedCategories.length; i++) {
int expectedOrdinal = i;
- int ordinal = tr.getOrdinal(new CategoryPath(expectedCategories[i]));
+ int ordinal = tr.getOrdinal(new FacetLabel(expectedCategories[i]));
if (expectedOrdinal != ordinal) {
fail("For category "+showcat(expectedCategories[i])+" expected ordinal "+
expectedOrdinal+", but got "+ordinal);
}
}
// (also test invalid categories:)
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new CategoryPath("non-existant")));
- assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new CategoryPath("Author", "Jules Verne")));
+ assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new FacetLabel("non-existant")));
+ assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(new FacetLabel("Author", "Jules Verne")));
tr.close();
indexDir.close();
@@ -407,9 +407,9 @@ public class TestTaxonomyCombined extend
// check parent of non-root ordinals:
for (int ordinal=1; ordinal<tr.getSize(); ordinal++) {
- CategoryPath me = tr.getPath(ordinal);
+ FacetLabel me = tr.getPath(ordinal);
int parentOrdinal = parents[ordinal];
- CategoryPath parent = tr.getPath(parentOrdinal);
+ FacetLabel parent = tr.getPath(parentOrdinal);
if (parent==null) {
fail("Parent of "+ordinal+" is "+parentOrdinal+
", but this is not a valid category.");
@@ -476,9 +476,9 @@ public class TestTaxonomyCombined extend
// check parent of non-root ordinals:
for (int ordinal = 1; ordinal < tr.getSize(); ordinal++) {
- CategoryPath me = tr.getPath(ordinal);
+ FacetLabel me = tr.getPath(ordinal);
int parentOrdinal = tw.getParent(ordinal);
- CategoryPath parent = tr.getPath(parentOrdinal);
+ FacetLabel parent = tr.getPath(parentOrdinal);
if (parent == null) {
fail("Parent of " + ordinal + " is " + parentOrdinal
+ ", but this is not a valid category.");
@@ -668,7 +668,7 @@ public class TestTaxonomyCombined extend
public void testChildrenArraysGrowth() throws Exception {
Directory indexDir = newDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
- tw.addCategory(new CategoryPath("hi", "there"));
+ tw.addCategory(new FacetLabel("hi", "there"));
tw.commit();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
@@ -677,8 +677,8 @@ public class TestTaxonomyCombined extend
assertEquals(3, ca.children().length);
assertTrue(Arrays.equals(new int[] { 1, 2, -1 }, ca.children()));
assertTrue(Arrays.equals(new int[] { -1, -1, -1 }, ca.siblings()));
- tw.addCategory(new CategoryPath("hi", "ho"));
- tw.addCategory(new CategoryPath("hello"));
+ tw.addCategory(new FacetLabel("hi", "ho"));
+ tw.addCategory(new FacetLabel("hello"));
tw.commit();
// Before refresh, nothing changed..
ParallelTaxonomyArrays newca = tr.getParallelTaxonomyArrays();
@@ -708,8 +708,8 @@ public class TestTaxonomyCombined extend
// compute base child arrays - after first chunk, and after the other
Directory indexDirBase = newDirectory();
TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase);
- twBase.addCategory(new CategoryPath("a", "0"));
- final CategoryPath abPath = new CategoryPath("a", "b");
+ twBase.addCategory(new FacetLabel("a", "0"));
+ final FacetLabel abPath = new FacetLabel("a", "b");
twBase.addCategory(abPath);
twBase.commit();
TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase);
@@ -721,7 +721,7 @@ public class TestTaxonomyCombined extend
final int numCategories = atLeast(800);
for (int i = 0; i < numCategories; i++) {
- twBase.addCategory(new CategoryPath("a", "b", Integer.toString(i)));
+ twBase.addCategory(new FacetLabel("a", "b", Integer.toString(i)));
}
twBase.close();
@@ -742,18 +742,18 @@ public class TestTaxonomyCombined extend
indexDirBase.close();
}
- private void assertConsistentYoungestChild(final CategoryPath abPath,
+ private void assertConsistentYoungestChild(final FacetLabel abPath,
final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry, int numCategories)
throws Exception {
SlowRAMDirectory indexDir = new SlowRAMDirectory(-1, null); // no slowness for intialization
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
- tw.addCategory(new CategoryPath("a", "0"));
+ tw.addCategory(new FacetLabel("a", "0"));
tw.addCategory(abPath);
tw.commit();
final DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
for (int i = 0; i < numCategories; i++) {
- final CategoryPath cp = new CategoryPath("a", "b", Integer.toString(i));
+ final FacetLabel cp = new FacetLabel("a", "b", Integer.toString(i));
tw.addCategory(cp);
assertEquals("Ordinal of "+cp+" must be invalid until Taxonomy Reader was refreshed", TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(cp));
}
@@ -840,7 +840,7 @@ public class TestTaxonomyCombined extend
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
- tw.addCategory(new CategoryPath("Author"));
+ tw.addCategory(new FacetLabel("Author"));
assertEquals(1, tr.getSize()); // still root only...
assertNull(TaxonomyReader.openIfChanged(tr)); // this is not enough, because tw.commit() hasn't been done yet
assertEquals(1, tr.getSize()); // still root only...
@@ -864,7 +864,7 @@ public class TestTaxonomyCombined extend
// the parent of this category is correct (this requires the reader
// to correctly update its prefetched parent vector), and that the
// old information also wasn't ruined:
- tw.addCategory(new CategoryPath("Author", "Richard Dawkins"));
+ tw.addCategory(new FacetLabel("Author", "Richard Dawkins"));
int dawkins = 2;
tw.commit();
newTaxoReader = TaxonomyReader.openIfChanged(tr);
@@ -889,7 +889,7 @@ public class TestTaxonomyCombined extend
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
// Test getOrdinal():
- CategoryPath author = new CategoryPath("Author");
+ FacetLabel author = new FacetLabel("Author");
assertEquals(1, tr.getSize()); // the empty taxonomy has size 1 (the root)
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
@@ -924,13 +924,13 @@ public class TestTaxonomyCombined extend
// native fslock impl gets angry if we use it, so use RAMDirectory explicitly.
Directory indexDir = new RAMDirectory();
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
- tw.addCategory(new CategoryPath("hi", "there"));
+ tw.addCategory(new FacetLabel("hi", "there"));
tw.commit();
// we deliberately not close the write now, and keep it open and
// locked.
// Verify that the writer worked:
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
- assertEquals(2, tr.getOrdinal(new CategoryPath("hi", "there")));
+ assertEquals(2, tr.getOrdinal(new FacetLabel("hi", "there")));
// Try to open a second writer, with the first one locking the directory.
// We expect to get a LockObtainFailedException.
try {
@@ -943,14 +943,14 @@ public class TestTaxonomyCombined extend
// write to the new writer.
DirectoryTaxonomyWriter.unlock(indexDir);
TaxonomyWriter tw2 = new DirectoryTaxonomyWriter(indexDir);
- tw2.addCategory(new CategoryPath("hey"));
+ tw2.addCategory(new FacetLabel("hey"));
tw2.close();
// See that the writer indeed wrote:
TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
assertNotNull(newtr);
tr.close();
tr = newtr;
- assertEquals(3, tr.getOrdinal(new CategoryPath("hey")));
+ assertEquals(3, tr.getOrdinal(new FacetLabel("hey")));
tr.close();
tw.close();
indexDir.close();
@@ -967,7 +967,7 @@ public class TestTaxonomyCombined extend
*/
public static void fillTaxonomyCheckPaths(TaxonomyWriter tw) throws IOException {
for (int i = 0; i < categories.length; i++) {
- int ordinal = tw.addCategory(new CategoryPath(categories[i]));
+ int ordinal = tw.addCategory(new FacetLabel(categories[i]));
int expectedOrdinal = expectedPaths[i][expectedPaths[i].length-1];
if (ordinal!=expectedOrdinal) {
fail("For category "+showcat(categories[i])+" expected ordinal "+
@@ -1052,7 +1052,7 @@ public class TestTaxonomyCombined extend
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
- CategoryPath cp = new CategoryPath("a");
+ FacetLabel cp = new FacetLabel("a");
writer.addCategory(cp);
TaxonomyReader newReader = TaxonomyReader.openIfChanged(reader);
assertNotNull("expected a new instance", newReader);
Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java Sat Jan 4 11:19:08 2014
@@ -6,7 +6,7 @@ import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.facet.FacetTestCase;
-import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
@@ -48,7 +48,7 @@ public class TestAddTaxonomy extends Fac
while (numCats.decrementAndGet() > 0) {
String cat = Integer.toString(random.nextInt(range));
try {
- tw.addCategory(new CategoryPath("a", cat));
+ tw.addCategory(new FacetLabel("a", cat));
} catch (IOException e) {
throw new RuntimeException(e);
}
@@ -97,7 +97,7 @@ public class TestAddTaxonomy extends Fac
// validate that all source categories exist in destination, and their
// ordinals are as expected.
for (int j = 1; j < srcSize; j++) {
- CategoryPath cp = srcTR.getPath(j);
+ FacetLabel cp = srcTR.getPath(j);
int destOrdinal = destTR.getOrdinal(cp);
assertTrue(cp + " not found in destination", destOrdinal > 0);
assertEquals(destOrdinal, map[j]);
@@ -113,8 +113,8 @@ public class TestAddTaxonomy extends Fac
public void testAddEmpty() throws Exception {
Directory dest = newDirectory();
DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
- destTW.addCategory(new CategoryPath("Author", "Rob Pike"));
- destTW.addCategory(new CategoryPath("Aardvarks", "Bob"));
+ destTW.addCategory(new FacetLabel("Author", "Rob Pike"));
+ destTW.addCategory(new FacetLabel("Aardvarks", "Bob"));
destTW.commit();
Directory src = newDirectory();
@@ -134,8 +134,8 @@ public class TestAddTaxonomy extends Fac
Directory src = newDirectory();
DirectoryTaxonomyWriter srcTW = new DirectoryTaxonomyWriter(src);
- srcTW.addCategory(new CategoryPath("Author", "Rob Pike"));
- srcTW.addCategory(new CategoryPath("Aardvarks", "Bob"));
+ srcTW.addCategory(new FacetLabel("Author", "Rob Pike"));
+ srcTW.addCategory(new FacetLabel("Aardvarks", "Bob"));
srcTW.close();
DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
@@ -168,14 +168,14 @@ public class TestAddTaxonomy extends Fac
public void testSimple() throws Exception {
Directory dest = newDirectory();
DirectoryTaxonomyWriter tw1 = new DirectoryTaxonomyWriter(dest);
- tw1.addCategory(new CategoryPath("Author", "Mark Twain"));
- tw1.addCategory(new CategoryPath("Animals", "Dog"));
- tw1.addCategory(new CategoryPath("Author", "Rob Pike"));
+ tw1.addCategory(new FacetLabel("Author", "Mark Twain"));
+ tw1.addCategory(new FacetLabel("Animals", "Dog"));
+ tw1.addCategory(new FacetLabel("Author", "Rob Pike"));
Directory src = newDirectory();
DirectoryTaxonomyWriter tw2 = new DirectoryTaxonomyWriter(src);
- tw2.addCategory(new CategoryPath("Author", "Rob Pike"));
- tw2.addCategory(new CategoryPath("Aardvarks", "Bob"));
+ tw2.addCategory(new FacetLabel("Author", "Rob Pike"));
+ tw2.addCategory(new FacetLabel("Aardvarks", "Bob"));
tw2.close();
OrdinalMap map = randomOrdinalMap();
@@ -196,7 +196,7 @@ public class TestAddTaxonomy extends Fac
Directory src = newDirectory();
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(src);
for (int i = 0; i < numCategories; i++) {
- tw.addCategory(new CategoryPath("a", Integer.toString(i)));
+ tw.addCategory(new FacetLabel("a", Integer.toString(i)));
}
tw.close();
@@ -209,7 +209,7 @@ public class TestAddTaxonomy extends Fac
public void run() {
for (int i = 0; i < numCategories; i++) {
try {
- destTW.addCategory(new CategoryPath("a", Integer.toString(i)));
+ destTW.addCategory(new FacetLabel("a", Integer.toString(i)));
} catch (IOException e) {
// shouldn't happen - if it does, let the test fail on uncaught exception.
throw new RuntimeException(e);
@@ -229,9 +229,9 @@ public class TestAddTaxonomy extends Fac
DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dest);
// +2 to account for the root category + "a"
assertEquals(numCategories + 2, dtr.getSize());
- HashSet<CategoryPath> categories = new HashSet<CategoryPath>();
+ HashSet<FacetLabel> categories = new HashSet<FacetLabel>();
for (int i = 1; i < dtr.getSize(); i++) {
- CategoryPath cat = dtr.getPath(i);
+ FacetLabel cat = dtr.getPath(i);
assertTrue("category " + cat + " already existed", categories.add(cat));
}
dtr.close();
Modified: lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java?rev=1555338&r1=1555337&r2=1555338&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java (original)
+++ lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java Sat Jan 4 11:19:08 2014
@@ -1,19 +1,18 @@
package org.apache.lucene.facet.taxonomy.directory;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.document.Document;
+import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetTestCase;
-import org.apache.lucene.facet.index.FacetFields;
-import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.FacetsConfig;
+import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
-import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
-import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
+import org.apache.lucene.facet.taxonomy.writercache.Cl2oTaxonomyWriterCache;
+import org.apache.lucene.facet.taxonomy.writercache.LruTaxonomyWriterCache;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
@@ -46,9 +45,9 @@ public class TestConcurrentFacetedIndexi
@Override
public void close() {}
@Override
- public int get(CategoryPath categoryPath) { return -1; }
+ public int get(FacetLabel categoryPath) { return -1; }
@Override
- public boolean put(CategoryPath categoryPath, int ordinal) { return true; }
+ public boolean put(FacetLabel categoryPath, int ordinal) { return true; }
@Override
public boolean isFull() { return true; }
@Override
@@ -56,12 +55,12 @@ public class TestConcurrentFacetedIndexi
};
- static CategoryPath newCategory() {
+ static FacetField newCategory() {
Random r = random();
String l1 = "l1." + r.nextInt(10); // l1.0-l1.9 (10 categories)
String l2 = "l2." + r.nextInt(30); // l2.0-l2.29 (30 categories)
String l3 = "l3." + r.nextInt(100); // l3.0-l3.99 (100 categories)
- return new CategoryPath(l1, l2, l3);
+ return new FacetField(l1, l2, l3);
}
static TaxonomyWriterCache newTaxoWriterCache(int ndocs) {
@@ -87,10 +86,14 @@ public class TestConcurrentFacetedIndexi
final IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE, newTaxoWriterCache(numDocs.get()));
final Thread[] indexThreads = new Thread[atLeast(4)];
+ final FacetsConfig config = new FacetsConfig();
+ for(int i=0;i<10;i++) {
+ config.setHierarchical("l1." + i, true);
+ config.setMultiValued("l1." + i, true);
+ }
for (int i = 0; i < indexThreads.length; i++) {
indexThreads[i] = new Thread() {
- private final FacetFields facetFields = new FacetFields(tw);
@Override
public void run() {
@@ -99,20 +102,20 @@ public class TestConcurrentFacetedIndexi
try {
Document doc = new Document();
int numCats = random.nextInt(3) + 1; // 1-3
- List<CategoryPath> cats = new ArrayList<CategoryPath>(numCats);
while (numCats-- > 0) {
- CategoryPath cp = newCategory();
- cats.add(cp);
+ FacetField ff = newCategory();
+ doc.add(ff);
+
+ FacetLabel label = new FacetLabel(ff.dim, ff.path);
// add all prefixes to values
- int level = cp.length;
+ int level = label.length;
while (level > 0) {
- String s = cp.subpath(level).toString('/');
+ String s = FacetsConfig.pathToString(label.components, level);
values.put(s, s);
--level;
}
}
- facetFields.addFields(doc, cats);
- iw.addDocument(doc);
+ iw.addDocument(config.build(tw, doc));
} catch (IOException e) {
throw new RuntimeException(e);
}
@@ -125,14 +128,23 @@ public class TestConcurrentFacetedIndexi
for (Thread t : indexThreads) t.join();
DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(tw);
- assertEquals("mismatch number of categories", values.size() + 1, tr.getSize()); // +1 for root category
+ // +1 for root category
+ if (values.size() + 1 != tr.getSize()) {
+ for(String value : values.keySet()) {
+ FacetLabel label = new FacetLabel(FacetsConfig.stringToPath(value));
+ if (tr.getOrdinal(label) == -1) {
+ System.out.println("FAIL: path=" + label + " not recognized");
+ }
+ }
+ fail("mismatch number of categories");
+ }
int[] parents = tr.getParallelTaxonomyArrays().parents();
for (String cat : values.keySet()) {
- CategoryPath cp = new CategoryPath(cat, '/');
+ FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0);
int level = cp.length;
int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
- CategoryPath path = CategoryPath.EMPTY;
+ FacetLabel path = null;
for (int i = 0; i < level; i++) {
path = cp.subpath(i + 1);
int ord = tr.getOrdinal(path);
@@ -140,9 +152,8 @@ public class TestConcurrentFacetedIndexi
parentOrd = ord; // next level should have this parent
}
}
- tr.close();
- IOUtils.close(tw, iw, taxoDir, indexDir);
+ IOUtils.close(tw, iw, tr, taxoDir, indexDir);
}
}