You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/12/03 19:22:15 UTC
svn commit: r1547511 - in /lucene/dev/branches/lucene5339/lucene/facet/src:
java/org/apache/lucene/facet/ java/org/apache/lucene/facet/taxonomy/
java/org/apache/lucene/facet/taxonomy/writercache/
test/org/apache/lucene/facet/taxonomy/ test/org/apache/l...
Author: mikemccand
Date: Tue Dec 3 18:22:14 2013
New Revision: 1547511
URL: http://svn.apache.org/r1547511
Log:
LUCENE-5339: small opto for range facets, and factor out base class; put longHashCode back
Added:
lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java (with props)
Modified:
lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java
lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java
lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java
lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java
lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FacetLabel.java
lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/NameHashIntCacheLRU.java
lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java
lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java
lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java?rev=1547511&r1=1547510&r2=1547511&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java Tue Dec 3 18:22:14 2013
@@ -31,8 +31,8 @@ import org.apache.lucene.util.Bits;
/** Represents a range over double values. */
public final class DoubleRange extends Range {
- private final double minIncl;
- private final double maxIncl;
+ final double minIncl;
+ final double maxIncl;
public final double min;
public final double max;
Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java?rev=1547511&r1=1547510&r2=1547511&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java Tue Dec 3 18:22:14 2013
@@ -45,11 +45,7 @@ import org.apache.lucene.queries.functio
* pass just a the field name).
*
* @lucene.experimental */
-public class DoubleRangeFacetCounts extends Facets {
- private final DoubleRange[] ranges;
- private final int[] counts;
- private final String field;
- private int totCount;
+public class DoubleRangeFacetCounts extends RangeFacetCounts {
/** Create {@code RangeFacetCounts}, using {@link
* DoubleFieldSource} from the specified field. */
@@ -60,14 +56,22 @@ public class DoubleRangeFacetCounts exte
/** Create {@code RangeFacetCounts}, using the provided
* {@link ValueSource}. */
public DoubleRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, DoubleRange... ranges) throws IOException {
- this.ranges = ranges;
- this.field = field;
- counts = new int[ranges.length];
+ super(field, ranges);
count(valueSource, hits.getMatchingDocs());
}
private void count(ValueSource valueSource, List<MatchingDocs> matchingDocs) throws IOException {
+ DoubleRange[] ranges = (DoubleRange[]) this.ranges;
+
+ // Compute min & max over all ranges:
+ double minIncl = Double.POSITIVE_INFINITY;
+ double maxIncl = Double.NEGATIVE_INFINITY;
+ for(DoubleRange range : ranges) {
+ minIncl = Math.min(minIncl, range.minIncl);
+ maxIncl = Math.max(maxIncl, range.maxIncl);
+ }
+
// TODO: test if this is faster (in the past it was
// faster to do MatchingDocs on the inside) ... see
// patches on LUCENE-4965):
@@ -81,6 +85,10 @@ public class DoubleRangeFacetCounts exte
if (fv.exists(doc)) {
double v = fv.doubleVal(doc);
+ if (v < minIncl || v > maxIncl) {
+ doc++;
+ continue;
+ }
// TODO: if all ranges are non-overlapping, we
// should instead do a bin-search up front
@@ -98,35 +106,4 @@ public class DoubleRangeFacetCounts exte
}
}
}
-
- // nocommit all args are ... unused ... this doesn't "fit"
- // very well:
-
- @Override
- public FacetResult getTopChildren(int topN, String dim, String... path) {
- if (dim.equals(field) == false) {
- throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\"");
- }
- if (path.length != 0) {
- throw new IllegalArgumentException("path.length should be 0");
- }
- LabelAndValue[] labelValues = new LabelAndValue[counts.length];
- for(int i=0;i<counts.length;i++) {
- // nocommit can we add the range into this?
- labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
- }
-
- return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
- }
-
- @Override
- public Number getSpecificValue(String dim, String... path) throws IOException {
- // nocommit we could impl this?
- throw new UnsupportedOperationException();
- }
-
- @Override
- public List<FacetResult> getAllDims(int topN) throws IOException {
- return Collections.singletonList(getTopChildren(topN, null));
- }
}
Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java?rev=1547511&r1=1547510&r2=1547511&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java Tue Dec 3 18:22:14 2013
@@ -31,8 +31,8 @@ import org.apache.lucene.util.Bits;
/** Represents a range over long values. */
public final class LongRange extends Range {
- private final long minIncl;
- private final long maxIncl;
+ final long minIncl;
+ final long maxIncl;
public final long min;
public final long max;
Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java?rev=1547511&r1=1547510&r2=1547511&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java Tue Dec 3 18:22:14 2013
@@ -35,13 +35,9 @@ import org.apache.lucene.queries.functio
* distance dimension like "< 1 km", "< 2 km", etc.).
*
* @lucene.experimental */
-public class LongRangeFacetCounts extends Facets {
- private final LongRange[] ranges;
- private final int[] counts;
- private final String field;
- private int totCount;
+public class LongRangeFacetCounts extends RangeFacetCounts {
- /** Create {@code RangeFacetCounts}, using {@link
+ /** Create {@code LongRangeFacetCounts}, using {@link
* LongFieldSource} from the specified field. */
public LongRangeFacetCounts(String field, FacetsCollector hits, LongRange... ranges) throws IOException {
this(field, new LongFieldSource(field), hits, ranges);
@@ -50,14 +46,22 @@ public class LongRangeFacetCounts extend
/** Create {@code RangeFacetCounts}, using the provided
* {@link ValueSource}. */
public LongRangeFacetCounts(String field, ValueSource valueSource, FacetsCollector hits, LongRange... ranges) throws IOException {
- this.ranges = ranges;
- this.field = field;
- counts = new int[ranges.length];
+ super(field, ranges);
count(valueSource, hits.getMatchingDocs());
}
private void count(ValueSource valueSource, List<MatchingDocs> matchingDocs) throws IOException {
+ LongRange[] ranges = (LongRange[]) this.ranges;
+
+ // Compute min & max over all ranges:
+ long minIncl = Long.MAX_VALUE;
+ long maxIncl = Long.MIN_VALUE;
+ for(LongRange range : ranges) {
+ minIncl = Math.min(minIncl, range.minIncl);
+ maxIncl = Math.max(maxIncl, range.maxIncl);
+ }
+
// TODO: test if this is faster (in the past it was
// faster to do MatchingDocs on the inside) ... see
// patches on LUCENE-4965):
@@ -71,6 +75,10 @@ public class LongRangeFacetCounts extend
if (fv.exists(doc)) {
long v = fv.longVal(doc);
+ if (v < minIncl || v > maxIncl) {
+ doc++;
+ continue;
+ }
// TODO: if all ranges are non-overlapping, we
// should instead do a bin-search up front
@@ -88,35 +96,4 @@ public class LongRangeFacetCounts extend
}
}
}
-
- // nocommit all args are ... unused ... this doesn't "fit"
- // very well:
-
- @Override
- public FacetResult getTopChildren(int topN, String dim, String... path) {
- if (dim.equals(field) == false) {
- throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\"");
- }
- if (path.length != 0) {
- throw new IllegalArgumentException("path.length should be 0");
- }
- LabelAndValue[] labelValues = new LabelAndValue[counts.length];
- for(int i=0;i<counts.length;i++) {
- // nocommit can we add the range into this?
- labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
- }
-
- return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
- }
-
- @Override
- public Number getSpecificValue(String dim, String... path) throws IOException {
- // nocommit we could impl this?
- throw new UnsupportedOperationException();
- }
-
- @Override
- public List<FacetResult> getAllDims(int topN) throws IOException {
- return Collections.singletonList(getTopChildren(topN, null));
- }
}
Added: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java?rev=1547511&view=auto
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java (added)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java Tue Dec 3 18:22:14 2013
@@ -0,0 +1,70 @@
+package org.apache.lucene.facet;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+
+
+/** Base class for range faceting.
+ *
+ * @lucene.experimental */
+abstract class RangeFacetCounts extends Facets {
+ protected final Range[] ranges;
+ protected final int[] counts;
+ protected final String field;
+ protected int totCount;
+
+ /** Create {@code RangeFacetCounts}, using {@link
+ * LongFieldSource} from the specified field. */
+ protected RangeFacetCounts(String field, Range[] ranges) throws IOException {
+ this.field = field;
+ this.ranges = ranges;
+ counts = new int[ranges.length];
+ }
+
+ // nocommit all args are ... unused ... this doesn't "fit"
+ // very well:
+
+ @Override
+ public FacetResult getTopChildren(int topN, String dim, String... path) {
+ if (dim.equals(field) == false) {
+ throw new IllegalArgumentException("invalid dim \"" + dim + "\"; should be \"" + field + "\"");
+ }
+ if (path.length != 0) {
+ throw new IllegalArgumentException("path.length should be 0");
+ }
+ LabelAndValue[] labelValues = new LabelAndValue[counts.length];
+ for(int i=0;i<counts.length;i++) {
+ labelValues[i] = new LabelAndValue(ranges[i].label, counts[i]);
+ }
+ return new FacetResult(dim, path, totCount, labelValues, labelValues.length);
+ }
+
+ @Override
+ public Number getSpecificValue(String dim, String... path) throws IOException {
+ // TODO: should we impl this?
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public List<FacetResult> getAllDims(int topN) throws IOException {
+ return Collections.singletonList(getTopChildren(topN, null));
+ }
+}
Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FacetLabel.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FacetLabel.java?rev=1547511&r1=1547510&r2=1547511&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FacetLabel.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FacetLabel.java Tue Dec 3 18:22:14 2013
@@ -41,9 +41,6 @@ public class FacetLabel implements Compa
*/
public final static int MAX_CATEGORY_PATH_LENGTH = (BYTE_BLOCK_SIZE - 2) / 4;
- /** An empty {@link FacetLabel} */
- //public static final FacetLabel EMPTY = new FacetLabel();
-
/**
* The components of this {@link FacetLabel}. Note that this array may be
* shared with other {@link FacetLabel} instances, e.g. as a result of
@@ -114,8 +111,12 @@ public class FacetLabel implements Compa
final int len = length < other.length ? length : other.length;
for (int i = 0, j = 0; i < len; i++, j++) {
int cmp = components[i].compareTo(other.components[j]);
- if (cmp < 0) return -1; // this is 'before'
- if (cmp > 0) return 1; // this is 'after'
+ if (cmp < 0) {
+ return -1; // this is 'before'
+ }
+ if (cmp > 0) {
+ return 1; // this is 'after'
+ }
}
// one is a prefix of the other
@@ -156,6 +157,23 @@ public class FacetLabel implements Compa
return hash;
}
+ /** Calculate a 64-bit hash function for this path. This
+ * is necessary for {@link NameHashIntCacheLRU} (the
+ * default cache impl for {@link
+ * LruTaxonomyWriterCache}) to reduce the chance of
+ * "silent but deadly" collisions. */
+ public long longHashCode() {
+ if (length == 0) {
+ return 0;
+ }
+
+ long hash = length;
+ for (int i = 0; i < length; i++) {
+ hash = hash * 65599 + components[i].hashCode();
+ }
+ return hash;
+ }
+
/** Returns a sub-path of this path up to {@code length} components. */
public FacetLabel subpath(final int length) {
if (length >= this.length || length < 0) {
Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/NameHashIntCacheLRU.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/NameHashIntCacheLRU.java?rev=1547511&r1=1547510&r2=1547511&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/NameHashIntCacheLRU.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/NameHashIntCacheLRU.java Tue Dec 3 18:22:14 2013
@@ -36,12 +36,12 @@ public class NameHashIntCacheLRU extends
@Override
Object key(FacetLabel name) {
- return new Integer(name.hashCode());
+ return new Long(name.longHashCode());
}
@Override
Object key(FacetLabel name, int prefixLen) {
- return new Integer(name.subpath(prefixLen).hashCode());
+ return new Long(name.subpath(prefixLen).longHashCode());
}
}
Modified: lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java?rev=1547511&r1=1547510&r2=1547511&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestFacetLabel.java Tue Dec 3 18:22:14 2013
@@ -108,6 +108,13 @@ public class TestFacetLabel extends Face
}
@Test
+ public void testLongHashCode() {
+ assertEquals(new FacetLabel().longHashCode(), new FacetLabel().longHashCode());
+ assertFalse(new FacetLabel().longHashCode() == new FacetLabel("hi").longHashCode());
+ assertEquals(new FacetLabel("hello", "world").longHashCode(), new FacetLabel("hello", "world").longHashCode());
+ }
+
+ @Test
public void testArrayConstructor() {
FacetLabel p = new FacetLabel("hello", "world", "yo");
assertEquals(3, p.length);
Modified: lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java?rev=1547511&r1=1547510&r2=1547511&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestConcurrentFacetedIndexing.java Tue Dec 3 18:22:14 2013
@@ -128,14 +128,23 @@ public class TestConcurrentFacetedIndexi
for (Thread t : indexThreads) t.join();
DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(tw);
- assertEquals("mismatch number of categories", values.size() + 1, tr.getSize()); // +1 for root category
+ // +1 for root category
+ if (values.size() + 1 != tr.getSize()) {
+ for(String value : values.keySet()) {
+ FacetLabel label = new FacetLabel(FacetsConfig.stringToPath(value));
+ if (tr.getOrdinal(label) == -1) {
+ System.out.println("FAIL: path=" + label + " not recognized");
+ }
+ }
+ fail("mismatch number of categories");
+ }
int[] parents = tr.getParallelTaxonomyArrays().parents();
for (String cat : values.keySet()) {
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0);
int level = cp.length;
int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
- FacetLabel path = new FacetLabel();
+ FacetLabel path = null;
for (int i = 0; i < level; i++) {
path = cp.subpath(i + 1);
int ord = tr.getOrdinal(path);
Modified: lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java?rev=1547511&r1=1547510&r2=1547511&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyWriter.java Tue Dec 3 18:22:14 2013
@@ -256,6 +256,9 @@ public class TestDirectoryTaxonomyWriter
// this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too.
cache = new LruTaxonomyWriterCache(ncats / 10);
}
+ if (VERBOSE) {
+ System.out.println("TEST: use cache=" + cache);
+ }
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);
Thread[] addThreads = new Thread[atLeast(4)];
for (int z = 0; z < addThreads.length; z++) {
@@ -291,7 +294,17 @@ public class TestDirectoryTaxonomyWriter
tw.close();
DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);
- assertEquals("mismatch number of categories", values.size() + 1, dtr.getSize()); // +1 for root category
+ // +1 for root category
+ if (values.size() + 1 != dtr.getSize()) {
+ for(String value : values.keySet()) {
+ FacetLabel label = new FacetLabel(FacetsConfig.stringToPath(value));
+ if (dtr.getOrdinal(label) == -1) {
+ System.out.println("FAIL: path=" + label + " not recognized");
+ }
+ }
+ fail("mismatch number of categories");
+ }
+
int[] parents = dtr.getParallelTaxonomyArrays().parents();
for (String cat : values.keySet()) {
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(cat));
@@ -306,9 +319,8 @@ public class TestDirectoryTaxonomyWriter
parentOrd = ord; // next level should have this parent
}
}
- dtr.close();
-
- dir.close();
+
+ IOUtils.close(dtr, dir);
}
private long getEpoch(Directory taxoDir) throws IOException {