You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2016/03/18 09:30:22 UTC
[35/50] lucene-solr:apiv2: LUCENE-7106: Add helpers to compute
aggregated stats on points.
LUCENE-7106: Add helpers to compute aggregated stats on points.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/24830b7f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/24830b7f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/24830b7f
Branch: refs/heads/apiv2
Commit: 24830b7f18146b38078a80bc04f041011ab8689e
Parents: 2c8b2a6
Author: Adrien Grand <jp...@gmail.com>
Authored: Wed Mar 16 15:34:40 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Wed Mar 16 15:34:40 2016 +0100
----------------------------------------------------------------------
.../org/apache/lucene/index/PointValues.java | 90 +++++++++++++++++++-
.../apache/lucene/index/TestPointValues.java | 62 ++++++++++++--
2 files changed, 145 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/24830b7f/lucene/core/src/java/org/apache/lucene/index/PointValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValues.java b/lucene/core/src/java/org/apache/lucene/index/PointValues.java
index 1fb2654..a4fd323 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PointValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PointValues.java
@@ -23,10 +23,10 @@ import java.net.InetAddress;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.bkd.BKDWriter;
/**
@@ -86,6 +86,94 @@ public abstract class PointValues {
/** Maximum number of dimensions */
public static final int MAX_DIMENSIONS = BKDWriter.MAX_DIMS;
+ /** Return the cumulated number of points across all leaves of the given
+ * {@link IndexReader}.
+ * @see PointValues#size(String) */
+ public static long size(IndexReader reader, String field) throws IOException {
+ long size = 0;
+ for (LeafReaderContext ctx : reader.leaves()) {
+ PointValues values = ctx.reader().getPointValues();
+ if (values != null) {
+ size += values.size(field);
+ }
+ }
+ return size;
+ }
+
+ /** Return the cumulated number of docs that have points across all leaves
+ * of the given {@link IndexReader}.
+ * @see PointValues#getDocCount(String) */
+ public static int getDocCount(IndexReader reader, String field) throws IOException {
+ int count = 0;
+ for (LeafReaderContext ctx : reader.leaves()) {
+ PointValues values = ctx.reader().getPointValues();
+ if (values != null) {
+ count += values.getDocCount(field);
+ }
+ }
+ return count;
+ }
+
+ /** Return the minimum packed values across all leaves of the given
+ * {@link IndexReader}.
+ * @see PointValues#getMinPackedValue(String) */
+ public static byte[] getMinPackedValue(IndexReader reader, String field) throws IOException {
+ byte[] minValue = null;
+ for (LeafReaderContext ctx : reader.leaves()) {
+ PointValues values = ctx.reader().getPointValues();
+ if (values == null) {
+ continue;
+ }
+ byte[] leafMinValue = values.getMinPackedValue(field);
+ if (leafMinValue == null) {
+ continue;
+ }
+ if (minValue == null) {
+ minValue = leafMinValue.clone();
+ } else {
+ final int numDimensions = values.getNumDimensions(field);
+ final int numBytesPerDimension = values.getBytesPerDimension(field);
+ for (int i = 0; i < numDimensions; ++i) {
+ int offset = i * numBytesPerDimension;
+ if (StringHelper.compare(numBytesPerDimension, leafMinValue, offset, minValue, offset) < 0) {
+ System.arraycopy(leafMinValue, offset, minValue, offset, numBytesPerDimension);
+ }
+ }
+ }
+ }
+ return minValue;
+ }
+
+ /** Return the maximum packed values across all leaves of the given
+ * {@link IndexReader}.
+ * @see PointValues#getMaxPackedValue(String) */
+ public static byte[] getMaxPackedValue(IndexReader reader, String field) throws IOException {
+ byte[] maxValue = null;
+ for (LeafReaderContext ctx : reader.leaves()) {
+ PointValues values = ctx.reader().getPointValues();
+ if (values == null) {
+ continue;
+ }
+ byte[] leafMaxValue = values.getMaxPackedValue(field);
+ if (leafMaxValue == null) {
+ continue;
+ }
+ if (maxValue == null) {
+ maxValue = leafMaxValue.clone();
+ } else {
+ final int numDimensions = values.getNumDimensions(field);
+ final int numBytesPerDimension = values.getBytesPerDimension(field);
+ for (int i = 0; i < numDimensions; ++i) {
+ int offset = i * numBytesPerDimension;
+ if (StringHelper.compare(numBytesPerDimension, leafMaxValue, offset, maxValue, offset) > 0) {
+ System.arraycopy(leafMaxValue, offset, maxValue, offset, numBytesPerDimension);
+ }
+ }
+ }
+ }
+ return maxValue;
+ }
+
/** Default constructor */
protected PointValues() {
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/24830b7f/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
index 49cbc2a..c7ca2dc 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
@@ -22,12 +22,6 @@ import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.FilterCodec;
-import org.apache.lucene.codecs.PointsFormat;
-import org.apache.lucene.codecs.PointsReader;
-import org.apache.lucene.codecs.PointsWriter;
-import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
-import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
@@ -657,4 +651,60 @@ public class TestPointValues extends LuceneTestCase {
assertTrue(output.toString(IOUtils.UTF_8).contains("test: points..."));
dir.close();
}
+
+ public void testMergedStats() throws IOException {
+ final int iters = atLeast(3);
+ for (int iter = 0; iter < iters; ++iter) {
+ doTestMergedStats();
+ }
+ }
+
+ private static byte[][] randomBinaryValue(int numDims, int numBytesPerDim) {
+ byte[][] bytes = new byte[numDims][];
+ for (int i = 0; i < numDims; ++i) {
+ bytes[i] = new byte[numBytesPerDim];
+ random().nextBytes(bytes[i]);
+ }
+ return bytes;
+ }
+
+ private void doTestMergedStats() throws IOException {
+ final int numDims = TestUtil.nextInt(random(), 1, 8);
+ final int numBytesPerDim = TestUtil.nextInt(random(), 1, 16);
+ Directory dir = new RAMDirectory();
+ IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
+ final int numDocs = TestUtil.nextInt(random(), 10, 20);
+ for (int i = 0; i < numDocs; ++i) {
+ Document doc = new Document();
+ final int numPoints = random().nextInt(3);
+ for (int j = 0; j < numPoints; ++j) {
+ doc.add(new BinaryPoint("field", randomBinaryValue(numDims, numBytesPerDim)));
+ }
+ w.addDocument(doc);
+ if (random().nextBoolean()) {
+ DirectoryReader.open(w).close();
+ }
+ }
+
+ final IndexReader reader1 = DirectoryReader.open(w);
+ w.forceMerge(1);
+ final IndexReader reader2 = DirectoryReader.open(w);
+ final PointValues expected = getOnlyLeafReader(reader2).getPointValues();
+ if (expected == null) {
+ assertNull(PointValues.getMinPackedValue(reader1, "field"));
+ assertNull(PointValues.getMaxPackedValue(reader1, "field"));
+ assertEquals(0, PointValues.getDocCount(reader1, "field"));
+ assertEquals(0, PointValues.size(reader1, "field"));
+ } else {
+ assertArrayEquals(
+ expected.getMinPackedValue("field"),
+ PointValues.getMinPackedValue(reader1, "field"));
+ assertArrayEquals(
+ expected.getMaxPackedValue("field"),
+ PointValues.getMaxPackedValue(reader1, "field"));
+ assertEquals(expected.getDocCount("field"), PointValues.getDocCount(reader1, "field"));
+ assertEquals(expected.size("field"), PointValues.size(reader1, "field"));
+ }
+ IOUtils.close(w, reader1, reader2, dir);
+ }
}