You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2016/03/16 16:04:54 UTC

[1/2] lucene-solr:branch_6x: LUCENE-7106: Add helpers to compute aggregated stats on points.

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6_0 b4d52d60a -> 4e0d8355f
  refs/heads/branch_6x 6f124e100 -> cb85f2611


LUCENE-7106: Add helpers to compute aggregated stats on points.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/cb85f261
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/cb85f261
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/cb85f261

Branch: refs/heads/branch_6x
Commit: cb85f2611e164b089c72ffa0c29bb72e6512cd2d
Parents: 6f124e1
Author: Adrien Grand <jp...@gmail.com>
Authored: Wed Mar 16 15:34:40 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Wed Mar 16 15:36:51 2016 +0100

----------------------------------------------------------------------
 .../org/apache/lucene/index/PointValues.java    | 90 +++++++++++++++++++-
 .../apache/lucene/index/TestPointValues.java    | 62 ++++++++++++--
 2 files changed, 145 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/cb85f261/lucene/core/src/java/org/apache/lucene/index/PointValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValues.java b/lucene/core/src/java/org/apache/lucene/index/PointValues.java
index 1fb2654..a4fd323 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PointValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PointValues.java
@@ -23,10 +23,10 @@ import java.net.InetAddress;
 import org.apache.lucene.document.BinaryPoint;
 import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.FloatPoint;
 import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.bkd.BKDWriter;
 
 /** 
@@ -86,6 +86,94 @@ public abstract class PointValues {
   /** Maximum number of dimensions */
   public static final int MAX_DIMENSIONS = BKDWriter.MAX_DIMS;
 
+  /** Return the cumulated number of points across all leaves of the given
+   * {@link IndexReader}.
+   *  @see PointValues#size(String) */
+  public static long size(IndexReader reader, String field) throws IOException {
+    long size = 0;
+    for (LeafReaderContext ctx : reader.leaves()) {
+      PointValues values = ctx.reader().getPointValues();
+      if (values != null) {
+        size += values.size(field);
+      }
+    }
+    return size;
+  }
+
+  /** Return the cumulated number of docs that have points across all leaves
+   * of the given {@link IndexReader}.
+   *  @see PointValues#getDocCount(String) */
+  public static int getDocCount(IndexReader reader, String field) throws IOException {
+    int count = 0;
+    for (LeafReaderContext ctx : reader.leaves()) {
+      PointValues values = ctx.reader().getPointValues();
+      if (values != null) {
+        count += values.getDocCount(field);
+      }
+    }
+    return count;
+  }
+
+  /** Return the minimum packed values across all leaves of the given
+   * {@link IndexReader}.
+   *  @see PointValues#getMinPackedValue(String) */
+  public static byte[] getMinPackedValue(IndexReader reader, String field) throws IOException {
+    byte[] minValue = null;
+    for (LeafReaderContext ctx : reader.leaves()) {
+      PointValues values = ctx.reader().getPointValues();
+      if (values == null) {
+        continue;
+      }
+      byte[] leafMinValue = values.getMinPackedValue(field);
+      if (leafMinValue == null) {
+        continue;
+      }
+      if (minValue == null) {
+        minValue = leafMinValue.clone();
+      } else {
+        final int numDimensions = values.getNumDimensions(field);
+        final int numBytesPerDimension = values.getBytesPerDimension(field);
+        for (int i = 0; i < numDimensions; ++i) {
+          int offset = i * numBytesPerDimension;
+          if (StringHelper.compare(numBytesPerDimension, leafMinValue, offset, minValue, offset) < 0) {
+            System.arraycopy(leafMinValue, offset, minValue, offset, numBytesPerDimension);
+          }
+        }
+      }
+    }
+    return minValue;
+  }
+
+  /** Return the maximum packed values across all leaves of the given
+   * {@link IndexReader}.
+   *  @see PointValues#getMaxPackedValue(String) */
+  public static byte[] getMaxPackedValue(IndexReader reader, String field) throws IOException {
+    byte[] maxValue = null;
+    for (LeafReaderContext ctx : reader.leaves()) {
+      PointValues values = ctx.reader().getPointValues();
+      if (values == null) {
+        continue;
+      }
+      byte[] leafMaxValue = values.getMaxPackedValue(field);
+      if (leafMaxValue == null) {
+        continue;
+      }
+      if (maxValue == null) {
+        maxValue = leafMaxValue.clone();
+      } else {
+        final int numDimensions = values.getNumDimensions(field);
+        final int numBytesPerDimension = values.getBytesPerDimension(field);
+        for (int i = 0; i < numDimensions; ++i) {
+          int offset = i * numBytesPerDimension;
+          if (StringHelper.compare(numBytesPerDimension, leafMaxValue, offset, maxValue, offset) > 0) {
+            System.arraycopy(leafMaxValue, offset, maxValue, offset, numBytesPerDimension);
+          }
+        }
+      }
+    }
+    return maxValue;
+  }
+
   /** Default constructor */
   protected PointValues() {
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/cb85f261/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
index 49cbc2a..c7ca2dc 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
@@ -22,12 +22,6 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.FilterCodec;
-import org.apache.lucene.codecs.PointsFormat;
-import org.apache.lucene.codecs.PointsReader;
-import org.apache.lucene.codecs.PointsWriter;
-import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
-import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
 import org.apache.lucene.document.BinaryPoint;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoublePoint;
@@ -657,4 +651,60 @@ public class TestPointValues extends LuceneTestCase {
     assertTrue(output.toString(IOUtils.UTF_8).contains("test: points..."));
     dir.close();
   }
+
+  public void testMergedStats() throws IOException {
+    final int iters = atLeast(3);
+    for (int iter = 0; iter < iters; ++iter) {
+      doTestMergedStats();
+    }
+  }
+
+  private static byte[][] randomBinaryValue(int numDims, int numBytesPerDim) {
+    byte[][] bytes = new byte[numDims][];
+    for (int i = 0; i < numDims; ++i) {
+      bytes[i] = new byte[numBytesPerDim];
+      random().nextBytes(bytes[i]);
+    }
+    return bytes;
+  }
+
+  private void doTestMergedStats() throws IOException {
+    final int numDims = TestUtil.nextInt(random(), 1, 8);
+    final int numBytesPerDim = TestUtil.nextInt(random(), 1, 16);
+    Directory dir = new RAMDirectory();
+    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
+    final int numDocs = TestUtil.nextInt(random(), 10, 20);
+    for (int i = 0; i < numDocs; ++i) {
+      Document doc = new Document();
+      final int numPoints = random().nextInt(3);
+      for (int j = 0; j < numPoints; ++j) {
+        doc.add(new BinaryPoint("field", randomBinaryValue(numDims, numBytesPerDim)));
+      }
+      w.addDocument(doc);
+      if (random().nextBoolean()) {
+        DirectoryReader.open(w).close();
+      }
+    }
+
+    final IndexReader reader1 = DirectoryReader.open(w);
+    w.forceMerge(1);
+    final IndexReader reader2 = DirectoryReader.open(w);
+    final PointValues expected = getOnlyLeafReader(reader2).getPointValues();
+    if (expected == null) {
+      assertNull(PointValues.getMinPackedValue(reader1, "field"));
+      assertNull(PointValues.getMaxPackedValue(reader1, "field"));
+      assertEquals(0, PointValues.getDocCount(reader1, "field"));
+      assertEquals(0, PointValues.size(reader1, "field"));
+    } else {
+      assertArrayEquals(
+          expected.getMinPackedValue("field"),
+          PointValues.getMinPackedValue(reader1, "field"));
+      assertArrayEquals(
+          expected.getMaxPackedValue("field"),
+          PointValues.getMaxPackedValue(reader1, "field"));
+      assertEquals(expected.getDocCount("field"), PointValues.getDocCount(reader1, "field"));
+      assertEquals(expected.size("field"),  PointValues.size(reader1, "field"));
+    }
+    IOUtils.close(w, reader1, reader2, dir);
+  }
 }


[2/2] lucene-solr:branch_6_0: LUCENE-7106: Add helpers to compute aggregated stats on points.

Posted by jp...@apache.org.
LUCENE-7106: Add helpers to compute aggregated stats on points.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4e0d8355
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4e0d8355
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4e0d8355

Branch: refs/heads/branch_6_0
Commit: 4e0d8355f087baa04522d0fe21453cfe5f237128
Parents: b4d52d6
Author: Adrien Grand <jp...@gmail.com>
Authored: Wed Mar 16 15:34:40 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Wed Mar 16 15:37:16 2016 +0100

----------------------------------------------------------------------
 .../org/apache/lucene/index/PointValues.java    | 90 +++++++++++++++++++-
 .../apache/lucene/index/TestPointValues.java    | 62 ++++++++++++--
 2 files changed, 145 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e0d8355/lucene/core/src/java/org/apache/lucene/index/PointValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValues.java b/lucene/core/src/java/org/apache/lucene/index/PointValues.java
index 1fb2654..a4fd323 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PointValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PointValues.java
@@ -23,10 +23,10 @@ import java.net.InetAddress;
 import org.apache.lucene.document.BinaryPoint;
 import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.FloatPoint;
 import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.bkd.BKDWriter;
 
 /** 
@@ -86,6 +86,94 @@ public abstract class PointValues {
   /** Maximum number of dimensions */
   public static final int MAX_DIMENSIONS = BKDWriter.MAX_DIMS;
 
+  /** Return the cumulated number of points across all leaves of the given
+   * {@link IndexReader}.
+   *  @see PointValues#size(String) */
+  public static long size(IndexReader reader, String field) throws IOException {
+    long size = 0;
+    for (LeafReaderContext ctx : reader.leaves()) {
+      PointValues values = ctx.reader().getPointValues();
+      if (values != null) {
+        size += values.size(field);
+      }
+    }
+    return size;
+  }
+
+  /** Return the cumulated number of docs that have points across all leaves
+   * of the given {@link IndexReader}.
+   *  @see PointValues#getDocCount(String) */
+  public static int getDocCount(IndexReader reader, String field) throws IOException {
+    int count = 0;
+    for (LeafReaderContext ctx : reader.leaves()) {
+      PointValues values = ctx.reader().getPointValues();
+      if (values != null) {
+        count += values.getDocCount(field);
+      }
+    }
+    return count;
+  }
+
+  /** Return the minimum packed values across all leaves of the given
+   * {@link IndexReader}.
+   *  @see PointValues#getMinPackedValue(String) */
+  public static byte[] getMinPackedValue(IndexReader reader, String field) throws IOException {
+    byte[] minValue = null;
+    for (LeafReaderContext ctx : reader.leaves()) {
+      PointValues values = ctx.reader().getPointValues();
+      if (values == null) {
+        continue;
+      }
+      byte[] leafMinValue = values.getMinPackedValue(field);
+      if (leafMinValue == null) {
+        continue;
+      }
+      if (minValue == null) {
+        minValue = leafMinValue.clone();
+      } else {
+        final int numDimensions = values.getNumDimensions(field);
+        final int numBytesPerDimension = values.getBytesPerDimension(field);
+        for (int i = 0; i < numDimensions; ++i) {
+          int offset = i * numBytesPerDimension;
+          if (StringHelper.compare(numBytesPerDimension, leafMinValue, offset, minValue, offset) < 0) {
+            System.arraycopy(leafMinValue, offset, minValue, offset, numBytesPerDimension);
+          }
+        }
+      }
+    }
+    return minValue;
+  }
+
+  /** Return the maximum packed values across all leaves of the given
+   * {@link IndexReader}.
+   *  @see PointValues#getMaxPackedValue(String) */
+  public static byte[] getMaxPackedValue(IndexReader reader, String field) throws IOException {
+    byte[] maxValue = null;
+    for (LeafReaderContext ctx : reader.leaves()) {
+      PointValues values = ctx.reader().getPointValues();
+      if (values == null) {
+        continue;
+      }
+      byte[] leafMaxValue = values.getMaxPackedValue(field);
+      if (leafMaxValue == null) {
+        continue;
+      }
+      if (maxValue == null) {
+        maxValue = leafMaxValue.clone();
+      } else {
+        final int numDimensions = values.getNumDimensions(field);
+        final int numBytesPerDimension = values.getBytesPerDimension(field);
+        for (int i = 0; i < numDimensions; ++i) {
+          int offset = i * numBytesPerDimension;
+          if (StringHelper.compare(numBytesPerDimension, leafMaxValue, offset, maxValue, offset) > 0) {
+            System.arraycopy(leafMaxValue, offset, maxValue, offset, numBytesPerDimension);
+          }
+        }
+      }
+    }
+    return maxValue;
+  }
+
   /** Default constructor */
   protected PointValues() {
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4e0d8355/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
index 49cbc2a..c7ca2dc 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPointValues.java
@@ -22,12 +22,6 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.FilterCodec;
-import org.apache.lucene.codecs.PointsFormat;
-import org.apache.lucene.codecs.PointsReader;
-import org.apache.lucene.codecs.PointsWriter;
-import org.apache.lucene.codecs.lucene60.Lucene60PointsReader;
-import org.apache.lucene.codecs.lucene60.Lucene60PointsWriter;
 import org.apache.lucene.document.BinaryPoint;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DoublePoint;
@@ -657,4 +651,60 @@ public class TestPointValues extends LuceneTestCase {
     assertTrue(output.toString(IOUtils.UTF_8).contains("test: points..."));
     dir.close();
   }
+
+  public void testMergedStats() throws IOException {
+    final int iters = atLeast(3);
+    for (int iter = 0; iter < iters; ++iter) {
+      doTestMergedStats();
+    }
+  }
+
+  private static byte[][] randomBinaryValue(int numDims, int numBytesPerDim) {
+    byte[][] bytes = new byte[numDims][];
+    for (int i = 0; i < numDims; ++i) {
+      bytes[i] = new byte[numBytesPerDim];
+      random().nextBytes(bytes[i]);
+    }
+    return bytes;
+  }
+
+  private void doTestMergedStats() throws IOException {
+    final int numDims = TestUtil.nextInt(random(), 1, 8);
+    final int numBytesPerDim = TestUtil.nextInt(random(), 1, 16);
+    Directory dir = new RAMDirectory();
+    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
+    final int numDocs = TestUtil.nextInt(random(), 10, 20);
+    for (int i = 0; i < numDocs; ++i) {
+      Document doc = new Document();
+      final int numPoints = random().nextInt(3);
+      for (int j = 0; j < numPoints; ++j) {
+        doc.add(new BinaryPoint("field", randomBinaryValue(numDims, numBytesPerDim)));
+      }
+      w.addDocument(doc);
+      if (random().nextBoolean()) {
+        DirectoryReader.open(w).close();
+      }
+    }
+
+    final IndexReader reader1 = DirectoryReader.open(w);
+    w.forceMerge(1);
+    final IndexReader reader2 = DirectoryReader.open(w);
+    final PointValues expected = getOnlyLeafReader(reader2).getPointValues();
+    if (expected == null) {
+      assertNull(PointValues.getMinPackedValue(reader1, "field"));
+      assertNull(PointValues.getMaxPackedValue(reader1, "field"));
+      assertEquals(0, PointValues.getDocCount(reader1, "field"));
+      assertEquals(0, PointValues.size(reader1, "field"));
+    } else {
+      assertArrayEquals(
+          expected.getMinPackedValue("field"),
+          PointValues.getMinPackedValue(reader1, "field"));
+      assertArrayEquals(
+          expected.getMaxPackedValue("field"),
+          PointValues.getMaxPackedValue(reader1, "field"));
+      assertEquals(expected.getDocCount("field"), PointValues.getDocCount(reader1, "field"));
+      assertEquals(expected.size("field"),  PointValues.size(reader1, "field"));
+    }
+    IOUtils.close(w, reader1, reader2, dir);
+  }
 }