You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2015/03/25 19:05:20 UTC

svn commit: r1669190 - in /lucene/dev/branches/branch_5x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/search/ solr/core/src/java/org/apache/solr/search/facet/ solr/core/src/test/org/apache/solr/search/ solr/core/src/test/org/apache/solr/sear...

Author: yonik
Date: Wed Mar 25 18:05:20 2015
New Revision: 1669190

URL: http://svn.apache.org/r1669190
Log:
SOLR-7306: percentiles for new facet module

Added:
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java
      - copied unchanged from r1669189, lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1669190&r1=1669189&r2=1669190&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Wed Mar 25 18:05:20 2015
@@ -148,6 +148,14 @@ New Features
 
 * SOLR-6350: StatsComponent now supports Percentiles (Xu Zhang, hossman)
 
+* SOLR-7306: Percentiles support for the new facet module.  Percentiles
+  can be calculated for all facet buckets and field faceting can sort
+  by percentile values.
+  Examples:
+    json.facet={ median_age : "percentile(age,50)" }
+    json.facet={ salary_percentiles : "percentile(salary,25,50,75)" }
+  (yonik)
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java?rev=1669190&r1=1669189&r2=1669190&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java Wed Mar 25 18:05:20 2015
@@ -45,6 +45,7 @@ import org.apache.solr.search.facet.AvgA
 import org.apache.solr.search.facet.CountAgg;
 import org.apache.solr.search.facet.MaxAgg;
 import org.apache.solr.search.facet.MinAgg;
+import org.apache.solr.search.facet.PercentileAgg;
 import org.apache.solr.search.facet.SumAgg;
 import org.apache.solr.search.facet.SumsqAgg;
 import org.apache.solr.search.facet.UniqueAgg;
@@ -868,7 +869,7 @@ public abstract class ValueSourceParser
       }
     });
 
-
+    addParser("agg_percentile", new PercentileAgg.Parser());
 
   }
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java?rev=1669190&r1=1669189&r2=1669190&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java Wed Mar 25 18:05:20 2015
@@ -943,6 +943,7 @@ public class QueryEqualityTest extends S
     assertFuncEquals("agg_count()", "agg_count()");
     assertFuncEquals("agg_unique(foo_i)", "agg_unique(foo_i)");
     assertFuncEquals("agg_sumsq(foo_i)", "agg_sumsq(foo_i)");
+    assertFuncEquals("agg_percentile(foo_i,50)", "agg_percentile(foo_i,50)");
     // assertFuncEquals("agg_stdev(foo_i)", "agg_stdev(foo_i)");
     // assertFuncEquals("agg_multistat(foo_i)", "agg_multistat(foo_i)");
   }

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java?rev=1669190&r1=1669189&r2=1669190&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java Wed Mar 25 18:05:20 2015
@@ -17,6 +17,7 @@ package org.apache.solr.search.facet;
  * limitations under the License.
  */
 
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -24,6 +25,7 @@ import java.util.Comparator;
 import java.util.List;
 import java.util.Random;
 
+import com.tdunning.math.stats.AVLTreeDigest;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.solr.JSONTestUtil;
 import org.apache.solr.SolrTestCaseHS;
@@ -366,6 +368,30 @@ public class TestJsonFacets extends Solr
             ", f2:{  'buckets':[{ val:'B', count:3, n1:-3.0}, { val:'A', count:2, n1:6.0 }]} }"
     );
 
+    // percentiles 0,10,50,90,100
+    // catA: 2.0 2.2 3.0 3.8 4.0
+    // catB: -9.0 -8.2 -5.0 7.800000000000001 11.0
+    // all: -9.0 -7.3999999999999995 2.0 8.200000000000001 11.0
+    // test sorting by single percentile
+    client.testJQ(params(p, "q", "*:*"
+            , "json.facet", "{f1:{terms:{field:'${cat_s}', sort:'n1 desc', facet:{n1:'percentile(${num_d},50)'}  }}" +
+                " , f2:{terms:{field:'${cat_s}', sort:'n1 asc', facet:{n1:'percentile(${num_d},50)'}  }} }"
+        )
+        , "facets=={ 'count':6, " +
+            "  f1:{  'buckets':[{ val:'A', count:2, n1:3.0 }, { val:'B', count:3, n1:-5.0}]}" +
+            ", f2:{  'buckets':[{ val:'B', count:3, n1:-5.0}, { val:'A', count:2, n1:3.0 }]} }"
+    );
+
+    // test sorting by multiple percentiles (sort is by first)
+    client.testJQ(params(p, "q", "*:*"
+            , "json.facet", "{f1:{terms:{field:'${cat_s}', sort:'n1 desc', facet:{n1:'percentile(${num_d},50,0,100)'}  }}" +
+                " , f2:{terms:{field:'${cat_s}', sort:'n1 asc', facet:{n1:'percentile(${num_d},50,0,100)'}  }} }"
+        )
+        , "facets=={ 'count':6, " +
+            "  f1:{  'buckets':[{ val:'A', count:2, n1:[3.0,2.0,4.0] }, { val:'B', count:3, n1:[-5.0,-9.0,11.0] }]}" +
+            ", f2:{  'buckets':[{ val:'B', count:3, n1:[-5.0,-9.0,11.0]}, { val:'A', count:2, n1:[3.0,2.0,4.0] }]} }"
+    );
+
     // test sorting by count/index order
     client.testJQ(params(p, "q", "*:*"
             , "json.facet", "{f1:{terms:{field:'${cat_s}', sort:'count desc' }  }" +
@@ -557,15 +583,15 @@ public class TestJsonFacets extends Solr
 
     // stats at top level
     client.testJQ(params(p, "q", "*:*"
-            , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})' }"
+            , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})', med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)' }"
         )
         , "facets=={ 'count':6, " +
-            "sum1:3.0, sumsq1:247.0, avg1:0.5, min1:-9.0, max1:11.0, numwhere:2  }"
+            "sum1:3.0, sumsq1:247.0, avg1:0.5, min1:-9.0, max1:11.0, numwhere:2, med:2.0, perc:[-9.0,2.0,11.0]  }"
     );
 
     // stats at top level, no matches
     client.testJQ(params(p, "q", "id:DOESNOTEXIST"
-            , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})' }"
+            , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})', med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)' }"
         )
         , "facets=={count:0 " +
             "/* ,sum1:0.0, sumsq1:0.0, avg1:0.0, min1:'NaN', max1:'NaN', numwhere:0 */ }"
@@ -671,4 +697,87 @@ public class TestJsonFacets extends Solr
     doStats( client, params() );
   }
 
+  /***
+  public void testPercentiles() {
+    AVLTreeDigest catA = new AVLTreeDigest(100);
+    catA.add(4);
+    catA.add(2);
+
+    AVLTreeDigest catB = new AVLTreeDigest(100);
+    catB.add(-9);
+    catB.add(11);
+    catB.add(-5);
+
+    AVLTreeDigest all = new AVLTreeDigest(100);
+    all.add(catA);
+    all.add(catB);
+
+    System.out.println(str(catA));
+    System.out.println(str(catB));
+    System.out.println(str(all));
+
+    // 2.0 2.2 3.0 3.8 4.0
+    // -9.0 -8.2 -5.0 7.800000000000001 11.0
+    // -9.0 -7.3999999999999995 2.0 8.200000000000001 11.0
+  }
+
+  private static String str(AVLTreeDigest digest) {
+    StringBuilder sb = new StringBuilder();
+    for (double d : new double[] {0,.1,.5,.9,1}) {
+      sb.append(" ").append(digest.quantile(d));
+    }
+    return sb.toString();
+  }
+   ***/
+
+  /*** test code to ensure TDigest is working as we expect.
+  @Test
+  public void testTDigest() throws Exception {
+    AVLTreeDigest t1 = new AVLTreeDigest(100);
+    t1.add(10, 1);
+    t1.add(90, 1);
+    t1.add(50, 1);
+
+    System.out.println(t1.quantile(0.1));
+    System.out.println(t1.quantile(0.5));
+    System.out.println(t1.quantile(0.9));
+
+    assertEquals(t1.quantile(0.5), 50.0, 0.01);
+
+    AVLTreeDigest t2 = new AVLTreeDigest(100);
+    t2.add(130, 1);
+    t2.add(170, 1);
+    t2.add(90, 1);
+
+    System.out.println(t2.quantile(0.1));
+    System.out.println(t2.quantile(0.5));
+    System.out.println(t2.quantile(0.9));
+
+    AVLTreeDigest top = new AVLTreeDigest(100);
+
+    t1.compress();
+    ByteBuffer buf = ByteBuffer.allocate(t1.byteSize()); // upper bound
+    t1.asSmallBytes(buf);
+    byte[] arr1 = Arrays.copyOf(buf.array(), buf.position());
+
+    ByteBuffer rbuf = ByteBuffer.wrap(arr1);
+    top.add(AVLTreeDigest.fromBytes(rbuf));
+
+    System.out.println(top.quantile(0.1));
+    System.out.println(top.quantile(0.5));
+    System.out.println(top.quantile(0.9));
+
+    t2.compress();
+    ByteBuffer buf2 = ByteBuffer.allocate(t2.byteSize()); // upper bound
+    t2.asSmallBytes(buf2);
+    byte[] arr2 = Arrays.copyOf(buf2.array(), buf2.position());
+
+    ByteBuffer rbuf2 = ByteBuffer.wrap(arr2);
+    top.add(AVLTreeDigest.fromBytes(rbuf2));
+
+    System.out.println(top.quantile(0.1));
+    System.out.println(top.quantile(0.5));
+    System.out.println(top.quantile(0.9));
+  }
+  ******/
 }