You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jb...@apache.org on 2017/12/18 19:46:24 UTC

lucene-solr:branch_7x: SOLR-11681: Add ttest and pairedTtest Stream Evaluators

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 980d5c365 -> 6d4dcf8ee


SOLR-11681: Add ttest and pairedTtest Stream Evaluators


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6d4dcf8e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6d4dcf8e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6d4dcf8e

Branch: refs/heads/branch_7x
Commit: 6d4dcf8eee3a6d18c881a6f406b73209ba32fe22
Parents: 980d5c3
Author: Joel Bernstein <jb...@apache.org>
Authored: Mon Dec 18 14:31:08 2017 -0500
Committer: Joel Bernstein <jb...@apache.org>
Committed: Mon Dec 18 14:42:55 2017 -0500

----------------------------------------------------------------------
 .../org/apache/solr/handler/StreamHandler.java  |  2 +
 .../solrj/io/eval/PairedTTestEvaluator.java     | 75 ++++++++++++++++
 .../client/solrj/io/eval/TTestEvaluator.java    | 95 ++++++++++++++++++++
 .../solrj/io/stream/StreamExpressionTest.java   | 43 ++++++++-
 4 files changed, 213 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6d4dcf8e/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
index e3491f6..8a83160 100644
--- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
@@ -281,6 +281,8 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
         .withFunctionName("olsRegress", OLSRegressionEvaluator.class)
         .withFunctionName("derivative", DerivativeEvaluator.class)
         .withFunctionName("spline", SplineEvaluator.class)
+        .withFunctionName("ttest", TTestEvaluator.class)
+        .withFunctionName("pairedTtest", PairedTTestEvaluator.class)
 
 
         // Boolean Stream Evaluators

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6d4dcf8e/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/PairedTTestEvaluator.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/PairedTTestEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/PairedTTestEvaluator.java
new file mode 100644
index 0000000..56c2dc9
--- /dev/null
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/PairedTTestEvaluator.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.client.solrj.io.eval;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.commons.math3.stat.inference.TTest;
+import org.apache.solr.client.solrj.io.Tuple;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
+import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
+
+public class PairedTTestEvaluator extends RecursiveNumericListEvaluator implements TwoValueWorker {
+  protected static final long serialVersionUID = 1L;
+
+  public PairedTTestEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{
+    super(expression, factory);
+
+    if(containedEvaluators.size() != 2){
+      throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting two parameters but found %d",expression,containedEvaluators.size()));
+    }
+  }
+
+  @Override
+  public Object doWork(Object value1, Object value2) throws IOException {
+
+    TTest tTest = new TTest();
+    Map map = new HashMap();
+    Tuple tuple = new Tuple(map);
+    if(value1 instanceof List) {
+      List<Number> values1 = (List<Number>)value1;
+      double[] samples1 = new double[values1.size()];
+
+      for(int i=0; i< samples1.length; i++) {
+        samples1[i] = values1.get(i).doubleValue();
+      }
+
+      if(value2 instanceof List) {
+        List<Number> values2 = (List<Number>) value2;
+        double[] samples2 = new double[values2.size()];
+
+        for (int i = 0; i < samples2.length; i++) {
+          samples2[i] = values2.get(i).doubleValue();
+        }
+
+        double tstat = tTest.pairedT(samples1, samples2);
+        double pval = tTest.pairedTTest(samples1, samples2);
+        tuple.put("t-statistic", tstat);
+        tuple.put("p-value", pval);
+        return tuple;
+      } else {
+        throw new IOException("Second parameter for pairedTtest must be a double array");
+      }
+    } else {
+      throw new IOException("First parameter for pairedTtest must be a double array");
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6d4dcf8e/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/TTestEvaluator.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/TTestEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/TTestEvaluator.java
new file mode 100644
index 0000000..6273376
--- /dev/null
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/TTestEvaluator.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.client.solrj.io.eval;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+import org.apache.commons.math3.stat.inference.TTest;
+import org.apache.solr.client.solrj.io.Tuple;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
+import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
+
+public class TTestEvaluator extends RecursiveNumericEvaluator implements TwoValueWorker {
+  protected static final long serialVersionUID = 1L;
+
+  public TTestEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{
+    super(expression, factory);
+
+    if(containedEvaluators.size() != 2){
+      throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting two parameters but found %d",expression,containedEvaluators.size()));
+    }
+  }
+
+  @Override
+  public Object doWork(Object value1, Object value2) throws IOException {
+
+    TTest tTest = new TTest();
+    Map map = new HashMap();
+    Tuple tuple = new Tuple(map);
+    if(value1 instanceof Number) {
+      double mean = ((Number) value1).doubleValue();
+
+      if(value2 instanceof List) {
+        List<Number> values = (List<Number>) value2;
+        double[] samples = new double[values.size()];
+        for (int i = 0; i < samples.length; i++) {
+          samples[i] = values.get(i).doubleValue();
+        }
+
+        double tstat = tTest.t(mean, samples);
+        double pval = tTest.tTest(mean, samples);
+
+        tuple.put("t-statistic", tstat);
+        tuple.put("p-value", pval);
+        return tuple;
+      } else {
+        throw new IOException("Second parameter for ttest must be a double array");
+      }
+    } else if(value1 instanceof List) {
+      List<Number> values1 = (List<Number>)value1;
+
+      double[] samples1 = new double[values1.size()];
+
+      for(int i=0; i< samples1.length; i++) {
+        samples1[i] = values1.get(i).doubleValue();
+      }
+
+      if(value2 instanceof List) {
+        List<Number> values2 = (List<Number>) value2;
+        double[] samples2 = new double[values2.size()];
+
+        for (int i = 0; i < samples2.length; i++) {
+          samples2[i] = values2.get(i).doubleValue();
+        }
+
+        double tstat = tTest.t(samples1, samples2);
+        double pval = tTest.tTest(samples1, samples2);
+        tuple.put("t-statistic", tstat);
+        tuple.put("p-value", pval);
+        return tuple;
+      } else {
+        throw new IOException("Second parameter for ttest must be a double array");
+      }
+    } else {
+      throw new IOException("First parameter for ttest must be either a double our double array");
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6d4dcf8e/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
index 4b368c1..c8fe2ff 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
@@ -6796,12 +6796,12 @@ public class StreamExpressionTest extends SolrCloudTestCase {
     List<Map> listh = (List<Map>)tuples.get(0).get("h");
     Map maph = listh.get(0);
     double pcth = (double)maph.get("pct");
-    assertEquals(pcth, .5, .02 );
+    assertEquals(pcth, .5, .02);
 
     List<Map> listi = (List<Map>)tuples.get(0).get("i");
     Map mapi = listi.get(0);
     double pcti = (double)mapi.get("pct");
-    assertEquals(pcti, .8, .02 );
+    assertEquals(pcti, .8, .02);
   }
 
   @Test
@@ -7114,6 +7114,7 @@ public class StreamExpressionTest extends SolrCloudTestCase {
     assertEquals(out.get(6).doubleValue(),21.0,0.01);
   }
 
+
   @Test
   public void testPolyfit() throws Exception {
     String cexpr = "let(echo=true," +
@@ -7142,6 +7143,44 @@ public class StreamExpressionTest extends SolrCloudTestCase {
 
 
   @Test
+  public void testTtest() throws Exception {
+    String cexpr = "let(echo=true," +
+                       "a=array(0,1,2,3,4,5,6,7,9,10,11,12), " +
+                       "b=array(0,1,2,3,4,5,6,7,1,1,1,1), " +
+                       "ttest=ttest(a, b)," +
+                       "sample2Mean=mean(b),"+
+                       "onesamplettest=ttest(sample2Mean, b)," +
+                       "pairedttest=pairedTtest(a,b))";
+    ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
+    paramsLoc.set("expr", cexpr);
+    paramsLoc.set("qt", "/stream");
+    String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
+    TupleStream solrStream = new SolrStream(url, paramsLoc);
+    StreamContext context = new StreamContext();
+    solrStream.setStreamContext(context);
+    List<Tuple> tuples = getTuples(solrStream);
+    assertTrue(tuples.size() == 1);
+    Map testResult = (Map)tuples.get(0).get("ttest");
+    Number tstat = (Number)testResult.get("t-statistic");
+    Number pval = (Number)testResult.get("p-value");
+    assertEquals(tstat.doubleValue(), 2.3666107120397575, .0001);
+    assertEquals(pval.doubleValue(), 0.029680704317867967, .0001);
+
+    Map testResult2 = (Map)tuples.get(0).get("onesamplettest");
+    Number tstat2 = (Number)testResult2.get("t-statistic");
+    Number pval2 = (Number)testResult2.get("p-value");
+    assertEquals(tstat2.doubleValue(), 0, .0001);
+    assertEquals(pval2.doubleValue(), 1, .0001);
+
+    Map testResult3 = (Map)tuples.get(0).get("pairedttest");
+    Number tstat3 = (Number)testResult3.get("t-statistic");
+    Number pval3 = (Number)testResult3.get("p-value");
+    assertEquals(tstat3.doubleValue(), 2.321219442769799, .0001);
+    assertEquals(pval3.doubleValue(), 0.0404907407662755, .0001);
+  }
+
+
+  @Test
   public void testLoess() throws Exception {
     String cexpr = "let(echo=true," +
                    "    a=array(0,1,2,3,4,5,6,7)," +