You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/01/03 10:51:55 UTC

[07/18] lucene-solr:jira/solr-11702: SOLR-11172: Add Mann-Whitney U test Stream Evaluator

SOLR-11172: Add Mann-Whitney U test Stream Evaluator


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/fbea59b0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/fbea59b0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/fbea59b0

Branch: refs/heads/jira/solr-11702
Commit: fbea59b0864768356f1057a0a099d8a54887d272
Parents: 0c4fb31
Author: Joel Bernstein <jb...@apache.org>
Authored: Thu Dec 28 14:14:26 2017 -0500
Committer: Joel Bernstein <jb...@apache.org>
Committed: Thu Dec 28 14:14:26 2017 -0500

----------------------------------------------------------------------
 .../org/apache/solr/handler/StreamHandler.java  |  1 +
 .../solrj/io/eval/MannWhitneyUEvaluator.java    | 62 ++++++++++++++++++++
 .../solrj/io/stream/StreamExpressionTest.java   | 18 +++++-
 3 files changed, 80 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fbea59b0/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
index fa1aaaa..ae15260 100644
--- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
@@ -286,6 +286,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
         .withFunctionName("multiVariateNormalDistribution", MultiVariateNormalDistributionEvaluator.class)
         .withFunctionName("integrate", IntegrateEvaluator.class)
         .withFunctionName("density", DensityEvaluator.class)
+        .withFunctionName("mannWhitney", MannWhitneyUEvaluator.class)
 
         // Boolean Stream Evaluators
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fbea59b0/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/MannWhitneyUEvaluator.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/MannWhitneyUEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/MannWhitneyUEvaluator.java
new file mode 100644
index 0000000..defa919
--- /dev/null
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/MannWhitneyUEvaluator.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.client.solrj.io.eval;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+import org.apache.commons.math3.stat.inference.MannWhitneyUTest;
+import org.apache.solr.client.solrj.io.Tuple;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
+import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
+
+
+public class MannWhitneyUEvaluator extends RecursiveNumericListEvaluator implements ManyValueWorker {
+  protected static final long serialVersionUID = 1L;
+
+  public MannWhitneyUEvaluator(StreamExpression expression, StreamFactory factory) throws IOException {
+    super(expression, factory);
+
+    if(containedEvaluators.size() < 1){
+      throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting at least one value but found %d",expression,containedEvaluators.size()));
+    }
+  }
+
+  @Override
+  public Object doWork(Object... values) throws IOException {
+    List<double[]> mannWhitneyUInput = Arrays.stream(values)
+        .map(value -> ((List<Number>) value).stream().mapToDouble(Number::doubleValue).toArray())
+        .collect(Collectors.toList());
+    if(mannWhitneyUInput.size() == 2) {
+      MannWhitneyUTest mannwhitneyutest = new MannWhitneyUTest();
+      double u = mannwhitneyutest.mannWhitneyU(mannWhitneyUInput.get(0), mannWhitneyUInput.get(1));
+      double p = mannwhitneyutest.mannWhitneyUTest(mannWhitneyUInput.get(0), mannWhitneyUInput.get(1));
+      Map<String,Number> m = new HashMap<>();
+      m.put("u-statistic", u);
+      m.put("p-value", p);
+      return new Tuple(m);
+    }else{
+      throw new IOException(String.format(Locale.ROOT,"%s(...) only works with a list of 2 arrays but a list of %d array(s) was provided.", constructingFactory.getFunctionName(getClass()), mannWhitneyUInput.size()));
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fbea59b0/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
index bc3dee5..4af8e43 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
@@ -7403,7 +7403,6 @@ public class StreamExpressionTest extends SolrCloudTestCase {
     }
   }
 
-
   @Test
   public void testAnova() throws Exception {
     String cexpr = "anova(array(1,2,3,5,4,6), array(5,2,3,5,4,6), array(1,2,7,5,4,6))";
@@ -7513,6 +7512,23 @@ public class StreamExpressionTest extends SolrCloudTestCase {
     assertEquals((double) out.get(3), 5.5, .0);
   }
 
+  @Test
+  public void testMannWhitney() throws Exception {
+    String cexpr = "mannWhitney(array(0.15,0.10,0.11,0.24,0.08,0.08,0.10,0.10,0.10,0.12,0.04,0.07), " +
+                               "array(0.10,0.20,0.30,0.10,0.10,0.02,0.05,0.07))";
+    ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
+    paramsLoc.set("expr", cexpr);
+    paramsLoc.set("qt", "/stream");
+    String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
+    TupleStream solrStream = new SolrStream(url, paramsLoc);
+    StreamContext context = new StreamContext();
+    solrStream.setStreamContext(context);
+    List<Tuple> tuples = getTuples(solrStream);
+    assertTrue(tuples.size() == 1);
+    Map out = (Map)tuples.get(0).get("return-value");
+    assertEquals((double) out.get("u-statistic"), 52.5, .1);
+    assertEquals((double) out.get("p-value"), 0.7284, .001);
+  }
 
   @Test
   public void testMovingMedian() throws Exception {