You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/09/27 19:04:57 UTC

[incubator-hivemall-site] 04/04: Added decision_path usage

This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall-site.git

commit d2a01e8dab22ec75a9c230ca8a262a80f7b17a8d
Author: Makoto Yui <my...@apache.org>
AuthorDate: Sat Sep 28 04:04:09 2019 +0900

    Added decision_path usage
---
 userguide/misc/funcs.html | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/userguide/misc/funcs.html b/userguide/misc/funcs.html
index 74adf17..be58736 100644
--- a/userguide/misc/funcs.html
+++ b/userguide/misc/funcs.html
@@ -2955,6 +2955,38 @@ bloom <span class="hljs-keyword">as</span> (
 </li>
 <li><p><code>train_randomforest_regressor(array&lt;double|string&gt; features, double target [, string options])</code> - Returns a relation consists of &lt;int model_id, int model_type, string model, array&lt;double&gt; var_importance, double oob_errors, int oob_tests&gt;</p>
 </li>
+<li><p><code>decision_path(string modelId, string model, array&lt;double|string&gt; features [, const string options] [, optional array&lt;string&gt; featureNames=null, optional array&lt;string&gt; classNames=null])</code> - Returns a decision path for each prediction in array&lt;string&gt;</p>
+<pre><code class="lang-sql">SELECT
+  t.passengerid,
+  decision_path(m.model_id, m.model, t.features, &apos;-classification&apos;)
+FROM
+  model_rf m
+  LEFT OUTER JOIN
+  test_rf t;
+&gt; | 892 | [&quot;2 [0.0] = 0.0&quot;,&quot;0 [3.0] = 3.0&quot;,&quot;1 [696.0] != 107.0&quot;,&quot;7 [7.8292] &lt;= 7.9104&quot;,&quot;1 [696.0] != 828.0&quot;,&quot;1 [696.0] != 391.0&quot;,&quot;0 [0.961038961038961, 0.03896103896103896]&quot;] |
+
+-- Show 100 frequent branches
+WITH tmp as (
+  SELECT
+    decision_path(m.model_id, m.model, t.features, &apos;-classification -no_verbose -no_leaf&apos;, array(&apos;pclass&apos;,&apos;name&apos;,&apos;sex&apos;,&apos;age&apos;,&apos;sibsp&apos;,&apos;parch&apos;,&apos;ticket&apos;,&apos;fare&apos;,&apos;cabin&apos;,&apos;embarked&apos;), array(&apos;no&apos;,&apos;yes&apos;)) as path
+  FROM
+    model_rf m
+    LEFT OUTER JOIN -- CROSS JOIN
+    test_rf t
+)
+select
+  r.branch,
+  count(1) as cnt
+from
+  tmp l
+  LATERAL VIEW explode(l.path) r as branch
+group by
+  r.branch
+order by
+  cnt desc
+limit 100;
+</code></pre>
+</li>
 <li><p><code>guess_attribute_types(ANY, ...)</code> - Returns attribute types</p>
 <pre><code class="lang-sql">select guess_attribute_types(*) from train limit 1;
 &gt; Q,Q,C,C,C,C,Q,C,C,C,Q,C,Q,Q,Q,Q,C,Q
@@ -3057,7 +3089,7 @@ Apache Hivemall is an effort undergoing incubation at The Apache Software Founda
     <script>
         var gitbook = gitbook || [];
         gitbook.push(function() {
-            gitbook.page.hasChanged({"page":{"title":"List of Functions","level":"1.3","depth":1,"next":{"title":"Tips for Effective Hivemall","level":"1.4","depth":1,"path":"tips/README.md","ref":"tips/README.md","articles":[{"title":"Explicit add_bias() for better prediction","level":"1.4.1","depth":2,"path":"tips/addbias.md","ref":"tips/addbias.md","articles":[]},{"title":"Use rand_amplify() to better prediction results","level":"1.4.2","depth":2,"path":"tips/rand_amplify.md","ref":"t [...]
+            gitbook.page.hasChanged({"page":{"title":"List of Functions","level":"1.3","depth":1,"next":{"title":"Tips for Effective Hivemall","level":"1.4","depth":1,"path":"tips/README.md","ref":"tips/README.md","articles":[{"title":"Explicit add_bias() for better prediction","level":"1.4.1","depth":2,"path":"tips/addbias.md","ref":"tips/addbias.md","articles":[]},{"title":"Use rand_amplify() to better prediction results","level":"1.4.2","depth":2,"path":"tips/rand_amplify.md","ref":"t [...]
         });
     </script>
 </div>