You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2017/06/15 08:28:17 UTC

incubator-hivemall git commit: [HIVEMALL-101] Fixed CI errors [Forced Update!]

Repository: incubator-hivemall
Updated Branches:
  refs/heads/master a780fcdbf -> 65d92ff5c (forced update)


[HIVEMALL-101] Fixed CI errors


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/65d92ff5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/65d92ff5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/65d92ff5

Branch: refs/heads/master
Commit: 65d92ff5c2f01c1990ddd296cae01d4feb8c5161
Parents: 50b4c9a
Author: Makoto Yui <my...@apache.org>
Authored: Thu Jun 15 17:11:11 2017 +0900
Committer: Makoto Yui <my...@apache.org>
Committed: Thu Jun 15 17:28:08 2017 +0900

----------------------------------------------------------------------
 .../classifier/GeneralClassifierUDTF.java       | 12 ++-
 .../optimizer/DenseOptimizerFactory.java        |  7 ++
 .../java/hivemall/optimizer/LossFunctions.java  | 82 ++++++++++++++++----
 .../optimizer/SparseOptimizerFactory.java       |  7 ++
 .../regression/GeneralRegressionUDTF.java       |  3 +-
 .../classifier/GeneralClassifierUDTFTest.java   |  8 +-
 .../regression/GeneralRegressionUDTFTest.java   |  7 +-
 docs/gitbook/misc/prediction.md                 |  2 +
 8 files changed, 99 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/65d92ff5/core/src/main/java/hivemall/classifier/GeneralClassifierUDTF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/classifier/GeneralClassifierUDTF.java b/core/src/main/java/hivemall/classifier/GeneralClassifierUDTF.java
index d7cb539..e5f06d8 100644
--- a/core/src/main/java/hivemall/classifier/GeneralClassifierUDTF.java
+++ b/core/src/main/java/hivemall/classifier/GeneralClassifierUDTF.java
@@ -41,8 +41,8 @@ public final class GeneralClassifierUDTF extends GeneralLearnerBaseUDTF {
 
     @Override
     protected String getLossOptionDescription() {
-        return "Loss function [default: HingeLoss, LogLoss, SquaredHingeLoss, ModifiedHuberLoss, "
-                + "SquaredLoss, QuantileLoss, EpsilonInsensitiveLoss, HuberLoss]";
+        return "Loss function [HingeLoss (default), LogLoss, SquaredHingeLoss, ModifiedHuberLoss, \n"
+                + ", or a regression loss: SquaredLoss, QuantileLoss, EpsilonInsensitiveLoss, HuberLoss]";
     }
 
     @Override
@@ -51,11 +51,9 @@ public final class GeneralClassifierUDTF extends GeneralLearnerBaseUDTF {
     }
 
     @Override
-    protected void checkLossFunction(LossFunction lossFunction) throws UDFArgumentException {
-        if(!lossFunction.forBinaryClassification()) {
-            throw new UDFArgumentException("The loss function `" + lossFunction.getType()
-                + "` is not designed for binary classification");
-        }
+    protected void checkLossFunction(@Nonnull LossFunction lossFunction)
+            throws UDFArgumentException {
+        // will accepts both binary loss and 
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/65d92ff5/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java b/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java
index e273f91..775d7d0 100644
--- a/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java
+++ b/core/src/main/java/hivemall/optimizer/DenseOptimizerFactory.java
@@ -42,6 +42,13 @@ public final class DenseOptimizerFactory {
             throw new IllegalArgumentException("`optimizer` not defined");
         }
 
+        if ("rda".equalsIgnoreCase(options.get("regularization"))
+                && "adagrad".equalsIgnoreCase(optimizerName) == false) {
+            throw new IllegalArgumentException(
+                "`-regularization rda` is only supported for AdaGrad but `-optimizer "
+                        + optimizerName);
+        }
+
         final Optimizer optimizerImpl;
         if ("sgd".equalsIgnoreCase(optimizerName)) {
             optimizerImpl = new Optimizer.SGD(options);

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/65d92ff5/core/src/main/java/hivemall/optimizer/LossFunctions.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/optimizer/LossFunctions.java b/core/src/main/java/hivemall/optimizer/LossFunctions.java
index a1ade3d..46c07ba 100644
--- a/core/src/main/java/hivemall/optimizer/LossFunctions.java
+++ b/core/src/main/java/hivemall/optimizer/LossFunctions.java
@@ -29,27 +29,32 @@ import javax.annotation.Nullable;
 public final class LossFunctions {
 
     public enum LossType {
-        SquaredLoss, QuantileLoss, EpsilonInsensitiveLoss, HuberLoss, HingeLoss, LogLoss,
-        SquaredHingeLoss, ModifiedHuberLoss
+        SquaredLoss, QuantileLoss, EpsilonInsensitiveLoss, SquaredEpsilonInsensitiveLoss,
+        HuberLoss, HingeLoss, LogLoss, SquaredHingeLoss, ModifiedHuberLoss
     }
 
     @Nonnull
     public static LossFunction getLossFunction(@Nullable final String type) {
-        if ("SquaredLoss".equalsIgnoreCase(type)) {
+        final String t = type.toLowerCase();
+        if ("squaredloss".equals(t) || "squared".equals(t)) {
             return new SquaredLoss();
-        } else if ("QuantileLoss".equalsIgnoreCase(type)) {
+        } else if ("quantileloss".equals(t) || "quantile".equals(t)) {
             return new QuantileLoss();
-        } else if ("EpsilonInsensitiveLoss".equalsIgnoreCase(type)) {
+        } else if ("epsiloninsensitiveloss".equals(t) || "epsilon_insensitive".equals(t)) {
             return new EpsilonInsensitiveLoss();
-        } else if ("HuberLoss".equalsIgnoreCase(type)) {
+        } else if ("squaredepsiloninsensitiveloss".equals(t)
+                || "squared_epsilon_insensitive".equals(t)) {
+            return new SquaredEpsilonInsensitiveLoss();
+        } else if ("huberloss".equals(t) || "huber".equals(t)) {
             return new HuberLoss();
-        } else if ("HingeLoss".equalsIgnoreCase(type)) {
+        } else if ("hingeloss".equals(t) || "hinge".equals(t)) {
             return new HingeLoss();
-        } else if ("LogLoss".equalsIgnoreCase(type) || "LogisticLoss".equalsIgnoreCase(type)) {
+        } else if ("logloss".equals(t) || "log".equals(t) || "logisticloss".equals(t)
+                || "logistic".equals(t)) {
             return new LogLoss();
-        } else if ("SquaredHingeLoss".equalsIgnoreCase(type)) {
+        } else if ("squaredhingeloss".equals(t) || "squared_hinge".equals(t)) {
             return new SquaredHingeLoss();
-        } else if ("ModifiedHuberLoss".equalsIgnoreCase(type)) {
+        } else if ("modifiedhuberloss".equals(t) || "modified_huber".equals(t)) {
             return new ModifiedHuberLoss();
         }
         throw new IllegalArgumentException("Unsupported loss function name: " + type);
@@ -64,6 +69,8 @@ public final class LossFunctions {
                 return new QuantileLoss();
             case EpsilonInsensitiveLoss:
                 return new EpsilonInsensitiveLoss();
+            case SquaredEpsilonInsensitiveLoss:
+                return new SquaredEpsilonInsensitiveLoss();
             case HuberLoss:
                 return new HuberLoss();
             case HingeLoss:
@@ -272,11 +279,11 @@ public final class LossFunctions {
         public float dloss(final float p, final float y) {
             if ((y - p) > epsilon) {// real value > predicted value - epsilon
                 return -1.f;
-            }
-            if ((p - y) > epsilon) {// real value < predicted value - epsilon
+            } else if ((p - y) > epsilon) {// real value < predicted value - epsilon
                 return 1.f;
+            } else {
+                return 0.f;
             }
-            return 0.f;
         }
 
         @Override
@@ -286,6 +293,55 @@ public final class LossFunctions {
     }
 
     /**
+     * Squared Epsilon-Insensitive loss. <code>loss = max(0, |y - p| - epsilon)^2</code>
+     */
+    public static final class SquaredEpsilonInsensitiveLoss extends RegressionLoss {
+
+        private float epsilon;
+
+        public SquaredEpsilonInsensitiveLoss() {
+            this(0.1f);
+        }
+
+        public SquaredEpsilonInsensitiveLoss(float epsilon) {
+            this.epsilon = epsilon;
+        }
+
+        public void setEpsilon(float epsilon) {
+            this.epsilon = epsilon;
+        }
+
+        @Override
+        public float loss(final float p, final float y) {
+            float d = Math.abs(y - p) - epsilon;
+            return (d > 0.f) ? (d * d) : 0.f;
+        }
+
+        @Override
+        public double loss(final double p, final double y) {
+            double d = Math.abs(y - p) - epsilon;
+            return (d > 0.d) ? (d * d) : 0.d;
+        }
+
+        @Override
+        public float dloss(final float p, final float y) {
+            final float z = y - p;
+            if (z > epsilon) {
+                return -2 * (z - epsilon);
+            } else if (-z > epsilon) {
+                return 2 * (-z - epsilon);
+            } else {
+                return 0.f;
+            }
+        }
+
+        @Override
+        public LossType getType() {
+            return LossType.SquaredEpsilonInsensitiveLoss;
+        }
+    }
+
+    /**
      * Huber regression loss.
      *
      * Variant of the SquaredLoss which is robust to outliers.

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/65d92ff5/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java b/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java
index 7bcac1b..12e0d71 100644
--- a/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java
+++ b/core/src/main/java/hivemall/optimizer/SparseOptimizerFactory.java
@@ -40,6 +40,13 @@ public final class SparseOptimizerFactory {
             throw new IllegalArgumentException("`optimizer` not defined");
         }
 
+        if ("rda".equalsIgnoreCase(options.get("regularization"))
+                && "adagrad".equalsIgnoreCase(optimizerName) == false) {
+            throw new IllegalArgumentException(
+                "`-regularization rda` is only supported for AdaGrad but `-optimizer "
+                        + optimizerName);
+        }
+
         final Optimizer optimizerImpl;
         if ("sgd".equalsIgnoreCase(optimizerName)) {
             optimizerImpl = new Optimizer.SGD(options);

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/65d92ff5/core/src/main/java/hivemall/regression/GeneralRegressionUDTF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/regression/GeneralRegressionUDTF.java b/core/src/main/java/hivemall/regression/GeneralRegressionUDTF.java
index 160d92d..1bd9393 100644
--- a/core/src/main/java/hivemall/regression/GeneralRegressionUDTF.java
+++ b/core/src/main/java/hivemall/regression/GeneralRegressionUDTF.java
@@ -41,7 +41,8 @@ public final class GeneralRegressionUDTF extends GeneralLearnerBaseUDTF {
 
     @Override
     protected String getLossOptionDescription() {
-        return "Loss function [default: SquaredLoss, QuantileLoss, EpsilonInsensitiveLoss, HuberLoss]";
+        return "Loss function [default: SquaredLoss/squared, QuantileLoss/quantile, "
+                + "EpsilonInsensitiveLoss/epsilon_insensitive, HuberLoss/huber]";
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/65d92ff5/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java b/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java
index 6ed783c..1c7a90e 100644
--- a/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java
+++ b/core/src/test/java/hivemall/classifier/GeneralClassifierUDTFTest.java
@@ -18,6 +18,8 @@
  */
 package hivemall.classifier;
 
+import hivemall.utils.math.MathUtils;
+
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
@@ -29,7 +31,7 @@ import java.util.List;
 import java.util.StringTokenizer;
 import java.util.zip.GZIPInputStream;
 
-import hivemall.utils.math.MathUtils;
+import javax.annotation.Nonnull;
 
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -41,8 +43,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.junit.Assert;
 import org.junit.Test;
 
-import javax.annotation.Nonnull;
-
 public class GeneralClassifierUDTFTest {
     private static final boolean DEBUG = false;
 
@@ -148,7 +148,7 @@ public class GeneralClassifierUDTFTest {
         String[] regularizations = new String[] {"NO", "L1", "L2", "ElasticNet", "RDA"};
         String[] lossFunctions = new String[] {"HingeLoss", "LogLoss", "SquaredHingeLoss",
                 "ModifiedHuberLoss", "SquaredLoss", "QuantileLoss", "EpsilonInsensitiveLoss",
-                "HuberLoss"};
+                "SquaredEpsilonInsensitiveLoss", "HuberLoss"};
 
         for (String opt : optimizers) {
             for (String reg : regularizations) {

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/65d92ff5/core/src/test/java/hivemall/regression/GeneralRegressionUDTFTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/hivemall/regression/GeneralRegressionUDTFTest.java b/core/src/test/java/hivemall/regression/GeneralRegressionUDTFTest.java
index df5c643..cfe9651 100644
--- a/core/src/test/java/hivemall/regression/GeneralRegressionUDTFTest.java
+++ b/core/src/test/java/hivemall/regression/GeneralRegressionUDTFTest.java
@@ -22,18 +22,17 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
+import javax.annotation.Nonnull;
+
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
 import org.junit.Assert;
 import org.junit.Test;
 
-import javax.annotation.Nonnull;
-
 public class GeneralRegressionUDTFTest {
     private static final boolean DEBUG = false;
 
@@ -156,7 +155,7 @@ public class GeneralRegressionUDTFTest {
         String[] optimizers = new String[] {"SGD", "AdaDelta", "AdaGrad", "Adam"};
         String[] regularizations = new String[] {"NO", "L1", "L2", "ElasticNet", "RDA"};
         String[] lossFunctions = new String[] {"SquaredLoss", "QuantileLoss",
-                "EpsilonInsensitiveLoss", "HuberLoss"};
+                "EpsilonInsensitiveLoss", "SquaredEpsilonInsensitiveLoss", "HuberLoss"};
 
         for (String opt : optimizers) {
             for (String reg : regularizations) {

http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/65d92ff5/docs/gitbook/misc/prediction.md
----------------------------------------------------------------------
diff --git a/docs/gitbook/misc/prediction.md b/docs/gitbook/misc/prediction.md
index 53fe03f..8c17ec6 100644
--- a/docs/gitbook/misc/prediction.md
+++ b/docs/gitbook/misc/prediction.md
@@ -110,6 +110,7 @@ Below we list possible options for `train_regression` and `train_classifier`, an
 		- SquaredLoss
 		- QuantileLoss
 		- EpsilonInsensitiveLoss
+		- SquaredEpsilonInsensitiveLoss
 		- HuberLoss
 	- For `train_classifier`
 		- HingeLoss
@@ -119,6 +120,7 @@ Below we list possible options for `train_regression` and `train_classifier`, an
 		- SquaredLoss
 		- QuantileLoss
 		- EpsilonInsensitiveLoss
+		- SquaredEpsilonInsensitiveLoss
 		- HuberLoss
 - Regularization function: `-reg`, `-regularization`
 	- L1