You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/10/31 10:58:30 UTC

[incubator-hivemall] branch master updated (c73ecde -> 527e39b)

This is an automated email from the ASF dual-hosted git repository.

myui pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git.


    from c73ecde  [HIVEMALL-274] Fix wrong column name of train_regressor() in tutorial
     new 0a10aac  Renamed XGBoostUDTF as XGBoostBaseUDTF
     new 3985bbf  Added SparseDMatrixBuilder
     new 527e39b  add missing junit dependency

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 xgboost/pom.xml                                    |  7 ++
 .../{XGBoostUDTF.java => XGBoostBaseUDTF.java}     |  6 +-
 .../XGBoostBinaryClassifierUDTF.java               |  7 +-
 .../XGBoostMulticlassClassifierUDTF.java           |  4 +-
 .../xgboost/regression/XGBoostRegressionUDTF.java  |  4 +-
 .../xgboost/utils/SparseDMatrixBuilder.java        | 79 ++++++++++++++++++++++
 .../hivemall/xgboost/XGBoostVersionUDFTest.java    |  1 -
 7 files changed, 97 insertions(+), 11 deletions(-)
 rename xgboost/src/main/java/hivemall/xgboost/{XGBoostUDTF.java => XGBoostBaseUDTF.java} (98%)
 create mode 100644 xgboost/src/main/java/hivemall/xgboost/utils/SparseDMatrixBuilder.java


[incubator-hivemall] 02/03: Added SparseDMatrixBuilder

Posted by my...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git

commit 3985bbf1b74ea0414850b607d1ad9e205db6757c
Author: Makoto Yui <my...@apache.org>
AuthorDate: Thu Oct 31 19:17:54 2019 +0900

    Added SparseDMatrixBuilder
---
 .../xgboost/utils/SparseDMatrixBuilder.java        | 79 ++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/xgboost/src/main/java/hivemall/xgboost/utils/SparseDMatrixBuilder.java b/xgboost/src/main/java/hivemall/xgboost/utils/SparseDMatrixBuilder.java
new file mode 100644
index 0000000..6f8e739
--- /dev/null
+++ b/xgboost/src/main/java/hivemall/xgboost/utils/SparseDMatrixBuilder.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.xgboost.utils;
+
+import hivemall.utils.collections.lists.FloatArrayList;
+import hivemall.utils.collections.lists.LongArrayList;
+import matrix4j.utils.collections.lists.IntArrayList;
+import ml.dmlc.xgboost4j.java.DMatrix;
+import ml.dmlc.xgboost4j.java.XGBoostError;
+
+import javax.annotation.Nonnegative;
+import javax.annotation.Nonnull;
+
+public class SparseDMatrixBuilder {
+
+    @Nonnull
+    private final LongArrayList rowPointers;
+    @Nonnull
+    private final IntArrayList columnIndices;
+    @Nonnull
+    private final FloatArrayList values;
+
+    private int maxNumColumns;
+
+    public SparseDMatrixBuilder(@Nonnegative int initSize) {
+        this.rowPointers = new LongArrayList(initSize + 1);
+        rowPointers.add(0);
+        this.columnIndices = new IntArrayList(initSize);
+        this.values = new FloatArrayList(initSize);
+        this.maxNumColumns = 0;
+    }
+
+    public SparseDMatrixBuilder nextRow() {
+        int ptr = values.size();
+        rowPointers.add(ptr);
+        return this;
+    }
+
+    private static final void checkColIndex(final int col) {
+        if (col < 0) {
+            throw new IllegalArgumentException("Found negative column index: " + col);
+        }
+    }
+
+    public SparseDMatrixBuilder nextColumn(@Nonnegative int col, float value) {
+        checkColIndex(col);
+
+        this.maxNumColumns = Math.max(col + 1, maxNumColumns);
+        if (value == 0.d) {
+            return this;
+        }
+
+        columnIndices.add(col);
+        values.add(value);
+        return this;
+    }
+
+    @Nonnull
+    public DMatrix buildMatrix() throws XGBoostError {
+        return new DMatrix(rowPointers.toArray(true), columnIndices.toArray(true),
+            values.toArray(true), DMatrix.SparseType.CSR, maxNumColumns);
+    }
+}


[incubator-hivemall] 03/03: add missing junit dependency

Posted by my...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git

commit 527e39bef53be07f16f30f5d90259521c8068a81
Author: Makoto Yui <my...@apache.org>
AuthorDate: Thu Oct 31 19:58:20 2019 +0900

    add missing junit dependency
---
 xgboost/pom.xml                                                   | 7 +++++++
 xgboost/src/test/java/hivemall/xgboost/XGBoostVersionUDFTest.java | 1 -
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/xgboost/pom.xml b/xgboost/pom.xml
index 5672c66..f124102 100644
--- a/xgboost/pom.xml
+++ b/xgboost/pom.xml
@@ -100,6 +100,13 @@
 			<version>2.21</version>
 			<scope>compile</scope>
 		</dependency>
+
+		<!-- test scope -->
+		<dependency>
+			<groupId>junit</groupId>
+			<artifactId>junit</artifactId>
+			<scope>test</scope>
+		</dependency>
 	</dependencies>
 
 </project>
diff --git a/xgboost/src/test/java/hivemall/xgboost/XGBoostVersionUDFTest.java b/xgboost/src/test/java/hivemall/xgboost/XGBoostVersionUDFTest.java
index dbfd5dd..90f061c 100644
--- a/xgboost/src/test/java/hivemall/xgboost/XGBoostVersionUDFTest.java
+++ b/xgboost/src/test/java/hivemall/xgboost/XGBoostVersionUDFTest.java
@@ -18,7 +18,6 @@
  */
 package hivemall.xgboost;
 
-
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.junit.Assert;
 import org.junit.Test;


[incubator-hivemall] 01/03: Renamed XGBoostUDTF as XGBoostBaseUDTF

Posted by my...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git

commit 0a10aac92674a533bb84806cb2cdecd5ec478af8
Author: Makoto Yui <my...@apache.org>
AuthorDate: Thu Oct 31 19:17:31 2019 +0900

    Renamed XGBoostUDTF as XGBoostBaseUDTF
---
 .../hivemall/xgboost/{XGBoostUDTF.java => XGBoostBaseUDTF.java}    | 6 +++---
 .../xgboost/classification/XGBoostBinaryClassifierUDTF.java        | 7 ++++---
 .../xgboost/classification/XGBoostMulticlassClassifierUDTF.java    | 4 ++--
 .../java/hivemall/xgboost/regression/XGBoostRegressionUDTF.java    | 4 ++--
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/xgboost/src/main/java/hivemall/xgboost/XGBoostUDTF.java b/xgboost/src/main/java/hivemall/xgboost/XGBoostBaseUDTF.java
similarity index 98%
rename from xgboost/src/main/java/hivemall/xgboost/XGBoostUDTF.java
rename to xgboost/src/main/java/hivemall/xgboost/XGBoostBaseUDTF.java
index 272614f..8d77ae3 100644
--- a/xgboost/src/main/java/hivemall/xgboost/XGBoostUDTF.java
+++ b/xgboost/src/main/java/hivemall/xgboost/XGBoostBaseUDTF.java
@@ -54,8 +54,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
  * This is a base class to handle the options for XGBoost and provide common functions among various
  * tasks.
  */
-public abstract class XGBoostUDTF extends UDTFWithOptions {
-    private static final Log logger = LogFactory.getLog(XGBoostUDTF.class);
+public abstract class XGBoostBaseUDTF extends UDTFWithOptions {
+    private static final Log logger = LogFactory.getLog(XGBoostBaseUDTF.class);
 
     // Settings for the XGBoost native library
     static {
@@ -106,7 +106,7 @@ public abstract class XGBoostUDTF extends UDTFWithOptions {
         params.put("base_score", 0.5);
     }
 
-    public XGBoostUDTF() {
+    public XGBoostBaseUDTF() {
         this.featuresList = new ArrayList<>(1024);
     }
 
diff --git a/xgboost/src/main/java/hivemall/xgboost/classification/XGBoostBinaryClassifierUDTF.java b/xgboost/src/main/java/hivemall/xgboost/classification/XGBoostBinaryClassifierUDTF.java
index 1138e73..a0409d7 100644
--- a/xgboost/src/main/java/hivemall/xgboost/classification/XGBoostBinaryClassifierUDTF.java
+++ b/xgboost/src/main/java/hivemall/xgboost/classification/XGBoostBinaryClassifierUDTF.java
@@ -18,7 +18,7 @@
  */
 package hivemall.xgboost.classification;
 
-import hivemall.xgboost.XGBoostUDTF;
+import hivemall.xgboost.XGBoostBaseUDTF;
 
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -28,8 +28,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
  * https://github.com/dmlc/xgboost/tree/master/demo/binary_classification
  */
 @Description(name = "train_xgboost_classifier",
-        value = "_FUNC_(string[] features, double target [, string options]) - Returns a relation consisting of <string model_id, array<byte> pred_model>")
-public final class XGBoostBinaryClassifierUDTF extends XGBoostUDTF {
+        value = "_FUNC_(array<string> features, double target [, string options])"
+                + " - Returns a relation consisting of <string model_id, array<byte> pred_model>")
+public final class XGBoostBinaryClassifierUDTF extends XGBoostBaseUDTF {
 
     public XGBoostBinaryClassifierUDTF() {
         super();
diff --git a/xgboost/src/main/java/hivemall/xgboost/classification/XGBoostMulticlassClassifierUDTF.java b/xgboost/src/main/java/hivemall/xgboost/classification/XGBoostMulticlassClassifierUDTF.java
index 7bdb16f..cb4c45c 100644
--- a/xgboost/src/main/java/hivemall/xgboost/classification/XGBoostMulticlassClassifierUDTF.java
+++ b/xgboost/src/main/java/hivemall/xgboost/classification/XGBoostMulticlassClassifierUDTF.java
@@ -18,7 +18,7 @@
  */
 package hivemall.xgboost.classification;
 
-import hivemall.xgboost.XGBoostUDTF;
+import hivemall.xgboost.XGBoostBaseUDTF;
 
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.Options;
@@ -33,7 +33,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  */
 @Description(name = "train_multiclass_xgboost_classifier",
         value = "_FUNC_(string[] features, double target [, string options]) - Returns a relation consisting of <string model_id, array<byte> pred_model>")
-public final class XGBoostMulticlassClassifierUDTF extends XGBoostUDTF {
+public final class XGBoostMulticlassClassifierUDTF extends XGBoostBaseUDTF {
 
     public XGBoostMulticlassClassifierUDTF() {
         super();
diff --git a/xgboost/src/main/java/hivemall/xgboost/regression/XGBoostRegressionUDTF.java b/xgboost/src/main/java/hivemall/xgboost/regression/XGBoostRegressionUDTF.java
index 5d38afe..c018c33 100644
--- a/xgboost/src/main/java/hivemall/xgboost/regression/XGBoostRegressionUDTF.java
+++ b/xgboost/src/main/java/hivemall/xgboost/regression/XGBoostRegressionUDTF.java
@@ -21,7 +21,7 @@ package hivemall.xgboost.regression;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 
-import hivemall.xgboost.XGBoostUDTF;
+import hivemall.xgboost.XGBoostBaseUDTF;
 
 /**
  * A XGBoost regression and the document is as follows; -
@@ -29,7 +29,7 @@ import hivemall.xgboost.XGBoostUDTF;
  */
 @Description(name = "train_xgboost_regr",
         value = "_FUNC_(string[] features, double target [, string options]) - Returns a relation consisting of <string model_id, array<byte> pred_model>")
-public final class XGBoostRegressionUDTF extends XGBoostUDTF {
+public final class XGBoostRegressionUDTF extends XGBoostBaseUDTF {
 
     public XGBoostRegressionUDTF() {
         super();