You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hivemall.apache.org by "Makoto Yui (JIRA)" <ji...@apache.org> on 2017/07/14 15:31:00 UTC

[jira] [Closed] (HIVEMALL-119) Fail to use xgboost on Hive

     [ https://issues.apache.org/jira/browse/HIVEMALL-119?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Makoto Yui closed HIVEMALL-119.
-------------------------------
    Resolution: Fixed

Merged in [1] with extensive refactoring [2].

[1] https://github.com/apache/incubator-hivemall/commit/04372d490194d598bbc79ec1adba8b0918225c38
[2] https://github.com/apache/incubator-hivemall/commit/7e96c8a997108f5cb7473ef9393ee8e13263f205

> Fail to use xgboost on Hive
> ---------------------------
>
>                 Key: HIVEMALL-119
>                 URL: https://issues.apache.org/jira/browse/HIVEMALL-119
>             Project: Hivemall
>          Issue Type: Bug
>         Environment: Head of https://github.com/amaya382/incubator-hivemall/tree/cross-compiling
> On docker, xgboost native built on local (Linux)
>            Reporter: ITO Ryuichi
>            Assignee: Takeshi Yamamuro
>              Labels: xgboost
>
> This error rises on [this branch](https://github.com/amaya382/incubator-hivemall/tree/cross-compiling), probably on head of master too.
> It seems that labels aren't set properly.
> {code}
> make xgboost-native-local
> mvn package -Dmaven.test.skip=true
> docker-compose -f resources/docker/docker-compose.yml build
> docker-compose -f resources/docker/docker-compose.yml up -d && docker attach hivemall
> {code}
> On docker
> {code}
> bin/prepare_iris.sh
> hive
> {code}
> On hive
> {code:sql}
> -- add jar /opt/hivemall/target/hivemall-core-0.4.2-rc.2-with-dependencies.jar;
> -- source /opt/hivemall/resources/ddl/define-all.hive;
> add jar /opt/hivemall/target/hivemall-xgboost-0.60-0.4.2-rc.2-with-dependencies.jar;
> source /opt/hivemall/resources/ddl/define-additional.hive;
> set hivevar:f0_min=4.3;
> set hivevar:f0_max=7.9;
> set hivevar:f1_min=2.0;
> set hivevar:f1_max=4.4;
> set hivevar:f2_min=1.0;
> set hivevar:f2_max=6.9;
> set hivevar:f3_min=0.1;
> set hivevar:f3_max=2.5;
> use iris;
> create or replace view iris_scaled
> as
> select
>   rowid, 
>   label,
>   add_bias(array(
>      concat("1:", rescale(features[0],${hivevar:f0_min},${hivevar:f0_max})), 
>      concat("2:", rescale(features[1],${hivevar:f1_min},${hivevar:f1_max})), 
>      concat("3:", rescale(features[2],${hivevar:f2_min},${hivevar:f2_max})), 
>      concat("4:", rescale(features[3],${hivevar:f3_min},${hivevar:f3_max}))
>   )) as features
> from 
>   iris_raw;
> -- select * from iris_scaled limit 3;
> -- 1       Iris-setosa     ["1:0.22222215","2:0.625","3:0.0677966","4:0.041666664","0:1.0"]
> -- 2       Iris-setosa     ["1:0.16666664","2:0.41666666","3:0.0677966","4:0.041666664","0:1.0"]
> -- 3       Iris-setosa     ["1:0.11111101","2:0.5","3:0.05084745","4:0.041666664","0:1.0"]
> select train_xgboost_classifier(features, case when label = 'Iris-setosa' then 1.0 else 0.0 end) from iris_scaled; -- got exception
> {code}
> {code}
> Failed with exception java.io.IOException:java.lang.ClassCastException: org.apache.hadoop.io.Text cannot be cast to java.lang.String
> [20:51:33] dmlc-core/include/dmlc/logging.h:235: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty
> org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty
> Check failed: (info.labels.size()) != (0) label set cannot be empty
>         at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313)
>         at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152)
>         at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683)
>         at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
>         at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
>         at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552)
>         at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535)
>         at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191)
>         at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233)
>         at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278)
>         at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
>         at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
>         at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
>         at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776)
>         at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
>         at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
>         at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
> org.apache.hadoop.hive.ql.metadata.HiveException: [20:51:33] src/objective/regression_obj.cc:89: Check failed: (info.labels.size()) != (0) label set cannot be empty
>         at hivemall.xgboost.XGBoostUDTF.close(XGBoostUDTF.java:313)
>         at org.apache.hadoop.hive.ql.exec.UDTFOperator.closeOp(UDTFOperator.java:152)
>         at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:683)
>         at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
>         at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:697)
>         at org.apache.hadoop.hive.ql.exec.FetchOperator.closeOperator(FetchOperator.java:552)
>         at org.apache.hadoop.hive.ql.exec.FetchOperator.clearFetchContext(FetchOperator.java:535)
>         at org.apache.hadoop.hive.ql.exec.FetchTask.clearFetch(FetchTask.java:191)
>         at org.apache.hadoop.hive.ql.Driver.releaseFetchTask(Driver.java:2233)
>         at org.apache.hadoop.hive.ql.Driver.close(Driver.java:2278)
>         at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:269)
>         at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183)
>         at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399)
>         at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:776)
>         at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714)
>         at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>         at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
>         at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>         at java.lang.reflect.Method.invoke(Method.java:606)
>         at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
>         at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
> Time taken: 3.375 seconds
> {code}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)