You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2018/09/07 10:19:41 UTC

incubator-hivemall git commit: [HIVEMALL-218] Fixed train_lda NPE where input row is null

Repository: incubator-hivemall
Updated Branches:
  refs/heads/master 30593b14b -> 47d1100c1


[HIVEMALL-218] Fixed train_lda NPE where input row is null

## What changes were proposed in this pull request?

Fixed NegativeArraySizeException where input is NULL of `train_lda`

## What type of PR is it?

Bug Fix

## What is the Jira issue?

https://issues.apache.org/jira/browse/HIVEMALL-218

## How was this patch tested?

manual tests

## Checklist

- [x] Did you apply source code formatter, i.e., `./bin/format_code.sh`, for your commit?
- [x] Did you run system tests on Hive (or Spark)?

Author: Makoto Yui <my...@apache.org>

Closes #164 from myui/HIVEMALL-218.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/47d1100c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/47d1100c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/47d1100c

Branch: refs/heads/master
Commit: 47d1100c1fab6796f09f0998624b3a445869f1d4
Parents: 30593b1
Author: Makoto Yui <my...@apache.org>
Authored: Fri Sep 7 19:19:35 2018 +0900
Committer: Makoto Yui <my...@apache.org>
Committed: Fri Sep 7 19:19:35 2018 +0900

----------------------------------------------------------------------
 .../ProbabilisticTopicModelBaseUDTF.java        | 23 +++++++++++++++-----
 1 file changed, 17 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/47d1100c/core/src/main/java/hivemall/topicmodel/ProbabilisticTopicModelBaseUDTF.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/topicmodel/ProbabilisticTopicModelBaseUDTF.java b/core/src/main/java/hivemall/topicmodel/ProbabilisticTopicModelBaseUDTF.java
index 33d940d..23a021d 100644
--- a/core/src/main/java/hivemall/topicmodel/ProbabilisticTopicModelBaseUDTF.java
+++ b/core/src/main/java/hivemall/topicmodel/ProbabilisticTopicModelBaseUDTF.java
@@ -57,6 +57,8 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.Counters;
 import org.apache.hadoop.mapred.Reporter;
 
+import com.google.common.base.Preconditions;
+
 public abstract class ProbabilisticTopicModelBaseUDTF extends UDTFWithOptions {
     private static final Log logger = LogFactory.getLog(ProbabilisticTopicModelBaseUDTF.class);
 
@@ -159,11 +161,17 @@ public abstract class ProbabilisticTopicModelBaseUDTF extends UDTFWithOptions {
             this.model = createModel();
         }
 
-        final int length = wordCountsOI.getListLength(args[0]);
+        Preconditions.checkArgument(args.length >= 1);
+        Object arg0 = args[0];
+        if (arg0 == null) {
+            return;
+        }
+
+        final int length = wordCountsOI.getListLength(arg0);
         final String[] wordCounts = new String[length];
         int j = 0;
         for (int i = 0; i < length; i++) {
-            Object o = wordCountsOI.getListElement(args[0], i);
+            Object o = wordCountsOI.getListElement(arg0, i);
             if (o == null) {
                 throw new HiveException("Given feature vector contains invalid null elements");
             }
@@ -268,6 +276,10 @@ public abstract class ProbabilisticTopicModelBaseUDTF extends UDTFWithOptions {
 
     @Override
     public void close() throws HiveException {
+        if (model.getDocCount() == 0L) {
+            this.model = null;
+            throw new HiveException("No training exmples to learn. Please revise input data.");
+        }
         finalizeTraining();
         forwardModel();
         this.model = null;
@@ -275,10 +287,6 @@ public abstract class ProbabilisticTopicModelBaseUDTF extends UDTFWithOptions {
 
     @VisibleForTesting
     void finalizeTraining() throws HiveException {
-        if (model.getDocCount() == 0L) {
-            this.model = null;
-            return;
-        }
         if (miniBatchCount > 0) { // update for remaining samples
             model.train(Arrays.copyOfRange(miniBatch, 0, miniBatchCount));
         }
@@ -462,6 +470,9 @@ public abstract class ProbabilisticTopicModelBaseUDTF extends UDTFWithOptions {
             topicIdx.set(k);
 
             final SortedMap<Float, List<String>> topicWords = model.getTopicWords(k);
+            if (topicWords == null) {
+                continue;
+            }
             for (Map.Entry<Float, List<String>> e : topicWords.entrySet()) {
                 score.set(e.getKey().floatValue());
                 for (String v : e.getValue()) {