You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2019/04/13 21:24:48 UTC

[incubator-hivemall] branch master updated: [HIVEMALL-246] Add feature name validation in feature UDF

This is an automated email from the ASF dual-hosted git repository.

myui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hivemall.git


The following commit(s) were added to refs/heads/master by this push:
     new 5438b12  [HIVEMALL-246] Add feature name validation in feature UDF
5438b12 is described below

commit 5438b129a2efadfedb4464c66d4d967d8d6618dd
Author: Makoto Yui <my...@apache.org>
AuthorDate: Sun Apr 14 06:24:42 2019 +0900

    [HIVEMALL-246] Add feature name validation in feature UDF
    
    ## What changes were proposed in this pull request?
    
    This PR adds feature name validation in feature UDF
    
    feature(name, value) should validate name not to include ":". Fail-fast behavior is preferable.
    
    ## What type of PR is it?
    
    Hot Fix
    
    ## What is the Jira issue?
    
    https://issues.apache.org/jira/browse/HIVEMALL-246
    
    ## How was this patch tested?
    
    unit tests
    
    ## Checklist
    
    - [x] Did you apply source code formatter, i.e., `./bin/format_code.sh`, for your commit?
    - [ ] Did you run system tests on Hive (or Spark)?
    
    Author: Makoto Yui <my...@apache.org>
    
    Closes #190 from myui/HIVEMALL-246.
---
 core/src/main/java/hivemall/ftvec/FeatureUDF.java     |  3 +++
 core/src/test/java/hivemall/ftvec/FeatureUDFTest.java | 16 ++++++++++++++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/hivemall/ftvec/FeatureUDF.java b/core/src/main/java/hivemall/ftvec/FeatureUDF.java
index b2dc5fa..887d9be 100644
--- a/core/src/main/java/hivemall/ftvec/FeatureUDF.java
+++ b/core/src/main/java/hivemall/ftvec/FeatureUDF.java
@@ -101,6 +101,9 @@ public final class FeatureUDF extends GenericUDF {
         // arg0|arg1 is Primitive Java object or Writable
         // Then, toString() works fine
         String featureStr = arg0.toString();
+        if (featureStr.indexOf(':') >= 0) {
+            throw new UDFArgumentException("feature name SHOULD NOT contain colon: " + featureStr);
+        }
         String valueStr = arg1.toString();
         String fv = featureStr + ':' + valueStr;
 
diff --git a/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java b/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java
index c4e1fa0..d5f5a44 100644
--- a/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java
+++ b/core/src/test/java/hivemall/ftvec/FeatureUDFTest.java
@@ -19,6 +19,9 @@
 package hivemall.ftvec;
 
 import hivemall.TestUtils;
+
+import java.io.IOException;
+
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
@@ -31,8 +34,6 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
-import java.io.IOException;
-
 public class FeatureUDFTest {
     FeatureUDF udf = null;
 
@@ -227,7 +228,18 @@ public class FeatureUDFTest {
                 new DeferredJavaObject(null)});
 
         Assert.assertNull(ret);
+    }
+
+    @Test(expected = UDFArgumentException.class)
+    public void testInvalidFeatureName() throws Exception {
+        ObjectInspector featureOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+        ObjectInspector weightOI = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
+        udf.initialize(new ObjectInspector[] {featureOI, weightOI});
+
+        udf.evaluate(new GenericUDF.DeferredObject[] {new DeferredJavaObject(new Text("f:1")),
+                new DeferredJavaObject(new DoubleWritable(2.5d))});
 
+        Assert.fail();
     }
 
     @Test