You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ay...@apache.org on 2022/11/11 10:31:55 UTC

[hive] branch master updated: HIVE-26724: Mask UDF failing with NPE. (#3750). (Ayush Saxena, reviewed by Sai Hemanth Gantasala)

This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new eef564206f3 HIVE-26724: Mask UDF failing with NPE. (#3750). (Ayush Saxena, reviewed by Sai Hemanth Gantasala)
eef564206f3 is described below

commit eef564206f3487b801a8921035f6b4079d179c52
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Fri Nov 11 16:01:44 2022 +0530

    HIVE-26724: Mask UDF failing with NPE. (#3750). (Ayush Saxena, reviewed by Sai Hemanth Gantasala)
---
 .../hive/ql/udf/generic/GenericUDFMaskHash.java    | 33 +++++++++++--
 .../queries/clientpositive/udf_mask_hash_config.q  | 24 ++++++++++
 .../clientpositive/llap/udf_mask_hash_config.q.out | 55 ++++++++++++++++++++++
 3 files changed, 109 insertions(+), 3 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskHash.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskHash.java
index 02f2c6c53bb..a97d2882d91 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskHash.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskHash.java
@@ -21,9 +21,12 @@ package org.apache.hadoop.hive.ql.udf.generic;
 import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.hadoop.hive.common.type.Date;
 import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.session.SessionState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 @Description(name = "mask_hash",
              value = "returns hash of the given value",
@@ -35,25 +38,49 @@ import org.apache.hadoop.hive.ql.session.SessionState;
 public class GenericUDFMaskHash extends BaseMaskUDF {
   public static final String UDF_NAME = "mask_hash";
 
+  public void configure(MapredContext context) {
+    boolean isSha512 =
+        "sha512".equalsIgnoreCase(HiveConf.getVar(context.getJobConf(), HiveConf.ConfVars.HIVE_MASKING_ALGO).trim());
+    ((MaskHashTransformer) transformer).setSHA512(isSha512);
+  }
+
   public GenericUDFMaskHash() {
     super(new MaskHashTransformer(), UDF_NAME);
   }
 }
 
 class MaskHashTransformer extends AbstractTransformer {
+
+  private static final Logger LOG = LoggerFactory.getLogger(MaskHashTransformer.class);
+
+  private boolean isSHA512 = false;
   @Override
   public void init(ObjectInspector[] arguments, int startIdx) {
   }
 
+  public void setSHA512(boolean val) {
+    this.isSHA512 = val;
+  }
+
   @Override
   String transform(final String value) {
-    if("sha512".equalsIgnoreCase(HiveConf.getVar(SessionState.get().getConf(), HiveConf.ConfVars.HIVE_MASKING_ALGO).trim())){
+    if (getIsSHA512FromSessionConf() || isSHA512) {
+      LOG.info("Using SHA512 for masking");
       return DigestUtils.sha512Hex(value);
-    }else{
+    } else {
+      LOG.info("Using SHA256 for masking");
       return DigestUtils.sha256Hex(value);
     }
   }
 
+  private boolean getIsSHA512FromSessionConf() {
+    if (SessionState.get() != null) {
+      return "sha512".equalsIgnoreCase(
+          HiveConf.getVar(SessionState.get().getConf(), HiveConf.ConfVars.HIVE_MASKING_ALGO).trim());
+    }
+    return false;
+  }
+
   @Override
   Byte transform(final Byte value) {
     return null;
diff --git a/ql/src/test/queries/clientpositive/udf_mask_hash_config.q b/ql/src/test/queries/clientpositive/udf_mask_hash_config.q
new file mode 100644
index 00000000000..e256b8ffd64
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_mask_hash_config.q
@@ -0,0 +1,24 @@
+set hive.query.results.cache.enabled=false;
+
+-- create a test table
+create table testhivemask(name string);
+
+-- insert some values
+insert into testhivemask values('name1'),('name2');
+
+-- explicitily configure sha512 and check for values
+set hive.fetch.task.conversion=none;
+set hive.masking.algo=sha512;
+
+-- try the udf on a table
+select mask_hash(name) from testhivemask;
+
+-- try the udf with a constant value
+select mask_hash('01-28-2021');
+
+-- explicitily configure sha256 and check for values
+
+set hive.masking.algo=sha256;
+select mask_hash(name) from testhivemask;
+
+select mask_hash('01-28-2021');
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/udf_mask_hash_config.q.out b/ql/src/test/results/clientpositive/llap/udf_mask_hash_config.q.out
new file mode 100644
index 00000000000..ac67e313e55
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/udf_mask_hash_config.q.out
@@ -0,0 +1,55 @@
+PREHOOK: query: create table testhivemask(name string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testhivemask
+POSTHOOK: query: create table testhivemask(name string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testhivemask
+PREHOOK: query: insert into testhivemask values('name1'),('name2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@testhivemask
+POSTHOOK: query: insert into testhivemask values('name1'),('name2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@testhivemask
+POSTHOOK: Lineage: testhivemask.name SCRIPT []
+PREHOOK: query: select mask_hash(name) from testhivemask
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testhivemask
+#### A masked pattern was here ####
+POSTHOOK: query: select mask_hash(name) from testhivemask
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testhivemask
+#### A masked pattern was here ####
+4e7c69b50c61f811ae81af0edaecc1af8445e153426f1ba831ff79e04ea68d4df3984ab80f2f45100e22a1132846628a70b9f6c064399393050e19296e6ec63b
+32084f67e470f79d7525e0329753a53cf6319e50592899ee6b5a2f72c974ff432e26729d14de7b519fadb358892a680cb9c9193916500db8f5753b6e90421cff
+PREHOOK: query: select mask_hash('01-28-2021')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select mask_hash('01-28-2021')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+7b375ec2327810383a3c6cfb871b5e433feda829cb882b8880a38f17039a34d71900de20b17884e703e411c483c47bec54fd9e8ef06adf1269202776e5b3c607
+PREHOOK: query: select mask_hash(name) from testhivemask
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testhivemask
+#### A masked pattern was here ####
+POSTHOOK: query: select mask_hash(name) from testhivemask
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testhivemask
+#### A masked pattern was here ####
+9367417d63903350aeb7e092bca792263d4fd82d4912252e014e073a8931b4c1
+069587dcb8f8b63329ae53051ba79ba34ba0deb41c7a1e044280d7b6bb15e4f0
+PREHOOK: query: select mask_hash('01-28-2021')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select mask_hash('01-28-2021')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+995bb88e7a32d9028c50d47cc4711e8bae0c94d5a746c0daa9fda1cc9dc41093