You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/05/03 03:14:02 UTC
[2/2] hive git commit: HIVE-13568: UDFs for use in column-masking -
includes updates for review comments (Madhan Neethiraj via Gunther
Hagleitner)
HIVE-13568: UDFs for use in column-masking - includes updates for review comments (Madhan Neethiraj via Gunther Hagleitner)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e9a72189
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e9a72189
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e9a72189
Branch: refs/heads/master
Commit: e9a72189edfded23c2e4a14a890a0528781852e5
Parents: d5285d8
Author: Madhan Neethiraj <ma...@apache.org>
Authored: Wed Apr 20 23:57:55 2016 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Mon May 2 18:13:30 2016 -0700
----------------------------------------------------------------------
.../test/resources/testconfiguration.properties | 6 +
.../hadoop/hive/ql/exec/FunctionRegistry.java | 8 +
.../hadoop/hive/ql/udf/generic/BaseMaskUDF.java | 473 +++++++++++++++++++
.../hive/ql/udf/generic/GenericUDFMask.java | 334 +++++++++++++
.../ql/udf/generic/GenericUDFMaskFirstN.java | 229 +++++++++
.../hive/ql/udf/generic/GenericUDFMaskHash.java | 77 +++
.../ql/udf/generic/GenericUDFMaskLastN.java | 193 ++++++++
.../udf/generic/GenericUDFMaskShowFirstN.java | 248 ++++++++++
.../ql/udf/generic/GenericUDFMaskShowLastN.java | 198 ++++++++
ql/src/test/queries/clientpositive/udf_mask.q | 13 +
.../queries/clientpositive/udf_mask_first_n.q | 13 +
.../test/queries/clientpositive/udf_mask_hash.q | 13 +
.../queries/clientpositive/udf_mask_last_n.q | 13 +
.../clientpositive/udf_mask_show_first_n.q | 13 +
.../clientpositive/udf_mask_show_last_n.q | 13 +
.../beelinepositive/show_functions.q.out | 6 +
.../results/clientpositive/show_functions.q.out | 12 +
.../test/results/clientpositive/udf_mask.q.out | 71 +++
.../clientpositive/udf_mask_first_n.q.out | 68 +++
.../results/clientpositive/udf_mask_hash.q.out | 59 +++
.../clientpositive/udf_mask_last_n.q.out | 68 +++
.../clientpositive/udf_mask_show_first_n.q.out | 68 +++
.../clientpositive/udf_mask_show_last_n.q.out | 68 +++
23 files changed, 2264 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 346a38d..c791ede 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -665,6 +665,12 @@ beeline.positive.exclude=add_part_exist.q,\
udf_map.q,\
udf_map_keys.q,\
udf_map_values.q,\
+ udf_mask.q,\
+ udf_mask_first_n.q,\
+ udf_mask_hash.q,\
+ udf_mask_last_n.q,\
+ udf_mask_show_first_n.q,\
+ udf_mask_show_last_n.q,\
udf_max.q,\
udf_min.q,\
udf_named_struct.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 1343b39..00df3a0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -474,6 +474,14 @@ public final class FunctionRegistry {
system.registerHiddenBuiltIn(GenericUDFOPDTIPlus.class);
system.registerHiddenBuiltIn(GenericUDFOPNumericMinus.class);
system.registerHiddenBuiltIn(GenericUDFOPNumericPlus.class);
+
+ // mask UDFs
+ system.registerGenericUDF(GenericUDFMask.UDF_NAME, GenericUDFMask.class);
+ system.registerGenericUDF(GenericUDFMaskFirstN.UDF_NAME, GenericUDFMaskFirstN.class);
+ system.registerGenericUDF(GenericUDFMaskLastN.UDF_NAME, GenericUDFMaskLastN.class);
+ system.registerGenericUDF(GenericUDFMaskShowFirstN.UDF_NAME, GenericUDFMaskShowFirstN.class);
+ system.registerGenericUDF(GenericUDFMaskShowLastN.UDF_NAME, GenericUDFMaskShowLastN.class);
+ system.registerGenericUDF(GenericUDFMaskHash.UDF_NAME, GenericUDFMaskHash.class);
}
public static String getNormalizedFunctionName(String fn) throws SemanticException {
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/BaseMaskUDF.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/BaseMaskUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/BaseMaskUDF.java
new file mode 100644
index 0000000..343f332
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/BaseMaskUDF.java
@@ -0,0 +1,473 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
+import org.apache.hadoop.hive.serde2.io.*;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.*;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+
+import java.sql.Date;
+
+
+public abstract class BaseMaskUDF extends GenericUDF {
+ private static final Log LOG = LogFactory.getLog(BaseMaskUDF.class);
+
+ final protected AbstractTransformer transformer;
+ final protected String displayName;
+ protected AbstractTransformerAdapter transformerAdapter = null;
+
+ protected BaseMaskUDF(AbstractTransformer transformer, String displayName) {
+ this.transformer = transformer;
+ this.displayName = displayName;
+ }
+
+ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
+ LOG.debug("==> BaseMaskUDF.initialize()");
+
+ checkArgPrimitive(arguments, 0); // first argument is the column to be transformed
+
+ PrimitiveObjectInspector columnType = ((PrimitiveObjectInspector) arguments[0]);
+
+ transformer.init(arguments, 1);
+
+ transformerAdapter = AbstractTransformerAdapter.getTransformerAdapter(columnType, transformer);
+
+ ObjectInspector ret = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(columnType.getPrimitiveCategory());
+
+ LOG.debug("<== BaseMaskUDF.initialize()");
+
+ return ret;
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ Object ret = transformerAdapter.getTransformedWritable(arguments[0]);
+
+ return ret;
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ return getStandardDisplayString(displayName, children);
+ }
+}
+
+
+/**
+ * Interface to be implemented by transformers which transform a given value according to its specification.
+ */
+abstract class AbstractTransformer {
+ /**
+ * Initialzie the transformer object
+ * @param arguments arguments given to GenericUDF.initialzie()
+ * @param startIdx index into array, from which the transformer should read values
+ */
+ abstract void init(ObjectInspector[] arguments, int startIdx);
+
+ /**
+ * Transform a String value
+ * @param value value to transform
+ * @return transformed value
+ */
+ abstract String transform(String value);
+
+ /**
+ * Transform a Byte value
+ * @param value value to transform
+ * @return transformed value
+ */
+ abstract Byte transform(Byte value);
+
+ /**
+ * Transform a Short value
+ * @param value value to transform
+ * @return transformed value
+ */
+ abstract Short transform(Short value);
+
+ /**
+ * Transform a Integer value
+ * @param value value to transform
+ * @return transformed value
+ */
+ abstract Integer transform(Integer value);
+
+ /**
+ * Transform a Long value
+ * @param value value to transform
+ * @return transformed value
+ */
+ abstract Long transform(Long value);
+
+ /**
+ * Transform a Date value
+ * @param value value to transform
+ * @return transformed value
+ */
+ abstract Date transform(Date value);
+}
+
+/**
+ * Interface to be implemented by datatype specific adapters that handle necessary conversion of the transformed value
+ * into appropriate Writable object, which GenericUDF.evaluate() is expected to return.
+ */
+abstract class AbstractTransformerAdapter {
+ final AbstractTransformer transformer;
+
+ AbstractTransformerAdapter(AbstractTransformer transformer) {
+ this.transformer = transformer;
+ }
+
+ abstract Object getTransformedWritable(DeferredObject value) throws HiveException;
+
+ static AbstractTransformerAdapter getTransformerAdapter(PrimitiveObjectInspector columnType, AbstractTransformer transformer) {
+ final AbstractTransformerAdapter ret;
+
+ switch(columnType.getPrimitiveCategory()) {
+ case STRING:
+ ret = new StringTransformerAdapter((StringObjectInspector)columnType, transformer);
+ break;
+
+ case CHAR:
+ ret = new HiveCharTransformerAdapter((HiveCharObjectInspector)columnType, transformer);
+ break;
+
+ case VARCHAR:
+ ret = new HiveVarcharTransformerAdapter((HiveVarcharObjectInspector)columnType, transformer);
+ break;
+
+ case BYTE:
+ ret = new ByteTransformerAdapter((ByteObjectInspector)columnType, transformer);
+ break;
+
+ case SHORT:
+ ret = new ShortTransformerAdapter((ShortObjectInspector)columnType, transformer);
+ break;
+
+ case INT:
+ ret = new IntegerTransformerAdapter((IntObjectInspector)columnType, transformer);
+ break;
+
+ case LONG:
+ ret = new LongTransformerAdapter((LongObjectInspector)columnType, transformer);
+ break;
+
+ case DATE:
+ ret = new DateTransformerAdapter((DateObjectInspector)columnType, transformer);
+ break;
+
+ default:
+ ret = new UnsupportedDatatypeTransformAdapter(columnType, transformer);
+ break;
+ }
+
+ return ret;
+ }
+}
+
+class ByteTransformerAdapter extends AbstractTransformerAdapter {
+ final ByteObjectInspector columnType;
+ final ByteWritable writable;
+
+ public ByteTransformerAdapter(ByteObjectInspector columnType, AbstractTransformer transformer) {
+ this(columnType, transformer, new ByteWritable());
+ }
+
+ public ByteTransformerAdapter(ByteObjectInspector columnType, AbstractTransformer transformer, ByteWritable writable) {
+ super(transformer);
+
+ this.columnType = columnType;
+ this.writable = writable;
+ }
+
+ @Override
+ public Object getTransformedWritable(DeferredObject object) throws HiveException {
+ Byte value = (Byte)columnType.getPrimitiveJavaObject(object.get());
+
+ if(value != null) {
+ Byte transformedValue = transformer.transform(value);
+
+ if(transformedValue != null) {
+ writable.set(transformedValue);
+
+ return writable;
+ }
+ }
+
+ return null;
+ }
+}
+
+class DateTransformerAdapter extends AbstractTransformerAdapter {
+ final DateObjectInspector columnType;
+ final DateWritable writable;
+
+ public DateTransformerAdapter(DateObjectInspector columnType, AbstractTransformer transformer) {
+ this(columnType, transformer, new DateWritable());
+ }
+
+ public DateTransformerAdapter(DateObjectInspector columnType, AbstractTransformer transformer, DateWritable writable) {
+ super(transformer);
+
+ this.columnType = columnType;
+ this.writable = writable;
+ }
+
+ @Override
+ public Object getTransformedWritable(DeferredObject object) throws HiveException {
+ Date value = columnType.getPrimitiveJavaObject(object.get());
+
+ if(value != null) {
+ Date transformedValue = transformer.transform(value);
+
+ if(transformedValue != null) {
+ writable.set(transformedValue);
+
+ return writable;
+ }
+ }
+
+ return null;
+ }
+}
+
+class HiveCharTransformerAdapter extends AbstractTransformerAdapter {
+ final HiveCharObjectInspector columnType;
+ final HiveCharWritable writable;
+
+ public HiveCharTransformerAdapter(HiveCharObjectInspector columnType, AbstractTransformer transformer) {
+ this(columnType, transformer, new HiveCharWritable());
+ }
+
+ public HiveCharTransformerAdapter(HiveCharObjectInspector columnType, AbstractTransformer transformer, HiveCharWritable writable) {
+ super(transformer);
+
+ this.columnType = columnType;
+ this.writable = writable;
+ }
+
+ @Override
+ public Object getTransformedWritable(DeferredObject object) throws HiveException {
+ HiveChar value = columnType.getPrimitiveJavaObject(object.get());
+
+ if(value != null) {
+ String transformedValue = transformer.transform(value.getValue());
+
+ if(transformedValue != null) {
+ writable.set(transformedValue);
+
+ return writable;
+ }
+ }
+
+ return null;
+ }
+}
+
+class HiveVarcharTransformerAdapter extends AbstractTransformerAdapter {
+ final HiveVarcharObjectInspector columnType;
+ final HiveVarcharWritable writable;
+
+ public HiveVarcharTransformerAdapter(HiveVarcharObjectInspector columnType, AbstractTransformer transformer) {
+ this(columnType, transformer, new HiveVarcharWritable());
+ }
+
+ public HiveVarcharTransformerAdapter(HiveVarcharObjectInspector columnType, AbstractTransformer transformer, HiveVarcharWritable writable) {
+ super(transformer);
+
+ this.columnType = columnType;
+ this.writable = writable;
+ }
+
+ @Override
+ public Object getTransformedWritable(DeferredObject object) throws HiveException {
+ HiveVarchar value = columnType.getPrimitiveJavaObject(object.get());
+
+ if(value != null) {
+ String transformedValue = transformer.transform(value.getValue());
+
+ if(transformedValue != null) {
+ writable.set(transformedValue);
+
+ return writable;
+ }
+ }
+
+ return null;
+ }
+}
+
+class IntegerTransformerAdapter extends AbstractTransformerAdapter {
+ final IntObjectInspector columnType;
+ final IntWritable writable;
+
+ public IntegerTransformerAdapter(IntObjectInspector columnType, AbstractTransformer transformer) {
+ this(columnType, transformer, new IntWritable());
+ }
+
+ public IntegerTransformerAdapter(IntObjectInspector columnType, AbstractTransformer transformer, IntWritable writable) {
+ super(transformer);
+
+ this.columnType = columnType;
+ this.writable = writable;
+ }
+
+ @Override
+ public Object getTransformedWritable(DeferredObject object) throws HiveException {
+ Integer value = (Integer)columnType.getPrimitiveJavaObject(object.get());
+
+ if(value != null) {
+ Integer transformedValue = transformer.transform(value);
+
+ if(transformedValue != null) {
+ writable.set(transformedValue);
+
+ return writable;
+ }
+ }
+
+ return null;
+ }
+}
+
+class LongTransformerAdapter extends AbstractTransformerAdapter {
+ final LongObjectInspector columnType;
+ final LongWritable writable;
+
+ public LongTransformerAdapter(LongObjectInspector columnType, AbstractTransformer transformer) {
+ this(columnType, transformer, new LongWritable());
+ }
+
+ public LongTransformerAdapter(LongObjectInspector columnType, AbstractTransformer transformer, LongWritable writable) {
+ super(transformer);
+
+ this.columnType = columnType;
+ this.writable = writable;
+ }
+
+ @Override
+ public Object getTransformedWritable(DeferredObject object) throws HiveException {
+ Long value = (Long)columnType.getPrimitiveJavaObject(object.get());
+
+ if(value != null) {
+ Long transformedValue = transformer.transform(value);
+
+ if(transformedValue != null) {
+ writable.set(transformedValue);
+
+ return writable;
+ }
+ }
+
+ return null;
+ }
+}
+
+class ShortTransformerAdapter extends AbstractTransformerAdapter {
+ final ShortObjectInspector columnType;
+ final ShortWritable writable;
+
+ public ShortTransformerAdapter(ShortObjectInspector columnType, AbstractTransformer transformer) {
+ this(columnType, transformer, new ShortWritable());
+ }
+
+ public ShortTransformerAdapter(ShortObjectInspector columnType, AbstractTransformer transformer, ShortWritable writable) {
+ super(transformer);
+
+ this.columnType = columnType;
+ this.writable = writable;
+ }
+
+ @Override
+ public Object getTransformedWritable(DeferredObject object) throws HiveException {
+ Short value = (Short)columnType.getPrimitiveJavaObject(object.get());
+
+ if(value != null) {
+ Short transformedValue = transformer.transform(value);
+
+ if(transformedValue != null) {
+ writable.set(transformedValue);
+
+ return writable;
+ }
+ }
+
+ return null;
+ }
+}
+
+class StringTransformerAdapter extends AbstractTransformerAdapter {
+ final StringObjectInspector columnType;
+ final Text writable;
+
+ public StringTransformerAdapter(StringObjectInspector columnType, AbstractTransformer transformer) {
+ this(columnType, transformer, new Text());
+ }
+
+ public StringTransformerAdapter(StringObjectInspector columnType, AbstractTransformer transformer, Text writable) {
+ super(transformer);
+
+ this.columnType = columnType;
+ this.writable = writable;
+ }
+
+ @Override
+ public Object getTransformedWritable(DeferredObject object) throws HiveException {
+ String value = columnType.getPrimitiveJavaObject(object.get());
+
+ if(value != null) {
+ String transformedValue = transformer.transform(value);
+
+ if(transformedValue != null) {
+ writable.set(transformedValue);
+
+ return writable;
+ }
+ }
+
+ return null;
+ }
+}
+
+class UnsupportedDatatypeTransformAdapter extends AbstractTransformerAdapter {
+ final PrimitiveObjectInspector columnType;
+
+ public UnsupportedDatatypeTransformAdapter(PrimitiveObjectInspector columnType, AbstractTransformer transformer) {
+ super(transformer);
+
+ this.columnType = columnType;
+ }
+
+ @Override
+ public Object getTransformedWritable(DeferredObject object) throws HiveException {
+ return null;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java
new file mode 100644
index 0000000..cd35142
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMask.java
@@ -0,0 +1,334 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+
+import java.sql.Date;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.*;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+
+
+@Description(name = "mask",
+ value = "masks the given value",
+ extended = "Examples:\n "
+ + " mask(ccn)\n "
+ + " mask(ccn, 'X', 'x', '0')\n "
+ + " mask(ccn, 'x', 'x', 'x')\n "
+ + "Arguments:\n "
+ + " mask(value, upperChar, lowerChar, digitChar, otherChar, numberChar, dayValue, monthValue, yearValue)\n "
+ + " value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR, DATE\n "
+ + " upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'\n "
+ + " lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'\n "
+ + " digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'\n "
+ + " otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1\n "
+ + " numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'\n "
+ + " dayValue - value to replace day field in a date with. Specify -1 to retain original value. Valid values: 1-31. Default value: 1\n "
+ + " monthValue - value to replace month field in a date with. Specify -1 to retain original value. Valid values: 0-11. Default value: 0\n "
+ + " yearValue - value to replace year field in a date with. Specify -1 to retain original value. Default value: 0\n "
+ )
+public class GenericUDFMask extends BaseMaskUDF {
+ public static final String UDF_NAME = "mask";
+
+ public GenericUDFMask() {
+ super(new MaskTransformer(), UDF_NAME);
+ }
+}
+
+class MaskTransformer extends AbstractTransformer {
+ final static int MASKED_UPPERCASE = 'X';
+ final static int MASKED_LOWERCASE = 'x';
+ final static int MASKED_DIGIT = 'n';
+ final static int MASKED_OTHER_CHAR = -1;
+ final static int MASKED_NUMBER = 1;
+ final static int MASKED_DAY_COMPONENT_VAL = 1;
+ final static int MASKED_MONTH_COMPONENT_VAL = 0;
+ final static int MASKED_YEAR_COMPONENT_VAL = 0;
+ final static int UNMASKED_VAL = -1;
+
+ int maskedUpperChar = MASKED_UPPERCASE;
+ int maskedLowerChar = MASKED_LOWERCASE;
+ int maskedDigitChar = MASKED_DIGIT;
+ int maskedOtherChar = MASKED_OTHER_CHAR;
+ int maskedNumber = MASKED_NUMBER;
+ int maskedDayValue = MASKED_DAY_COMPONENT_VAL;
+ int maskedMonthValue = MASKED_MONTH_COMPONENT_VAL;
+ int maskedYearValue = MASKED_YEAR_COMPONENT_VAL;
+
+ public MaskTransformer() {
+ }
+
+ @Override
+ public void init(ObjectInspector[] arguments, int startIdx) {
+ int idx = startIdx;
+
+ maskedUpperChar = getCharArg(arguments, idx++, MASKED_UPPERCASE);
+ maskedLowerChar = getCharArg(arguments, idx++, MASKED_LOWERCASE);
+ maskedDigitChar = getCharArg(arguments, idx++, MASKED_DIGIT);
+ maskedOtherChar = getCharArg(arguments, idx++, MASKED_OTHER_CHAR);
+ maskedNumber = getIntArg(arguments, idx++, MASKED_NUMBER);
+ maskedDayValue = getIntArg(arguments, idx++, MASKED_DAY_COMPONENT_VAL);
+ maskedMonthValue = getIntArg(arguments, idx++, MASKED_MONTH_COMPONENT_VAL);
+ maskedYearValue = getIntArg(arguments, idx++, MASKED_YEAR_COMPONENT_VAL);
+
+ if(maskedNumber < 0 || maskedNumber > 9) {
+ maskedNumber = MASKED_NUMBER;
+ }
+
+ if(maskedDayValue < 1 || maskedDayValue > 31) {
+ maskedDayValue = MASKED_DAY_COMPONENT_VAL;
+ }
+
+ if(maskedMonthValue < 0 || maskedMonthValue > 11) {
+ maskedMonthValue = MASKED_MONTH_COMPONENT_VAL;
+ }
+ }
+
+ @Override
+ String transform(final String val) {
+ StringBuilder ret = new StringBuilder(val.length());
+
+ for(int i = 0; i < val.length(); i++) {
+ ret.appendCodePoint(transformChar(val.charAt(i)));
+ }
+
+ return ret.toString();
+ }
+
+ @Override
+ Byte transform(final Byte value) {
+ byte val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ byte ret = 0;
+ int pos = 1;
+ while(val != 0) {
+ ret += maskedNumber * pos;
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Short transform(final Short value) {
+ short val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ short ret = 0;
+ int pos = 1;
+ while(val != 0) {
+ ret += maskedNumber * pos;
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Integer transform(final Integer value) {
+ int val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ int ret = 0;
+ int pos = 1;
+ while(val != 0) {
+ ret += maskedNumber * pos;
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Long transform(final Long value) {
+ long val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ long ret = 0;
+ long pos = 1;
+ for(int i = 0; val != 0; i++) {
+ ret += maskedNumber * pos;
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Date transform(final Date value) {
+ int year = maskedYearValue == UNMASKED_VAL ? value.getYear() : maskedYearValue;
+ int month = maskedMonthValue == UNMASKED_VAL ? value.getMonth() : maskedMonthValue;
+ int day = maskedDayValue == UNMASKED_VAL ? value.getDate() : maskedDayValue;
+
+ return new Date(year, month, day);
+ }
+
+ protected int transformChar(final int c) {
+ switch(Character.getType(c)) {
+ case Character.UPPERCASE_LETTER:
+ if(maskedUpperChar != UNMASKED_VAL) {
+ return maskedUpperChar;
+ }
+ break;
+
+ case Character.LOWERCASE_LETTER:
+ if(maskedLowerChar != UNMASKED_VAL) {
+ return maskedLowerChar;
+ }
+ break;
+
+ case Character.DECIMAL_DIGIT_NUMBER:
+ if(maskedDigitChar != UNMASKED_VAL) {
+ return maskedDigitChar;
+ }
+ break;
+
+ default:
+ if(maskedOtherChar != UNMASKED_VAL) {
+ return maskedOtherChar;
+ }
+ break;
+ }
+
+ return c;
+ }
+
+ int getCharArg(ObjectInspector[] arguments, int index, int defaultValue) {
+ int ret = defaultValue;
+
+ ObjectInspector arg = (arguments != null && arguments.length > index) ? arguments[index] : null;
+
+ if (arg != null) {
+ if(arg instanceof WritableConstantIntObjectInspector) {
+ IntWritable value = ((WritableConstantIntObjectInspector)arg).getWritableConstantValue();
+
+ if(value != null) {
+ ret = value.get();
+ }
+ } else if(arg instanceof WritableConstantLongObjectInspector) {
+ LongWritable value = ((WritableConstantLongObjectInspector)arg).getWritableConstantValue();
+
+ if(value != null) {
+ ret = (int)value.get();
+ }
+ } else if(arg instanceof WritableConstantShortObjectInspector) {
+ ShortWritable value = ((WritableConstantShortObjectInspector)arg).getWritableConstantValue();
+
+ if(value != null) {
+ ret = value.get();
+ }
+ } else if(arg instanceof ConstantObjectInspector) {
+ Object value = ((ConstantObjectInspector) arg).getWritableConstantValue();
+
+ if (value != null) {
+ String strValue = value.toString();
+
+ if (strValue != null && strValue.length() > 0) {
+ ret = strValue.charAt(0);
+ }
+ }
+ }
+ }
+
+ return ret;
+ }
+
+ int getIntArg(ObjectInspector[] arguments, int index, int defaultValue) {
+ int ret = defaultValue;
+
+ ObjectInspector arg = (arguments != null && arguments.length > index) ? arguments[index] : null;
+
+ if (arg != null) {
+ if (arg instanceof WritableConstantIntObjectInspector) {
+ IntWritable value = ((WritableConstantIntObjectInspector) arg).getWritableConstantValue();
+
+ if (value != null) {
+ ret = value.get();
+ }
+ } else if (arg instanceof WritableConstantLongObjectInspector) {
+ LongWritable value = ((WritableConstantLongObjectInspector) arg).getWritableConstantValue();
+
+ if (value != null) {
+ ret = (int) value.get();
+ }
+ } else if (arg instanceof WritableConstantShortObjectInspector) {
+ ShortWritable value = ((WritableConstantShortObjectInspector) arg).getWritableConstantValue();
+
+ if (value != null) {
+ ret = value.get();
+ }
+ } else if (arg instanceof ConstantObjectInspector) {
+ Object value = ((ConstantObjectInspector) arg).getWritableConstantValue();
+
+ if (value != null) {
+ String strValue = value.toString();
+
+ if (strValue != null && strValue.length() > 0) {
+ ret = Integer.parseInt(value.toString());
+ }
+ }
+ }
+ }
+
+ return ret;
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java
new file mode 100644
index 0000000..1ecf86f
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskFirstN.java
@@ -0,0 +1,229 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+
+@Description(name = "mask_first_n",
+ value = "masks the first n characters of the value",
+ extended = "Examples:\n "
+ + " mask_first_n(ccn, 8)\n "
+ + " mask_first_n(ccn, 8, 'x', 'x', 'x')\n "
+ + "Arguments:\n "
+ + " mask(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar)\n "
+ + " value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR\n "
+ + " charCount - number of characters. Default value: 4\n "
+ + " upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'\n "
+ + " lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'\n "
+ + " digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'\n "
+ + " otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1\n "
+ + " numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'\n "
+ )
+public class GenericUDFMaskFirstN extends BaseMaskUDF {
+ public static final String UDF_NAME = "mask_first_n";
+
+ public GenericUDFMaskFirstN() {
+ super(new MaskFirstNTransformer(), UDF_NAME);
+ }
+}
+
+class MaskFirstNTransformer extends MaskTransformer {
+ int charCount = 4;
+
+ public MaskFirstNTransformer() {
+ super();
+ }
+
+ @Override
+ public void init(ObjectInspector[] arguments, int argsStartIdx) {
+ super.init(arguments, argsStartIdx + 1); // first argument is charCount, which is consumed in this method below
+
+ charCount = getIntArg(arguments, argsStartIdx, 4);
+
+ if(charCount < 0) {
+ charCount = 0;
+ }
+ }
+
+ @Override
+ String transform(final String value) {
+ final StringBuilder ret = new StringBuilder(value.length());
+ final int endIdx = value.length() < charCount ? value.length() : charCount;
+
+ for(int i = 0; i < endIdx; i++) {
+ ret.appendCodePoint(transformChar(value.charAt(i)));
+ }
+
+ for(int i = endIdx; i < value.length(); i++) {
+ ret.appendCodePoint(value.charAt(i));
+ }
+
+ return ret.toString();
+ }
+
+ @Override
+ Byte transform(final Byte value) {
+ byte val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ // count number of digits in the value
+ int digitCount = 0;
+ for(byte v = val; v != 0; v /= 10) {
+ digitCount++;
+ }
+
+ // number of digits to retain from the end
+ final int retainCount = digitCount < charCount ? 0 : (digitCount - charCount);
+
+ byte ret = 0;
+ int pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i >= retainCount) { // mask this digit
+ ret += maskedNumber * pos;
+ } else { //retain this digit
+ ret += (val % 10) * pos;
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Short transform(final Short value) {
+ short val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ // count number of digits in the value
+ int digitCount = 0;
+ for(short v = val; v != 0; v /= 10) {
+ digitCount++;
+ }
+
+ // number of digits to retain from the end
+ final int retainCount = digitCount < charCount ? 0 : (digitCount - charCount);
+
+ short ret = 0;
+ int pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i >= retainCount) { // mask this digit
+ ret += maskedNumber * pos;
+ } else { // retain this digit
+ ret += (val % 10) * pos;
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Integer transform(final Integer value) {
+ int val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ // count number of digits in the value
+ int digitCount = 0;
+ for(int v = val; v != 0; v /= 10) {
+ digitCount++;
+ }
+
+ // number of digits to retain from the end
+ final int retainCount = digitCount < charCount ? 0 : (digitCount - charCount);
+
+ int ret = 0;
+ int pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i >= retainCount) { // mask this digit
+ ret += maskedNumber * pos;
+ } else { // retain this digit
+ ret += (val % 10) * pos;
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Long transform(final Long value) {
+ long val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ // count number of digits in the value
+ int digitCount = 0;
+ for(long v = val; v != 0; v /= 10) {
+ digitCount++;
+ }
+
+ // number of digits to retain from the end
+ final int retainCount = digitCount < charCount ? 0 : (digitCount - charCount);
+
+ long ret = 0;
+ long pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i >= retainCount) { // mask this digit
+ ret += maskedNumber * pos;
+ } else { // retain this digit
+ ret += (val % 10) * pos;
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskHash.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskHash.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskHash.java
new file mode 100644
index 0000000..c456f43
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskHash.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.sql.Date;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+
+@Description(name = "mask_hash",
+ value = "returns hash of the given value",
+ extended = "Examples:\n "
+ + " mask_hash(value)\n "
+ + "Arguments:\n "
+ + " value - value to mask. Supported types: STRING, VARCHAR, CHAR"
+ )
+public class GenericUDFMaskHash extends BaseMaskUDF {
+ public static final String UDF_NAME = "mask_hash";
+
+ public GenericUDFMaskHash() {
+ super(new MaskHashTransformer(), UDF_NAME);
+ }
+}
+
+class MaskHashTransformer extends AbstractTransformer {
+ @Override
+ public void init(ObjectInspector[] arguments, int startIdx) {
+ }
+
+ @Override
+ String transform(final String value) {
+ return DigestUtils.md5Hex(value);
+ }
+
+ @Override
+ Byte transform(final Byte value) {
+ return null;
+ }
+
+ @Override
+ Short transform(final Short value) {
+ return null;
+ }
+
+ @Override
+ Integer transform(final Integer value) {
+ return null;
+ }
+
+ @Override
+ Long transform(final Long value) {
+ return null;
+ }
+
+ @Override
+ Date transform(final Date value) {
+ return null;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java
new file mode 100644
index 0000000..2e867bc
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskLastN.java
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+
+@Description(name = "mask_last_n",
+ value = "masks the last n characters of the value",
+ extended = "Examples:\n "
+ + " mask_last_n(ccn, 8)\n "
+ + " mask_last_n(ccn, 8, 'x', 'x', 'x')\n "
+ + "Arguments:\n "
+ + " mask_last_n(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar)\n "
+ + " value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR\n "
+ + " charCount - number of characters. Default value: 4\n "
+ + " upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'\n "
+ + " lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'\n "
+ + " digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'\n "
+ + " otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1\n "
+ + " numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'\n "
+ )
+public class GenericUDFMaskLastN extends BaseMaskUDF {
+ public static final String UDF_NAME = "mask_last_n";
+
+ public GenericUDFMaskLastN() {
+ super(new MaskLastNTransformer(), UDF_NAME);
+ }
+}
+
+class MaskLastNTransformer extends MaskTransformer {
+ int charCount = 4;
+
+ public MaskLastNTransformer() {
+ super();
+ }
+
+ @Override
+ public void init(ObjectInspector[] arguments, int argsStartIdx) {
+ super.init(arguments, argsStartIdx + 1); // first argument is charCount, which is consumed in this method below
+
+ charCount = getIntArg(arguments, argsStartIdx, 4);
+
+ if(charCount < 0) {
+ charCount = 0;
+ }
+ }
+
+ @Override
+ String transform(final String value) {
+ final StringBuilder ret = new StringBuilder(value.length());
+ final int startIdx = value.length() <= charCount ? 0 : (value.length() - charCount);
+
+ for(int i = 0; i < startIdx; i++) {
+ ret.appendCodePoint(value.charAt(i));
+ }
+
+ for(int i = startIdx; i < value.length(); i++) {
+ ret.appendCodePoint(transformChar(value.charAt(i)));
+ }
+
+ return ret.toString();
+ }
+
+ @Override
+ Byte transform(final Byte value) {
+ byte val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ byte ret = 0;
+ int pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i < charCount) { // mask this digit
+ ret += maskedNumber * pos;
+ } else { //retain this digit
+ ret += (val % 10) * pos;
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Short transform(final Short value) {
+ short val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ short ret = 0;
+ int pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i < charCount) { // mask this digit
+ ret += maskedNumber * pos;
+ } else { // retain this digit
+ ret += (val % 10) * pos;
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Integer transform(final Integer value) {
+ int val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ int ret = 0;
+ int pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i < charCount) { // mask this digit
+ ret += maskedNumber * pos;
+ } else { // retain this digit
+ ret += (val % 10) * pos;
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Long transform(final Long value) {
+ long val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ long ret = 0;
+ long pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i < charCount) { // mask this digit
+ ret += maskedNumber * pos;
+ } else { // retain this digit
+ ret += (val % 10) * pos;
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowFirstN.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowFirstN.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowFirstN.java
new file mode 100644
index 0000000..a9a1766
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowFirstN.java
@@ -0,0 +1,248 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+
+@Description(name = "mask_show_first_n",
+ value = "masks all but first n characters of the value",
+ extended = "Examples:\n "
+ + " mask_show_first_n(ccn, 8)\n "
+ + " mask_show_first_n(ccn, 8, 'x', 'x', 'x')\n "
+ + "Arguments:\n "
+ + " mask_show_first_n(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar)\n "
+ + " value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR\n "
+ + " charCount - number of characters. Default value: 4\n "
+ + " upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'\n "
+ + " lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'\n "
+ + " digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'\n "
+ + " otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1\n "
+ + " numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'\n "
+ )
+public class GenericUDFMaskShowFirstN extends BaseMaskUDF {
+ public static final String UDF_NAME = "mask_show_first_n";
+
+ public GenericUDFMaskShowFirstN() {
+ super(new MaskShowFirstNTransformer(), UDF_NAME);
+ }
+}
+
+class MaskShowFirstNTransformer extends MaskTransformer {
+ int charCount = 4;
+
+ public MaskShowFirstNTransformer() {
+ super();
+ }
+
+ @Override
+ public void init(ObjectInspector[] arguments, int argsStartIdx) {
+ super.init(arguments, argsStartIdx + 1); // first argument is charCount, which is consumed here
+
+ charCount = getIntArg(arguments, argsStartIdx, 4);
+
+ if(charCount < 0) {
+ charCount = 0;
+ }
+ }
+
+ @Override
+ String transform(final String value) {
+ if(value.length() <= charCount) {
+ return value;
+ }
+
+ final StringBuilder ret = new StringBuilder(value.length());
+
+ for(int i = 0; i < charCount; i++) {
+ ret.appendCodePoint(value.charAt(i));
+ }
+
+ for(int i = charCount; i < value.length(); i++) {
+ ret.appendCodePoint(transformChar(value.charAt(i)));
+ }
+
+ return ret.toString();
+ }
+
+ @Override
+ Byte transform(final Byte value) {
+ byte val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ // count number of digits in the value
+ int digitCount = 0;
+ for(byte v = val; v != 0; v /= 10) {
+ digitCount++;
+ }
+
+ // number of digits to mask from the end
+ final int maskCount = digitCount - charCount;
+
+ if(maskCount <= 0) {
+ return value;
+ }
+
+ byte ret = 0;
+ int pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i < maskCount) { // mask this digit
+ ret += (maskedNumber * pos);
+ } else { //retain this digit
+ ret += ((val % 10) * pos);
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Short transform(final Short value) {
+ short val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ // count number of digits in the value
+ int digitCount = 0;
+ for(short v = val; v != 0; v /= 10) {
+ digitCount++;
+ }
+
+ // number of digits to mask from the end
+ final int maskCount = digitCount - charCount;
+
+ if(maskCount <= 0) {
+ return value;
+ }
+
+ short ret = 0;
+ int pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i < maskCount) { // mask this digit
+ ret += (maskedNumber * pos);
+ } else { // retain this digit
+ ret += ((val % 10) * pos);
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Integer transform(final Integer value) {
+ int val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ // count number of digits in the value
+ int digitCount = 0;
+ for(int v = val; v != 0; v /= 10) {
+ digitCount++;
+ }
+
+ // number of digits to mask from the end
+ final int maskCount = digitCount - charCount;
+
+ if(maskCount <= 0) {
+ return value;
+ }
+
+ int ret = 0;
+ int pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i < maskCount) { // mask this digit
+ ret += maskedNumber * pos;
+ } else { // retain this digit
+ ret += ((val % 10) * pos);
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Long transform(final Long value) {
+ long val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ // count number of digits in the value
+ int digitCount = 0;
+ for(long v = val; v != 0; v /= 10) {
+ digitCount++;
+ }
+
+ // number of digits to mask from the end
+ final int maskCount = digitCount - charCount;
+
+ if(maskCount <= 0) {
+ return value;
+ }
+
+ long ret = 0;
+ long pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i < maskCount) { // mask this digit
+ ret += (maskedNumber * pos);
+ } else { // retain this digit
+ ret += ((val % 10) * pos);
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowLastN.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowLastN.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowLastN.java
new file mode 100644
index 0000000..2d05319
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMaskShowLastN.java
@@ -0,0 +1,198 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+
+@Description(name = "mask_show_last_n",
+ value = "masks all but last n characters of the value",
+ extended = "Examples:\n "
+ + " mask_show_last_n(ccn, 8)\n "
+ + " mask_show_last_n(ccn, 8, 'x', 'x', 'x')\n "
+ + "Arguments:\n "
+ + " mask_show_last_n(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar)\n "
+ + " value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR\n "
+ + " charCount - number of characters. Default value: 4\n "
+ + " upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'\n "
+ + " lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'\n "
+ + " digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'\n "
+ + " otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1\n "
+ + " numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'\n "
+ )
+public class GenericUDFMaskShowLastN extends BaseMaskUDF {
+ public static final String UDF_NAME = "mask_show_last_n";
+
+ public GenericUDFMaskShowLastN() {
+ super(new MaskShowLastNTransformer(), UDF_NAME);
+ }
+}
+
+class MaskShowLastNTransformer extends MaskTransformer {
+ int charCount = 4;
+
+ public MaskShowLastNTransformer() {
+ super();
+ }
+
+ @Override
+ public void init(ObjectInspector[] arguments, int argsStartIdx) {
+ super.init(arguments, argsStartIdx + 1); // first argument is charCount, which is consumed in this method below
+
+ charCount = getIntArg(arguments, argsStartIdx, 4);
+
+ if(charCount < 0) {
+ charCount = 0;
+ }
+ }
+
+
+ @Override
+ String transform(final String value) {
+ if(value.length() <= charCount) {
+ return value;
+ }
+
+ final StringBuilder ret = new StringBuilder(value.length());
+ final int endIdx = value.length() - charCount;
+
+ for(int i = 0; i < endIdx; i++) {
+ ret.appendCodePoint(transformChar(value.charAt(i)));
+ }
+
+ for(int i = endIdx; i < value.length(); i++) {
+ ret.appendCodePoint(value.charAt(i));
+ }
+
+ return ret.toString();
+ }
+
+ @Override
+ Byte transform(final Byte value) {
+ byte val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ byte ret = 0;
+ int pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i >= charCount) { // mask this digit
+ ret += maskedNumber * pos;
+ } else { //retain this digit
+ ret += (val % 10) * pos;
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Short transform(final Short value) {
+ short val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ short ret = 0;
+ int pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i >= charCount) { // mask this digit
+ ret += maskedNumber * pos;
+ } else { // retain this digit
+ ret += (val % 10) * pos;
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Integer transform(final Integer value) {
+ int val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ int ret = 0;
+ int pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i >= charCount) { // mask this digit
+ ret += maskedNumber * pos;
+ } else { // retain this digit
+ ret += (val % 10) * pos;
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+
+ @Override
+ Long transform(final Long value) {
+ long val = value;
+
+ if(value < 0) {
+ val *= -1;
+ }
+
+ long ret = 0;
+ long pos = 1;
+ for(int i = 0; val != 0; i++) {
+ if(i >= charCount) { // mask this digit
+ ret += (maskedNumber * pos);
+ } else { // retain this digit
+ ret += ((val % 10) * pos);
+ }
+
+ val /= 10;
+ pos *= 10;
+ }
+
+ if(value < 0) {
+ ret *= -1;
+ }
+
+ return ret;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/queries/clientpositive/udf_mask.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_mask.q b/ql/src/test/queries/clientpositive/udf_mask.q
new file mode 100644
index 0000000..82b8ee7
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_mask.q
@@ -0,0 +1,13 @@
+DESCRIBE FUNCTION mask;
+DESC FUNCTION EXTENDED mask;
+
+explain select mask('TestString-123', 'X', 'x', '0', '1');
+
+select mask('TestString-123', 'X', 'x', '0', ':'),
+ mask(cast('TestString-123' as varchar(24)), 'X', 'x', '0', ':'),
+ mask(cast('TestString-123' as char(24)), 'X', 'x', '0', ':'),
+ mask(cast(123 as tinyint), -1, -1, -1, -1, '5'),
+ mask(cast(12345 as smallint), -1, -1, -1, -1, '5'),
+ mask(cast(12345 as int), -1, -1, -1, -1, '5'),
+ mask(cast(12345 as bigint), -1, -1, -1, -1, '5'),
+ mask(cast('2016-04-20' as date), -1, -1, -1, -1, -1, 0, 0, 0);
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/queries/clientpositive/udf_mask_first_n.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_mask_first_n.q b/ql/src/test/queries/clientpositive/udf_mask_first_n.q
new file mode 100644
index 0000000..3cd3962
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_mask_first_n.q
@@ -0,0 +1,13 @@
+DESCRIBE FUNCTION mask_first_n;
+DESC FUNCTION EXTENDED mask_first_n;
+
+explain select mask_first_n('TestString-123', 4, 'X', 'x', '0', '1');
+
+select mask_first_n('TestString-123', 4, 'X', 'x', '0', ':'),
+ mask_first_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'),
+ mask_first_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'),
+ mask_first_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'),
+ mask_first_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'),
+ mask_first_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'),
+ mask_first_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'),
+ mask_first_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0);
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/queries/clientpositive/udf_mask_hash.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_mask_hash.q b/ql/src/test/queries/clientpositive/udf_mask_hash.q
new file mode 100644
index 0000000..698f6b3
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_mask_hash.q
@@ -0,0 +1,13 @@
+DESCRIBE FUNCTION mask_hash;
+DESC FUNCTION EXTENDED mask_hash;
+
+explain select mask_hash('TestString-123');
+
+select mask_hash('TestString-123'),
+ mask_hash(cast('TestString-123' as varchar(24))),
+ mask_hash(cast('TestString-123' as char(24))),
+ mask_hash(cast(123 as tinyint)),
+ mask_hash(cast(12345 as smallint)),
+ mask_hash(cast(12345 as int)),
+ mask_hash(cast(12345 as bigint)),
+ mask_hash(cast('2016-04-20' as date));
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/queries/clientpositive/udf_mask_last_n.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_mask_last_n.q b/ql/src/test/queries/clientpositive/udf_mask_last_n.q
new file mode 100644
index 0000000..89eb05d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_mask_last_n.q
@@ -0,0 +1,13 @@
+DESCRIBE FUNCTION mask_last_n;
+DESC FUNCTION EXTENDED mask_last_n;
+
+explain select mask_last_n('TestString-123', 4, 'X', 'x', '0', '1');
+
+select mask_last_n('TestString-123', 4, 'X', 'x', '0', ':'),
+ mask_last_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'),
+ mask_last_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'),
+ mask_last_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'),
+ mask_last_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'),
+ mask_last_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'),
+ mask_last_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'),
+ mask_last_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0);
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/queries/clientpositive/udf_mask_show_first_n.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_mask_show_first_n.q b/ql/src/test/queries/clientpositive/udf_mask_show_first_n.q
new file mode 100644
index 0000000..1425a82
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_mask_show_first_n.q
@@ -0,0 +1,13 @@
+DESCRIBE FUNCTION mask_show_first_n;
+DESC FUNCTION EXTENDED mask_show_first_n;
+
+explain select mask_show_first_n('TestString-123', 4, 'X', 'x', '0', '1');
+
+select mask_show_first_n('TestString-123', 4, 'X', 'x', '0', ':'),
+ mask_show_first_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'),
+ mask_show_first_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'),
+ mask_show_first_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'),
+ mask_show_first_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'),
+ mask_show_first_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'),
+ mask_show_first_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'),
+ mask_show_first_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0);
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/queries/clientpositive/udf_mask_show_last_n.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_mask_show_last_n.q b/ql/src/test/queries/clientpositive/udf_mask_show_last_n.q
new file mode 100644
index 0000000..c4d15fb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_mask_show_last_n.q
@@ -0,0 +1,13 @@
+DESCRIBE FUNCTION mask_show_last_n;
+DESC FUNCTION EXTENDED mask_show_last_n;
+
+explain select mask_show_last_n('TestString-123', 4, 'X', 'x', '0', '1');
+
+select mask_show_last_n('TestString-123', 4, 'X', 'x', '0', ':'),
+ mask_show_last_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'),
+ mask_show_last_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'),
+ mask_show_last_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'),
+ mask_show_last_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'),
+ mask_show_last_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'),
+ mask_show_last_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'),
+ mask_show_last_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0);
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/results/beelinepositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/beelinepositive/show_functions.q.out b/ql/src/test/results/beelinepositive/show_functions.q.out
index d7f706b..4f3ec40 100644
--- a/ql/src/test/results/beelinepositive/show_functions.q.out
+++ b/ql/src/test/results/beelinepositive/show_functions.q.out
@@ -96,6 +96,12 @@ Saving all output to "!!{outputDirectory}!!/show_functions.q.raw". Enter "record
'map'
'map_keys'
'map_values'
+'mask'
+'mask_first_n'
+'mask_hash'
+'mask_last_n'
+'mask_show_first_n'
+'mask_show_last_n'
'max'
'min'
'minute'
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index 3cddcce..5c8b982 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -127,6 +127,12 @@ ltrim
map
map_keys
map_values
+mask
+mask_first_n
+mask_hash
+mask_last_n
+mask_show_first_n
+mask_show_last_n
matchpath
max
md5
@@ -340,6 +346,12 @@ POSTHOOK: type: SHOWFUNCTIONS
map
map_keys
map_values
+mask
+mask_first_n
+mask_hash
+mask_last_n
+mask_show_first_n
+mask_show_last_n
matchpath
max
md5
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/results/clientpositive/udf_mask.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_mask.q.out b/ql/src/test/results/clientpositive/udf_mask.q.out
new file mode 100644
index 0000000..cbafaee
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_mask.q.out
@@ -0,0 +1,71 @@
+PREHOOK: query: DESCRIBE FUNCTION mask
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION mask
+POSTHOOK: type: DESCFUNCTION
+masks the given value
+PREHOOK: query: DESC FUNCTION EXTENDED mask
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESC FUNCTION EXTENDED mask
+POSTHOOK: type: DESCFUNCTION
+masks the given value
+Examples:
+ mask(ccn)
+ mask(ccn, 'X', 'x', '0')
+ mask(ccn, 'x', 'x', 'x')
+ Arguments:
+ mask(value, upperChar, lowerChar, digitChar, otherChar, numberChar, dayValue, monthValue, yearValue)
+ value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR, DATE
+ upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'
+ lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'
+ digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'
+ otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1
+ numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'
+ dayValue - value to replace day field in a date with. Specify -1 to retain original value. Valid values: 1-31. Default value: 1
+ monthValue - value to replace month field in a date with. Specify -1 to retain original value. Valid values: 0-11. Default value: 0
+ yearValue - value to replace year field in a date with. Specify -1 to retain original value. Default value: 0
+
+PREHOOK: query: explain select mask('TestString-123', 'X', 'x', '0', '1')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select mask('TestString-123', 'X', 'x', '0', '1')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'XxxxXxxxxx1000' (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: select mask('TestString-123', 'X', 'x', '0', ':'),
+ mask(cast('TestString-123' as varchar(24)), 'X', 'x', '0', ':'),
+ mask(cast('TestString-123' as char(24)), 'X', 'x', '0', ':'),
+ mask(cast(123 as tinyint), -1, -1, -1, -1, '5'),
+ mask(cast(12345 as smallint), -1, -1, -1, -1, '5'),
+ mask(cast(12345 as int), -1, -1, -1, -1, '5'),
+ mask(cast(12345 as bigint), -1, -1, -1, -1, '5'),
+ mask(cast('2016-04-20' as date), -1, -1, -1, -1, -1, 0, 0, 0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select mask('TestString-123', 'X', 'x', '0', ':'),
+ mask(cast('TestString-123' as varchar(24)), 'X', 'x', '0', ':'),
+ mask(cast('TestString-123' as char(24)), 'X', 'x', '0', ':'),
+ mask(cast(123 as tinyint), -1, -1, -1, -1, '5'),
+ mask(cast(12345 as smallint), -1, -1, -1, -1, '5'),
+ mask(cast(12345 as int), -1, -1, -1, -1, '5'),
+ mask(cast(12345 as bigint), -1, -1, -1, -1, '5'),
+ mask(cast('2016-04-20' as date), -1, -1, -1, -1, -1, 0, 0, 0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+XxxxXxxxxx:000 XxxxXxxxxx:000 XxxxXxxxxx:000:::::::::: 43 -9981 55555 55555 1900-01-01
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/results/clientpositive/udf_mask_first_n.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_mask_first_n.q.out b/ql/src/test/results/clientpositive/udf_mask_first_n.q.out
new file mode 100644
index 0000000..988cf70
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_mask_first_n.q.out
@@ -0,0 +1,68 @@
+PREHOOK: query: DESCRIBE FUNCTION mask_first_n
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION mask_first_n
+POSTHOOK: type: DESCFUNCTION
+masks the first n characters of the value
+PREHOOK: query: DESC FUNCTION EXTENDED mask_first_n
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESC FUNCTION EXTENDED mask_first_n
+POSTHOOK: type: DESCFUNCTION
+masks the first n characters of the value
+Examples:
+ mask_first_n(ccn, 8)
+ mask_first_n(ccn, 8, 'x', 'x', 'x')
+ Arguments:
+ mask(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar)
+ value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR
+ charCount - number of characters. Default value: 4
+ upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'
+ lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'
+ digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'
+ otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1
+ numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'
+
+PREHOOK: query: explain select mask_first_n('TestString-123', 4, 'X', 'x', '0', '1')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select mask_first_n('TestString-123', 4, 'X', 'x', '0', '1')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'XxxxString-123' (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: select mask_first_n('TestString-123', 4, 'X', 'x', '0', ':'),
+ mask_first_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'),
+ mask_first_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'),
+ mask_first_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'),
+ mask_first_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'),
+ mask_first_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'),
+ mask_first_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'),
+ mask_first_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select mask_first_n('TestString-123', 4, 'X', 'x', '0', ':'),
+ mask_first_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'),
+ mask_first_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'),
+ mask_first_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'),
+ mask_first_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'),
+ mask_first_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'),
+ mask_first_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'),
+ mask_first_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+XxxxString-123 XxxxString-123 XxxxString-123 43 -9981 55555 55555 1900-01-01
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/results/clientpositive/udf_mask_hash.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_mask_hash.q.out b/ql/src/test/results/clientpositive/udf_mask_hash.q.out
new file mode 100644
index 0000000..9fc34bb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_mask_hash.q.out
@@ -0,0 +1,59 @@
+PREHOOK: query: DESCRIBE FUNCTION mask_hash
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION mask_hash
+POSTHOOK: type: DESCFUNCTION
+returns hash of the given value
+PREHOOK: query: DESC FUNCTION EXTENDED mask_hash
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESC FUNCTION EXTENDED mask_hash
+POSTHOOK: type: DESCFUNCTION
+returns hash of the given value
+Examples:
+ mask_hash(value)
+ Arguments:
+ value - value to mask. Supported types: STRING, VARCHAR, CHAR
+PREHOOK: query: explain select mask_hash('TestString-123')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select mask_hash('TestString-123')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'dd78d68ad1b23bde126812482dd70ac6' (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: select mask_hash('TestString-123'),
+ mask_hash(cast('TestString-123' as varchar(24))),
+ mask_hash(cast('TestString-123' as char(24))),
+ mask_hash(cast(123 as tinyint)),
+ mask_hash(cast(12345 as smallint)),
+ mask_hash(cast(12345 as int)),
+ mask_hash(cast(12345 as bigint)),
+ mask_hash(cast('2016-04-20' as date))
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select mask_hash('TestString-123'),
+ mask_hash(cast('TestString-123' as varchar(24))),
+ mask_hash(cast('TestString-123' as char(24))),
+ mask_hash(cast(123 as tinyint)),
+ mask_hash(cast(12345 as smallint)),
+ mask_hash(cast(12345 as int)),
+ mask_hash(cast(12345 as bigint)),
+ mask_hash(cast('2016-04-20' as date))
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+dd78d68ad1b23bde126812482dd70ac6 dd78d68ad1b23bde126812482dd70ac6 835735ba20f1297683efca69fabd0fba NULL NULL NULL NULL NULL
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/results/clientpositive/udf_mask_last_n.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_mask_last_n.q.out b/ql/src/test/results/clientpositive/udf_mask_last_n.q.out
new file mode 100644
index 0000000..ae75d51
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_mask_last_n.q.out
@@ -0,0 +1,68 @@
+PREHOOK: query: DESCRIBE FUNCTION mask_last_n
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION mask_last_n
+POSTHOOK: type: DESCFUNCTION
+masks the last n characters of the value
+PREHOOK: query: DESC FUNCTION EXTENDED mask_last_n
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESC FUNCTION EXTENDED mask_last_n
+POSTHOOK: type: DESCFUNCTION
+masks the last n characters of the value
+Examples:
+ mask_last_n(ccn, 8)
+ mask_last_n(ccn, 8, 'x', 'x', 'x')
+ Arguments:
+ mask_last_n(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar)
+ value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR
+ charCount - number of characters. Default value: 4
+ upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'
+ lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'
+ digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'
+ otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1
+ numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'
+
+PREHOOK: query: explain select mask_last_n('TestString-123', 4, 'X', 'x', '0', '1')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select mask_last_n('TestString-123', 4, 'X', 'x', '0', '1')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'TestString1000' (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: select mask_last_n('TestString-123', 4, 'X', 'x', '0', ':'),
+ mask_last_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'),
+ mask_last_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'),
+ mask_last_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'),
+ mask_last_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'),
+ mask_last_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'),
+ mask_last_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'),
+ mask_last_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select mask_last_n('TestString-123', 4, 'X', 'x', '0', ':'),
+ mask_last_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'),
+ mask_last_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'),
+ mask_last_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'),
+ mask_last_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'),
+ mask_last_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'),
+ mask_last_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'),
+ mask_last_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+TestString:000 TestString:000 TestString-123 :::: 43 15555 15555 15555 1900-01-01
http://git-wip-us.apache.org/repos/asf/hive/blob/e9a72189/ql/src/test/results/clientpositive/udf_mask_show_first_n.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_mask_show_first_n.q.out b/ql/src/test/results/clientpositive/udf_mask_show_first_n.q.out
new file mode 100644
index 0000000..d8ada97
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_mask_show_first_n.q.out
@@ -0,0 +1,68 @@
+PREHOOK: query: DESCRIBE FUNCTION mask_show_first_n
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION mask_show_first_n
+POSTHOOK: type: DESCFUNCTION
+masks all but first n characters of the value
+PREHOOK: query: DESC FUNCTION EXTENDED mask_show_first_n
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESC FUNCTION EXTENDED mask_show_first_n
+POSTHOOK: type: DESCFUNCTION
+masks all but first n characters of the value
+Examples:
+ mask_show_first_n(ccn, 8)
+ mask_show_first_n(ccn, 8, 'x', 'x', 'x')
+ Arguments:
+ mask_show_first_n(value, charCount, upperChar, lowerChar, digitChar, otherChar, numberChar)
+ value - value to mask. Supported types: TINYINT, SMALLINT, INT, BIGINT, STRING, VARCHAR, CHAR
+ charCount - number of characters. Default value: 4
+ upperChar - character to replace upper-case characters with. Specify -1 to retain original character. Default value: 'X'
+ lowerChar - character to replace lower-case characters with. Specify -1 to retain original character. Default value: 'x'
+ digitChar - character to replace digit characters with. Specify -1 to retain original character. Default value: 'n'
+ otherChar - character to replace all other characters with. Specify -1 to retain original character. Default value: -1
+ numberChar - character to replace digits in a number with. Valid values: 0-9. Default value: '1'
+
+PREHOOK: query: explain select mask_show_first_n('TestString-123', 4, 'X', 'x', '0', '1')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select mask_show_first_n('TestString-123', 4, 'X', 'x', '0', '1')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'TestXxxxxx1000' (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: select mask_show_first_n('TestString-123', 4, 'X', 'x', '0', ':'),
+ mask_show_first_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'),
+ mask_show_first_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'),
+ mask_show_first_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'),
+ mask_show_first_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'),
+ mask_show_first_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'),
+ mask_show_first_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'),
+ mask_show_first_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select mask_show_first_n('TestString-123', 4, 'X', 'x', '0', ':'),
+ mask_show_first_n(cast('TestString-123' as varchar(24)), 4, 'X', 'x', '0', ':'),
+ mask_show_first_n(cast('TestString-123' as char(24)), 4, 'X', 'x', '0', ':'),
+ mask_show_first_n(cast(123 as tinyint), 4, -1, -1, -1, -1, '5'),
+ mask_show_first_n(cast(12345 as smallint), 4, -1, -1, -1, -1, '5'),
+ mask_show_first_n(cast(12345 as int), 4, -1, -1, -1, -1, '5'),
+ mask_show_first_n(cast(12345 as bigint), 4, -1, -1, -1, -1, '5'),
+ mask_show_first_n(cast('2016-04-20' as date), 4, -1, -1, -1, -1, -1, 0, 0, 0)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+TestXxxxxx:000 TestXxxxxx:000 TestXxxxxx:000:::::::::: 123 12345 12345 12345 1900-01-01