You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by rb...@apache.org on 2016/12/27 05:23:40 UTC

hive git commit: HIVE-15511: Provide an option in months_between UDF to disable rounding-off (Rajesh Balamohan, reviewed by Pengcheng Xiong)

Repository: hive
Updated Branches:
  refs/heads/master ac68aed6e -> 3c1344490


HIVE-15511: Provide an option in months_between UDF to disable rounding-off (Rajesh Balamohan, reviewed by Pengcheng Xiong)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3c134449
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3c134449
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3c134449

Branch: refs/heads/master
Commit: 3c13444902a1ad3d5bbe8a1f4d2ad75aaf409df8
Parents: ac68aed
Author: Rajesh Balamohan <rb...@apache.org>
Authored: Tue Dec 27 10:53:20 2016 +0530
Committer: Rajesh Balamohan <rb...@apache.org>
Committed: Tue Dec 27 10:53:20 2016 +0530

----------------------------------------------------------------------
 .../hadoop/hive/ql/udf/generic/GenericUDF.java  | 16 ++++++
 .../ql/udf/generic/GenericUDFMonthsBetween.java | 23 ++++++--
 .../generic/TestGenericUDFMonthsBetween.java    | 59 ++++++++++++++++++--
 .../clientpositive/udf_months_between.q.out     |  6 +-
 4 files changed, 91 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3c134449/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java
index 259fde8..00a4f38 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java
@@ -43,9 +43,11 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.C
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
+import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hive.common.util.DateUtils;
@@ -530,6 +532,20 @@ public abstract class GenericUDF implements Closeable {
     return str;
   }
 
+  protected Boolean getConstantBooleanValue(ObjectInspector[] arguments, int i)
+      throws UDFArgumentTypeException {
+    Object constValue = ((ConstantObjectInspector) arguments[i]).getWritableConstantValue();
+    if (constValue == null) {
+      return false;
+    }
+    if (constValue instanceof BooleanWritable) {
+      return ((BooleanWritable) constValue).get();
+    } else {
+      throw new UDFArgumentTypeException(i, getFuncName() + " only takes BOOLEAN types as "
+          + getArgOrder(i) + " argument, got " + constValue.getClass());
+    }
+  }
+
   protected Integer getConstantIntValue(ObjectInspector[] arguments, int i)
       throws UDFArgumentTypeException {
     Object constValue = ((ConstantObjectInspector) arguments[i]).getWritableConstantValue();

http://git-wip-us.apache.org/repos/asf/hive/blob/3c134449/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java
index 35dc51a..e50b4f1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMonthsBetween.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
@@ -44,7 +45,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
  * UDFMonthsBetween.
  *
  */
-@Description(name = "months_between", value = "_FUNC_(date1, date2) - returns number of months between dates date1 and date2",
+@Description(name = "months_between", value = "_FUNC_(date1, date2, roundOff) "
+    + "- returns number of months between dates date1 and date2",
     extended = "If date1 is later than date2, then the result is positive. "
     + "If date1 is earlier than date2, then the result is negative. "
     + "If date1 and date2 are either the same days of the month or both last days of months, "
@@ -53,7 +55,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
     + "month and considers the difference in time components date1 and date2.\n"
     + "date1 and date2 type can be date, timestamp or string in the format "
     + "'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss'. "
-    + "The result is rounded to 8 decimal places.\n"
+    + "The result is rounded to 8 decimal places by default. Set roundOff=false otherwise.\n"
     + " Example:\n"
     + "  > SELECT _FUNC_('1997-02-28 10:30:00', '1996-10-30');\n 3.94959677")
 public class GenericUDFMonthsBetween extends GenericUDF {
@@ -64,14 +66,21 @@ public class GenericUDFMonthsBetween extends GenericUDF {
   private final Calendar cal1 = Calendar.getInstance();
   private final Calendar cal2 = Calendar.getInstance();
   private final DoubleWritable output = new DoubleWritable();
+  private boolean isRoundOffNeeded = true;
 
   @Override
   public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
-    checkArgsSize(arguments, 2, 2);
+    checkArgsSize(arguments, 2, 3);
 
     checkArgPrimitive(arguments, 0);
     checkArgPrimitive(arguments, 1);
 
+    if (arguments.length == 3) {
+      if (arguments[2] instanceof ConstantObjectInspector) {
+        isRoundOffNeeded = getConstantBooleanValue(arguments, 2);
+      }
+    }
+
     // the function should support both short date and full timestamp format
     // time part of the timestamp should not be skipped
     checkArgGroups(arguments, 0, tsInputTypes, STRING_GROUP, DATE_GROUP);
@@ -129,9 +138,11 @@ public class GenericUDFMonthsBetween extends GenericUDF {
     // 1 sec is 0.000000373 months (1/2678400). 1 month is 31 days.
     // there should be no adjustments for leap seconds
     double monBtwDbl = monDiffInt + (sec1 - sec2) / 2678400D;
-    // Round a double to 8 decimal places.
-    double result = BigDecimal.valueOf(monBtwDbl).setScale(8, ROUND_HALF_UP).doubleValue();
-    output.set(result);
+    if (isRoundOffNeeded) {
+      // Round a double to 8 decimal places.
+      monBtwDbl = BigDecimal.valueOf(monBtwDbl).setScale(8, ROUND_HALF_UP).doubleValue();
+    }
+    output.set(monBtwDbl);
     return output;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/3c134449/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java
index 4e650b5..224047d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFMonthsBetween.java
@@ -23,11 +23,12 @@ import java.sql.Timestamp;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMonthsBetween;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.Text;
 
@@ -36,13 +37,29 @@ import junit.framework.TestCase;
 public class TestGenericUDFMonthsBetween extends TestCase {
 
   public void testMonthsBetweenForString() throws HiveException {
+    // Default run
     GenericUDFMonthsBetween udf = new GenericUDFMonthsBetween();
     ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
     ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
     ObjectInspector[] arguments = { valueOI1, valueOI2 };
-
     udf.initialize(arguments);
 
+    testMonthsBetweenForString(udf);
+
+    // Run without round-off
+    GenericUDFMonthsBetween udfWithoutRoundOff = new GenericUDFMonthsBetween();
+    ObjectInspector vOI1 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    ObjectInspector vOI2 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+    ObjectInspector vOI3 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.booleanTypeInfo,
+            new BooleanWritable(false));
+    ObjectInspector[] args = { vOI1, vOI2, vOI3 };
+    udfWithoutRoundOff.initialize(args);
+
+    testMonthsBetweenForString(udf);
+  }
+
+  public void testMonthsBetweenForString(GenericUDFMonthsBetween udf) throws HiveException {
     // test month diff with fraction considering time components
     runTestStr("1995-02-02", "1995-01-01", 1.03225806, udf);
     runTestStr("2003-07-17", "2005-07-06", -23.64516129, udf);
@@ -97,6 +114,8 @@ public class TestGenericUDFMonthsBetween extends TestCase {
     // string dates without day should be parsed to null
     runTestStr("2002-03", "2002-02-24", null, udf);
     runTestStr("2002-03-24", "2002-02", null, udf);
+
+    runTestStr("2003-04-23", "2002-04-24", 11.96774194, udf);
   }
 
   public void testMonthsBetweenForTimestamp() throws HiveException {
@@ -104,9 +123,24 @@ public class TestGenericUDFMonthsBetween extends TestCase {
     ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
     ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
     ObjectInspector[] arguments = { valueOI1, valueOI2 };
-
     udf.initialize(arguments);
 
+    testMonthsBetweenForTimestamp(udf);
+
+    // Run without round-off
+    GenericUDFMonthsBetween udfWithoutRoundOff = new GenericUDFMonthsBetween();
+    ObjectInspector vOI1 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
+    ObjectInspector vOI2 = PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
+    ObjectInspector vOI3 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.booleanTypeInfo,
+            new BooleanWritable(false));
+    ObjectInspector[] args = { vOI1, vOI2, vOI3 };
+    udfWithoutRoundOff.initialize(args);
+
+    testMonthsBetweenForTimestamp(udfWithoutRoundOff);
+  }
+
+  public void testMonthsBetweenForTimestamp(GenericUDFMonthsBetween udf) throws HiveException {
     // test month diff with fraction considering time components
     runTestTs("1995-02-02 00:00:00", "1995-01-01 00:00:00", 1.03225806, udf);
     runTestTs("2003-07-17 00:00:00", "2005-07-06 00:00:00", -23.64516129, udf);
@@ -142,6 +176,8 @@ public class TestGenericUDFMonthsBetween extends TestCase {
     runTestTs("2002-03-24 00:00:00", "2002-02-24 10:30:00", 1.0, udf);
     runTestTs("2002-03-24 10:30:00", "2002-02-24 00:00:00", 1.0, udf);
 
+    runTestTs("2003-04-23 23:59:59", "2003-03-24 00:0:0", 0.99999963, udf);
+
     // Test with null args
     runTestTs(null, "2002-03-01 00:00:00", null, udf);
     runTestTs("2002-02-28 00:00:00", null, null, udf);
@@ -153,9 +189,24 @@ public class TestGenericUDFMonthsBetween extends TestCase {
     ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
     ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
     ObjectInspector[] arguments = { valueOI1, valueOI2 };
-
     udf.initialize(arguments);
 
+    testMonthsBetweenForDate(udf);
+
+    // Run without round-off
+    GenericUDFMonthsBetween udfWithoutRoundOff = new GenericUDFMonthsBetween();
+    ObjectInspector vOI1 = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
+    ObjectInspector vOI2 = PrimitiveObjectInspectorFactory.writableDateObjectInspector;
+    ObjectInspector vOI3 = PrimitiveObjectInspectorFactory
+        .getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.booleanTypeInfo,
+            new BooleanWritable(false));
+    ObjectInspector[] args = { vOI1, vOI2, vOI3 };
+    udfWithoutRoundOff.initialize(args);
+
+    testMonthsBetweenForDate(udfWithoutRoundOff);
+  }
+
+  public void testMonthsBetweenForDate(GenericUDFMonthsBetween udf) throws HiveException {
     // test month diff with fraction considering time components
     runTestDt("1995-02-02", "1995-01-01", 1.03225806, udf);
     runTestDt("2003-07-17", "2005-07-06", -23.64516129, udf);

http://git-wip-us.apache.org/repos/asf/hive/blob/3c134449/ql/src/test/results/clientpositive/udf_months_between.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_months_between.q.out b/ql/src/test/results/clientpositive/udf_months_between.q.out
index 913cd35..d46d466 100644
--- a/ql/src/test/results/clientpositive/udf_months_between.q.out
+++ b/ql/src/test/results/clientpositive/udf_months_between.q.out
@@ -2,14 +2,14 @@ PREHOOK: query: describe function months_between
 PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: describe function months_between
 POSTHOOK: type: DESCFUNCTION
-months_between(date1, date2) - returns number of months between dates date1 and date2
+months_between(date1, date2, roundOff) - returns number of months between dates date1 and date2
 PREHOOK: query: desc function extended months_between
 PREHOOK: type: DESCFUNCTION
 POSTHOOK: query: desc function extended months_between
 POSTHOOK: type: DESCFUNCTION
-months_between(date1, date2) - returns number of months between dates date1 and date2
+months_between(date1, date2, roundOff) - returns number of months between dates date1 and date2
 If date1 is later than date2, then the result is positive. If date1 is earlier than date2, then the result is negative. If date1 and date2 are either the same days of the month or both last days of months, then the result is always an integer. Otherwise the UDF calculates the fractional portion of the result based on a 31-day month and considers the difference in time components date1 and date2.
-date1 and date2 type can be date, timestamp or string in the format 'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss'. The result is rounded to 8 decimal places.
+date1 and date2 type can be date, timestamp or string in the format 'yyyy-MM-dd' or 'yyyy-MM-dd HH:mm:ss'. The result is rounded to 8 decimal places by default. Set roundOff=false otherwise.
  Example:
   > SELECT months_between('1997-02-28 10:30:00', '1996-10-30');
  3.94959677