You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by ha...@apache.org on 2016/03/25 15:22:36 UTC

[1/2] hive git commit: HIVE-12531 : Implement fast-path for Year/Month UDFs for dates between 1999 and 2038 (Jason Dere via Sergey Shelukhin)

Repository: hive
Updated Branches:
  refs/heads/master b1c45029e -> e384b2b65


HIVE-12531 : Implement fast-path for Year/Month UDFs for dates between 1999 and 2038 (Jason Dere via Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e384b2b6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e384b2b6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e384b2b6

Branch: refs/heads/master
Commit: e384b2b657c819d5963b8f76222f78bb479a29a2
Parents: b75d9ea
Author: Jason Dere <jd...@hortonworks.com>
Authored: Wed Dec 9 11:48:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Mar 25 07:21:55 2016 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/udf/UDFMonth.java     | 16 ++++++++--------
 .../java/org/apache/hadoop/hive/ql/udf/UDFYear.java | 16 ++++++++--------
 .../expressions/TestVectorDateExpressions.java      | 13 ++++++++++---
 3 files changed, 26 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e384b2b6/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
index 8c2b0e4..05afb8e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java
@@ -20,8 +20,8 @@ package org.apache.hadoop.hive.ql.udf;
 
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
-import java.util.Calendar;
 import java.util.Date;
+import org.joda.time.MutableDateTime;
 
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
@@ -53,7 +53,7 @@ import org.apache.hadoop.io.Text;
 @NDV(maxNdv = 31)
 public class UDFMonth extends UDF {
   private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
-  private final Calendar calendar = Calendar.getInstance();
+  private transient final MutableDateTime mdt = new MutableDateTime();
 
   private final IntWritable result = new IntWritable();
 
@@ -75,8 +75,8 @@ public class UDFMonth extends UDF {
     }
     try {
       Date date = formatter.parse(dateString.toString());
-      calendar.setTime(date);
-      result.set(1 + calendar.get(Calendar.MONTH));
+      mdt.setMillis(date.getTime());
+      result.set(mdt.getMonthOfYear());
       return result;
     } catch (ParseException e) {
       return null;
@@ -88,8 +88,8 @@ public class UDFMonth extends UDF {
       return null;
     }
 
-    calendar.setTime(d.get());
-    result.set(1 + calendar.get(Calendar.MONTH));
+    mdt.setMillis(d.get().getTime());
+    result.set(mdt.getMonthOfYear());
     return result;
   }
 
@@ -98,8 +98,8 @@ public class UDFMonth extends UDF {
       return null;
     }
 
-    calendar.setTime(t.getTimestamp());
-    result.set(1 + calendar.get(Calendar.MONTH));
+    mdt.setMillis(t.getTimestamp().getTime());
+    result.set(mdt.getMonthOfYear());
     return result;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/e384b2b6/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
index d7ecd8c..fb3a655 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java
@@ -20,8 +20,8 @@ package org.apache.hadoop.hive.ql.udf;
 
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
-import java.util.Calendar;
 import java.util.Date;
+import org.joda.time.MutableDateTime;
 
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDF;
@@ -53,7 +53,7 @@ import org.apache.hadoop.io.Text;
 @NDV(maxNdv = 20) // although technically its unbounded, its unlikely we will ever see ndv > 20
 public class UDFYear extends UDF {
   private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
-  private final Calendar calendar = Calendar.getInstance();
+  private transient final MutableDateTime mdt = new MutableDateTime();
 
   private final IntWritable result = new IntWritable();
 
@@ -77,8 +77,8 @@ public class UDFYear extends UDF {
 
     try {
       Date date = formatter.parse(dateString.toString());
-      calendar.setTime(date);
-      result.set(calendar.get(Calendar.YEAR));
+      mdt.setMillis(date.getTime());
+      result.set(mdt.getYear());
       return result;
     } catch (ParseException e) {
       return null;
@@ -90,8 +90,8 @@ public class UDFYear extends UDF {
       return null;
     }
 
-    calendar.setTime(d.get());
-    result.set(calendar.get(Calendar.YEAR));
+    mdt.setMillis(d.get().getTime());
+    result.set(mdt.getYear());
     return result;
   }
 
@@ -100,8 +100,8 @@ public class UDFYear extends UDF {
       return null;
     }
 
-    calendar.setTime(t.getTimestamp());
-    result.set(calendar.get(Calendar.YEAR));
+    mdt.setMillis(t.getTimestamp().getTime());
+    result.set(mdt.getYear());
     return result;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/e384b2b6/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
index 58cecc1..61c96e9 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
@@ -38,7 +38,9 @@ import org.junit.Test;
 import org.junit.internal.runners.statements.Fail;
 
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import com.sun.tools.javac.resources.javac;
 
+import java.sql.Date;
 import java.sql.Timestamp;
 import java.util.ArrayList;
 import java.util.Calendar;
@@ -53,6 +55,7 @@ import java.util.concurrent.ThreadFactory;
 public class TestVectorDateExpressions {
 
   private ExecutorService runner;
+  private static final int MAX_SANE_DATE_VALUE = new DateWritable(Date.valueOf("3000-01-01")).getDays();
 
   /* copied over from VectorUDFTimestampFieldLong */
   private TimestampWritable toTimestampWritable(long daysSinceEpoch) {
@@ -78,11 +81,15 @@ public class TestVectorDateExpressions {
   }
 
   private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size) {
+    return getVectorizedRandomRowBatch(seed, size, Integer.MAX_VALUE);
+  }
+
+  private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size, int maxValue) {
     VectorizedRowBatch batch = new VectorizedRowBatch(2, size);
     LongColumnVector lcv = new LongColumnVector(size);
     Random rand = new Random(seed);
     for (int i = 0; i < size; i++) {
-      lcv.vector[i] = (rand.nextInt());
+      lcv.vector[i] = (rand.nextInt(maxValue));
     }
     batch.cols[0] = lcv;
     batch.cols[1] = new LongColumnVector(size);
@@ -159,7 +166,7 @@ public class TestVectorDateExpressions {
     batch.cols[0].isNull[0] = true;
     verifyUDFYear(batch);
 
-    batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
+    batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, MAX_SANE_DATE_VALUE);
     verifyUDFYear(batch);
     TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
     verifyUDFYear(batch);
@@ -283,7 +290,7 @@ public class TestVectorDateExpressions {
     batch.cols[0].isNull[0] = true;
     verifyUDFMonth(batch);
 
-    batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE);
+    batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, MAX_SANE_DATE_VALUE);
     verifyUDFMonth(batch);
     TestVectorizedRowBatch.addRandomNulls(batch.cols[0]);
     verifyUDFMonth(batch);

[2/2] hive git commit: HIVE-12552 : Wrong number of reducer estimation causing job to fail (Rajesh Balamohan via Gunther Hagleitner)

Posted by ha...@apache.org.

HIVE-12552 : Wrong number of reducer estimation causing job to fail (Rajesh Balamohan via Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b75d9ea8
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b75d9ea8
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b75d9ea8

Branch: refs/heads/master
Commit: b75d9ea8a73f85d1420f8e3ba1e3b8f9b9acdc5e
Parents: b1c4502
Author: Rajesh Balamohan <rbalamohan at apache dot org>
Authored: Wed Dec 9 11:48:00 2015 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Mar 25 07:21:55 2016 -0700

----------------------------------------------------------------------
 ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b75d9ea8/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index 8eab3af..d5a2eca 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -119,7 +119,8 @@ public class GenTezUtils {
 
       // max we allow tez to pick
       int maxPartition = (int) (reduceSink.getConf().getNumReducers() * maxPartitionFactor);
-      maxPartition = (maxPartition > maxReducers) ? maxReducers : maxPartition;
+      maxPartition = Math.max(1, (maxPartition > maxReducers) ? maxReducers :
+          maxPartition);
 
       reduceWork.setMinReduceTasks(minPartition);
       reduceWork.setMaxReduceTasks(maxPartition);