You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jx...@apache.org on 2015/11/06 18:33:02 UTC

[20/55] [abbrv] hive git commit: HIVE-12238: Vectorization: Thread-safety errors in VectorUDFDate (Gopal V, reviewed by Gunther Hagleitner)

HIVE-12238: Vectorization: Thread-safety errors in VectorUDFDate (Gopal V, reviewed by Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d7c04859
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d7c04859
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d7c04859

Branch: refs/heads/master-fixed
Commit: d7c04859e1903cd6ed38678e8dae6b453c34b7bb
Parents: ad12765
Author: Gopal V <go...@apache.org>
Authored: Mon Nov 2 19:56:08 2015 -0800
Committer: Gopal V <go...@apache.org>
Committed: Mon Nov 2 19:56:08 2015 -0800

----------------------------------------------------------------------
 .../vector/expressions/VectorUDFDateString.java |  4 +-
 .../expressions/TestVectorDateExpressions.java  | 71 +++++++++++++++++++-
 2 files changed, 72 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d7c04859/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java
index f1a5b93..e27ac6a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java
@@ -22,6 +22,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
 import org.apache.hadoop.io.Text;
+import org.apache.hive.common.util.DateUtils;
 
 import java.text.SimpleDateFormat;
 import java.util.Date;
@@ -30,14 +31,13 @@ import java.text.ParseException;
 public class VectorUDFDateString extends StringUnaryUDF {
   private static final long serialVersionUID = 1L;
 
-  private transient static SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
-
   private static final Logger LOG = LoggerFactory.getLogger(
       VectorUDFDateString.class.getName());
 
   public VectorUDFDateString(int colNum, int outputColumn) {
     super(colNum, outputColumn, new StringUnaryUDF.IUDFUnaryString() {
       Text t = new Text();
+      final transient SimpleDateFormat formatter = DateUtils.getDateFormat();
 
       @Override
       public Text evaluate(Text s) {

http://git-wip-us.apache.org/repos/asf/hive/blob/d7c04859/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
index 6bd4be1..9c4a751 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java
@@ -18,8 +18,9 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.expressions;
 
-import junit.framework.Assert;
+import org.junit.Assert;
 import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
@@ -31,15 +32,28 @@ import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
+import org.junit.internal.runners.statements.Fail;
+
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 
 import java.sql.Timestamp;
 import java.util.ArrayList;
 import java.util.Calendar;
 import java.util.List;
 import java.util.Random;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.ThreadFactory;
 
 public class TestVectorDateExpressions {
+
+  private ExecutorService runner;
+
   /* copied over from VectorUDFTimestampFieldLong */
   private TimestampWritable toTimestampWritable(long daysSinceEpoch) {
     Timestamp ts = new Timestamp(DateWritable.daysToMillis((int) daysSinceEpoch));
@@ -412,6 +426,60 @@ public class TestVectorDateExpressions {
     verifyUDFWeekOfYear(batch);
   }
 
+  @Before
+  public void setUp() throws Exception {
+    runner =
+        Executors.newFixedThreadPool(3,
+            new ThreadFactoryBuilder().setNameFormat("date-tester-thread-%d").build());
+  }
+
+  private static final class MultiThreadedDateFormatTest implements Callable<Void> {
+    @Override
+    public Void call() throws Exception {
+      int batchSize = 1024;
+      VectorUDFDateString udf = new VectorUDFDateString(0, 1);
+      VectorizedRowBatch batch = new VectorizedRowBatch(2, batchSize);
+      BytesColumnVector in = new BytesColumnVector(batchSize);
+      BytesColumnVector out = new BytesColumnVector(batchSize);
+      batch.cols[0] = in;
+      batch.cols[1] = out;
+      for (int i = 0; i < batchSize; i++) {
+        byte[] data = String.format("1999-%02d-%02d", 1 + (i % 12), 1 + (i % 15)).getBytes("UTF-8");
+        in.setRef(i, data, 0, data.length);
+        in.isNull[i] = false;
+      }
+      udf.evaluate(batch);
+      // bug if it throws an exception
+      return (Void) null;
+    }
+  }
+
+  // 5s timeout
+  @Test(timeout = 5000)
+  public void testMultiThreadedVectorUDFDate() {
+    List<Callable<Void>> tasks = new ArrayList<Callable<Void>>();
+    for (int i = 0; i < 200; i++) {
+      tasks.add(new MultiThreadedDateFormatTest());
+    }
+    try {
+      List<Future<Void>> results = runner.invokeAll(tasks);
+      for (Future<Void> f : results) {
+        Assert.assertNull(f.get());
+      }
+    } catch (InterruptedException ioe) {
+      Assert.fail("Interrupted while running tests");
+    } catch (Exception e) {
+      Assert.fail("Multi threaded operations threw unexpected Exception: " + e.getMessage());
+    }
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    if (runner != null) {
+      runner.shutdownNow();
+    }
+  }
+
   public static void main(String[] args) {
     TestVectorDateExpressions self = new TestVectorDateExpressions();
     self.testVectorUDFYear();
@@ -419,5 +487,6 @@ public class TestVectorDateExpressions {
     self.testVectorUDFDayOfMonth();
     self.testVectorUDFWeekOfYear();
     self.testVectorUDFUnixTimeStamp();
+    self.testMultiThreadedVectorUDFDate();
   }
 }