You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/10/14 06:53:06 UTC

svn commit: r1631614 [1/3] - in /hive/branches/branch-0.14: itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ ql/src/test/results/clientpositive/tez/

Author: gunther
Date: Tue Oct 14 04:53:05 2014
New Revision: 1631614

URL: http://svn.apache.org/r1631614
Log:
HIVE-8434: Vectorization logic using wrong values for DATE and TIMESTAMP partitioning columns in vectorized row batches... (Matt McCline via Gunther Hagleitner)

Added:
    hive/branches/branch-0.14/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q
    hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/vector_partitioned_date_time.q.out
    hive/branches/branch-0.14/ql/src/test/results/clientpositive/vector_partitioned_date_time.q.out
Modified:
    hive/branches/branch-0.14/itests/src/test/resources/testconfiguration.properties
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java

Modified: hive/branches/branch-0.14/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/itests/src/test/resources/testconfiguration.properties?rev=1631614&r1=1631613&r2=1631614&view=diff
==============================================================================
--- hive/branches/branch-0.14/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/branches/branch-0.14/itests/src/test/resources/testconfiguration.properties Tue Oct 14 04:53:05 2014
@@ -169,6 +169,7 @@ minitez.query.files.shared=alter_merge_2
   vector_mapjoin_reduce.q,\
   vector_non_string_partition.q,\
   vector_orderby_5.q,\
+  vector_partitioned_date_time.q,\
   vector_string_concat.q,\
   vector_varchar_4.q,\
   vector_varchar_simple.q,\

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1631614&r1=1631613&r2=1631614&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Tue Oct 14 04:53:05 2014
@@ -45,6 +45,7 @@ import org.apache.hadoop.hive.serde2.Col
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -487,7 +488,7 @@ public class VectorizedRowBatchCtx {
             lcv.isNull[0] = true;
             lcv.isRepeating = true;
           } else { 
-            lcv.fill(((Date) value).getTime());
+            lcv.fill(DateWritable.dateToDays((Date) value));
             lcv.isNull[0] = false;
           }          
         }
@@ -500,7 +501,7 @@ public class VectorizedRowBatchCtx {
             lcv.isNull[0] = true;
             lcv.isRepeating = true;
           } else { 
-            lcv.fill((long)(((Timestamp) value).getTime()));
+            lcv.fill(TimestampUtils.getTimeNanoSec((Timestamp) value));
             lcv.isNull[0] = false;
           }
         }

Added: hive/branches/branch-0.14/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q?rev=1631614&view=auto
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q (added)
+++ hive/branches/branch-0.14/ql/src/test/queries/clientpositive/vector_partitioned_date_time.q Tue Oct 14 04:53:05 2014
@@ -0,0 +1,127 @@
+set hive.fetch.task.conversion=minimal;
+
+
+-- Check if vectorization code is handling partitioning on DATE and the other data types.
+
+
+CREATE TABLE flights_tiny (
+  origin_city_name STRING,
+  dest_city_name STRING,
+  fl_date DATE,
+  arr_delay FLOAT,
+  fl_num INT
+);
+
+LOAD DATA LOCAL INPATH '../../data/files/flights_tiny.txt.1' OVERWRITE INTO TABLE flights_tiny;
+
+CREATE TABLE flights_tiny_orc STORED AS ORC AS
+SELECT origin_city_name, dest_city_name, fl_date, to_utc_timestamp(fl_date, 'America/Los_Angeles') as fl_time, arr_delay, fl_num
+FROM flights_tiny;
+
+SELECT * FROM flights_tiny_orc;
+
+SET hive.vectorized.execution.enabled=false;
+
+select * from flights_tiny_orc sort by fl_num, fl_date limit 25;
+
+select fl_date, count(*) from flights_tiny_orc group by fl_date;
+
+SET hive.vectorized.execution.enabled=true;
+
+explain
+select * from flights_tiny_orc sort by fl_num, fl_date limit 25;
+
+select * from flights_tiny_orc sort by fl_num, fl_date limit 25;
+
+explain
+select fl_date, count(*) from flights_tiny_orc group by fl_date;
+
+select fl_date, count(*) from flights_tiny_orc group by fl_date;
+
+
+SET hive.vectorized.execution.enabled=false;
+
+CREATE TABLE flights_tiny_orc_partitioned_date (
+  origin_city_name STRING,
+  dest_city_name STRING,
+  fl_time TIMESTAMP,
+  arr_delay FLOAT,
+  fl_num INT
+)
+PARTITIONED BY (fl_date DATE)
+STORED AS ORC;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+INSERT INTO TABLE flights_tiny_orc_partitioned_date
+PARTITION (fl_date)
+SELECT  origin_city_name, dest_city_name, fl_time, arr_delay, fl_num, fl_date
+FROM flights_tiny_orc;
+
+
+select * from flights_tiny_orc_partitioned_date;
+
+select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25;
+
+select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date;
+
+SET hive.vectorized.execution.enabled=true;
+
+explain
+select * from flights_tiny_orc_partitioned_date;
+
+select * from flights_tiny_orc_partitioned_date;
+
+explain
+select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25;
+
+select * from flights_tiny_orc_partitioned_date sort by fl_num, fl_date limit 25;
+
+explain
+select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date;
+
+select fl_date, count(*) from flights_tiny_orc_partitioned_date group by fl_date;
+
+
+SET hive.vectorized.execution.enabled=false;
+
+CREATE TABLE flights_tiny_orc_partitioned_timestamp (
+  origin_city_name STRING,
+  dest_city_name STRING,
+  fl_date DATE,
+  arr_delay FLOAT,
+  fl_num INT
+)
+PARTITIONED BY (fl_time TIMESTAMP)
+STORED AS ORC;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+INSERT INTO TABLE flights_tiny_orc_partitioned_timestamp
+PARTITION (fl_time)
+SELECT  origin_city_name, dest_city_name, fl_date, arr_delay, fl_num, fl_time
+FROM flights_tiny_orc;
+
+
+select * from flights_tiny_orc_partitioned_timestamp;
+
+select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25;
+
+select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time;
+
+SET hive.vectorized.execution.enabled=true;
+
+explain
+select * from flights_tiny_orc_partitioned_timestamp;
+
+select * from flights_tiny_orc_partitioned_timestamp;
+
+explain
+select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25;
+
+select * from flights_tiny_orc_partitioned_timestamp sort by fl_num, fl_time limit 25;
+
+explain
+select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time;
+
+select fl_time, count(*) from flights_tiny_orc_partitioned_timestamp group by fl_time;
\ No newline at end of file