You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ji...@apache.org on 2014/03/18 19:28:10 UTC

svn commit: r1579001 - in /hive/trunk: common/src/test/org/apache/hadoop/hive/common/type/ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/ ql/src/test/queries/clientpositive/ ql/s...

Author: jitendra
Date: Tue Mar 18 18:28:09 2014
New Revision: 1579001

URL: http://svn.apache.org/r1579001
Log:
HIVE-6639. Vectorization: Partition column names are not picked up. (reviewed by Vikram)

Modified:
    hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    hive/trunk/ql/src/test/queries/clientpositive/vectorization_part.q
    hive/trunk/ql/src/test/results/clientpositive/vectorization_part.q.out

Modified: hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java?rev=1579001&r1=1579000&r2=1579001&view=diff
==============================================================================
--- hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java (original)
+++ hive/trunk/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java Tue Mar 18 18:28:09 2014
@@ -852,7 +852,12 @@ public class TestDecimal128 {
     Decimal128 d12 = new Decimal128(27.000, (short)3);
     HiveDecimal hd7 = HiveDecimal.create(new BigDecimal("27.000"));
     assertEquals(hd7.toString(), d12.getHiveDecimalString());
-    assertEquals("27", hd7.toString());
+    assertEquals("27", d12.getHiveDecimalString());
+
+    Decimal128 d13 = new Decimal128(1234123000, (short)3);
+    HiveDecimal hd8 = HiveDecimal.create(new BigDecimal("1234123000"));
+    assertEquals(hd8.toString(), d13.getHiveDecimalString());
+    assertEquals("1234123000", d13.getHiveDecimalString());
   }
 
   @Test

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java?rev=1579001&r1=1579000&r2=1579001&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java Tue Mar 18 18:28:09 2014
@@ -53,6 +53,8 @@ public class CastDecimalToTimestamp exte
   @Override
   protected void func(LongColumnVector outV, DecimalColumnVector inV,  int i) {
     tmp.update(inV.vector[i]);
+
+    // Reduce scale at most by 9, therefore multiplication will not require rounding.
     int newScale = inV.scale > 9 ? (inV.scale - 9) : 0;
     tmp.multiplyDestructive(tenE9, (short) newScale);
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1579001&r1=1579000&r2=1579001&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Mar 18 18:28:09 2014
@@ -55,6 +55,7 @@ import org.apache.hadoop.hive.ql.lib.Tas
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 import org.apache.hadoop.hive.ql.parse.RowResolver;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -421,7 +422,7 @@ public class Vectorizer implements Physi
       VectorizationContext vContext = null;
 
       if (op instanceof TableScanOperator) {
-        vContext = getVectorizationContext(op, physicalContext);
+        vContext = getVectorizationContext((TableScanOperator) op, physicalContext);
         for (String onefile : mWork.getPathToAliases().keySet()) {
           List<String> aliases = mWork.getPathToAliases().get(onefile);
           for (String alias : aliases) {
@@ -719,27 +720,20 @@ public class Vectorizer implements Physi
     return supportedDataTypesPattern.matcher(type.toLowerCase()).matches();
   }
 
-  private VectorizationContext getVectorizationContext(Operator<? extends OperatorDesc> op,
+  private VectorizationContext getVectorizationContext(TableScanOperator op,
       PhysicalContext pctx) {
     RowSchema rs = op.getSchema();
 
     Map<String, Integer> cmap = new HashMap<String, Integer>();
     int columnCount = 0;
     for (ColumnInfo c : rs.getSignature()) {
-      if (!c.getIsVirtualCol()) {
+      if (!isVirtualColumn(c)) {
         cmap.put(c.getInternalName(), columnCount++);
       }
     }
-    PrunedPartitionList partList = pctx.getParseContext().getOpToPartList().get(op);
-    if (partList != null) {
-      Table tab = partList.getSourceTable();
-      if (tab.getPartitionKeys() != null) {
-        for (FieldSchema fs : tab.getPartitionKeys()) {
-          cmap.put(fs.getName(), columnCount++);
-        }
-      }
-    }
-    return new VectorizationContext(cmap, columnCount);
+
+    VectorizationContext vc =  new VectorizationContext(cmap, columnCount);
+    return vc;
   }
 
   Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op,
@@ -778,4 +772,16 @@ public class Vectorizer implements Physi
     }
     return vectorOp;
   }
+
+  private boolean isVirtualColumn(ColumnInfo column) {
+
+    // Not using method column.getIsVirtualCol() because partitioning columns are also
+    // treated as virtual columns in ColumnInfo.
+    for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) {
+      if (column.getInternalName().equals(vc.getName())) {
+        return true;
+      }
+    }
+    return false;
+  }
 }

Modified: hive/trunk/ql/src/test/queries/clientpositive/vectorization_part.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorization_part.q?rev=1579001&r1=1579000&r2=1579001&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorization_part.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorization_part.q Tue Mar 18 18:28:09 2014
@@ -4,3 +4,4 @@ insert overwrite table alltypesorc_part 
 insert overwrite table alltypesorc_part partition (ds='2012') select * from alltypesorc limit 100;
 
 select count(cdouble), cint from alltypesorc_part where ds='2011' group by cint limit 10;
+select count(*) from alltypesorc_part A join alltypesorc_part B on A.ds=B.ds;

Modified: hive/trunk/ql/src/test/results/clientpositive/vectorization_part.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorization_part.q.out?rev=1579001&r1=1579000&r2=1579001&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorization_part.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorization_part.q.out Tue Mar 18 18:28:09 2014
@@ -92,3 +92,40 @@ POSTHOOK: Lineage: alltypesorc_part PART
 POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:from deserializer), ]
 POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from deserializer), ]
 100	528534767
+PREHOOK: query: select count(*) from alltypesorc_part A join alltypesorc_part B on A.ds=B.ds
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_part
+PREHOOK: Input: default@alltypesorc_part@ds=2011
+PREHOOK: Input: default@alltypesorc_part@ds=2012
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from alltypesorc_part A join alltypesorc_part B on A.ds=B.ds
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_part
+POSTHOOK: Input: default@alltypesorc_part@ds=2011
+POSTHOOK: Input: default@alltypesorc_part@ds=2012
+#### A masked pattern was here ####
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from deserializer), ]
+20000