You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ji...@apache.org on 2014/03/18 19:42:51 UTC
svn commit: r1579010 - in /hive/branches/branch-0.13:
common/src/test/org/apache/hadoop/hive/common/type/
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/
ql/src/test/queries/clien...
Author: jitendra
Date: Tue Mar 18 18:42:51 2014
New Revision: 1579010
URL: http://svn.apache.org/r1579010
Log:
HIVE-6639. Vectorization: Partition column names are not picked up. (reviewed by Vikram)
Modified:
hive/branches/branch-0.13/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vectorization_part.q
hive/branches/branch-0.13/ql/src/test/results/clientpositive/vectorization_part.q.out
Modified: hive/branches/branch-0.13/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java?rev=1579010&r1=1579009&r2=1579010&view=diff
==============================================================================
--- hive/branches/branch-0.13/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java (original)
+++ hive/branches/branch-0.13/common/src/test/org/apache/hadoop/hive/common/type/TestDecimal128.java Tue Mar 18 18:42:51 2014
@@ -852,7 +852,12 @@ public class TestDecimal128 {
Decimal128 d12 = new Decimal128(27.000, (short)3);
HiveDecimal hd7 = HiveDecimal.create(new BigDecimal("27.000"));
assertEquals(hd7.toString(), d12.getHiveDecimalString());
- assertEquals("27", hd7.toString());
+ assertEquals("27", d12.getHiveDecimalString());
+
+ Decimal128 d13 = new Decimal128(1234123000, (short)3);
+ HiveDecimal hd8 = HiveDecimal.create(new BigDecimal("1234123000"));
+ assertEquals(hd8.toString(), d13.getHiveDecimalString());
+ assertEquals("1234123000", d13.getHiveDecimalString());
}
@Test
Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java?rev=1579010&r1=1579009&r2=1579010&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToTimestamp.java Tue Mar 18 18:42:51 2014
@@ -53,6 +53,8 @@ public class CastDecimalToTimestamp exte
@Override
protected void func(LongColumnVector outV, DecimalColumnVector inV, int i) {
tmp.update(inV.vector[i]);
+
+ // Reduce scale at most by 9, therefore multiplication will not require rounding.
int newScale = inV.scale > 9 ? (inV.scale - 9) : 0;
tmp.multiplyDestructive(tenE9, (short) newScale);
Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1579010&r1=1579009&r2=1579010&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Mar 18 18:42:51 2014
@@ -55,6 +55,7 @@ import org.apache.hadoop.hive.ql.lib.Tas
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -421,7 +422,7 @@ public class Vectorizer implements Physi
VectorizationContext vContext = null;
if (op instanceof TableScanOperator) {
- vContext = getVectorizationContext(op, physicalContext);
+ vContext = getVectorizationContext((TableScanOperator) op, physicalContext);
for (String onefile : mWork.getPathToAliases().keySet()) {
List<String> aliases = mWork.getPathToAliases().get(onefile);
for (String alias : aliases) {
@@ -719,27 +720,20 @@ public class Vectorizer implements Physi
return supportedDataTypesPattern.matcher(type.toLowerCase()).matches();
}
- private VectorizationContext getVectorizationContext(Operator<? extends OperatorDesc> op,
+ private VectorizationContext getVectorizationContext(TableScanOperator op,
PhysicalContext pctx) {
RowSchema rs = op.getSchema();
Map<String, Integer> cmap = new HashMap<String, Integer>();
int columnCount = 0;
for (ColumnInfo c : rs.getSignature()) {
- if (!c.getIsVirtualCol()) {
+ if (!isVirtualColumn(c)) {
cmap.put(c.getInternalName(), columnCount++);
}
}
- PrunedPartitionList partList = pctx.getParseContext().getOpToPartList().get(op);
- if (partList != null) {
- Table tab = partList.getSourceTable();
- if (tab.getPartitionKeys() != null) {
- for (FieldSchema fs : tab.getPartitionKeys()) {
- cmap.put(fs.getName(), columnCount++);
- }
- }
- }
- return new VectorizationContext(cmap, columnCount);
+
+ VectorizationContext vc = new VectorizationContext(cmap, columnCount);
+ return vc;
}
Operator<? extends OperatorDesc> vectorizeOperator(Operator<? extends OperatorDesc> op,
@@ -778,4 +772,16 @@ public class Vectorizer implements Physi
}
return vectorOp;
}
+
+ private boolean isVirtualColumn(ColumnInfo column) {
+
+ // Not using method column.getIsVirtualCol() because partitioning columns are also
+ // treated as virtual columns in ColumnInfo.
+ for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) {
+ if (column.getInternalName().equals(vc.getName())) {
+ return true;
+ }
+ }
+ return false;
+ }
}
Modified: hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vectorization_part.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vectorization_part.q?rev=1579010&r1=1579009&r2=1579010&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vectorization_part.q (original)
+++ hive/branches/branch-0.13/ql/src/test/queries/clientpositive/vectorization_part.q Tue Mar 18 18:42:51 2014
@@ -4,3 +4,4 @@ insert overwrite table alltypesorc_part
insert overwrite table alltypesorc_part partition (ds='2012') select * from alltypesorc limit 100;
select count(cdouble), cint from alltypesorc_part where ds='2011' group by cint limit 10;
+select count(*) from alltypesorc_part A join alltypesorc_part B on A.ds=B.ds;
Modified: hive/branches/branch-0.13/ql/src/test/results/clientpositive/vectorization_part.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/results/clientpositive/vectorization_part.q.out?rev=1579010&r1=1579009&r2=1579010&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/test/results/clientpositive/vectorization_part.q.out (original)
+++ hive/branches/branch-0.13/ql/src/test/results/clientpositive/vectorization_part.q.out Tue Mar 18 18:42:51 2014
@@ -92,3 +92,40 @@ POSTHOOK: Lineage: alltypesorc_part PART
POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:from deserializer), ]
POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from deserializer), ]
100 528534767
+PREHOOK: query: select count(*) from alltypesorc_part A join alltypesorc_part B on A.ds=B.ds
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc_part
+PREHOOK: Input: default@alltypesorc_part@ds=2011
+PREHOOK: Input: default@alltypesorc_part@ds=2012
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) from alltypesorc_part A join alltypesorc_part B on A.ds=B.ds
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc_part
+POSTHOOK: Input: default@alltypesorc_part@ds=2011
+POSTHOOK: Input: default@alltypesorc_part@ds=2012
+#### A masked pattern was here ####
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2011).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cbigint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cbigint, type:bigint, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cboolean1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean1, type:boolean, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cboolean2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cboolean2, type:boolean, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cfloat SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).csmallint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:csmallint, type:smallint, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).cstring2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring2, type:string, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctimestamp2 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp2, type:timestamp, comment:from deserializer), ]
+POSTHOOK: Lineage: alltypesorc_part PARTITION(ds=2012).ctinyint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctinyint, type:tinyint, comment:from deserializer), ]
+20000