You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ji...@apache.org on 2014/03/25 19:12:23 UTC
svn commit: r1581442 - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/common/type/
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/
ql/src/java/org/apache/hadoop/hive/ql/optimizer...
Author: jitendra
Date: Tue Mar 25 18:12:23 2014
New Revision: 1581442
URL: http://svn.apache.org/r1581442
Log:
HIVE-6349 : Column name map is broken (reviewed by Remus, Sergey)
Added:
hive/trunk/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q
hive/trunk/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java?rev=1581442&r1=1581441&r2=1581442&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/common/type/HiveDecimal.java Tue Mar 25 18:12:23 2014
@@ -273,13 +273,4 @@ public class HiveDecimal implements Comp
return bd;
}
-
- /**
- * Sets the {@link BigDecimal} value in this object.
- * @param bigDecimal
- */
- public void setNormalize(BigDecimal bigDecimal) {
- BigDecimal value = normalize(bigDecimal, true);
- this.bd = value;
- }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java?rev=1581442&r1=1581441&r2=1581442&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java Tue Mar 25 18:12:23 2014
@@ -38,7 +38,7 @@ import org.apache.hadoop.hive.serde2.obj
/**
* Select operator implementation.
*/
-public class VectorSelectOperator extends SelectOperator {
+public class VectorSelectOperator extends SelectOperator implements VectorizationContextRegion {
private static final long serialVersionUID = 1L;
@@ -48,6 +48,9 @@ public class VectorSelectOperator extend
private transient VectorExpressionWriter [] valueWriters = null;
+ // Create a new outgoing vectorization context because column name map will change.
+ private VectorizationContext vOutContext;
+
public VectorSelectOperator(VectorizationContext vContext, OperatorDesc conf)
throws HiveException {
this.conf = (SelectDesc) conf;
@@ -59,14 +62,21 @@ public class VectorSelectOperator extend
vExpressions[i] = ve;
}
- Map<String, Integer> cMap = vContext.getColumnMap();
+ /**
+ * Create a new vectorization context to update the column map but same output column manager
+ * must be inherited to track the scratch the columns.
+ */
+ vOutContext = new VectorizationContext(vContext);
+
+ // Set a fileKey, although this operator doesn't use it.
+ vOutContext.setFileKey(vContext.getFileKey() + "/_SELECT_");
+
+ // Update column map
+ vOutContext.getColumnMap().clear();
for (int i=0; i < colList.size(); ++i) {
String columnName = this.conf.getOutputColumnNames().get(i);
- if (!cMap.containsKey(columnName)) {
- VectorExpression ve = vExpressions[i];
- // Update column map with output column names
- vContext.addToColumnMap(columnName, ve.getOutputColumn());
- }
+ VectorExpression ve = vExpressions[i];
+ vOutContext.addToColumnMap(columnName, ve.getOutputColumn());
}
}
@@ -153,4 +163,9 @@ public class VectorSelectOperator extend
public void setVExpressions(VectorExpression[] vExpressions) {
this.vExpressions = vExpressions;
}
+
+ @Override
+ public VectorizationContext getOuputVectorizationContext() {
+ return vOutContext;
+ }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java?rev=1581442&r1=1581441&r2=1581442&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java Tue Mar 25 18:12:23 2014
@@ -120,6 +120,11 @@ public class VectorizationContext {
//Map column number to type
private final OutputColumnManager ocm;
+ // File key is used by operators to retrieve the scratch vectors
+ // from mapWork at runtime. The operators that modify the structure of
+ // a vector row batch, need to allocate scratch vectors as well. Every
+ // operator that creates a new Vectorization context should set a unique
+ // fileKey.
private String fileKey = null;
// Set of UDF classes for type casting data types in row-mode.
@@ -151,6 +156,19 @@ public class VectorizationContext {
vMap = new VectorExpressionDescriptor();
}
+ /**
+ * This constructor inherits the OutputColumnManger and from
+ * the 'parent' constructor, therefore this should be used only by operators
+ * that don't create a new vectorized row batch. This should be used only by
+ * operators that want to modify the columnName map without changing the row batch.
+ */
+ public VectorizationContext(VectorizationContext parent) {
+ this.columnMap = new HashMap<String, Integer>(parent.columnMap);
+ this.ocm = parent.ocm;
+ this.firstOutputColumnIndex = parent.firstOutputColumnIndex;
+ vMap = new VectorExpressionDescriptor();
+ }
+
public String getFileKey() {
return fileKey;
}
@@ -170,7 +188,7 @@ public class VectorizationContext {
return columnMap.get(colExpr.getColumn());
}
- private class OutputColumnManager {
+ private static class OutputColumnManager {
private final int initialOutputCol;
private int outputColCount = 0;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java?rev=1581442&r1=1581441&r2=1581442&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java Tue Mar 25 18:12:23 2014
@@ -447,28 +447,24 @@ public final class VectorExpressionWrite
SettableHiveDecimalObjectInspector fieldObjInspector) throws HiveException {
return new VectorExpressionWriterDecimal() {
- private HiveDecimal hd;
private Object obj;
public VectorExpressionWriter init(SettableHiveDecimalObjectInspector objInspector) throws HiveException {
super.init(objInspector);
- hd = HiveDecimal.create(BigDecimal.ZERO);
obj = initValue(null);
return this;
}
@Override
public Object writeValue(Decimal128 value) throws HiveException {
- hd.setNormalize(value.toBigDecimal());
- ((SettableHiveDecimalObjectInspector) this.objectInspector).set(obj, hd);
- return obj;
+ return ((SettableHiveDecimalObjectInspector) this.objectInspector).set(obj,
+ HiveDecimal.create(value.toBigDecimal()));
}
@Override
public Object setValue(Object field, Decimal128 value) {
- hd.setNormalize(value.toBigDecimal());
- ((SettableHiveDecimalObjectInspector) this.objectInspector).set(field, hd);
- return field;
+ return ((SettableHiveDecimalObjectInspector) this.objectInspector).set(field,
+ HiveDecimal.create(value.toBigDecimal()));
}
@Override
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java?rev=1581442&r1=1581441&r2=1581442&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFYearLong.java Tue Mar 25 18:12:23 2014
@@ -29,16 +29,16 @@ public final class VectorUDFYearLong ext
private static final long serialVersionUID = 1L;
/* year boundaries in nanoseconds */
- static transient final long[] YEAR_BOUNDARIES;
- static transient final int MIN_YEAR = 1901;
- static transient final int MAX_YEAR = 2038;
+ private static transient final long[] YEAR_BOUNDARIES;
+ private static transient final int MIN_YEAR = 1678;
+ private static transient final int MAX_YEAR = 2300;
static {
YEAR_BOUNDARIES = new long[MAX_YEAR-MIN_YEAR];
Calendar c = Calendar.getInstance();
c.setTimeInMillis(0); // c.set doesn't reset millis
/* 1901 Jan is not with in range */
- for(int year=MIN_YEAR+1; year <= 2038; year++) {
+ for(int year=MIN_YEAR+1; year <= MAX_YEAR; year++) {
c.set(year, Calendar.JANUARY, 1, 0, 0, 0);
YEAR_BOUNDARIES[year-MIN_YEAR-1] = c.getTimeInMillis()*1000*1000;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1581442&r1=1581441&r2=1581442&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Tue Mar 25 18:12:23 2014
@@ -33,7 +33,6 @@ import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.exec.*;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
@@ -53,11 +52,7 @@ import org.apache.hadoop.hive.ql.lib.Rul
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.lib.TaskGraphWalker;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.Partition;
-import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
-import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
-import org.apache.hadoop.hive.ql.parse.RowResolver;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.AbstractOperatorDesc;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
@@ -378,7 +373,9 @@ public class Vectorizer implements Physi
class VectorizationNodeProcessor implements NodeProcessor {
private final MapWork mWork;
- private final Map<String, VectorizationContext> vectorizationContexts =
+
+ // This is used to extract scratch column types for each file key
+ private final Map<String, VectorizationContext> scratchColumnContext =
new HashMap<String, VectorizationContext>();
private final Map<Operator<? extends OperatorDesc>, VectorizationContext> vContextsByTSOp =
@@ -394,8 +391,8 @@ public class Vectorizer implements Physi
public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
Map<String, Map<Integer, String>> scratchColumnVectorTypes =
new HashMap<String, Map<Integer, String>>();
- for (String onefile : vectorizationContexts.keySet()) {
- VectorizationContext vc = vectorizationContexts.get(onefile);
+ for (String onefile : scratchColumnContext.keySet()) {
+ VectorizationContext vc = scratchColumnContext.get(onefile);
Map<Integer, String> cmap = vc.getOutputColumnTypeMap();
scratchColumnVectorTypes.put(onefile, cmap);
}
@@ -405,8 +402,8 @@ public class Vectorizer implements Physi
public Map<String, Map<String, Integer>> getScratchColumnMap() {
Map<String, Map<String, Integer>> scratchColumnMap =
new HashMap<String, Map<String, Integer>>();
- for(String oneFile: vectorizationContexts.keySet()) {
- VectorizationContext vc = vectorizationContexts.get(oneFile);
+ for(String oneFile: scratchColumnContext.keySet()) {
+ VectorizationContext vc = scratchColumnContext.get(oneFile);
Map<String, Integer> cmap = vc.getColumnMap();
scratchColumnMap.put(oneFile, cmap);
}
@@ -433,7 +430,7 @@ public class Vectorizer implements Physi
// Each partition gets a copy
//
vContext.setFileKey(onefile);
- vectorizationContexts.put(onefile, vContext);
+ scratchColumnContext.put(onefile, vContext);
break;
}
}
@@ -472,7 +469,7 @@ public class Vectorizer implements Physi
VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext();
vContextsByTSOp.put(op, vOutContext);
- vectorizationContexts.put(vOutContext.getFileKey(), vOutContext);
+ scratchColumnContext.put(vOutContext.getFileKey(), vOutContext);
}
}
} catch (HiveException e) {
Added: hive/trunk/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q?rev=1581442&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/vectorized_nested_mapjoin.q Tue Mar 25 18:12:23 2014
@@ -0,0 +1,8 @@
+SET hive.vectorized.execution.enabled=true;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.nonconditionaltask=true;
+SET hive.auto.convert.join.nonconditionaltask.size=1000000000;
+
+explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint;
+
+select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint;
Added: hive/trunk/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out?rev=1581442&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out Tue Mar 25 18:12:23 2014
@@ -0,0 +1,125 @@
+PREHOOK: query: explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-7 is a root stage
+ Stage-2 depends on stages: Stage-7
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-7
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ t1:v1
+ Fetch Operator
+ limit: -1
+ v3
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ t1:v1
+ TableScan
+ alias: v1
+ Statistics: Num rows: 23577 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {ctinyint} {csmallint} {cdouble}
+ 1 {ctinyint}
+ keys:
+ 0 ctinyint (type: tinyint)
+ 1 ctinyint (type: tinyint)
+ v3
+ TableScan
+ alias: v3
+ Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ HashTable Sink Operator
+ condition expressions:
+ 0 {_col1}
+ 1
+ keys:
+ 0 _col0 (type: smallint)
+ 1 csmallint (type: smallint)
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: v2
+ Statistics: Num rows: 94309 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {ctinyint} {csmallint} {cdouble}
+ 1 {ctinyint}
+ keys:
+ 0 ctinyint (type: tinyint)
+ 1 ctinyint (type: tinyint)
+ outputColumnNames: _col0, _col1, _col5, _col14
+ Statistics: Num rows: 103739 Data size: 414960 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (_col0 = _col14) (type: boolean)
+ Statistics: Num rows: 51869 Data size: 207477 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: smallint), _col5 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 51869 Data size: 207477 Basic stats: COMPLETE Column stats: NONE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {_col1}
+ 1
+ keys:
+ 0 _col0 (type: smallint)
+ 1 csmallint (type: smallint)
+ outputColumnNames: _col1
+ Statistics: Num rows: 103739 Data size: 414960 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: double)
+ outputColumnNames: _col1
+ Statistics: Num rows: 103739 Data size: 414960 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: sum(_col1)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: double)
+ Local Work:
+ Map Reduce Local Work
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+PREHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+6.065190932486892E11