You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/08/18 00:00:34 UTC
[04/50] [abbrv] hive git commit: HIVE-11437: CBO: Calcite Operator To
Hive Operator (Calcite Return Path) : dealing with insert into (Pengcheng
Xiong, reviewed by Jesus Camacho Rodriguez)
HIVE-11437: CBO: Calcite Operator To Hive Operator (Calcite Return Path) : dealing with insert into (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6df52edc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6df52edc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6df52edc
Branch: refs/heads/hbase-metastore
Commit: 6df52edc5ec7dea80b271897128d5037d2d90ef0
Parents: 0b38612
Author: Pengcheng Xiong <px...@hortonworks.com>
Authored: Mon Aug 10 13:00:59 2015 +0300
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Mon Aug 10 13:00:59 2015 +0300
----------------------------------------------------------------------
.../hadoop/hive/ql/parse/CalcitePlanner.java | 40 ++++++++-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +-
.../test/queries/clientpositive/cbo_rp_insert.q | 17 ++++
.../results/clientpositive/cbo_rp_insert.q.out | 89 ++++++++++++++++++++
4 files changed, 146 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/6df52edc/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 4027229..f26d1df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -109,6 +109,8 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
@@ -170,6 +172,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.serde.serdeConstants;
@@ -649,7 +652,42 @@ public class CalcitePlanner extends SemanticAnalyzer {
conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")).convert(modifiedOptimizedOptiqPlan);
RowResolver hiveRootRR = genRowResolver(hiveRoot, getQB());
opParseCtx.put(hiveRoot, new OpParseContext(hiveRootRR));
- return genFileSinkPlan(getQB().getParseInfo().getClauseNames().iterator().next(), getQB(), hiveRoot);
+ String dest = getQB().getParseInfo().getClauseNames().iterator().next();
+ if (getQB().getParseInfo().getDestSchemaForClause(dest) != null
+ && this.getQB().getTableDesc() == null) {
+ Operator<?> selOp = handleInsertStatement(dest, hiveRoot, hiveRootRR, getQB());
+ return genFileSinkPlan(dest, getQB(), selOp);
+ } else {
+ return genFileSinkPlan(dest, getQB(), hiveRoot);
+ }
+ }
+
+ // This function serves as the wrapper of handleInsertStatementSpec in
+ // SemanticAnalyzer
+ Operator<?> handleInsertStatement(String dest, Operator<?> input, RowResolver inputRR, QB qb)
+ throws SemanticException {
+ ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
+ ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
+ for (int i = 0; i < columns.size(); i++) {
+ ColumnInfo col = columns.get(i);
+ colList.add(new ExprNodeColumnDesc(col));
+ }
+ ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
+
+ RowResolver out_rwsch = handleInsertStatementSpec(colList, dest, inputRR, inputRR, qb,
+ selExprList);
+
+ ArrayList<String> columnNames = new ArrayList<String>();
+ Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
+ for (int i = 0; i < colList.size(); i++) {
+ String outputCol = getColumnInternalName(i);
+ colExprMap.put(outputCol, colList.get(i));
+ columnNames.add(outputCol);
+ }
+ Operator<?> output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList,
+ columnNames), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
+ output.setColumnExprMap(colExprMap);
+ return output;
}
private RelNode introduceProjectIfNeeded(RelNode optimizedOptiqPlan)
http://git-wip-us.apache.org/repos/asf/hive/blob/6df52edc/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index fe7c1ca..5ea6f3f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3883,7 +3883,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
* @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx)
* @throws SemanticException
*/
- private RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest,
+ public RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest,
RowResolver outputRR, RowResolver inputRR, QB qb,
ASTNode selExprList) throws SemanticException {
//(z,x)
http://git-wip-us.apache.org/repos/asf/hive/blob/6df52edc/ql/src/test/queries/clientpositive/cbo_rp_insert.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_insert.q b/ql/src/test/queries/clientpositive/cbo_rp_insert.q
new file mode 100644
index 0000000..eeaeec2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_rp_insert.q
@@ -0,0 +1,17 @@
+set hive.cbo.enable=true;
+set hive.cbo.returnpath.hiveop=true;
+
+drop database if exists x314 cascade;
+create database x314;
+use x314;
+create table source(s1 int, s2 int);
+create table target1(x int, y int, z int);
+
+insert into source(s2,s1) values(2,1);
+-- expect source to contain 1 row (1,2)
+select * from source;
+insert into target1(z,x) select * from source;
+-- expect target1 to contain 1 row (2,NULL,1)
+select * from target1;
+
+drop database if exists x314 cascade;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/6df52edc/ql/src/test/results/clientpositive/cbo_rp_insert.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_insert.q.out b/ql/src/test/results/clientpositive/cbo_rp_insert.q.out
new file mode 100644
index 0000000..6428a4b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cbo_rp_insert.q.out
@@ -0,0 +1,89 @@
+PREHOOK: query: drop database if exists x314 cascade
+PREHOOK: type: DROPDATABASE
+POSTHOOK: query: drop database if exists x314 cascade
+POSTHOOK: type: DROPDATABASE
+PREHOOK: query: create database x314
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:x314
+POSTHOOK: query: create database x314
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:x314
+PREHOOK: query: use x314
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:x314
+POSTHOOK: query: use x314
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:x314
+PREHOOK: query: create table source(s1 int, s2 int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:x314
+PREHOOK: Output: x314@source
+POSTHOOK: query: create table source(s1 int, s2 int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:x314
+POSTHOOK: Output: x314@source
+PREHOOK: query: create table target1(x int, y int, z int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:x314
+PREHOOK: Output: x314@target1
+POSTHOOK: query: create table target1(x int, y int, z int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:x314
+POSTHOOK: Output: x314@target1
+PREHOOK: query: insert into source(s2,s1) values(2,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: x314@values__tmp__table__1
+PREHOOK: Output: x314@source
+POSTHOOK: query: insert into source(s2,s1) values(2,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: x314@values__tmp__table__1
+POSTHOOK: Output: x314@source
+POSTHOOK: Lineage: source.s1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: source.s2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: -- expect source to contain 1 row (1,2)
+select * from source
+PREHOOK: type: QUERY
+PREHOOK: Input: x314@source
+#### A masked pattern was here ####
+POSTHOOK: query: -- expect source to contain 1 row (1,2)
+select * from source
+POSTHOOK: type: QUERY
+POSTHOOK: Input: x314@source
+#### A masked pattern was here ####
+1 2
+PREHOOK: query: insert into target1(z,x) select * from source
+PREHOOK: type: QUERY
+PREHOOK: Input: x314@source
+PREHOOK: Output: x314@target1
+POSTHOOK: query: insert into target1(z,x) select * from source
+POSTHOOK: type: QUERY
+POSTHOOK: Input: x314@source
+POSTHOOK: Output: x314@target1
+POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s2, type:int, comment:null), ]
+POSTHOOK: Lineage: target1.y EXPRESSION []
+POSTHOOK: Lineage: target1.z SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ]
+PREHOOK: query: -- expect target1 to contain 1 row (2,NULL,1)
+select * from target1
+PREHOOK: type: QUERY
+PREHOOK: Input: x314@target1
+#### A masked pattern was here ####
+POSTHOOK: query: -- expect target1 to contain 1 row (2,NULL,1)
+select * from target1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: x314@target1
+#### A masked pattern was here ####
+2 NULL 1
+PREHOOK: query: drop database if exists x314 cascade
+PREHOOK: type: DROPDATABASE
+PREHOOK: Input: database:x314
+PREHOOK: Output: database:x314
+PREHOOK: Output: x314@source
+PREHOOK: Output: x314@target1
+PREHOOK: Output: x314@values__tmp__table__1
+POSTHOOK: query: drop database if exists x314 cascade
+POSTHOOK: type: DROPDATABASE
+POSTHOOK: Input: database:x314
+POSTHOOK: Output: database:x314
+POSTHOOK: Output: x314@source
+POSTHOOK: Output: x314@target1
+POSTHOOK: Output: x314@values__tmp__table__1