You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/08/18 00:00:34 UTC

[04/50] [abbrv] hive git commit: HIVE-11437: CBO: Calcite Operator To Hive Operator (Calcite Return Path) : dealing with insert into (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)

HIVE-11437: CBO: Calcite Operator To Hive Operator (Calcite Return Path) : dealing with insert into (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6df52edc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6df52edc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6df52edc

Branch: refs/heads/hbase-metastore
Commit: 6df52edc5ec7dea80b271897128d5037d2d90ef0
Parents: 0b38612
Author: Pengcheng Xiong <px...@hortonworks.com>
Authored: Mon Aug 10 13:00:59 2015 +0300
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Mon Aug 10 13:00:59 2015 +0300

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java    | 40 ++++++++-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  2 +-
 .../test/queries/clientpositive/cbo_rp_insert.q | 17 ++++
 .../results/clientpositive/cbo_rp_insert.q.out  | 89 ++++++++++++++++++++
 4 files changed, 146 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6df52edc/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 4027229..f26d1df 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -109,6 +109,8 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionInfo;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
@@ -170,6 +172,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
 import org.apache.hadoop.hive.serde.serdeConstants;
@@ -649,7 +652,42 @@ public class CalcitePlanner extends SemanticAnalyzer {
         conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")).convert(modifiedOptimizedOptiqPlan);
     RowResolver hiveRootRR = genRowResolver(hiveRoot, getQB());
     opParseCtx.put(hiveRoot, new OpParseContext(hiveRootRR));
-    return genFileSinkPlan(getQB().getParseInfo().getClauseNames().iterator().next(), getQB(), hiveRoot);
+    String dest = getQB().getParseInfo().getClauseNames().iterator().next();
+    if (getQB().getParseInfo().getDestSchemaForClause(dest) != null
+        && this.getQB().getTableDesc() == null) {
+      Operator<?> selOp = handleInsertStatement(dest, hiveRoot, hiveRootRR, getQB());
+      return genFileSinkPlan(dest, getQB(), selOp);
+    } else {
+      return genFileSinkPlan(dest, getQB(), hiveRoot);
+    }
+  }
+
+  // This function serves as the wrapper of handleInsertStatementSpec in
+  // SemanticAnalyzer
+  Operator<?> handleInsertStatement(String dest, Operator<?> input, RowResolver inputRR, QB qb)
+      throws SemanticException {
+    ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
+    ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
+    for (int i = 0; i < columns.size(); i++) {
+      ColumnInfo col = columns.get(i);
+      colList.add(new ExprNodeColumnDesc(col));
+    }
+    ASTNode selExprList = qb.getParseInfo().getSelForClause(dest);
+
+    RowResolver out_rwsch = handleInsertStatementSpec(colList, dest, inputRR, inputRR, qb,
+        selExprList);
+
+    ArrayList<String> columnNames = new ArrayList<String>();
+    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
+    for (int i = 0; i < colList.size(); i++) {
+      String outputCol = getColumnInternalName(i);
+      colExprMap.put(outputCol, colList.get(i));
+      columnNames.add(outputCol);
+    }
+    Operator<?> output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList,
+        columnNames), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
+    output.setColumnExprMap(colExprMap);
+    return output;
   }
 
   private RelNode introduceProjectIfNeeded(RelNode optimizedOptiqPlan)

http://git-wip-us.apache.org/repos/asf/hive/blob/6df52edc/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index fe7c1ca..5ea6f3f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3883,7 +3883,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
    * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx)
    * @throws SemanticException
    */
-  private RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest,
+  public RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest,
                                          RowResolver outputRR, RowResolver inputRR, QB qb,
                                          ASTNode selExprList) throws SemanticException {
     //(z,x)

http://git-wip-us.apache.org/repos/asf/hive/blob/6df52edc/ql/src/test/queries/clientpositive/cbo_rp_insert.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_insert.q b/ql/src/test/queries/clientpositive/cbo_rp_insert.q
new file mode 100644
index 0000000..eeaeec2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_rp_insert.q
@@ -0,0 +1,17 @@
+set hive.cbo.enable=true;
+set hive.cbo.returnpath.hiveop=true;
+
+drop database if exists x314 cascade;
+create database x314;
+use x314;
+create table source(s1 int, s2 int);
+create table target1(x int, y int, z int);
+
+insert into source(s2,s1) values(2,1);
+-- expect source to contain 1 row (1,2)
+select * from source;
+insert into target1(z,x) select * from source;
+-- expect target1 to contain 1 row (2,NULL,1)
+select * from target1;
+
+drop database if exists x314 cascade;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/6df52edc/ql/src/test/results/clientpositive/cbo_rp_insert.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_insert.q.out b/ql/src/test/results/clientpositive/cbo_rp_insert.q.out
new file mode 100644
index 0000000..6428a4b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cbo_rp_insert.q.out
@@ -0,0 +1,89 @@
+PREHOOK: query: drop database if exists x314 cascade
+PREHOOK: type: DROPDATABASE
+POSTHOOK: query: drop database if exists x314 cascade
+POSTHOOK: type: DROPDATABASE
+PREHOOK: query: create database x314
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:x314
+POSTHOOK: query: create database x314
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:x314
+PREHOOK: query: use x314
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:x314
+POSTHOOK: query: use x314
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:x314
+PREHOOK: query: create table source(s1 int, s2 int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:x314
+PREHOOK: Output: x314@source
+POSTHOOK: query: create table source(s1 int, s2 int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:x314
+POSTHOOK: Output: x314@source
+PREHOOK: query: create table target1(x int, y int, z int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:x314
+PREHOOK: Output: x314@target1
+POSTHOOK: query: create table target1(x int, y int, z int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:x314
+POSTHOOK: Output: x314@target1
+PREHOOK: query: insert into source(s2,s1) values(2,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: x314@values__tmp__table__1
+PREHOOK: Output: x314@source
+POSTHOOK: query: insert into source(s2,s1) values(2,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: x314@values__tmp__table__1
+POSTHOOK: Output: x314@source
+POSTHOOK: Lineage: source.s1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+POSTHOOK: Lineage: source.s2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+PREHOOK: query: -- expect source to contain 1 row (1,2)
+select * from source
+PREHOOK: type: QUERY
+PREHOOK: Input: x314@source
+#### A masked pattern was here ####
+POSTHOOK: query: -- expect source to contain 1 row (1,2)
+select * from source
+POSTHOOK: type: QUERY
+POSTHOOK: Input: x314@source
+#### A masked pattern was here ####
+1	2
+PREHOOK: query: insert into target1(z,x) select * from source
+PREHOOK: type: QUERY
+PREHOOK: Input: x314@source
+PREHOOK: Output: x314@target1
+POSTHOOK: query: insert into target1(z,x) select * from source
+POSTHOOK: type: QUERY
+POSTHOOK: Input: x314@source
+POSTHOOK: Output: x314@target1
+POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s2, type:int, comment:null), ]
+POSTHOOK: Lineage: target1.y EXPRESSION []
+POSTHOOK: Lineage: target1.z SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ]
+PREHOOK: query: -- expect target1 to contain 1 row (2,NULL,1)
+select * from target1
+PREHOOK: type: QUERY
+PREHOOK: Input: x314@target1
+#### A masked pattern was here ####
+POSTHOOK: query: -- expect target1 to contain 1 row (2,NULL,1)
+select * from target1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: x314@target1
+#### A masked pattern was here ####
+2	NULL	1
+PREHOOK: query: drop database if exists x314 cascade
+PREHOOK: type: DROPDATABASE
+PREHOOK: Input: database:x314
+PREHOOK: Output: database:x314
+PREHOOK: Output: x314@source
+PREHOOK: Output: x314@target1
+PREHOOK: Output: x314@values__tmp__table__1
+POSTHOOK: query: drop database if exists x314 cascade
+POSTHOOK: type: DROPDATABASE
+POSTHOOK: Input: database:x314
+POSTHOOK: Output: database:x314
+POSTHOOK: Output: x314@source
+POSTHOOK: Output: x314@target1
+POSTHOOK: Output: x314@values__tmp__table__1