You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by wz...@apache.org on 2021/12/16 15:05:30 UTC

[impala] 03/04: IMPALA-6590: Disable expr rewrites and codegen for VALUES() statements

This is an automated email from the ASF dual-hosted git repository.

wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 763acffb74ec2770a9402ba23c145ea928021f8d
Author: Abhishek Rawat <ar...@cloudera.com>
AuthorDate: Tue Jun 11 12:56:22 2019 -0700

    IMPALA-6590: Disable expr rewrites and codegen for VALUES() statements
    
    Expression rewrites for VALUES() could result in performance regression
    since there is virtually no benefit of rewrite, if the expression will
    only ever be evaluated once. The overhead of rewrites in some cases
    could be huge, especially if there are several constant expressions.
    The regression also seems to non-linearly increase as number of columns
    increases. Similarly, there is no value in doing codegen for such const
    expressions.
    
    The rewriteExprs() for ValuesStmt class was overridden with an empty
    function body. As a result rewrites for VALUES() is a no-op.
    
    Codegen was disabled for const expressions within a UNION node, if
    the UNION node is not within a subplan. This applies to all UNION nodes
    with const expressions (and not just limited to UNION nodes associated
    with a VALUES clause).
    
    The decision for whether or not to enable codegen for const expressions
    in a UNION is made in the planner when a UnionNode is initialized. A new
    member 'is_codegen_disabled' was added to the thrift struct TExprNode
    for communicating this decision to backend. The Optimizer should take
    decisions it can and so it seemed like the right place to disable/enable
    codegen. The infrastructure is generic and could be extended in future
    to selectively disable codegen for any given expression, if needed.
    
    Testing:
    - Added a new e2e test case in tests/query_test/test_codegen.py, which
      tests the different scenarios involving UNION with const expressions.
    - Passed exhaustive unit-tests.
    - Ran manual tests to validate that the non-linear regression in VALUES
      clause when involving increasing number of columns is no longer seen.
      Results below.
    
    for i in 256 512 1024 2048 4096 8192 16384 32768;
    do (echo 'VALUES ('; for x in $(seq $i);
    do echo  "cast($x as string),"; done;
    echo "NULL); profile;") |
    time impala-shell.sh -f /dev/stdin |& grep Analysis; done
    
    Base:
           - Analysis finished: 20.137ms (19.215ms)
           - Analysis finished: 46.275ms (44.597ms)
           - Analysis finished: 119.642ms (116.663ms)
           - Analysis finished: 361.195ms (355.856ms)
           - Analysis finished: 1s277ms (1s266ms)
           - Analysis finished: 5s664ms (5s640ms)
           - Analysis finished: 29s689ms (29s646ms)
           - Analysis finished: 2m (2m)
    
    Test:
           - Analysis finished: 1.868ms (986.520us)
           - Analysis finished: 3.195ms (1.856ms)
           - Analysis finished: 7.332ms (3.484ms)
           - Analysis finished: 13.896ms (8.071ms)
           - Analysis finished: 31.015ms (18.963ms)
           - Analysis finished: 60.157ms (38.125ms)
           - Analysis finished: 113.694ms (67.642ms)
           - Analysis finished: 253.044ms (163.180ms)
    
    Change-Id: I229d67b821968321abd8f97f7c89cf2617000d8d
    Reviewed-on: http://gerrit.cloudera.org:8080/13645
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/union-node.cc                          | 11 +++
 be/src/exec/union-node.h                           | 13 ++++
 be/src/exprs/scalar-expr.cc                        | 13 ++--
 be/src/exprs/scalar-expr.h                         |  6 +-
 be/src/runtime/fragment-state.h                    |  3 +
 common/thrift/Exprs.thrift                         |  3 +
 .../main/java/org/apache/impala/analysis/Expr.java | 11 +++
 .../org/apache/impala/analysis/ValuesStmt.java     |  8 +++
 .../java/org/apache/impala/planner/UnionNode.java  |  7 +-
 .../apache/impala/analysis/ExprRewriterTest.java   |  4 +-
 .../queries/PlannerTest/values.test                | 32 ++++-----
 .../QueryTest/union-const-scalar-expr-codegen.test | 80 ++++++++++++++++++++++
 tests/query_test/test_codegen.py                   |  7 ++
 13 files changed, 173 insertions(+), 25 deletions(-)

diff --git a/be/src/exec/union-node.cc b/be/src/exec/union-node.cc
index aac6874..b454cdf 100644
--- a/be/src/exec/union-node.cc
+++ b/be/src/exec/union-node.cc
@@ -41,6 +41,8 @@ Status UnionPlanNode::Init(const TPlanNode& tnode, FragmentState* state) {
   DCHECK(tuple_desc_ != nullptr);
   first_materialized_child_idx_ = tnode_->union_node.first_materialized_child_idx;
   DCHECK_GT(first_materialized_child_idx_, -1);
+  const int64_t num_nonconst_scalar_expr_to_be_codegened =
+      state->NumScalarExprNeedsCodegen();
   // Create const_exprs_lists_ from thrift exprs.
   const vector<vector<TExpr>>& const_texpr_lists = tnode_->union_node.const_expr_lists;
   for (const vector<TExpr>& texprs : const_texpr_lists) {
@@ -49,6 +51,8 @@ Status UnionPlanNode::Init(const TPlanNode& tnode, FragmentState* state) {
     DCHECK_EQ(const_exprs.size(), tuple_desc_->slots().size());
     const_exprs_lists_.push_back(const_exprs);
   }
+  num_const_scalar_expr_to_be_codegened_ =
+      state->NumScalarExprNeedsCodegen() - num_nonconst_scalar_expr_to_be_codegened;
   // Create child_exprs_lists_ from thrift exprs.
   const vector<vector<TExpr>>& thrift_result_exprs = tnode_->union_node.result_expr_lists;
   for (int i = 0; i < thrift_result_exprs.size(); ++i) {
@@ -85,6 +89,8 @@ UnionNode::UnionNode(
   : ExecNode(pool, pnode, descs),
     tuple_desc_(pnode.tuple_desc_),
     first_materialized_child_idx_(pnode.first_materialized_child_idx_),
+    num_const_scalar_expr_to_be_codegened_(pnode.num_const_scalar_expr_to_be_codegened_),
+    is_codegen_status_added_(pnode.is_codegen_status_added_),
     const_exprs_lists_(pnode.const_exprs_lists_),
     child_exprs_lists_(pnode.child_exprs_lists_),
     codegend_union_materialize_batch_fns_(pnode.codegend_union_materialize_batch_fns_),
@@ -158,6 +164,7 @@ void UnionPlanNode::Codegen(FragmentState* state){
         &(codegend_union_materialize_batch_fns_.data()[i]));
   }
   AddCodegenStatus(codegen_status, codegen_message.str());
+  is_codegen_status_added_ = true;
 }
 
 Status UnionNode::Open(RuntimeState* state) {
@@ -177,6 +184,10 @@ Status UnionNode::Open(RuntimeState* state) {
   // succeeded.
   if (!children_.empty()) RETURN_IF_ERROR(child(child_idx_)->Open(state));
 
+  if (is_codegen_status_added_ && num_const_scalar_expr_to_be_codegened_ == 0
+      && !const_exprs_lists_.empty()) {
+    runtime_profile_->AppendExecOption("Codegen Disabled for const scalar expressions");
+  }
   return Status::OK();
 }
 
diff --git a/be/src/exec/union-node.h b/be/src/exec/union-node.h
index 59be242..f9574b3 100644
--- a/be/src/exec/union-node.h
+++ b/be/src/exec/union-node.h
@@ -62,6 +62,13 @@ class UnionPlanNode : public PlanNode {
   /// materialized.
   int first_materialized_child_idx_ = -1;
 
+  /// Number of const scalar expressions which will be codegened.
+  /// This is only used for observability.
+  int64_t num_const_scalar_expr_to_be_codegened_ = 0;
+
+  /// Set as TRUE if codegen status is added.
+  bool is_codegen_status_added_ = false;
+
   typedef void (*UnionMaterializeBatchFn)(UnionNode*, RowBatch*, uint8_t**);
   /// Vector of pointers to codegen'ed MaterializeBatch functions. The vector contains one
   /// function for each child. The size of the vector should be equal to the number of
@@ -105,6 +112,12 @@ class UnionNode : public ExecNode {
   /// materialized.
   const int first_materialized_child_idx_;
 
+  /// Number of const scalar expressions which will be codegened.
+  const int64_t num_const_scalar_expr_to_be_codegened_;
+
+  /// Reference to UnionPlanNode::is_codegen_status_added_.
+  const bool& is_codegen_status_added_;
+
   /// Const exprs materialized by this node. These exprs don't refer to any children.
   /// Only materialized by the first fragment instance to avoid duplication.
   const std::vector<std::vector<ScalarExpr*>>& const_exprs_lists_;
diff --git a/be/src/exprs/scalar-expr.cc b/be/src/exprs/scalar-expr.cc
index 0bf2069..cf5a287 100644
--- a/be/src/exprs/scalar-expr.cc
+++ b/be/src/exprs/scalar-expr.cc
@@ -65,14 +65,15 @@ namespace impala {
 
 const char* ScalarExpr::LLVM_CLASS_NAME = "class.impala::ScalarExpr";
 
-ScalarExpr::ScalarExpr(const ColumnType& type, bool is_constant)
+ScalarExpr::ScalarExpr(const ColumnType& type, bool is_constant, bool is_codegen_disabled)
   : Expr(type),
-    is_constant_(is_constant) {
-}
+    is_constant_(is_constant),
+    is_codegen_disabled_(is_codegen_disabled) {}
 
 ScalarExpr::ScalarExpr(const TExprNode& node)
   : Expr(node),
-    is_constant_(node.is_constant) {
+    is_constant_(node.is_constant),
+    is_codegen_disabled_(node.is_codegen_disabled) {
   if (node.__isset.fn) fn_ = node.fn;
 }
 
@@ -328,8 +329,10 @@ bool ScalarExpr::ShouldCodegen(const FragmentState* state) const {
   //    key expression in a descriptor table.
   // 2. codegen is disabled by query option.
   // 3. there is an optimization hint to disable codegen and the expr can be interpreted.
+  // 4. Optimizer decided to disable codegen. Example: const expressions in VALUES()
+  //    which are evaluated only once.
   return state != nullptr && !state->CodegenDisabledByQueryOption()
-      && !(state->CodegenHasDisableHint() && IsInterpretable());
+      && !((state->CodegenHasDisableHint() || is_codegen_disabled_) && IsInterpretable());
 }
 
 int ScalarExpr::GetSlotIds(vector<SlotId>* slot_ids) const {
diff --git a/be/src/exprs/scalar-expr.h b/be/src/exprs/scalar-expr.h
index a148cc5..582763e 100644
--- a/be/src/exprs/scalar-expr.h
+++ b/be/src/exprs/scalar-expr.h
@@ -256,7 +256,7 @@ class ScalarExpr : public Expr {
   static Status CreateNode(const TExprNode& texpr_node, ObjectPool* pool,
       ScalarExpr** expr) WARN_UNUSED_RESULT;
 
-  ScalarExpr(const ColumnType& type, bool is_constant);
+  ScalarExpr(const ColumnType& type, bool is_constant, bool is_codegen_disabled = false);
   ScalarExpr(const TExprNode& node);
 
   /// Implementation of GetCodegendComputeFn() to be overridden by each subclass of
@@ -394,6 +394,10 @@ class ScalarExpr : public Expr {
   /// Set in GetCodegendComputeFn().
   bool added_to_jit_ = false;
 
+  /// True if codegen should be disabled for this scalar expression. Typical use case
+  /// is const expressions in VALUES clause, which are evaluated only once.
+  const bool is_codegen_disabled_;
+
   /// Static wrappers which call the compute function of the given ScalarExpr, passing
   /// it the ScalarExprEvaluator and TupleRow. These are cross-compiled and used by
   /// GetStaticGetValWrapper.
diff --git a/be/src/runtime/fragment-state.h b/be/src/runtime/fragment-state.h
index 6bda279..fa6a66b 100644
--- a/be/src/runtime/fragment-state.h
+++ b/be/src/runtime/fragment-state.h
@@ -115,6 +115,9 @@ class FragmentState {
   /// created, init'ed in which all expressions' Prepare() are invoked.
   bool ScalarExprNeedsCodegen() const { return !scalar_exprs_to_codegen_.empty(); }
 
+  /// Returns the number of scalar expressions to be codegen'd.
+  int64_t NumScalarExprNeedsCodegen() const { return scalar_exprs_to_codegen_.size(); }
+
   /// Check if codegen was disabled and if so, add a message to the runtime profile.
   /// Call this only after expressions have been have been created.
   void CheckAndAddCodegenDisabledMessage(std::vector<std::string>& codegen_status_msgs) {
diff --git a/common/thrift/Exprs.thrift b/common/thrift/Exprs.thrift
index 7ed771a..fb02a29 100644
--- a/common/thrift/Exprs.thrift
+++ b/common/thrift/Exprs.thrift
@@ -177,6 +177,9 @@ struct TExprNode {
   20: optional TTimestampLiteral timestamp_literal
   21: optional TKuduPartitionExpr kudu_partition_expr
   22: optional TCastExpr cast_expr
+
+  // If codegen is disabled for this Expr
+  23: optional bool is_codegen_disabled
 }
 
 // A flattened representation of a tree of Expr nodes, obtained by depth-first
diff --git a/fe/src/main/java/org/apache/impala/analysis/Expr.java b/fe/src/main/java/org/apache/impala/analysis/Expr.java
index 07c27c1..fe11c2b 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Expr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Expr.java
@@ -413,6 +413,9 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl
   // For exprs of type Predicate, this keeps track of predicate hints
   protected List<PlanHint> predicateHints_;
 
+  // Is codegen disabled for this expression ?
+  private boolean isCodegenDisabled_ = false;
+
   protected Expr() {
     type_ = Type.INVALID;
     selectivity_ = -1.0;
@@ -441,6 +444,7 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl
       predicateHints_ = new ArrayList<>();
       predicateHints_.addAll(other.predicateHints_);
     }
+    isCodegenDisabled_ = other.isCodegenDisabled_;
   }
 
   public boolean isAnalyzed() { return isAnalyzed_; }
@@ -462,6 +466,12 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl
   public boolean isAuxExpr() { return isAuxExpr_; }
   public void setIsAuxExpr() { isAuxExpr_ = true; }
   public Function getFn() { return fn_; }
+  public void disableCodegen() {
+    isCodegenDisabled_ = true;
+    for (Expr child : children_) {
+      child.disableCodegen();
+    }
+  }
 
   /**
    * Perform semantic analysis of node and all of its children.
@@ -856,6 +866,7 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl
     msg.type = type_.toThrift();
     msg.is_constant = isConstant_;
     msg.num_children = children_.size();
+    msg.setIs_codegen_disabled(isCodegenDisabled_);
     if (fn_ != null) {
       TFunction thriftFn = fn_.toThrift();
       thriftFn.setLast_modified_time(fn_.getLastModifiedTime());
diff --git a/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java b/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java
index 642cac7..349c491 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ValuesStmt.java
@@ -22,6 +22,8 @@ import java.util.List;
 import com.google.common.base.Preconditions;
 
 import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
+import org.apache.impala.common.AnalysisException;
+import org.apache.impala.rewrite.ExprRewriter;
 
 /**
  * Representation of a values() statement with a list of constant-expression lists.
@@ -81,4 +83,10 @@ public class ValuesStmt extends UnionStmt {
 
   @Override
   public ValuesStmt clone() { return new ValuesStmt(this); }
+
+  /**
+   * Intentionally left empty to disable expression rewrite for values clause.
+   */
+  @Override
+  public void rewriteExprs(ExprRewriter rewriter) {}
 }
diff --git a/fe/src/main/java/org/apache/impala/planner/UnionNode.java b/fe/src/main/java/org/apache/impala/planner/UnionNode.java
index 72da0e6..06dc4e1 100644
--- a/fe/src/main/java/org/apache/impala/planner/UnionNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/UnionNode.java
@@ -295,7 +295,12 @@ public class UnionNode extends PlanNode {
       Preconditions.checkState(exprList.size() == slots.size());
       List<Expr> newExprList = new ArrayList<>();
       for (int i = 0; i < exprList.size(); ++i) {
-        if (slots.get(i).isMaterialized()) newExprList.add(exprList.get(i));
+        if (slots.get(i).isMaterialized()) {
+          Expr constExpr = exprList.get(i);
+          // Disable codegen for const expressions which will only ever be evaluated once.
+          if (!isInSubplan_) constExpr.disableCodegen();
+          newExprList.add(constExpr);
+        }
       }
       materializedConstExprLists_.add(newExprList);
     }
diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
index 71a5f4d..9fa7fc6 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriterTest.java
@@ -135,8 +135,8 @@ public class ExprRewriterTest extends AnalyzerTest {
         stmt_, stmt_), 47, 23);
     // Constant select.
     RewritesOk("select 1, 2, 3, 4", 4, 4);
-    // Values stmt.
-    RewritesOk("values(1, '2', 3, 4.1), (1, '2', 3, 4.1)", 8, 8);
+    // Values stmt - expression rewrites are disabled.
+    RewritesOk("values(1, '2', 3, 4.1), (1, '2', 3, 4.1)", 0, 0);
     // Test WHERE-clause subqueries.
     RewritesOk("select id, int_col from functional.alltypes a " +
         "where exists (select 1 from functional.alltypes " +
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/values.test b/testdata/workloads/functional-planner/queries/PlannerTest/values.test
index ffa5632..36b0fb4 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/values.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/values.test
@@ -4,35 +4,35 @@ PLAN-ROOT SINK
 |
 00:UNION
    constant-operands=1
-   row-size=18B cardinality=1
+   row-size=19B cardinality=1
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 00:UNION
    constant-operands=1
-   row-size=18B cardinality=1
+   row-size=19B cardinality=1
 ====
 values(1+1, 2, 5.0, 'a') order by 1 limit 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:TOP-N [LIMIT=10]
-|  order by: 2 ASC
-|  row-size=18B cardinality=1
+|  order by: 1 + 1 ASC
+|  row-size=19B cardinality=1
 |
 00:UNION
    constant-operands=1
-   row-size=18B cardinality=1
+   row-size=19B cardinality=1
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 01:TOP-N [LIMIT=10]
-|  order by: 2 ASC
-|  row-size=18B cardinality=1
+|  order by: 1 + 1 ASC
+|  row-size=19B cardinality=1
 |
 00:UNION
    constant-operands=1
-   row-size=18B cardinality=1
+   row-size=19B cardinality=1
 ====
 values((1+1, 2, 5.0, 'a'), (2, 3, 6.0, 'b'), (3, 4, 7.0, 'c'))
 ---- PLAN
@@ -40,33 +40,33 @@ PLAN-ROOT SINK
 |
 00:UNION
    constant-operands=3
-   row-size=18B cardinality=3
+   row-size=19B cardinality=3
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 00:UNION
    constant-operands=3
-   row-size=18B cardinality=3
+   row-size=19B cardinality=3
 ====
 values((1+1, 2, 5.0, 'a'), (2, 3, 6.0, 'b'), (3, 4, 7.0, 'c')) order by 1 limit 10
 ---- PLAN
 PLAN-ROOT SINK
 |
 01:TOP-N [LIMIT=10]
-|  order by: 2 ASC
-|  row-size=18B cardinality=3
+|  order by: 1 + 1 ASC
+|  row-size=19B cardinality=3
 |
 00:UNION
    constant-operands=3
-   row-size=18B cardinality=3
+   row-size=19B cardinality=3
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 01:TOP-N [LIMIT=10]
-|  order by: 2 ASC
-|  row-size=18B cardinality=3
+|  order by: 1 + 1 ASC
+|  row-size=19B cardinality=3
 |
 00:UNION
    constant-operands=3
-   row-size=18B cardinality=3
+   row-size=19B cardinality=3
 ====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/union-const-scalar-expr-codegen.test b/testdata/workloads/functional-query/queries/QueryTest/union-const-scalar-expr-codegen.test
new file mode 100644
index 0000000..e8398eb
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/union-const-scalar-expr-codegen.test
@@ -0,0 +1,80 @@
+====
+---- QUERY
+# Test union with multiple legs each having const expressions.
+# Expect codegen to be disabled for const expressions.
+set DISABLE_CODEGEN_ROWS_THRESHOLD=1;
+select 1,2,3 union all select 4,5,6 union all select 7,8,9 order by 1;
+---- TYPES
+tinyint,tinyint,tinyint
+---- RESULTS
+1,2,3
+4,5,6
+7,8,9
+---- RUNTIME_PROFILE
+00:UNION
+   constant-operands=3
+#SORT_NODE
+ExecOption: Codegen Enabled
+#UNION_NODE
+ExecOption: Codegen Enabled, Codegen Disabled for const scalar expressions
+====
+---- QUERY
+# Test insert statement with values (translated into UNION with const expressions).
+# Expect codegen to be disabled for const expressions.
+set DISABLE_CODEGEN_ROWS_THRESHOLD=1;
+drop table if exists test_values_codegen;
+create table test_values_codegen (c1 int, c2 timestamp, c3 string);
+insert into test_values_codegen(c1) values (CAST(1+ceil(2.5)*3 as tinyint));
+---- RUNTIME_PROFILE
+00:UNION
+   constant-operands=1
+#UNION_NODE
+ExecOption: Codegen Enabled, Codegen Disabled for const scalar expressions
+====
+---- QUERY
+# Test insert statement with values having const scalar expressions.
+# Expect codegen to be disabled for const expressions.
+set DISABLE_CODEGEN_ROWS_THRESHOLD=1;
+insert into test_values_codegen values
+  (1+1, '2015-04-09 14:07:46.580465000', base64encode('hello world')),
+  (CAST(1*2+2-5 as INT), CAST(1428421382 as timestamp),
+   regexp_extract('abcdef123ghi456jkl','.*?(\\d+)',0));
+---- RUNTIME_PROFILE
+00:UNION
+   constant-operands=2
+#UNION_NODE
+ExecOption: Codegen Enabled, Codegen Disabled for const scalar expressions
+====
+---- QUERY
+# Test the result of above inserts with codegen disabled.
+select * from test_values_codegen order by c1;
+---- TYPES
+int, timestamp, string
+---- RESULTS
+-1,2015-04-07 15:43:02,'abcdef123ghi456'
+2,2015-04-09 14:07:46.580465000,'aGVsbG8gd29ybGQ='
+10,NULL,'NULL'
+====
+---- QUERY
+# Test union with const expressions in a subplan.
+# Expect codegen enabled.
+select count(c.c_custkey), count(v.tot_price)
+from tpch_nested_parquet.customer c, (
+  select sum(o_totalprice) tot_price from c.c_orders
+  union
+  select 9.99 tot_price) v;
+---- TYPES
+BIGINT, BIGINT
+---- RESULTS
+300000,249996
+---- RUNTIME_PROFILE
+01:SUBPLAN
+|  03:UNION
+|  |  constant-operands=1
+#AGGREGATION_NODE (id=6)
+ExecOption: Codegen Enabled
+#UNION_NODE (id=3)
+ExecOption: Codegen Enabled
+#AGGREGATION_NODE (id=5)
+ExecOption: Codegen Enabled
+====
diff --git a/tests/query_test/test_codegen.py b/tests/query_test/test_codegen.py
index 0622d7d..18597aa 100644
--- a/tests/query_test/test_codegen.py
+++ b/tests/query_test/test_codegen.py
@@ -92,3 +92,10 @@ class TestCodegen(ImpalaTestSuite):
     profile_str = str(result.runtime_profile)
     assert "Probe Side Codegen Enabled" in profile_str, profile_str
     assert "Build Side Codegen Enabled" in profile_str, profile_str
+
+  def test_const_scalar_expr_in_union(self, vector, unique_database):
+    """Test that codegen is disabled for const scalar expressions in a UNION node.
+    if, however the UNION node is under a subplan then codegen is not disabled for
+    const expressions."""
+    self.run_test_case('QueryTest/union-const-scalar-expr-codegen', vector,
+        use_db=unique_database)