You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/02/06 20:27:42 UTC
spark git commit: [SPARK-23312][SQL][FOLLOWUP] add a config to turn off vectorized cache reader

Repository: spark
Updated Branches:
  refs/heads/master 7db9979ba -> ac7454cac


[SPARK-23312][SQL][FOLLOWUP] add a config to turn off vectorized cache reader

## What changes were proposed in this pull request?

https://github.com/apache/spark/pull/20483 tried to provide a way to turn off the new columnar cache reader, to restore the behavior in 2.2. However even we turn off that config, the behavior is still different than 2.2.

If the output data are rows, we still enable whole stage codegen for the scan node, which is different with 2.2, we should also fix it.

## How was this patch tested?

existing tests.

Author: Wenchen Fan <we...@databricks.com>

Closes #20513 from cloud-fan/cache.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ac7454ca
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ac7454ca
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ac7454ca

Branch: refs/heads/master
Commit: ac7454cac04a1d9252b3856360eda5c3e8bcb8da
Parents: 7db9979
Author: Wenchen Fan <we...@databricks.com>
Authored: Tue Feb 6 12:27:37 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Tue Feb 6 12:27:37 2018 -0800

----------------------------------------------------------------------
 .../spark/sql/execution/columnar/InMemoryTableScanExec.scala      | 3 +++
 .../src/test/scala/org/apache/spark/sql/CachedTableSuite.scala    | 3 ++-
 .../org/apache/spark/sql/execution/WholeStageCodegenSuite.scala   | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ac7454ca/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
index e972f8b..a93e8a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
@@ -61,6 +61,9 @@ case class InMemoryTableScanExec(
     }) && !WholeStageCodegenExec.isTooManyFields(conf, relation.schema)
   }
 
+  // TODO: revisit this. Shall we always turn off whole stage codegen if the output data are rows?
+  override def supportCodegen: Boolean = supportsBatch
+
   override protected def needsUnsafeRowConversion: Boolean = false
 
   private val columnIndices =

http://git-wip-us.apache.org/repos/asf/spark/blob/ac7454ca/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 9f27fa0..669e5f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -787,7 +787,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSQLContext
       withSQLConf(SQLConf.CACHE_VECTORIZED_READER_ENABLED.key -> vectorized.toString) {
         val df = spark.range(10).cache()
         df.queryExecution.executedPlan.foreach {
-          case i: InMemoryTableScanExec => assert(i.supportsBatch == vectorized)
+          case i: InMemoryTableScanExec =>
+            assert(i.supportsBatch == vectorized && i.supportCodegen == vectorized)
           case _ =>
         }
       }

http://git-wip-us.apache.org/repos/asf/spark/blob/ac7454ca/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index 6e8d5a7..ef16292 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -137,7 +137,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSQLContext {
     val dsStringFilter = dsString.filter(_ == "1")
     val planString = dsStringFilter.queryExecution.executedPlan
     assert(planString.collect {
-      case WholeStageCodegenExec(FilterExec(_, i: InMemoryTableScanExec)) if !i.supportsBatch => ()
+      case i: InMemoryTableScanExec if !i.supportsBatch => ()
     }.length == 1)
     assert(dsStringFilter.collect() === Array("1"))
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org