You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by rx...@apache.org on 2015/07/11 01:49:57 UTC

spark git commit: [SPARK-8990] [SQL] SPARK-8990 DataFrameReader.parquet() should respect user specified options

Repository: spark
Updated Branches:
  refs/heads/master fb8807c9b -> 857e325f3


[SPARK-8990] [SQL] SPARK-8990 DataFrameReader.parquet() should respect user specified options

Author: Cheng Lian <li...@databricks.com>

Closes #7347 from liancheng/spark-8990 and squashes the following commits:

045698c [Cheng Lian] SPARK-8990 DataFrameReader.parquet() should respect user specified options


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/857e325f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/857e325f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/857e325f

Branch: refs/heads/master
Commit: 857e325f304a02b1cab2463c1e15af7627a600bd
Parents: fb8807c
Author: Cheng Lian <li...@databricks.com>
Authored: Fri Jul 10 16:49:45 2015 -0700
Committer: Reynold Xin <rx...@databricks.com>
Committed: Fri Jul 10 16:49:45 2015 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/DataFrameReader.scala  |  2 +-
 .../spark/sql/parquet/ParquetQuerySuite.scala   | 21 ++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/857e325f/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 1828ed1..9ad6e21 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -263,7 +263,7 @@ class DataFrameReader private[sql](sqlContext: SQLContext) {
       val globbedPaths = paths.map(new Path(_)).flatMap(SparkHadoopUtil.get.globPath).toArray
       sqlContext.baseRelationToDataFrame(
         new ParquetRelation2(
-          globbedPaths.map(_.toString), None, None, Map.empty[String, String])(sqlContext))
+          globbedPaths.map(_.toString), None, None, extraOptions.toMap)(sqlContext))
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/857e325f/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
index a0a81c4..21007d9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
@@ -142,6 +142,27 @@ class ParquetQuerySuiteBase extends QueryTest with ParquetTest {
       testSchemaMerging(2)
     }
   }
+
+  test("SPARK-8990 DataFrameReader.parquet() should respect user specified options") {
+    withTempPath { dir =>
+      val basePath = dir.getCanonicalPath
+      sqlContext.range(0, 10).toDF("a").write.parquet(new Path(basePath, "foo=1").toString)
+      sqlContext.range(0, 10).toDF("b").write.parquet(new Path(basePath, "foo=a").toString)
+
+      // Disables the global SQL option for schema merging
+      withSQLConf(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED.key -> "false") {
+        assertResult(2) {
+          // Disables schema merging via data source option
+          sqlContext.read.option("mergeSchema", "false").parquet(basePath).columns.length
+        }
+
+        assertResult(3) {
+          // Enables schema merging via data source option
+          sqlContext.read.option("mergeSchema", "true").parquet(basePath).columns.length
+        }
+      }
+    }
+  }
 }
 
 class ParquetDataSourceOnQuerySuite extends ParquetQuerySuiteBase with BeforeAndAfterAll {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org