You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by kevinyu98 <gi...@git.apache.org> on 2018/12/03 17:40:27 UTC
[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for resolu...
Github user kevinyu98 commented on a diff in the pull request:
https://github.com/apache/spark/pull/23108#discussion_r238367919
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala ---
@@ -186,6 +186,54 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
}
}
+ protected def testORCTableLocation(isConvertMetastore: Boolean): Unit = {
+ val tableName1 = "spark_orc1"
+ val tableName2 = "spark_orc2"
+
+ withTempDir { dir =>
+ val someDF1 = Seq((1, 1, "orc1"), (2, 2, "orc2")).toDF("c1", "c2", "c3").repartition(1)
+ withTable(tableName1, tableName2) {
+ val dataDir = s"${dir.getCanonicalPath}/dir1/"
+ val parentDir = s"${dir.getCanonicalPath}/"
+ val wildCardDir = new File(s"${dir}/*").toURI
+ someDF1.write.orc(dataDir)
+ val parentDirStatement =
+ s"""
+ |CREATE EXTERNAL TABLE $tableName1(
+ | c1 int,
+ | c2 int,
+ | c3 string)
+ |STORED AS orc
+ |LOCATION '${parentDir}'""".stripMargin
+ sql(parentDirStatement)
+ val parentDirSqlStatement = s"select * from ${tableName1}"
+ if (isConvertMetastore) {
+ checkAnswer(sql(parentDirSqlStatement), Nil)
+ } else {
+ checkAnswer(sql(parentDirSqlStatement),
+ (1 to 2).map(i => Row(i, i, s"orc$i")))
+ }
+
+ val wildCardStatement =
+ s"""
+ |CREATE EXTERNAL TABLE $tableName2(
+ | c1 int,
+ | c2 int,
+ | c3 string)
+ |STORED AS orc
+ |LOCATION '$wildCardDir'""".stripMargin
--- End diff --
@dongjoon-hyun Sorry for the delay. My got some issues with my Intellij environment. Sure, I will add three level subdirectories for this PR. FYI, I also tried with `convertMetastoreParquet` for Parquet, the behavior is consistent.
sql("set spark.sql.hive.convertMetastoreParquet = true")
three level
Parquet:
-- "/" can only read current directory
-- "/*" can read sub directory data, but not three level subdirectories.
sql("set spark.sql.hive.convertMetastoreParquet = false")
-- "/" can only read current directory
-- "/*" can read sub directory data, but not three level subdirectories.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org