You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by kevinyu98 <gi...@git.apache.org> on 2018/12/03 17:40:27 UTC

[GitHub] spark pull request #23108: [Spark-25993][SQL][TEST]Add test cases for resolu...

Github user kevinyu98 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/23108#discussion_r238367919
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala ---
    @@ -186,6 +186,54 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
         }
       }
     
    +  protected def testORCTableLocation(isConvertMetastore: Boolean): Unit = {
    +    val tableName1 = "spark_orc1"
    +    val tableName2 = "spark_orc2"
    +
    +    withTempDir { dir =>
    +      val someDF1 = Seq((1, 1, "orc1"), (2, 2, "orc2")).toDF("c1", "c2", "c3").repartition(1)
    +      withTable(tableName1, tableName2) {
    +        val dataDir = s"${dir.getCanonicalPath}/dir1/"
    +        val parentDir = s"${dir.getCanonicalPath}/"
    +        val wildCardDir = new File(s"${dir}/*").toURI
    +        someDF1.write.orc(dataDir)
    +        val parentDirStatement =
    +          s"""
    +             |CREATE EXTERNAL TABLE $tableName1(
    +             |  c1 int,
    +             |  c2 int,
    +             |  c3 string)
    +             |STORED AS orc
    +             |LOCATION '${parentDir}'""".stripMargin
    +        sql(parentDirStatement)
    +        val parentDirSqlStatement = s"select * from ${tableName1}"
    +        if (isConvertMetastore) {
    +          checkAnswer(sql(parentDirSqlStatement), Nil)
    +        } else {
    +         checkAnswer(sql(parentDirSqlStatement),
    +           (1 to 2).map(i => Row(i, i, s"orc$i")))
    +        }
    +
    +        val wildCardStatement =
    +          s"""
    +             |CREATE EXTERNAL TABLE $tableName2(
    +             |  c1 int,
    +             |  c2 int,
    +             |  c3 string)
    +             |STORED AS orc
    +             |LOCATION '$wildCardDir'""".stripMargin
    --- End diff --
    
    @dongjoon-hyun Sorry for the delay. My got some issues with my Intellij environment. Sure, I will add three level subdirectories for this PR. FYI, I also tried with `convertMetastoreParquet` for Parquet, the behavior is consistent. 
    sql("set spark.sql.hive.convertMetastoreParquet = true")
    
    three level 
    
    Parquet:
    
    -- "/"  can only read current directory
    -- "/*" can read sub directory data, but not three level subdirectories.
    
    sql("set spark.sql.hive.convertMetastoreParquet = false")
    
    -- "/"  can only read current directory
    -- "/*" can read sub directory data, but not three level subdirectories.



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org