You are viewing a plain text version of this content. The canonical link for it is here.

Posted to reviews@spark.apache.org by srowen <gi...@git.apache.org> on 2018/12/06 14:36:46 UTC

[GitHub] spark pull request #22759: [MINOR][SQL][DOC] Correct parquet nullability doc...

Github user srowen commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22759#discussion_r239475332
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala ---
    @@ -542,6 +551,35 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
         }
       }
     
    +  test("parquet - column nullability -- write only") {
    +    val schema = StructType(
    +      StructField("cl1", IntegerType, nullable = false) ::
    +        StructField("cl2", IntegerType, nullable = true) :: Nil)
    +    val row = Row(3, 4)
    +    val df = spark.createDataFrame(sparkContext.parallelize(row :: Nil), schema)
    +
    +    withTempPath { dir =>
    +      val path = dir.getAbsolutePath
    +      df.write.mode("overwrite").parquet(path)
    +      val file = SpecificParquetRecordReaderBase.listDirectory(dir).get(0)
    +
    +      val hadoopInputFile = HadoopInputFile.fromPath(new Path(file), new Configuration())
    +      val f = ParquetFileReader.open(hadoopInputFile)
    +      val parquetSchema = f.getFileMetaData.getSchema.getColumns.asScala
    +                          .map(_.getPrimitiveType)
    +      f.close
    +
    +      // the write keeps nullable info from the schema
    +      val expectedParquetSchema: Seq[PrimitiveType] = Seq(
    --- End diff --
    
    Also really doesn't matter, but you can simplify the code by omitting types like this, etc.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org