You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by srowen <gi...@git.apache.org> on 2018/12/06 14:36:46 UTC
[GitHub] spark pull request #22759: [MINOR][SQL][DOC] Correct parquet nullability doc...
Github user srowen commented on a diff in the pull request:
https://github.com/apache/spark/pull/22759#discussion_r239475332
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala ---
@@ -542,6 +551,35 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSQLContext with Be
}
}
+ test("parquet - column nullability -- write only") {
+ val schema = StructType(
+ StructField("cl1", IntegerType, nullable = false) ::
+ StructField("cl2", IntegerType, nullable = true) :: Nil)
+ val row = Row(3, 4)
+ val df = spark.createDataFrame(sparkContext.parallelize(row :: Nil), schema)
+
+ withTempPath { dir =>
+ val path = dir.getAbsolutePath
+ df.write.mode("overwrite").parquet(path)
+ val file = SpecificParquetRecordReaderBase.listDirectory(dir).get(0)
+
+ val hadoopInputFile = HadoopInputFile.fromPath(new Path(file), new Configuration())
+ val f = ParquetFileReader.open(hadoopInputFile)
+ val parquetSchema = f.getFileMetaData.getSchema.getColumns.asScala
+ .map(_.getPrimitiveType)
+ f.close
+
+ // the write keeps nullable info from the schema
+ val expectedParquetSchema: Seq[PrimitiveType] = Seq(
--- End diff --
Also really doesn't matter, but you can simplify the code by omitting types like this, etc.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org