You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/07/16 06:01:40 UTC
spark git commit: [SPARK-24810][SQL] Fix paths to test files in
AvroSuite
Repository: spark
Updated Branches:
refs/heads/master d463533de -> 9f929458f
[SPARK-24810][SQL] Fix paths to test files in AvroSuite
## What changes were proposed in this pull request?
In the PR, I propose to move `testFile()` to the common trait `SQLTestUtilsBase` and wrap test files in `AvroSuite` by the method `testFile()` which returns full paths to test files in the resource folder.
Author: Maxim Gekk <ma...@databricks.com>
Closes #21773 from MaxGekk/test-file.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9f929458
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9f929458
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9f929458
Branch: refs/heads/master
Commit: 9f929458fb0a8a106f3b5a6ed3ee2cd3faa85770
Parents: d463533
Author: Maxim Gekk <ma...@databricks.com>
Authored: Sun Jul 15 23:01:36 2018 -0700
Committer: Xiao Li <ga...@gmail.com>
Committed: Sun Jul 15 23:01:36 2018 -0700
----------------------------------------------------------------------
.../org/apache/spark/sql/avro/AvroSuite.scala | 79 +++++++++++---------
.../execution/datasources/csv/CSVSuite.scala | 4 -
.../execution/datasources/json/JsonSuite.scala | 4 -
.../datasources/text/WholeTextFileSuite.scala | 11 +--
.../apache/spark/sql/test/SQLTestUtils.scala | 7 ++
5 files changed, 53 insertions(+), 52 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/9f929458/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
----------------------------------------------------------------------
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index 6ed6656..9c6526b 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -36,8 +36,8 @@ import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
import org.apache.spark.sql.types._
class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
- val episodesFile = "src/test/resources/episodes.avro"
- val testFile = "src/test/resources/test.avro"
+ val episodesAvro = testFile("episodes.avro")
+ val testAvro = testFile("test.avro")
override protected def beforeAll(): Unit = {
super.beforeAll()
@@ -45,18 +45,18 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
}
def checkReloadMatchesSaved(originalFile: String, newFile: String): Unit = {
- val originalEntries = spark.read.avro(testFile).collect()
+ val originalEntries = spark.read.avro(testAvro).collect()
val newEntries = spark.read.avro(newFile)
checkAnswer(newEntries, originalEntries)
}
test("reading from multiple paths") {
- val df = spark.read.avro(episodesFile, episodesFile)
+ val df = spark.read.avro(episodesAvro, episodesAvro)
assert(df.count == 16)
}
test("reading and writing partitioned data") {
- val df = spark.read.avro(episodesFile)
+ val df = spark.read.avro(episodesAvro)
val fields = List("title", "air_date", "doctor")
for (field <- fields) {
withTempPath { dir =>
@@ -72,14 +72,14 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
}
test("request no fields") {
- val df = spark.read.avro(episodesFile)
+ val df = spark.read.avro(episodesAvro)
df.createOrReplaceTempView("avro_table")
assert(spark.sql("select count(*) from avro_table").collect().head === Row(8))
}
test("convert formats") {
withTempPath { dir =>
- val df = spark.read.avro(episodesFile)
+ val df = spark.read.avro(episodesAvro)
df.write.parquet(dir.getCanonicalPath)
assert(spark.read.parquet(dir.getCanonicalPath).count() === df.count)
}
@@ -87,7 +87,7 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
test("rearrange internal schema") {
withTempPath { dir =>
- val df = spark.read.avro(episodesFile)
+ val df = spark.read.avro(episodesAvro)
df.select("doctor", "title").write.avro(dir.getCanonicalPath)
}
}
@@ -362,7 +362,7 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
val deflateDir = s"$dir/deflate"
val snappyDir = s"$dir/snappy"
- val df = spark.read.avro(testFile)
+ val df = spark.read.avro(testAvro)
spark.conf.set(AVRO_COMPRESSION_CODEC, "uncompressed")
df.write.avro(uncompressDir)
spark.conf.set(AVRO_COMPRESSION_CODEC, "deflate")
@@ -381,49 +381,49 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
}
test("dsl test") {
- val results = spark.read.avro(episodesFile).select("title").collect()
+ val results = spark.read.avro(episodesAvro).select("title").collect()
assert(results.length === 8)
}
test("support of various data types") {
// This test uses data from test.avro. You can see the data and the schema of this file in
// test.json and test.avsc
- val all = spark.read.avro(testFile).collect()
+ val all = spark.read.avro(testAvro).collect()
assert(all.length == 3)
- val str = spark.read.avro(testFile).select("string").collect()
+ val str = spark.read.avro(testAvro).select("string").collect()
assert(str.map(_(0)).toSet.contains("Terran is IMBA!"))
- val simple_map = spark.read.avro(testFile).select("simple_map").collect()
+ val simple_map = spark.read.avro(testAvro).select("simple_map").collect()
assert(simple_map(0)(0).getClass.toString.contains("Map"))
assert(simple_map.map(_(0).asInstanceOf[Map[String, Some[Int]]].size).toSet == Set(2, 0))
- val union0 = spark.read.avro(testFile).select("union_string_null").collect()
+ val union0 = spark.read.avro(testAvro).select("union_string_null").collect()
assert(union0.map(_(0)).toSet == Set("abc", "123", null))
- val union1 = spark.read.avro(testFile).select("union_int_long_null").collect()
+ val union1 = spark.read.avro(testAvro).select("union_int_long_null").collect()
assert(union1.map(_(0)).toSet == Set(66, 1, null))
- val union2 = spark.read.avro(testFile).select("union_float_double").collect()
+ val union2 = spark.read.avro(testAvro).select("union_float_double").collect()
assert(
union2
.map(x => new java.lang.Double(x(0).toString))
.exists(p => Math.abs(p - Math.PI) < 0.001))
- val fixed = spark.read.avro(testFile).select("fixed3").collect()
+ val fixed = spark.read.avro(testAvro).select("fixed3").collect()
assert(fixed.map(_(0).asInstanceOf[Array[Byte]]).exists(p => p(1) == 3))
- val enum = spark.read.avro(testFile).select("enum").collect()
+ val enum = spark.read.avro(testAvro).select("enum").collect()
assert(enum.map(_(0)).toSet == Set("SPADES", "CLUBS", "DIAMONDS"))
- val record = spark.read.avro(testFile).select("record").collect()
+ val record = spark.read.avro(testAvro).select("record").collect()
assert(record(0)(0).getClass.toString.contains("Row"))
assert(record.map(_(0).asInstanceOf[Row](0)).contains("TEST_STR123"))
- val array_of_boolean = spark.read.avro(testFile).select("array_of_boolean").collect()
+ val array_of_boolean = spark.read.avro(testAvro).select("array_of_boolean").collect()
assert(array_of_boolean.map(_(0).asInstanceOf[Seq[Boolean]].size).toSet == Set(3, 1, 0))
- val bytes = spark.read.avro(testFile).select("bytes").collect()
+ val bytes = spark.read.avro(testAvro).select("bytes").collect()
assert(bytes.map(_(0).asInstanceOf[Array[Byte]].length).toSet == Set(3, 1, 0))
}
@@ -432,7 +432,7 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
s"""
|CREATE TEMPORARY VIEW avroTable
|USING avro
- |OPTIONS (path "$episodesFile")
+ |OPTIONS (path "${episodesAvro}")
""".stripMargin.replaceAll("\n", " "))
assert(spark.sql("SELECT * FROM avroTable").collect().length === 8)
@@ -443,8 +443,8 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
// get the same values back.
withTempPath { dir =>
val avroDir = s"$dir/avro"
- spark.read.avro(testFile).write.avro(avroDir)
- checkReloadMatchesSaved(testFile, avroDir)
+ spark.read.avro(testAvro).write.avro(avroDir)
+ checkReloadMatchesSaved(testAvro, avroDir)
}
}
@@ -457,8 +457,8 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
val parameters = Map("recordName" -> name, "recordNamespace" -> namespace)
val avroDir = tempDir + "/namedAvro"
- spark.read.avro(testFile).write.options(parameters).avro(avroDir)
- checkReloadMatchesSaved(testFile, avroDir)
+ spark.read.avro(testAvro).write.options(parameters).avro(avroDir)
+ checkReloadMatchesSaved(testAvro, avroDir)
// Look at raw file and make sure has namespace info
val rawSaved = spark.sparkContext.textFile(avroDir)
@@ -532,10 +532,11 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
}
test("support of globbed paths") {
- val e1 = spark.read.avro("*/test/resources/episodes.avro").collect()
+ val resourceDir = testFile(".")
+ val e1 = spark.read.avro(resourceDir + "../*/episodes.avro").collect()
assert(e1.length == 8)
- val e2 = spark.read.avro("src/*/*/episodes.avro").collect()
+ val e2 = spark.read.avro(resourceDir + "../../*/*/episodes.avro").collect()
assert(e2.length == 8)
}
@@ -574,8 +575,12 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
| }]
|}
""".stripMargin
- val result = spark.read.option(AvroFileFormat.AvroSchema, avroSchema).avro(testFile).collect()
- val expected = spark.read.avro(testFile).select("string").collect()
+ val result = spark
+ .read
+ .option(AvroFileFormat.AvroSchema, avroSchema)
+ .avro(testAvro)
+ .collect()
+ val expected = spark.read.avro(testAvro).select("string").collect()
assert(result.sameElements(expected))
}
@@ -593,7 +598,7 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
|}
""".stripMargin
val result = spark.read.option(AvroFileFormat.AvroSchema, avroSchema)
- .avro(testFile).select("missingField").first
+ .avro(testAvro).select("missingField").first
assert(result === Row("foo"))
}
@@ -632,7 +637,7 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
s"""
|CREATE TEMPORARY VIEW episodes
|USING avro
- |OPTIONS (path "$episodesFile")
+ |OPTIONS (path "${episodesAvro}")
""".stripMargin.replaceAll("\n", " "))
spark.sql(
s"""
@@ -657,7 +662,7 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
test("test save and load") {
// Test if load works as expected
withTempPath { tempDir =>
- val df = spark.read.avro(episodesFile)
+ val df = spark.read.avro(episodesAvro)
assert(df.count == 8)
val tempSaveDir = s"$tempDir/save/"
@@ -671,7 +676,7 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
test("test load with non-Avro file") {
// Test if load works as expected
withTempPath { tempDir =>
- val df = spark.read.avro(episodesFile)
+ val df = spark.read.avro(episodesAvro)
assert(df.count == 8)
val tempSaveDir = s"$tempDir/save/"
@@ -701,10 +706,10 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
StructField("record", StructType(Seq(StructField("value_field", StringType, false))), false),
StructField("array_of_boolean", ArrayType(BooleanType), false),
StructField("bytes", BinaryType, true)))
- val withSchema = spark.read.schema(partialColumns).avro(testFile).collect()
+ val withSchema = spark.read.schema(partialColumns).avro(testAvro).collect()
val withOutSchema = spark
.read
- .avro(testFile)
+ .avro(testAvro)
.select("string", "simple_map", "complex_map", "union_string_null", "union_int_long_null",
"fixed3", "fixed2", "enum", "record", "array_of_boolean", "bytes")
.collect()
@@ -722,7 +727,7 @@ class AvroSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
StructField("non_exist_field", StringType, false),
StructField("non_exist_field2", StringType, false))),
false)))
- val withEmptyColumn = spark.read.schema(schema).avro(testFile).collect()
+ val withEmptyColumn = spark.read.schema(schema).avro(testAvro).collect()
assert(withEmptyColumn.forall(_ == Row(null: String, Row(null: String, null: String))))
}
http://git-wip-us.apache.org/repos/asf/spark/blob/9f929458/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index ae8110f..63cc598 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -60,10 +60,6 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
private val unescapedQuotesFile = "test-data/unescaped-quotes.csv"
private val valueMalformedFile = "test-data/value-malformed.csv"
- private def testFile(fileName: String): String = {
- Thread.currentThread().getContextClassLoader.getResource(fileName).toString
- }
-
/** Verifies data and schema. */
private def verifyCars(
df: DataFrame,
http://git-wip-us.apache.org/repos/asf/spark/blob/9f929458/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index eab15b3..655f40a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -48,10 +48,6 @@ class TestFileFilter extends PathFilter {
class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
import testImplicits._
- def testFile(fileName: String): String = {
- Thread.currentThread().getContextClassLoader.getResource(fileName).toString
- }
-
test("Type promotion") {
def checkTypePromotion(expected: Any, actual: Any) {
assert(expected.getClass == actual.getClass,
http://git-wip-us.apache.org/repos/asf/spark/blob/9f929458/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala
index fff0f82..a302d67 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/WholeTextFileSuite.scala
@@ -21,10 +21,10 @@ import java.io.File
import org.apache.spark.sql.{QueryTest, Row}
import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSQLContext
+import org.apache.spark.sql.test.{SharedSQLContext, SQLTestUtils}
import org.apache.spark.sql.types.{StringType, StructType}
-class WholeTextFileSuite extends QueryTest with SharedSQLContext {
+class WholeTextFileSuite extends QueryTest with SharedSQLContext with SQLTestUtils {
// Hadoop's FileSystem caching does not use the Configuration as part of its cache key, which
// can cause Filesystem.get(Configuration) to return a cached instance created with a different
@@ -35,13 +35,10 @@ class WholeTextFileSuite extends QueryTest with SharedSQLContext {
protected override def sparkConf =
super.sparkConf.set("spark.hadoop.fs.file.impl.disable.cache", "true")
- private def testFile: String = {
- Thread.currentThread().getContextClassLoader.getResource("test-data/text-suite.txt").toString
- }
-
test("reading text file with option wholetext=true") {
val df = spark.read.option("wholetext", "true")
- .format("text").load(testFile)
+ .format("text")
+ .load(testFile("test-data/text-suite.txt"))
// schema
assert(df.schema == new StructType().add("value", StringType))
http://git-wip-us.apache.org/repos/asf/spark/blob/9f929458/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index bc4a120..e562be8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -391,6 +391,13 @@ private[sql] trait SQLTestUtilsBase
val fs = hadoopPath.getFileSystem(spark.sessionState.newHadoopConf())
fs.makeQualified(hadoopPath).toUri
}
+
+ /**
+ * Returns full path to the given file in the resouce folder
+ */
+ protected def testFile(fileName: String): String = {
+ Thread.currentThread().getContextClassLoader.getResource(fileName).toString
+ }
}
private[sql] object SQLTestUtils {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org