You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yh...@apache.org on 2015/05/24 18:51:44 UTC

spark git commit: [SPARK-7805] [SQL] Move SQLTestUtils.scala and ParquetTest.scala to src/test

Repository: spark
Updated Branches:
  refs/heads/master bfbc0df72 -> ed21476bc


[SPARK-7805] [SQL] Move SQLTestUtils.scala and ParquetTest.scala to src/test

https://issues.apache.org/jira/browse/SPARK-7805

Because `sql/hive`'s tests depend on the test jar of `sql/core`, we do not need to store `SQLTestUtils` and `ParquetTest` in `src/main`. We should only add stuff that will be needed by `sql/console` or Python tests (for Python, we need it in `src/main`, right? davies).

Author: Yin Huai <yh...@databricks.com>

Closes #6334 from yhuai/SPARK-7805 and squashes the following commits:

af6d0c9 [Yin Huai] mima
b86746a [Yin Huai] Move SQLTestUtils.scala and ParquetTest.scala to src/test.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ed21476b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ed21476b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ed21476b

Branch: refs/heads/master
Commit: ed21476bc0c760616e7e6bb99f6541745fb09595
Parents: bfbc0df
Author: Yin Huai <yh...@databricks.com>
Authored: Sun May 24 09:51:37 2015 -0700
Committer: Yin Huai <yh...@databricks.com>
Committed: Sun May 24 09:51:37 2015 -0700

----------------------------------------------------------------------
 project/MimaExcludes.scala                      |   5 +-
 .../apache/spark/sql/parquet/ParquetTest.scala  | 102 -------------------
 .../scala/org/apache/spark/sql/test/README.md   |   7 ++
 .../apache/spark/sql/test/SQLTestUtils.scala    |  88 ----------------
 .../apache/spark/sql/parquet/ParquetTest.scala  | 102 +++++++++++++++++++
 .../apache/spark/sql/test/SQLTestUtils.scala    |  88 ++++++++++++++++
 6 files changed, 201 insertions(+), 191 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ed21476b/project/MimaExcludes.scala
----------------------------------------------------------------------
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 03e93a2..11b439e 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -133,7 +133,10 @@ object MimaExcludes {
               "org.apache.spark.sql.parquet.TestGroupWriteSupport"),
             ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CachedData"),
             ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CachedData$"),
-            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CacheManager")
+            ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CacheManager"),
+            // TODO: Remove the following rule once ParquetTest has been moved to src/test.
+            ProblemFilters.exclude[MissingClassProblem](
+              "org.apache.spark.sql.parquet.ParquetTest")
           ) ++ Seq(
             // SPARK-7530 Added StreamingContext.getState()
             ProblemFilters.exclude[MissingMethodProblem](

http://git-wip-us.apache.org/repos/asf/spark/blob/ed21476b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
deleted file mode 100644
index 516ba37..0000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.parquet
-
-import java.io.File
-
-import scala.reflect.ClassTag
-import scala.reflect.runtime.universe.TypeTag
-
-import org.apache.spark.sql.test.SQLTestUtils
-import org.apache.spark.sql.{DataFrame, SaveMode}
-
-/**
- * A helper trait that provides convenient facilities for Parquet testing.
- *
- * NOTE: Considering classes `Tuple1` ... `Tuple22` all extend `Product`, it would be more
- * convenient to use tuples rather than special case classes when writing test cases/suites.
- * Especially, `Tuple1.apply` can be used to easily wrap a single type/value.
- */
-private[sql] trait ParquetTest extends SQLTestUtils {
-  import sqlContext.implicits.{localSeqToDataFrameHolder, rddToDataFrameHolder}
-  import sqlContext.sparkContext
-
-  /**
-   * Writes `data` to a Parquet file, which is then passed to `f` and will be deleted after `f`
-   * returns.
-   */
-  protected def withParquetFile[T <: Product: ClassTag: TypeTag]
-      (data: Seq[T])
-      (f: String => Unit): Unit = {
-    withTempPath { file =>
-      sparkContext.parallelize(data).toDF().write.parquet(file.getCanonicalPath)
-      f(file.getCanonicalPath)
-    }
-  }
-
-  /**
-   * Writes `data` to a Parquet file and reads it back as a [[DataFrame]],
-   * which is then passed to `f`. The Parquet file will be deleted after `f` returns.
-   */
-  protected def withParquetDataFrame[T <: Product: ClassTag: TypeTag]
-      (data: Seq[T])
-      (f: DataFrame => Unit): Unit = {
-    withParquetFile(data)(path => f(sqlContext.read.parquet(path)))
-  }
-
-  /**
-   * Writes `data` to a Parquet file, reads it back as a [[DataFrame]] and registers it as a
-   * temporary table named `tableName`, then call `f`. The temporary table together with the
-   * Parquet file will be dropped/deleted after `f` returns.
-   */
-  protected def withParquetTable[T <: Product: ClassTag: TypeTag]
-      (data: Seq[T], tableName: String)
-      (f: => Unit): Unit = {
-    withParquetDataFrame(data) { df =>
-      sqlContext.registerDataFrameAsTable(df, tableName)
-      withTempTable(tableName)(f)
-    }
-  }
-
-  protected def makeParquetFile[T <: Product: ClassTag: TypeTag](
-      data: Seq[T], path: File): Unit = {
-    data.toDF().write.mode(SaveMode.Overwrite).parquet(path.getCanonicalPath)
-  }
-
-  protected def makeParquetFile[T <: Product: ClassTag: TypeTag](
-      df: DataFrame, path: File): Unit = {
-    df.write.mode(SaveMode.Overwrite).parquet(path.getCanonicalPath)
-  }
-
-  protected def makePartitionDir(
-      basePath: File,
-      defaultPartitionName: String,
-      partitionCols: (String, Any)*): File = {
-    val partNames = partitionCols.map { case (k, v) =>
-      val valueString = if (v == null || v == "") defaultPartitionName else v.toString
-      s"$k=$valueString"
-    }
-
-    val partDir = partNames.foldLeft(basePath) { (parent, child) =>
-      new File(parent, child)
-    }
-
-    assert(partDir.mkdirs(), s"Couldn't create directory $partDir")
-    partDir
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/ed21476b/sql/core/src/main/scala/org/apache/spark/sql/test/README.md
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/README.md b/sql/core/src/main/scala/org/apache/spark/sql/test/README.md
new file mode 100644
index 0000000..d867f18
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/test/README.md
@@ -0,0 +1,7 @@
+README
+======
+
+Please do not add any class in this place unless it is used by `sql/console` or Python tests.
+If you need to create any classes or traits that will be used by tests from both `sql/core` and
+`sql/hive`, you can add them in the `src/test` of `sql/core` (tests of `sql/hive`
+depend on the test jar of `sql/core`).

http://git-wip-us.apache.org/repos/asf/spark/blob/ed21476b/sql/core/src/main/scala/org/apache/spark/sql/test/SQLTestUtils.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/test/SQLTestUtils.scala
deleted file mode 100644
index ca66cdc..0000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.test
-
-import java.io.File
-
-import scala.util.Try
-
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.util.Utils
-
-trait SQLTestUtils {
-  val sqlContext: SQLContext
-
-  import sqlContext.{conf, sparkContext}
-
-  protected def configuration = sparkContext.hadoopConfiguration
-
-  /**
-   * Sets all SQL configurations specified in `pairs`, calls `f`, and then restore all SQL
-   * configurations.
-   *
-   * @todo Probably this method should be moved to a more general place
-   */
-  protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
-    val (keys, values) = pairs.unzip
-    val currentValues = keys.map(key => Try(conf.getConf(key)).toOption)
-    (keys, values).zipped.foreach(conf.setConf)
-    try f finally {
-      keys.zip(currentValues).foreach {
-        case (key, Some(value)) => conf.setConf(key, value)
-        case (key, None) => conf.unsetConf(key)
-      }
-    }
-  }
-
-  /**
-   * Generates a temporary path without creating the actual file/directory, then pass it to `f`. If
-   * a file/directory is created there by `f`, it will be delete after `f` returns.
-   *
-   * @todo Probably this method should be moved to a more general place
-   */
-  protected def withTempPath(f: File => Unit): Unit = {
-    val path = Utils.createTempDir()
-    path.delete()
-    try f(path) finally Utils.deleteRecursively(path)
-  }
-
-  /**
-   * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`
-   * returns.
-   *
-   * @todo Probably this method should be moved to a more general place
-   */
-  protected def withTempDir(f: File => Unit): Unit = {
-    val dir = Utils.createTempDir().getCanonicalFile
-    try f(dir) finally Utils.deleteRecursively(dir)
-  }
-
-  /**
-   * Drops temporary table `tableName` after calling `f`.
-   */
-  protected def withTempTable(tableName: String)(f: => Unit): Unit = {
-    try f finally sqlContext.dropTempTable(tableName)
-  }
-
-  /**
-   * Drops table `tableName` after calling `f`.
-   */
-  protected def withTable(tableName: String)(f: => Unit): Unit = {
-    try f finally sqlContext.sql(s"DROP TABLE IF EXISTS $tableName")
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/ed21476b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetTest.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetTest.scala
new file mode 100644
index 0000000..516ba37
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetTest.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parquet
+
+import java.io.File
+
+import scala.reflect.ClassTag
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.{DataFrame, SaveMode}
+
+/**
+ * A helper trait that provides convenient facilities for Parquet testing.
+ *
+ * NOTE: Considering classes `Tuple1` ... `Tuple22` all extend `Product`, it would be more
+ * convenient to use tuples rather than special case classes when writing test cases/suites.
+ * Especially, `Tuple1.apply` can be used to easily wrap a single type/value.
+ */
+private[sql] trait ParquetTest extends SQLTestUtils {
+  import sqlContext.implicits.{localSeqToDataFrameHolder, rddToDataFrameHolder}
+  import sqlContext.sparkContext
+
+  /**
+   * Writes `data` to a Parquet file, which is then passed to `f` and will be deleted after `f`
+   * returns.
+   */
+  protected def withParquetFile[T <: Product: ClassTag: TypeTag]
+      (data: Seq[T])
+      (f: String => Unit): Unit = {
+    withTempPath { file =>
+      sparkContext.parallelize(data).toDF().write.parquet(file.getCanonicalPath)
+      f(file.getCanonicalPath)
+    }
+  }
+
+  /**
+   * Writes `data` to a Parquet file and reads it back as a [[DataFrame]],
+   * which is then passed to `f`. The Parquet file will be deleted after `f` returns.
+   */
+  protected def withParquetDataFrame[T <: Product: ClassTag: TypeTag]
+      (data: Seq[T])
+      (f: DataFrame => Unit): Unit = {
+    withParquetFile(data)(path => f(sqlContext.read.parquet(path)))
+  }
+
+  /**
+   * Writes `data` to a Parquet file, reads it back as a [[DataFrame]] and registers it as a
+   * temporary table named `tableName`, then call `f`. The temporary table together with the
+   * Parquet file will be dropped/deleted after `f` returns.
+   */
+  protected def withParquetTable[T <: Product: ClassTag: TypeTag]
+      (data: Seq[T], tableName: String)
+      (f: => Unit): Unit = {
+    withParquetDataFrame(data) { df =>
+      sqlContext.registerDataFrameAsTable(df, tableName)
+      withTempTable(tableName)(f)
+    }
+  }
+
+  protected def makeParquetFile[T <: Product: ClassTag: TypeTag](
+      data: Seq[T], path: File): Unit = {
+    data.toDF().write.mode(SaveMode.Overwrite).parquet(path.getCanonicalPath)
+  }
+
+  protected def makeParquetFile[T <: Product: ClassTag: TypeTag](
+      df: DataFrame, path: File): Unit = {
+    df.write.mode(SaveMode.Overwrite).parquet(path.getCanonicalPath)
+  }
+
+  protected def makePartitionDir(
+      basePath: File,
+      defaultPartitionName: String,
+      partitionCols: (String, Any)*): File = {
+    val partNames = partitionCols.map { case (k, v) =>
+      val valueString = if (v == null || v == "") defaultPartitionName else v.toString
+      s"$k=$valueString"
+    }
+
+    val partDir = partNames.foldLeft(basePath) { (parent, child) =>
+      new File(parent, child)
+    }
+
+    assert(partDir.mkdirs(), s"Couldn't create directory $partDir")
+    partDir
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/ed21476b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
new file mode 100644
index 0000000..ca66cdc
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.test
+
+import java.io.File
+
+import scala.util.Try
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.util.Utils
+
+trait SQLTestUtils {
+  val sqlContext: SQLContext
+
+  import sqlContext.{conf, sparkContext}
+
+  protected def configuration = sparkContext.hadoopConfiguration
+
+  /**
+   * Sets all SQL configurations specified in `pairs`, calls `f`, and then restore all SQL
+   * configurations.
+   *
+   * @todo Probably this method should be moved to a more general place
+   */
+  protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
+    val (keys, values) = pairs.unzip
+    val currentValues = keys.map(key => Try(conf.getConf(key)).toOption)
+    (keys, values).zipped.foreach(conf.setConf)
+    try f finally {
+      keys.zip(currentValues).foreach {
+        case (key, Some(value)) => conf.setConf(key, value)
+        case (key, None) => conf.unsetConf(key)
+      }
+    }
+  }
+
+  /**
+   * Generates a temporary path without creating the actual file/directory, then pass it to `f`. If
+   * a file/directory is created there by `f`, it will be delete after `f` returns.
+   *
+   * @todo Probably this method should be moved to a more general place
+   */
+  protected def withTempPath(f: File => Unit): Unit = {
+    val path = Utils.createTempDir()
+    path.delete()
+    try f(path) finally Utils.deleteRecursively(path)
+  }
+
+  /**
+   * Creates a temporary directory, which is then passed to `f` and will be deleted after `f`
+   * returns.
+   *
+   * @todo Probably this method should be moved to a more general place
+   */
+  protected def withTempDir(f: File => Unit): Unit = {
+    val dir = Utils.createTempDir().getCanonicalFile
+    try f(dir) finally Utils.deleteRecursively(dir)
+  }
+
+  /**
+   * Drops temporary table `tableName` after calling `f`.
+   */
+  protected def withTempTable(tableName: String)(f: => Unit): Unit = {
+    try f finally sqlContext.dropTempTable(tableName)
+  }
+
+  /**
+   * Drops table `tableName` after calling `f`.
+   */
+  protected def withTable(tableName: String)(f: => Unit): Unit = {
+    try f finally sqlContext.sql(s"DROP TABLE IF EXISTS $tableName")
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org