You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2023/06/28 01:37:44 UTC

[spark] branch master updated: [SPARK-44039][CONNECT][TESTS] Improve for PlanGenerationTestSuite & ProtoToParsedPlanTestSuite

This is an automated email from the ASF dual-hosted git repository.

hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new c8c51d04741 [SPARK-44039][CONNECT][TESTS] Improve for PlanGenerationTestSuite & ProtoToParsedPlanTestSuite
c8c51d04741 is described below

commit c8c51d047411959ad6c648246a5bd6ea4ae13ce8
Author: panbingkun <pb...@gmail.com>
AuthorDate: Tue Jun 27 21:37:33 2023 -0400

    [SPARK-44039][CONNECT][TESTS] Improve for PlanGenerationTestSuite & ProtoToParsedPlanTestSuite
    
    ### What changes were proposed in this pull request?
    The pr aims to improve for PlanGenerationTestSuite & ProtoToParsedPlanTestSuite, include:
    - When generating `GOLDEN` files, we should first delete the corresponding directories and generate new ones to avoid submitting some redundant files during the review process. eg:
    When we write a test named `make_timestamp_ltz` for the overloaded method, and during the review process, the reviewer wishes to add more tests for the method. The name of this method has changed during the next submission process, such as `make_timestamp_ltz without timezone`.At this point, if the `queries/function_make_timestamp_ltz.json`, `queries/function_make_timestamp_ltz.proto.bin` and `explain-results/function_make_timestamp_ltz.explain` files of `function_make_timestamp_ltz`  [...]
    
    - Clear and update some redundant files submitted incorrectly
    
    ### Why are the changes needed?
    Make code clear.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Pass GA.
    
    Closes #41572 from panbingkun/SPARK-44039.
    
    Authored-by: panbingkun <pb...@gmail.com>
    Signed-off-by: Herman van Hovell <he...@databricks.com>
---
 .../apache/spark/sql/PlanGenerationTestSuite.scala |  28 ++++++++++
 .../explain-results/function_percentile.explain    |   2 -
 .../function_regexp_extract_all.explain            |   2 -
 .../explain-results/function_regexp_instr.explain  |   2 -
 .../query-tests/explain-results/read_path.explain  |   1 -
 .../query-tests/queries/function_lit_array.json    |  58 ++++++++++-----------
 .../query-tests/queries/function_percentile.json   |  29 -----------
 .../queries/function_percentile.proto.bin          | Bin 192 -> 0 bytes
 .../queries/function_regexp_extract_all.json       |  33 ------------
 .../queries/function_regexp_extract_all.proto.bin  | Bin 212 -> 0 bytes
 .../query-tests/queries/function_regexp_instr.json |  33 ------------
 .../queries/function_regexp_instr.proto.bin        | Bin 203 -> 0 bytes
 .../resources/query-tests/queries/read_path.json   |  11 ----
 .../query-tests/queries/read_path.proto.bin        |   3 --
 .../queries/streaming_table_API_with_options.json  |   3 +-
 .../sql/connect/ProtoToParsedPlanTestSuite.scala   |  23 ++++++++
 16 files changed, 82 insertions(+), 146 deletions(-)

diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index e8d04f37d7f..ecb7092b8d9 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -44,6 +44,7 @@ import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.protobuf.{functions => pbFn}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.util.Utils
 
 // scalastyle:off
 /**
@@ -61,6 +62,14 @@ import org.apache.spark.unsafe.types.CalendarInterval
  *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "connect-client-jvm/testOnly org.apache.spark.sql.PlanGenerationTestSuite"
  * }}}
  *
+ * If you need to clean the orphaned golden files, you need to set the
+ * SPARK_CLEAN_ORPHANED_GOLDEN_FILES=1 environment variable before running this test, e.g.:
+ * {{{
+ *   SPARK_CLEAN_ORPHANED_GOLDEN_FILES=1 build/sbt "connect-client-jvm/testOnly org.apache.spark.sql.PlanGenerationTestSuite"
+ * }}}
+ * Note: not all orphaned golden files should be cleaned, some are reserved for testing backups
+ * compatibility.
+ *
  * Note that the plan protos are used as the input for the `ProtoToParsedPlanTestSuite` in the
  * `connector/connect/server` module
  */
@@ -74,6 +83,9 @@ class PlanGenerationTestSuite
   // Borrowed from SparkFunSuite
   private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
 
+  private val cleanOrphanedGoldenFiles: Boolean =
+    System.getenv("SPARK_CLEAN_ORPHANED_GOLDEN_FILES") == "1"
+
   protected val queryFilePath: Path = commonResourcePath.resolve("query-tests/queries")
 
   // A relative path to /connector/connect/server, used by `ProtoToParsedPlanTestSuite` to run
@@ -111,9 +123,25 @@ class PlanGenerationTestSuite
 
   override protected def afterAll(): Unit = {
     session.close()
+    if (cleanOrphanedGoldenFiles) {
+      cleanOrphanedGoldenFile()
+    }
     super.afterAll()
   }
 
+  private def cleanOrphanedGoldenFile(): Unit = {
+    val allTestNames = testNames.map(_.replace(' ', '_'))
+    val orphans = Utils
+      .recursiveList(queryFilePath.toFile)
+      .filter(g =>
+        g.getAbsolutePath.endsWith(".proto.bin") ||
+          g.getAbsolutePath.endsWith(".json"))
+      .filter(g =>
+        !allTestNames.contains(g.getName.stripSuffix(".proto.bin")) &&
+          !allTestNames.contains(g.getName.stripSuffix(".json")))
+    orphans.foreach(Utils.deleteRecursively)
+  }
+
   private def test(name: String)(f: => Dataset[_]): Unit = super.test(name) {
     val actual = f.plan.getRoot
     val goldenFile = queryFilePath.resolve(name.replace(' ', '_') + ".proto.bin")
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_percentile.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_percentile.explain
deleted file mode 100644
index 901eaf745ef..00000000000
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_percentile.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Aggregate [percentile(a#0, 0.3, 1, 0, 0, false) AS percentile(a, 0.3, 1)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all.explain
deleted file mode 100644
index 225379df43c..00000000000
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [regexp_extract_all(g#0, (\d+)([a-z]+), 1) AS regexp_extract_all(g, (\d+)([a-z]+), 1)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr.explain
deleted file mode 100644
index 8ee6294b451..00000000000
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [regexp_instr(g#0, \d+(a|b|m), 1) AS regexp_instr(g, \d+(a|b|m), 1)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/read_path.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/read_path.explain
deleted file mode 100644
index bad6a06e3fd..00000000000
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/read_path.explain
+++ /dev/null
@@ -1 +0,0 @@
-Relation [name#0,age#0] csv
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.json b/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.json
index c9441c9e77c..77fa899bb9b 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.json
@@ -32,13 +32,13 @@
               "containsNull": true
             }
           },
-          "element": [{
+          "elements": [{
             "array": {
               "elementType": {
                 "integer": {
                 }
               },
-              "element": [{
+              "elements": [{
                 "integer": 1
               }]
             }
@@ -48,7 +48,7 @@
                 "integer": {
                 }
               },
-              "element": [{
+              "elements": [{
                 "integer": 2
               }]
             }
@@ -58,7 +58,7 @@
                 "integer": {
                 }
               },
-              "element": [{
+              "elements": [{
                 "integer": 3
               }]
             }
@@ -82,7 +82,7 @@
               "containsNull": true
             }
           },
-          "element": [{
+          "elements": [{
             "array": {
               "elementType": {
                 "array": {
@@ -93,13 +93,13 @@
                   "containsNull": true
                 }
               },
-              "element": [{
+              "elements": [{
                 "array": {
                   "elementType": {
                     "integer": {
                     }
                   },
-                  "element": [{
+                  "elements": [{
                     "integer": 1
                   }]
                 }
@@ -116,13 +116,13 @@
                   "containsNull": true
                 }
               },
-              "element": [{
+              "elements": [{
                 "array": {
                   "elementType": {
                     "integer": {
                     }
                   },
-                  "element": [{
+                  "elements": [{
                     "integer": 2
                   }]
                 }
@@ -139,13 +139,13 @@
                   "containsNull": true
                 }
               },
-              "element": [{
+              "elements": [{
                 "array": {
                   "elementType": {
                     "integer": {
                     }
                   },
-                  "element": [{
+                  "elements": [{
                     "integer": 3
                   }]
                 }
@@ -161,7 +161,7 @@
             "boolean": {
             }
           },
-          "element": [{
+          "elements": [{
             "boolean": true
           }, {
             "boolean": false
@@ -179,7 +179,7 @@
             "short": {
             }
           },
-          "element": [{
+          "elements": [{
             "short": 9872
           }, {
             "short": 9873
@@ -195,7 +195,7 @@
             "integer": {
             }
           },
-          "element": [{
+          "elements": [{
             "integer": -8726532
           }, {
             "integer": 8726532
@@ -211,7 +211,7 @@
             "long": {
             }
           },
-          "element": [{
+          "elements": [{
             "long": "7834609328726531"
           }, {
             "long": "7834609328726532"
@@ -227,7 +227,7 @@
             "double": {
             }
           },
-          "element": [{
+          "elements": [{
             "double": 2.718281828459045
           }, {
             "double": 1.0
@@ -243,7 +243,7 @@
             "float": {
             }
           },
-          "element": [{
+          "elements": [{
             "float": -0.8
           }, {
             "float": -0.7
@@ -261,7 +261,7 @@
               "precision": 38
             }
           },
-          "element": [{
+          "elements": [{
             "decimal": {
               "value": "89.97620",
               "precision": 7,
@@ -285,7 +285,7 @@
               "precision": 38
             }
           },
-          "element": [{
+          "elements": [{
             "decimal": {
               "value": "89889.7667231",
               "precision": 12,
@@ -307,7 +307,7 @@
             "string": {
             }
           },
-          "element": [{
+          "elements": [{
             "string": "connect!"
           }, {
             "string": "disconnect!"
@@ -325,7 +325,7 @@
             "string": {
             }
           },
-          "element": [{
+          "elements": [{
             "string": "ABCDEFGHIJ"
           }, {
             "string": "BCDEFGHIJK"
@@ -339,7 +339,7 @@
             "date": {
             }
           },
-          "element": [{
+          "elements": [{
             "date": 18545
           }, {
             "date": 18546
@@ -353,7 +353,7 @@
             "timestamp": {
             }
           },
-          "element": [{
+          "elements": [{
             "timestamp": "1677155519808000"
           }, {
             "timestamp": "1677155519809000"
@@ -367,7 +367,7 @@
             "timestamp": {
             }
           },
-          "element": [{
+          "elements": [{
             "timestamp": "12345000"
           }, {
             "timestamp": "23456000"
@@ -381,7 +381,7 @@
             "timestampNtz": {
             }
           },
-          "element": [{
+          "elements": [{
             "timestampNtz": "1677184560000000"
           }, {
             "timestampNtz": "1677188160000000"
@@ -395,7 +395,7 @@
             "date": {
             }
           },
-          "element": [{
+          "elements": [{
             "date": 19411
           }, {
             "date": 19417
@@ -411,7 +411,7 @@
               "endField": 3
             }
           },
-          "element": [{
+          "elements": [{
             "dayTimeInterval": "100000000"
           }, {
             "dayTimeInterval": "200000000"
@@ -427,7 +427,7 @@
               "endField": 1
             }
           },
-          "element": [{
+          "elements": [{
             "yearMonthInterval": 0
           }, {
             "yearMonthInterval": 0
@@ -441,7 +441,7 @@
             "calendarInterval": {
             }
           },
-          "element": [{
+          "elements": [{
             "calendarInterval": {
               "months": 2,
               "days": 20,
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_percentile.json b/connector/connect/common/src/test/resources/query-tests/queries/function_percentile.json
deleted file mode 100644
index 44e2c98a4dc..00000000000
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_percentile.json
+++ /dev/null
@@ -1,29 +0,0 @@
-{
-  "common": {
-    "planId": "1"
-  },
-  "project": {
-    "input": {
-      "common": {
-        "planId": "0"
-      },
-      "localRelation": {
-        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
-      }
-    },
-    "expressions": [{
-      "unresolvedFunction": {
-        "functionName": "percentile",
-        "arguments": [{
-          "unresolvedAttribute": {
-            "unparsedIdentifier": "a"
-          }
-        }, {
-          "literal": {
-            "double": 0.3
-          }
-        }]
-      }
-    }]
-  }
-}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_percentile.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_percentile.proto.bin
deleted file mode 100644
index 45b807e5ffb..00000000000
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_percentile.proto.bin and /dev/null differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all.json b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all.json
deleted file mode 100644
index ebe2f581e3d..00000000000
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all.json
+++ /dev/null
@@ -1,33 +0,0 @@
-{
-  "common": {
-    "planId": "1"
-  },
-  "project": {
-    "input": {
-      "common": {
-        "planId": "0"
-      },
-      "localRelation": {
-        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
-      }
-    },
-    "expressions": [{
-      "unresolvedFunction": {
-        "functionName": "regexp_extract_all",
-        "arguments": [{
-          "unresolvedAttribute": {
-            "unparsedIdentifier": "g"
-          }
-        }, {
-          "literal": {
-            "string": "(\\d+)([a-z]+)"
-          }
-        }, {
-          "literal": {
-            "integer": 1
-          }
-        }]
-      }
-    }]
-  }
-}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all.proto.bin
deleted file mode 100644
index 2cf31e5f75f..00000000000
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all.proto.bin and /dev/null differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr.json b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr.json
deleted file mode 100644
index cb44dda5ba2..00000000000
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr.json
+++ /dev/null
@@ -1,33 +0,0 @@
-{
-  "common": {
-    "planId": "1"
-  },
-  "project": {
-    "input": {
-      "common": {
-        "planId": "0"
-      },
-      "localRelation": {
-        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
-      }
-    },
-    "expressions": [{
-      "unresolvedFunction": {
-        "functionName": "regexp_instr",
-        "arguments": [{
-          "unresolvedAttribute": {
-            "unparsedIdentifier": "g"
-          }
-        }, {
-          "literal": {
-            "string": "\\d+(a|b|m)"
-          }
-        }, {
-          "literal": {
-            "integer": 1
-          }
-        }]
-      }
-    }]
-  }
-}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr.proto.bin
deleted file mode 100644
index 55cc77eb3cd..00000000000
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_instr.proto.bin and /dev/null differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_path.json b/connector/connect/common/src/test/resources/query-tests/queries/read_path.json
deleted file mode 100644
index c3fc8132a3b..00000000000
--- a/connector/connect/common/src/test/resources/query-tests/queries/read_path.json
+++ /dev/null
@@ -1,11 +0,0 @@
-{
-  "read": {
-    "dataSource": {
-      "format": "csv",
-      "schema": "name STRING,age INT",
-      "options": {
-        "path": "../common/src/test/resources/query-tests/test-data/people.csv"
-      }
-    }
-  }
-}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_path.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/read_path.proto.bin
deleted file mode 100644
index 01787253c42..00000000000
--- a/connector/connect/common/src/test/resources/query-tests/queries/read_path.proto.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-ca
-csvname STRING,age INTE
-path=../common/src/test/resources/query-tests/test-data/people.csv
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/streaming_table_API_with_options.json b/connector/connect/common/src/test/resources/query-tests/queries/streaming_table_API_with_options.json
index bcfb885aed8..ac68317d9b9 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/streaming_table_API_with_options.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/streaming_table_API_with_options.json
@@ -11,4 +11,5 @@
       }
     },
     "isStreaming": true
-  }
\ No newline at end of file
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
index e5c14192a31..9fdaffcba67 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
@@ -57,12 +57,24 @@ import org.apache.spark.util.Utils
  * {{{
  *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "connect/testOnly org.apache.spark.sql.connect.ProtoToParsedPlanTestSuite"
  * }}}
+ *
+ * If you need to clean the orphaned golden files, you need to set the
+ * SPARK_CLEAN_ORPHANED_GOLDEN_FILES=1 environment variable before running this test, e.g.:
+ * {{{
+ *   SPARK_CLEAN_ORPHANED_GOLDEN_FILES=1 build/sbt "connect/testOnly org.apache.spark.sql.connect.ProtoToParsedPlanTestSuite"
+ * }}}
+ * Note: not all orphaned golden files should be cleaned, some are reserved for testing backups
+ * compatibility.
  */
 // scalastyle:on
 class ProtoToParsedPlanTestSuite
     extends SparkFunSuite
     with SharedSparkSession
     with ResourceHelper {
+
+  private val cleanOrphanedGoldenFiles: Boolean =
+    System.getenv("SPARK_CLEAN_ORPHANED_GOLDEN_FILES") == "1"
+
   val url = "jdbc:h2:mem:testdb0"
   var conn: java.sql.Connection = null
 
@@ -95,6 +107,9 @@ class ProtoToParsedPlanTestSuite
 
   override def afterAll(): Unit = {
     conn.close()
+    if (cleanOrphanedGoldenFiles) {
+      cleanOrphanedGoldenFile()
+    }
     super.afterAll()
   }
 
@@ -195,6 +210,14 @@ class ProtoToParsedPlanTestSuite
     }
   }
 
+  private def cleanOrphanedGoldenFile(): Unit = {
+    val orphans = Utils
+      .recursiveList(goldenFilePath.toFile)
+      .filter(g => g.getAbsolutePath.endsWith(".explain"))
+      .filter(g => !testNames.contains(g.getName.stripSuffix(".explain")))
+    orphans.foreach(Utils.deleteRecursively)
+  }
+
   private def removeMemoryAddress(expr: String): String = {
     expr
       .replaceAll("@[0-9a-f]+,", ",")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org