You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by db...@apache.org on 2020/03/06 00:15:31 UTC

[spark] branch branch-3.0 updated: [SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the implementation of `quoteIfNeeded`

This is an automated email from the ASF dual-hosted git repository.

dbtsai pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 853f69a  [SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the implementation of `quoteIfNeeded`
853f69a is described below

commit 853f69a4cf4bf138afb9075325ad175bc5f2d334
Author: DB Tsai <d_...@apple.com>
AuthorDate: Fri Mar 6 00:13:57 2020 +0000

    [SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the implementation of `quoteIfNeeded`
    
    ### What changes were proposed in this pull request?
    There are two implementation of quoteIfNeeded.  One is in `org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quote` and the other is in `OrcFiltersBase.quoteAttributeNameIfNeeded`. This PR will consolidate them into one.
    
    ### Why are the changes needed?
    Simplify the codebase.
    
    ### Does this PR introduce any user-facing change?
    No
    
    ### How was this patch tested?
    Existing UTs.
    
    Closes #27814 from dbtsai/SPARK-31058.
    
    Authored-by: DB Tsai <d_...@apple.com>
    Signed-off-by: DB Tsai <d_...@apple.com>
    (cherry picked from commit fe126a6a05b0de8db68c8f890e876ce96bf194ca)
    Signed-off-by: DB Tsai <d_...@apple.com>
---
 .../spark/sql/connector/catalog/IdentifierImpl.java |  2 +-
 .../sql/connector/catalog/CatalogV2Implicits.scala  | 10 +++++-----
 .../spark/sql/connector/catalog/V1Table.scala       | 13 +++----------
 .../catalyst/analysis/ResolveSessionCatalog.scala   |  2 +-
 .../execution/datasources/orc/OrcFiltersBase.scala  | 10 ----------
 .../sql/execution/datasources/orc/OrcFilters.scala  | 21 ++++++++++++---------
 .../sql/execution/datasources/orc/OrcFilters.scala  | 21 ++++++++++++---------
 7 files changed, 34 insertions(+), 45 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
index a56007b..30596d9 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
@@ -55,7 +55,7 @@ class IdentifierImpl implements Identifier {
   @Override
   public String toString() {
     return Stream.concat(Stream.of(namespace), Stream.of(name))
-      .map(CatalogV2Implicits::quote)
+      .map(CatalogV2Implicits::quoteIfNeeded)
       .collect(Collectors.joining("."));
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index 3478af8..71bab62 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -84,15 +84,15 @@ private[sql] object CatalogV2Implicits {
   }
 
   implicit class NamespaceHelper(namespace: Array[String]) {
-    def quoted: String = namespace.map(quote).mkString(".")
+    def quoted: String = namespace.map(quoteIfNeeded).mkString(".")
   }
 
   implicit class IdentifierHelper(ident: Identifier) {
     def quoted: String = {
       if (ident.namespace.nonEmpty) {
-        ident.namespace.map(quote).mkString(".") + "." + quote(ident.name)
+        ident.namespace.map(quoteIfNeeded).mkString(".") + "." + quoteIfNeeded(ident.name)
       } else {
-        quote(ident.name)
+        quoteIfNeeded(ident.name)
       }
     }
 
@@ -122,10 +122,10 @@ private[sql] object CatalogV2Implicits {
           s"$quoted is not a valid TableIdentifier as it has more than 2 name parts.")
     }
 
-    def quoted: String = parts.map(quote).mkString(".")
+    def quoted: String = parts.map(quoteIfNeeded).mkString(".")
   }
 
-  def quote(part: String): String = {
+  def quoteIfNeeded(part: String): String = {
     if (part.contains(".") || part.contains("`")) {
       s"`${part.replace("`", "``")}`"
     } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
index 91e0c58..70fc968 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
 import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform}
 import org.apache.spark.sql.types.StructType
 
@@ -35,20 +36,12 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table {
     def quoted: String = {
       identifier.database match {
         case Some(db) =>
-          Seq(db, identifier.table).map(quote).mkString(".")
+          Seq(db, identifier.table).map(quoteIfNeeded).mkString(".")
         case _ =>
-          quote(identifier.table)
+          quoteIfNeeded(identifier.table)
 
       }
     }
-
-    private def quote(part: String): String = {
-      if (part.contains(".") || part.contains("`")) {
-        s"`${part.replace("`", "``")}`"
-      } else {
-        part
-      }
-    }
   }
 
   def catalogTable: CatalogTable = v1Table
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 7950b61..90a3fb8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -95,7 +95,7 @@ class ResolveSessionCatalog(
               .map(_._2.dataType)
               .getOrElse {
                 throw new AnalysisException(
-                  s"ALTER COLUMN cannot find column ${quote(colName)} in v1 table. " +
+                  s"ALTER COLUMN cannot find column ${quoteIfNeeded(colName)} in v1 table. " +
                     s"Available: ${v1Table.schema.fieldNames.mkString(", ")}")
               }
           }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala
index 0b56587..e673309 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala
@@ -36,16 +36,6 @@ trait OrcFiltersBase {
     }
   }
 
-  // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
-  // in order to distinguish predicate pushdown for nested columns.
-  protected[sql] def quoteAttributeNameIfNeeded(name: String) : String = {
-    if (!name.contains("`") && name.contains(".")) {
-      s"`$name`"
-    } else {
-      name
-    }
-  }
-
   /**
    * Return true if this is a searchable type in ORC.
    * Both CharType and VarcharType are cleaned at AstBuilder.
diff --git a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
index 995c5ed..b9cbc48 100644
--- a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
+++ b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -24,6 +24,7 @@ import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder
 import org.apache.orc.storage.serde2.io.HiveDecimalWritable
 
 import org.apache.spark.SparkException
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types._
 
@@ -218,47 +219,49 @@ private[sql] object OrcFilters extends OrcFiltersBase {
     // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
     // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be
     // wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
+    // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
+    // in order to distinguish predicate pushdown for nested columns.
     expression match {
       case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValue = castLiteralValue(value, dataTypeMap(attribute))
         Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end())
 
       case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValue = castLiteralValue(value, dataTypeMap(attribute))
         Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end())
 
       case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValue = castLiteralValue(value, dataTypeMap(attribute))
         Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end())
 
       case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValue = castLiteralValue(value, dataTypeMap(attribute))
         Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end())
 
       case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValue = castLiteralValue(value, dataTypeMap(attribute))
         Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end())
 
       case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValue = castLiteralValue(value, dataTypeMap(attribute))
         Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end())
 
       case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         Some(builder.startAnd().isNull(quotedName, getType(attribute)).end())
 
       case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         Some(builder.startNot().isNull(quotedName, getType(attribute)).end())
 
       case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute)))
         Some(builder.startAnd().in(quotedName, getType(attribute),
           castedValues.map(_.asInstanceOf[AnyRef]): _*).end())
diff --git a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
index 948ab44..6e9e592 100644
--- a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
+++ b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable
 
 import org.apache.spark.SparkException
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types._
 
@@ -218,47 +219,49 @@ private[sql] object OrcFilters extends OrcFiltersBase {
     // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
     // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be
     // wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
+    // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
+    // in order to distinguish predicate pushdown for nested columns.
     expression match {
       case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValue = castLiteralValue(value, dataTypeMap(attribute))
         Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end())
 
       case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValue = castLiteralValue(value, dataTypeMap(attribute))
         Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end())
 
       case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValue = castLiteralValue(value, dataTypeMap(attribute))
         Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end())
 
       case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValue = castLiteralValue(value, dataTypeMap(attribute))
         Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end())
 
       case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValue = castLiteralValue(value, dataTypeMap(attribute))
         Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end())
 
       case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValue = castLiteralValue(value, dataTypeMap(attribute))
         Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end())
 
       case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         Some(builder.startAnd().isNull(quotedName, getType(attribute)).end())
 
       case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         Some(builder.startNot().isNull(quotedName, getType(attribute)).end())
 
       case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
-        val quotedName = quoteAttributeNameIfNeeded(attribute)
+        val quotedName = quoteIfNeeded(attribute)
         val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute)))
         Some(builder.startAnd().in(quotedName, getType(attribute),
           castedValues.map(_.asInstanceOf[AnyRef]): _*).end())


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org