You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by db...@apache.org on 2020/03/06 00:15:31 UTC
[spark] branch branch-3.0 updated: [SPARK-31058][SQL][TEST-HIVE1.2]
Consolidate the implementation of `quoteIfNeeded`
This is an automated email from the ASF dual-hosted git repository.
dbtsai pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 853f69a [SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the implementation of `quoteIfNeeded`
853f69a is described below
commit 853f69a4cf4bf138afb9075325ad175bc5f2d334
Author: DB Tsai <d_...@apple.com>
AuthorDate: Fri Mar 6 00:13:57 2020 +0000
[SPARK-31058][SQL][TEST-HIVE1.2] Consolidate the implementation of `quoteIfNeeded`
### What changes were proposed in this pull request?
There are two implementation of quoteIfNeeded. One is in `org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quote` and the other is in `OrcFiltersBase.quoteAttributeNameIfNeeded`. This PR will consolidate them into one.
### Why are the changes needed?
Simplify the codebase.
### Does this PR introduce any user-facing change?
No
### How was this patch tested?
Existing UTs.
Closes #27814 from dbtsai/SPARK-31058.
Authored-by: DB Tsai <d_...@apple.com>
Signed-off-by: DB Tsai <d_...@apple.com>
(cherry picked from commit fe126a6a05b0de8db68c8f890e876ce96bf194ca)
Signed-off-by: DB Tsai <d_...@apple.com>
---
.../spark/sql/connector/catalog/IdentifierImpl.java | 2 +-
.../sql/connector/catalog/CatalogV2Implicits.scala | 10 +++++-----
.../spark/sql/connector/catalog/V1Table.scala | 13 +++----------
.../catalyst/analysis/ResolveSessionCatalog.scala | 2 +-
.../execution/datasources/orc/OrcFiltersBase.scala | 10 ----------
.../sql/execution/datasources/orc/OrcFilters.scala | 21 ++++++++++++---------
.../sql/execution/datasources/orc/OrcFilters.scala | 21 ++++++++++++---------
7 files changed, 34 insertions(+), 45 deletions(-)
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
index a56007b..30596d9 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
@@ -55,7 +55,7 @@ class IdentifierImpl implements Identifier {
@Override
public String toString() {
return Stream.concat(Stream.of(namespace), Stream.of(name))
- .map(CatalogV2Implicits::quote)
+ .map(CatalogV2Implicits::quoteIfNeeded)
.collect(Collectors.joining("."));
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index 3478af8..71bab62 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -84,15 +84,15 @@ private[sql] object CatalogV2Implicits {
}
implicit class NamespaceHelper(namespace: Array[String]) {
- def quoted: String = namespace.map(quote).mkString(".")
+ def quoted: String = namespace.map(quoteIfNeeded).mkString(".")
}
implicit class IdentifierHelper(ident: Identifier) {
def quoted: String = {
if (ident.namespace.nonEmpty) {
- ident.namespace.map(quote).mkString(".") + "." + quote(ident.name)
+ ident.namespace.map(quoteIfNeeded).mkString(".") + "." + quoteIfNeeded(ident.name)
} else {
- quote(ident.name)
+ quoteIfNeeded(ident.name)
}
}
@@ -122,10 +122,10 @@ private[sql] object CatalogV2Implicits {
s"$quoted is not a valid TableIdentifier as it has more than 2 name parts.")
}
- def quoted: String = parts.map(quote).mkString(".")
+ def quoted: String = parts.map(quoteIfNeeded).mkString(".")
}
- def quote(part: String): String = {
+ def quoteIfNeeded(part: String): String = {
if (part.contains(".") || part.contains("`")) {
s"`${part.replace("`", "``")}`"
} else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
index 91e0c58..70fc968 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform}
import org.apache.spark.sql.types.StructType
@@ -35,20 +36,12 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table {
def quoted: String = {
identifier.database match {
case Some(db) =>
- Seq(db, identifier.table).map(quote).mkString(".")
+ Seq(db, identifier.table).map(quoteIfNeeded).mkString(".")
case _ =>
- quote(identifier.table)
+ quoteIfNeeded(identifier.table)
}
}
-
- private def quote(part: String): String = {
- if (part.contains(".") || part.contains("`")) {
- s"`${part.replace("`", "``")}`"
- } else {
- part
- }
- }
}
def catalogTable: CatalogTable = v1Table
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 7950b61..90a3fb8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -95,7 +95,7 @@ class ResolveSessionCatalog(
.map(_._2.dataType)
.getOrElse {
throw new AnalysisException(
- s"ALTER COLUMN cannot find column ${quote(colName)} in v1 table. " +
+ s"ALTER COLUMN cannot find column ${quoteIfNeeded(colName)} in v1 table. " +
s"Available: ${v1Table.schema.fieldNames.mkString(", ")}")
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala
index 0b56587..e673309 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFiltersBase.scala
@@ -36,16 +36,6 @@ trait OrcFiltersBase {
}
}
- // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
- // in order to distinguish predicate pushdown for nested columns.
- protected[sql] def quoteAttributeNameIfNeeded(name: String) : String = {
- if (!name.contains("`") && name.contains(".")) {
- s"`$name`"
- } else {
- name
- }
- }
-
/**
* Return true if this is a searchable type in ORC.
* Both CharType and VarcharType are cleaned at AstBuilder.
diff --git a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
index 995c5ed..b9cbc48 100644
--- a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
+++ b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -24,6 +24,7 @@ import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder
import org.apache.orc.storage.serde2.io.HiveDecimalWritable
import org.apache.spark.SparkException
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types._
@@ -218,47 +219,49 @@ private[sql] object OrcFilters extends OrcFiltersBase {
// NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
// call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be
// wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
+ // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
+ // in order to distinguish predicate pushdown for nested columns.
expression match {
case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end())
case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end())
case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end())
case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end())
case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end())
case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end())
case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
Some(builder.startAnd().isNull(quotedName, getType(attribute)).end())
case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
Some(builder.startNot().isNull(quotedName, getType(attribute)).end())
case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute)))
Some(builder.startAnd().in(quotedName, getType(attribute),
castedValues.map(_.asInstanceOf[AnyRef]): _*).end())
diff --git a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
index 948ab44..6e9e592 100644
--- a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
+++ b/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable
import org.apache.spark.SparkException
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types._
@@ -218,47 +219,49 @@ private[sql] object OrcFilters extends OrcFiltersBase {
// NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
// call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be
// wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
+ // Since ORC 1.5.0 (ORC-323), we need to quote for column names with `.` characters
+ // in order to distinguish predicate pushdown for nested columns.
expression match {
case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().equals(quotedName, getType(attribute), castedValue).end())
case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().nullSafeEquals(quotedName, getType(attribute), castedValue).end())
case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().lessThan(quotedName, getType(attribute), castedValue).end())
case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startAnd().lessThanEquals(quotedName, getType(attribute), castedValue).end())
case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startNot().lessThanEquals(quotedName, getType(attribute), castedValue).end())
case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValue = castLiteralValue(value, dataTypeMap(attribute))
Some(builder.startNot().lessThan(quotedName, getType(attribute), castedValue).end())
case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
Some(builder.startAnd().isNull(quotedName, getType(attribute)).end())
case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
Some(builder.startNot().isNull(quotedName, getType(attribute)).end())
case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
- val quotedName = quoteAttributeNameIfNeeded(attribute)
+ val quotedName = quoteIfNeeded(attribute)
val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(attribute)))
Some(builder.startAnd().in(quotedName, getType(attribute),
castedValues.map(_.asInstanceOf[AnyRef]): _*).end())
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org