You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2017/04/20 10:13:52 UTC
spark git commit: [SPARK-20156][SQL][FOLLOW-UP] Java String
toLowerCase "Turkish locale bug" in Database and Table DDLs
Repository: spark
Updated Branches:
refs/heads/master 46c574976 -> 55bea5691
[SPARK-20156][SQL][FOLLOW-UP] Java String toLowerCase "Turkish locale bug" in Database and Table DDLs
### What changes were proposed in this pull request?
Database and Table names conform the Hive standard ("[a-zA-z_0-9]+"), i.e. if this name only contains characters, numbers, and _.
When calling `toLowerCase` on the names, we should add `Locale.ROOT` to the `toLowerCase`for avoiding inadvertent locale-sensitive variation in behavior (aka the "Turkish locale problem").
### How was this patch tested?
Added a test case
Author: Xiao Li <ga...@gmail.com>
Closes #17655 from gatorsmile/locale.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/55bea569
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/55bea569
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/55bea569
Branch: refs/heads/master
Commit: 55bea56911a958f6d3ec3ad96fb425cc71ec03f4
Parents: 46c5749
Author: Xiao Li <ga...@gmail.com>
Authored: Thu Apr 20 11:13:48 2017 +0100
Committer: Sean Owen <so...@cloudera.com>
Committed: Thu Apr 20 11:13:48 2017 +0100
----------------------------------------------------------------------
.../analysis/ResolveTableValuedFunctions.scala | 4 ++-
.../sql/catalyst/catalog/SessionCatalog.scala | 4 +--
.../apache/spark/sql/internal/SharedState.scala | 4 ++-
.../spark/sql/execution/command/DDLSuite.scala | 19 +++++++++++++
.../apache/spark/sql/test/SQLTestUtils.scala | 28 +++++++++++++++++++-
5 files changed, 54 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/55bea569/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
index 8841309..de6de24 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.catalyst.analysis
+import java.util.Locale
+
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Range}
import org.apache.spark.sql.catalyst.rules._
@@ -103,7 +105,7 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] {
override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
case u: UnresolvedTableValuedFunction if u.functionArgs.forall(_.resolved) =>
- builtinFunctions.get(u.functionName.toLowerCase()) match {
+ builtinFunctions.get(u.functionName.toLowerCase(Locale.ROOT)) match {
case Some(tvf) =>
val resolved = tvf.flatMap { case (argList, resolver) =>
argList.implicitCast(u.functionArgs) match {
http://git-wip-us.apache.org/repos/asf/spark/blob/55bea569/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 3fbf83f..6c6d600 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -115,14 +115,14 @@ class SessionCatalog(
* Format table name, taking into account case sensitivity.
*/
protected[this] def formatTableName(name: String): String = {
- if (conf.caseSensitiveAnalysis) name else name.toLowerCase
+ if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT)
}
/**
* Format database name, taking into account case sensitivity.
*/
protected[this] def formatDatabaseName(name: String): String = {
- if (conf.caseSensitiveAnalysis) name else name.toLowerCase
+ if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT)
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/55bea569/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 0289471..d06dbaa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.internal
+import java.util.Locale
+
import scala.reflect.ClassTag
import scala.util.control.NonFatal
@@ -114,7 +116,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
// System preserved database should not exists in metastore. However it's hard to guarantee it
// for every session, because case-sensitivity differs. Here we always lowercase it to make our
// life easier.
- val globalTempDB = sparkContext.conf.get(GLOBAL_TEMP_DATABASE).toLowerCase
+ val globalTempDB = sparkContext.conf.get(GLOBAL_TEMP_DATABASE).toLowerCase(Locale.ROOT)
if (externalCatalog.databaseExists(globalTempDB)) {
throw new SparkException(
s"$globalTempDB is a system preserved database, please rename your existing database " +
http://git-wip-us.apache.org/repos/asf/spark/blob/55bea569/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index fe74ab4..2f4eb1b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -2295,5 +2295,24 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
}
}
}
+
+ test(s"basic DDL using locale tr - caseSensitive $caseSensitive") {
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> s"$caseSensitive") {
+ withLocale("tr") {
+ val dbName = "DaTaBaSe_I"
+ withDatabase(dbName) {
+ sql(s"CREATE DATABASE $dbName")
+ sql(s"USE $dbName")
+
+ val tabName = "tAb_I"
+ withTable(tabName) {
+ sql(s"CREATE TABLE $tabName(col_I int) USING PARQUET")
+ sql(s"INSERT OVERWRITE TABLE $tabName SELECT 1")
+ checkAnswer(sql(s"SELECT col_I FROM $tabName"), Row(1) :: Nil)
+ }
+ }
+ }
+ }
+ }
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/55bea569/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 6a4cc95..b5ad73b7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.test
import java.io.File
import java.net.URI
import java.nio.file.Files
-import java.util.UUID
+import java.util.{Locale, UUID}
import scala.language.implicitConversions
import scala.util.control.NonFatal
@@ -229,6 +229,32 @@ private[sql] trait SQLTestUtils
}
/**
+ * Drops database `dbName` after calling `f`.
+ */
+ protected def withDatabase(dbNames: String*)(f: => Unit): Unit = {
+ try f finally {
+ dbNames.foreach { name =>
+ spark.sql(s"DROP DATABASE IF EXISTS $name")
+ }
+ }
+ }
+
+ /**
+ * Enables Locale `language` before executing `f`, then switches back to the default locale of JVM
+ * after `f` returns.
+ */
+ protected def withLocale(language: String)(f: => Unit): Unit = {
+ val originalLocale = Locale.getDefault
+ try {
+ // Add Locale setting
+ Locale.setDefault(new Locale(language))
+ f
+ } finally {
+ Locale.setDefault(originalLocale)
+ }
+ }
+
+ /**
* Activates database `db` before executing `f`, then switches back to `default` database after
* `f` returns.
*/
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org