You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/01/24 07:03:22 UTC

[2/9] impala git commit: IMPALA-3942: Fix wrongly escaped string literal in front-end

IMPALA-3942: Fix wrongly escaped string literal in front-end

String literal can be wrapped by either single or double quotes.
There are some holes in escaping the string literal:
- toSql() always returns strings that are single quoted, resulting in
improper escaping in the output if the original string was actually
double quoted.
- It is not always possible to determine if a string "should" be single
or double quoted. e.g. concat('a', "b")

The solution is to normalize any string which comes from user's given
string or a generated string (e.g. constant fold by the rewritter rule)
to single quotes.

Testing:
Added unit tests into expr-test, ToSqlTest

Change-Id: Ibc4b5f5d8ffaa8feb96a466959427a04b3b06fec
Reviewed-on: http://gerrit.cloudera.org:8080/8818
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/72b4b815
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/72b4b815
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/72b4b815

Branch: refs/heads/master
Commit: 72b4b815b6eae55e91290b98345b912dbd7128e2
Parents: fa7ae65
Author: Jinchul <ji...@gmail.com>
Authored: Tue Dec 12 16:34:39 2017 +0900
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Jan 23 20:35:25 2018 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc                       | 36 ++++++++++++++++
 .../apache/impala/analysis/StringLiteral.java   | 45 ++++++++++++++++++--
 .../org/apache/impala/analysis/ToSqlTest.java   | 16 +++++++
 3 files changed, 93 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/72b4b815/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 7a2a65a..2df1433 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -1339,6 +1339,42 @@ TEST_F(ExprTest, LiteralExprs) {
   TestIsNull("null", TYPE_NULL);
 }
 
+// IMPALA-3942: Test escaping string literal for single/double quotes
+TEST_F(ExprTest, EscapeStringLiteral) {
+  TestStringValue(R"('"')", R"(")");
+  TestStringValue(R"("'")", R"(')");
+  TestStringValue(R"("\"")", R"(")");
+  TestStringValue(R"('\'')", R"(')");
+  TestStringValue(R"('\\"')", R"(\")");
+  TestStringValue(R"("\\'")", R"(\')");
+  TestStringValue(R"("\\\"")", R"(\")");
+  TestStringValue(R"('\\\'')", R"(\')");
+  TestStringValue(R"("\\\"\\'\\\"")", R"(\"\'\")");
+  TestStringValue(R"('\\\'\\"\\\'')", R"(\'\"\')");
+  TestStringValue(R"("\\")", R"(\)");
+  TestStringValue(R"('\\')", R"(\)");
+  TestStringValue(R"("\\\\")", R"(\\)");
+  TestStringValue(R"('\\\\')", R"(\\)");
+  TestStringValue(R"('a"b')", R"(a"b)");
+  TestStringValue(R"("a'b")", R"(a'b)");
+  TestStringValue(R"('a\"b')", R"(a"b)");
+  TestStringValue(R"('a\'b')", R"(a'b)");
+  TestStringValue(R"("a\"b")", R"(a"b)");
+  TestStringValue(R"("a\'b")", R"(a'b)");
+  TestStringValue(R"('a\\"b')", R"(a\"b)");
+  TestStringValue(R"("a\\'b")", R"(a\'b)");
+  TestStringValue(R"('a\\\'b')", R"(a\'b)");
+  TestStringValue(R"("a\\\"b")", R"(a\"b)");
+  TestStringValue(R"(concat("a'b", "c'd"))", R"(a'bc'd)");
+  TestStringValue(R"(concat('a"b', 'c"d'))", R"(a"bc"d)");
+  TestStringValue(R"(concat("a'b", 'c"d'))", R"(a'bc"d)");
+  TestStringValue(R"(concat('a"b', "c'd"))", R"(a"bc'd)");
+  TestStringValue(R"(concat("a\"b", 'c\'d'))", R"(a"bc'd)");
+  TestStringValue(R"(concat('a\'b', "c\"d"))", R"(a'bc"d)");
+  TestStringValue(R"(concat("a\\\"b", 'c\\\'d'))", R"(a\"bc\'d)");
+  TestStringValue(R"(concat('a\\\'b', "c\\\"d"))", R"(a\'bc\"d)");
+}
+
 TEST_F(ExprTest, ArithmeticExprs) {
   // Test float ops.
   TestFixedResultTypeOps<float, float, double>(min_float_values_[TYPE_FLOAT],

http://git-wip-us.apache.org/repos/asf/impala/blob/72b4b815/fe/src/main/java/org/apache/impala/analysis/StringLiteral.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/StringLiteral.java b/fe/src/main/java/org/apache/impala/analysis/StringLiteral.java
index 72a5ca2..5c51c45 100644
--- a/fe/src/main/java/org/apache/impala/analysis/StringLiteral.java
+++ b/fe/src/main/java/org/apache/impala/analysis/StringLiteral.java
@@ -70,7 +70,7 @@ public class StringLiteral extends LiteralExpr {
   public int hashCode() { return value_.hashCode(); }
 
   @Override
-  public String toSqlImpl() { return "'" + value_ + "'"; }
+  public String toSqlImpl() { return "'" + getNormalizedValue() + "'"; }
 
   @Override
   protected void toThrift(TExprNode msg) {
@@ -84,7 +84,44 @@ public class StringLiteral extends LiteralExpr {
   public String getUnescapedValue() {
     // Unescape string exactly like Hive does. Hive's method assumes
     // quotes so we add them here to reuse Hive's code.
-    return BaseSemanticAnalyzer.unescapeSQLString("'" + value_ + "'");
+    return BaseSemanticAnalyzer.unescapeSQLString("'" + getNormalizedValue()
+        + "'");
+  }
+
+  /**
+   *  String literals can come directly from the SQL of a query or from rewrites like
+   *  constant folding. So this value normalization to a single-quoted string is necessary
+   *  because we do not know whether single or double quotes are appropriate.
+   *
+   *  @return a normalized representation of the string value suitable for embedding in
+   *          SQL as a single-quoted string literal.
+   */
+  private String getNormalizedValue() {
+    final int len = value_.length();
+    final StringBuilder sb = new StringBuilder(len);
+    for (int i = 0; i < len; ++i) {
+      final char currentChar = value_.charAt(i);
+      if (currentChar == '\\' && (i + 1) < len) {
+        final char nextChar = value_.charAt(i + 1);
+        // unescape an escaped double quote: remove back-slash in front of the quote.
+        if (nextChar == '"' || nextChar == '\'' || nextChar == '\\') {
+          if (nextChar != '"') {
+            sb.append(currentChar);
+          }
+          sb.append(nextChar);
+          ++i;
+          continue;
+        }
+
+        sb.append(currentChar);
+      } else if (currentChar == '\'') {
+        // escape a single quote: add back-slash in front of the quote.
+        sb.append("\\\'");
+      } else {
+        sb.append(currentChar);
+      }
+    }
+    return sb.toString();
   }
 
   @Override
@@ -159,8 +196,8 @@ public class StringLiteral extends LiteralExpr {
       return new NumericLiteral(val);
     }
     // Symbol is not an integer or floating point literal.
-    throw new AnalysisException(
-        "Failed to convert string literal '" + value_ + "' to number.");
+    throw new AnalysisException("Failed to convert string literal '"
+        + value_ + "' to number.");
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/impala/blob/72b4b815/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
index bb6a39b..2b9da03 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
@@ -462,6 +462,22 @@ public class ToSqlTest extends FrontendTestBase {
         "SELECT `1 + 10`, `trim('abc')` FROM (SELECT 1 + 10, trim('abc')) t");
   }
 
+  @Test
+  public void normalizeStringLiteralTest() {
+    testToSql("select \"'\"", "SELECT '\\''");
+    testToSql("select \"\\'\"", "SELECT '\\''");
+    testToSql("select \"\\\\'\"", "SELECT '\\\\\\''");
+    testToSql("select '\"'", "SELECT '\"'");
+    testToSql("select '\\\"'", "SELECT '\"'");
+    testToSql("select '\\''", "SELECT '\\''");
+    testToSql("select '\\\\\\''", "SELECT '\\\\\\''");
+    testToSql("select regexp_replace(string_col, \"\\\\'\", \"'\") from " +
+        "functional.alltypes", "SELECT regexp_replace(string_col, '\\\\\\'', '\\'') " +
+        "FROM functional.alltypes");
+    testToSql("select * from functional.alltypes where '123' = \"123\"",
+        "SELECT * FROM functional.alltypes WHERE '123' = '123'");
+  }
+
   // Test the toSql() output of the where clause.
   @Test
   public void whereTest() {