You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/09/25 18:08:29 UTC

svn commit: r1627566 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ java/org/apache/hadoop/hive/ql/parse/ test/queries/clientpositive/ test/results/clientpositive/

Author: hashutosh
Date: Thu Sep 25 16:08:28 2014
New Revision: 1627566

URL: http://svn.apache.org/r1627566
Log:
HIVE-8199 : CBO Trunk Merge: quote2 test fails due to incorrect literal translation (Sergey Shelukhin via Ashutosh Chauhan)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTBuilder.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
    hive/trunk/ql/src/test/queries/clientpositive/quote2.q
    hive/trunk/ql/src/test/results/clientpositive/quote2.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTBuilder.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTBuilder.java?rev=1627566&r1=1627565&r2=1627566&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTBuilder.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/ASTBuilder.java Thu Sep 25 16:08:28 2014
@@ -25,6 +25,7 @@ import net.hydromatic.avatica.ByteString
 
 import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
 import org.apache.hadoop.hive.ql.parse.HiveParser;
 import org.apache.hadoop.hive.ql.parse.ParseDriver;
 import org.eigenbase.rel.JoinRelType;
@@ -174,8 +175,9 @@ class ASTBuilder {
     case VARCHAR:
     case CHAR:
       val = literal.getValue3();
+      String escapedVal = BaseSemanticAnalyzer.escapeSQLString(String.valueOf(val));
       type = HiveParser.StringLiteral;
-      val = "'" + String.valueOf(val) + "'";
+      val = "'" + escapedVal + "'";
       break;
     case BOOLEAN:
       val = literal.getValue3();
@@ -231,4 +233,4 @@ class ASTBuilder {
     }
     return this;
   }
-}
\ No newline at end of file
+}

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1627566&r1=1627565&r2=1627566&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Thu Sep 25 16:08:28 2014
@@ -406,7 +406,6 @@ public abstract class BaseSemanticAnalyz
 
   @SuppressWarnings("nls")
   public static String unescapeSQLString(String b) {
-
     Character enclosure = null;
 
     // Some of the strings can be passed in as unicode. For example, the
@@ -487,7 +486,7 @@ public abstract class BaseSemanticAnalyz
         case '\\':
           sb.append("\\");
           break;
-        // The following 2 lines are exactly what MySQL does
+        // The following 2 lines are exactly what MySQL does TODO: why do we do this?
         case '%':
           sb.append("\\%");
           break;
@@ -505,6 +504,58 @@ public abstract class BaseSemanticAnalyz
     return sb.toString();
   }
 
+  /**
+   * Escapes the string for AST; doesn't enclose it in quotes, however.
+   */
+  public static String escapeSQLString(String b) {
+    // There's usually nothing to escape so we will be optimistic.
+    String result = b;
+    for (int i = 0; i < result.length(); ++i) {
+      char currentChar = result.charAt(i);
+      if (currentChar == '\\' && ((i + 1) < result.length())) {
+        // TODO: do we need to handle the "this is what MySQL does" here?
+        char nextChar = result.charAt(i + 1);
+        if (nextChar == '%' || nextChar == '_') {
+          ++i;
+          continue;
+        }
+      }
+      switch (currentChar) {
+      case '\0': result = spliceString(result, i, "\\0"); ++i; break;
+      case '\'': result = spliceString(result, i, "\\'"); ++i; break;
+      case '\"': result = spliceString(result, i, "\\\""); ++i; break;
+      case '\b': result = spliceString(result, i, "\\b"); ++i; break;
+      case '\n': result = spliceString(result, i, "\\n"); ++i; break;
+      case '\r': result = spliceString(result, i, "\\r"); ++i; break;
+      case '\t': result = spliceString(result, i, "\\t"); ++i; break;
+      case '\\': result = spliceString(result, i, "\\\\"); ++i; break;
+      case '\u001A': result = spliceString(result, i, "\\Z"); ++i; break;
+      default: {
+        if (currentChar < ' ') {
+          String hex = Integer.toHexString(currentChar);
+          String unicode = "\\u";
+          for (int j = 4; j > hex.length(); --j) {
+            unicode += '0';
+          }
+          unicode += hex;
+          result = spliceString(result, i, unicode);
+          i += (unicode.length() - 1);
+        }
+        break; // if not a control character, do nothing
+      }
+      }
+    }
+    return result;
+  }
+
+  private static String spliceString(String str, int i, String replacement) {
+    return spliceString(str, i, 1, replacement);
+  }
+
+  private static String spliceString(String str, int i, int length, String replacement) {
+    return str.substring(0, i) + replacement + str.substring(i + length);
+  }
+
   public HashSet<ReadEntity> getInputs() {
     return inputs;
   }

Modified: hive/trunk/ql/src/test/queries/clientpositive/quote2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/quote2.q?rev=1627566&r1=1627565&r2=1627566&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/quote2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/quote2.q Thu Sep 25 16:08:28 2014
@@ -10,6 +10,7 @@ SELECT
     'abc\\\\\'',  "abc\\\\\"",
     'abc\\\\\\',  "abc\\\\\\",
     'abc""""\\',  "abc''''\\",
+    'mysql_%\\_\%', 'mysql\\\_\\\\\%',
     "awk '{print NR\"\\t\"$0}'",
     'tab\ttab',   "tab\ttab"
 FROM src
@@ -24,6 +25,7 @@ SELECT
     'abc\\\\\'',  "abc\\\\\"",
     'abc\\\\\\',  "abc\\\\\\",
     'abc""""\\',  "abc''''\\",
+    'mysql_%\\_\%', 'mysql\\\_\\\\\%',
     "awk '{print NR\"\\t\"$0}'",
     'tab\ttab',   "tab\ttab"
 FROM src

Modified: hive/trunk/ql/src/test/results/clientpositive/quote2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/quote2.q.out?rev=1627566&r1=1627565&r2=1627566&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/quote2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/quote2.q.out Thu Sep 25 16:08:28 2014
@@ -8,6 +8,7 @@ SELECT
     'abc\\\\\'',  "abc\\\\\"",
     'abc\\\\\\',  "abc\\\\\\",
     'abc""""\\',  "abc''''\\",
+    'mysql_%\\_\%', 'mysql\\\_\\\\\%',
     "awk '{print NR\"\\t\"$0}'",
     'tab\ttab',   "tab\ttab"
 FROM src
@@ -23,6 +24,7 @@ SELECT
     'abc\\\\\'',  "abc\\\\\"",
     'abc\\\\\\',  "abc\\\\\\",
     'abc""""\\',  "abc''''\\",
+    'mysql_%\\_\%', 'mysql\\\_\\\\\%',
     "awk '{print NR\"\\t\"$0}'",
     'tab\ttab',   "tab\ttab"
 FROM src
@@ -40,12 +42,12 @@ STAGE PLANS:
           alias: src
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
-            expressions: 'abc' (type: string), 'abc' (type: string), 'abc'' (type: string), 'abc"' (type: string), 'abc\' (type: string), 'abc\' (type: string), 'abc\'' (type: string), 'abc\"' (type: string), 'abc\\' (type: string), 'abc\\' (type: string), 'abc\\'' (type: string), 'abc\\"' (type: string), 'abc\\\' (type: string), 'abc\\\' (type: string), 'abc""""\' (type: string), 'abc''''\' (type: string), 'awk '{print NR"\t"$0}'' (type: string), 'tab	tab' (type: string), 'tab	tab' (type: string)
-            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18
-            Statistics: Num rows: 500 Data size: 857000 Basic stats: COMPLETE Column stats: COMPLETE
+            expressions: 'abc' (type: string), 'abc' (type: string), 'abc'' (type: string), 'abc"' (type: string), 'abc\' (type: string), 'abc\' (type: string), 'abc\'' (type: string), 'abc\"' (type: string), 'abc\\' (type: string), 'abc\\' (type: string), 'abc\\'' (type: string), 'abc\\"' (type: string), 'abc\\\' (type: string), 'abc\\\' (type: string), 'abc""""\' (type: string), 'abc''''\' (type: string), 'mysql_%\_\%' (type: string), 'mysql\\_\\\%' (type: string), 'awk '{print NR"\t"$0}'' (type: string), 'tab	tab' (type: string), 'tab	tab' (type: string)
+            outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
+            Statistics: Num rows: 500 Data size: 952500 Basic stats: COMPLETE Column stats: COMPLETE
             Limit
               Number of rows: 1
-              Statistics: Num rows: 1 Data size: 1714 Basic stats: COMPLETE Column stats: COMPLETE
+              Statistics: Num rows: 1 Data size: 1905 Basic stats: COMPLETE Column stats: COMPLETE
               ListSink
 
 PREHOOK: query: SELECT
@@ -57,6 +59,7 @@ PREHOOK: query: SELECT
     'abc\\\\\'',  "abc\\\\\"",
     'abc\\\\\\',  "abc\\\\\\",
     'abc""""\\',  "abc''''\\",
+    'mysql_%\\_\%', 'mysql\\\_\\\\\%',
     "awk '{print NR\"\\t\"$0}'",
     'tab\ttab',   "tab\ttab"
 FROM src
@@ -73,6 +76,7 @@ POSTHOOK: query: SELECT
     'abc\\\\\'',  "abc\\\\\"",
     'abc\\\\\\',  "abc\\\\\\",
     'abc""""\\',  "abc''''\\",
+    'mysql_%\\_\%', 'mysql\\\_\\\\\%',
     "awk '{print NR\"\\t\"$0}'",
     'tab\ttab',   "tab\ttab"
 FROM src
@@ -80,4 +84,4 @@ LIMIT 1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
-abc	abc	abc'	abc"	abc\	abc\	abc\'	abc\"	abc\\	abc\\	abc\\'	abc\\"	abc\\\	abc\\\	abc""""\	abc''''\	awk '{print NR"\t"$0}'	tab	tab	tab	tab
+abc	abc	abc'	abc"	abc\	abc\	abc\'	abc\"	abc\\	abc\\	abc\\'	abc\\"	abc\\\	abc\\\	abc""""\	abc''''\	mysql_%\_\%	mysql\\_\\\%	awk '{print NR"\t"$0}'	tab	tab	tab	tab