You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/10/26 23:38:41 UTC

svn commit: r830010 - in /hadoop/hive/trunk: CHANGES.txt metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java ql/src/test/queries/clientpositive/input_part10.q ql/src/test/results/clientpositive/input_part10.q.out

Author: namit
Date: Mon Oct 26 22:38:40 2009
New Revision: 830010

URL: http://svn.apache.org/viewvc?rev=830010&view=rev
Log:
HIVE-883. URISyntaxException when partition value contains special chars.
(Zheng Shao via namit)


Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/input_part10.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part10.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=830010&r1=830009&r2=830010&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Mon Oct 26 22:38:40 2009
@@ -214,6 +214,9 @@
     HIVE-880. User group information not populated for pre and post hook.
     (Namit Jain via zshao)
 
+    HIVE-883. URISyntaxException when partition value contains special chars.
+    (Zheng Shao via namit)
+
 Release 0.4.0 -  Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java?rev=830010&r1=830009&r2=830010&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java (original)
+++ hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java Mon Oct 26 22:38:40 2009
@@ -21,6 +21,7 @@
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.BitSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -161,13 +162,77 @@
     return false;
   }
 
+  // NOTE: This is for generating the internal path name for partitions. Users
+  // should always use the MetaStore API to get the path name for a partition.
+  // Users should not directly take partition values and turn it into a path 
+  // name by themselves, because the logic below may change in the future.
+  //
+  // In the future, it's OK to add new chars to the escape list, and old data
+  // won't be corrupt, because the full path name in metastore is stored.
+  // In that case, Hive will continue to read the old data, but when it creates
+  // new partitions, it will use new names.
+  static BitSet charToEscape = new BitSet(128);
+  static {
+    for (char c = 0; c < ' ' ; c++) {
+      charToEscape.set(c);
+    }
+    char[] clist = new char[] { '"', '#', '%', '\'', '*', '/', ':',
+        '=', '?', '\\', '\u00FF'
+    };
+    for (char c : clist) {
+      charToEscape.set(c);
+    }
+  }
+  static boolean needsEscaping(char c) {
+    return c >= 0 && c < charToEscape.size()
+        && charToEscape.get(c); 
+  }
+  
+  static String escapePathName(String path) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < path.length(); i++) {
+      char c = path.charAt(i);
+      if (needsEscaping(c)) {
+        sb.append('%');
+        sb.append(String.format("%1$02X", (int)c));
+      } else {
+        sb.append(c);
+      }
+    }
+    return sb.toString();
+  }
+  static String unescapePathName(String path) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < path.length(); i++) {
+      char c = path.charAt(i);
+      if (c == '%' && i + 2 < path.length()) {
+        int code = -1;
+        try {
+          code = Integer.valueOf(path.substring(i+1, i+3), 16);
+        } catch (Exception e) {
+          code = -1;
+        }
+        if (code >= 0) {
+          sb.append((char)code);
+          i += 2;
+          continue;
+        }
+      }
+      sb.append(c);
+    }    
+    return sb.toString();
+  }
+  
   public static String makePartName(Map<String, String> spec) throws MetaException {
     StringBuffer suffixBuf = new StringBuffer();
     for(Entry<String, String> e: spec.entrySet()) {
       if(e.getValue() == null  || e.getValue().length() == 0) {
         throw new MetaException("Partition spec is incorrect. " + spec);
       }
-      suffixBuf.append(e.getKey() + "=" + e.getValue() + "/");
+      suffixBuf.append(escapePathName(e.getKey()));
+      suffixBuf.append('=');
+      suffixBuf.append(escapePathName(e.getValue()));
+      suffixBuf.append(Path.SEPARATOR);
     }
     return suffixBuf.toString();
   }
@@ -184,8 +249,8 @@
       String component = currPath.getName();
       Matcher m = pat.matcher(component);
       if (m.matches()) {
-        String k = m.group(1);
-        String v = m.group(2);
+        String k = unescapePathName(m.group(1));
+        String v = unescapePathName(m.group(2));
 
         if (partSpec.containsKey(k)) {
           throw new MetaException("Partition name is invalid. Key " + k + " defined at two levels");
@@ -241,9 +306,9 @@
       if(i > 0) {
         name.append(Path.SEPARATOR);
       }
-      name.append((partCols.get(i)).getName().toLowerCase());
+      name.append(escapePathName((partCols.get(i)).getName().toLowerCase()));
       name.append('=');
-      name.append(vals.get(i));
+      name.append(escapePathName(vals.get(i)));
     }
     return name.toString();
   }

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/input_part10.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/input_part10.q?rev=830010&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/input_part10.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/input_part10.q Mon Oct 26 22:38:40 2009
@@ -0,0 +1,20 @@
+CREATE TABLE part_special (
+  a STRING,
+  b STRING
+) PARTITIONED BY (
+  ds STRING,
+  ts STRING
+);
+
+EXPLAIN
+INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+SELECT 1, 2 FROM src LIMIT 1;
+
+INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+SELECT 1, 2 FROM src LIMIT 1;
+
+DESCRIBE EXTENDED part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455');
+
+SELECT * FROM part_special WHERE ds='2008 04 08' AND ts = '10:11:12=455';
+
+DROP TABLE part_special;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part10.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part10.q.out?rev=830010&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part10.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/input_part10.q.out Mon Oct 26 22:38:40 2009
@@ -0,0 +1,115 @@
+PREHOOK: query: CREATE TABLE part_special (
+  a STRING,
+  b STRING
+) PARTITIONED BY (
+  ds STRING,
+  ts STRING
+)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE part_special (
+  a STRING,
+  b STRING
+) PARTITIONED BY (
+  ds STRING,
+  ts STRING
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@part_special
+PREHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+SELECT 1, 2 FROM src LIMIT 1
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+SELECT 1, 2 FROM src LIMIT 1
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB part_special (TOK_PARTSPEC (TOK_PARTVAL ds '2008 04 08') (TOK_PARTVAL ts '10:11:12=455')))) (TOK_SELECT (TOK_SELEXPR 1) (TOK_SELEXPR 2)) (TOK_LIMIT 1)))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src 
+          TableScan
+            alias: src
+            Select Operator
+              expressions:
+                    expr: 1
+                    type: int
+                    expr: 2
+                    type: int
+              outputColumnNames: _col0, _col1
+              Limit
+                Reduce Output Operator
+                  sort order: 
+                  tag: -1
+                  value expressions:
+                        expr: _col0
+                        type: int
+                        expr: _col1
+                        type: int
+      Reduce Operator Tree:
+        Extract
+          Limit
+            File Output Operator
+              compressed: false
+              GlobalTableId: 1
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: part_special
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          partition:
+            ds 2008 04 08
+            ts 10:11:12=455
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: part_special
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+SELECT 1, 2 FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@part_special@ds=2008 04 08/ts=10%3A11%3A12%3D455
+POSTHOOK: query: INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+SELECT 1, 2 FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@part_special@ds=2008 04 08/ts=10%3A11%3A12%3D455
+PREHOOK: query: DESCRIBE EXTENDED part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE EXTENDED part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+POSTHOOK: type: DESCTABLE
+a	string	
+b	string	
+ds	string	
+ts	string	
+	 	 
+Detailed Partition Information	Partition(values:[2008 04 08, 10:11:12=455], dbName:default, tableName:part_special, createTime:1256125568, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/data/users/zshao/tools/deploy-trunk-apache-hive/build/ql/test/data/warehouse/part_special/ds=2008 04 08/ts=10%3A11%3A12%3D455, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}), parameters:{transient_lastDdlTime=1256125568})	
+PREHOOK: query: SELECT * FROM part_special WHERE ds='2008 04 08' AND ts = '10:11:12=455'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@part_special@ds=2008 04 08/ts=10%3A11%3A12%3D455
+PREHOOK: Output: file:/data/users/zshao/tools/deploy-trunk-apache-hive/build/ql/tmp/1606481639/10000
+POSTHOOK: query: SELECT * FROM part_special WHERE ds='2008 04 08' AND ts = '10:11:12=455'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@part_special@ds=2008 04 08/ts=10%3A11%3A12%3D455
+POSTHOOK: Output: file:/data/users/zshao/tools/deploy-trunk-apache-hive/build/ql/tmp/1606481639/10000
+1	2	2008 04 08	10:11:12=455
+PREHOOK: query: DROP TABLE part_special
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE part_special
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@part_special