You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/10/26 22:11:05 UTC
svn commit: r829965 - in /hadoop/hive/branches/branch-0.4: CHANGES.txt
metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
ql/src/test/queries/clientpositive/input_part10.q
ql/src/test/results/clientpositive/input_part10.q.out
Author: namit
Date: Mon Oct 26 21:11:05 2009
New Revision: 829965
URL: http://svn.apache.org/viewvc?rev=829965&view=rev
Log:
HIVE-883. URISyntaxException when partition value contains special chars.
(Zheng Shao via namit)
Added:
hadoop/hive/branches/branch-0.4/ql/src/test/queries/clientpositive/input_part10.q
hadoop/hive/branches/branch-0.4/ql/src/test/results/clientpositive/input_part10.q.out
Modified:
hadoop/hive/branches/branch-0.4/CHANGES.txt
hadoop/hive/branches/branch-0.4/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
Modified: hadoop/hive/branches/branch-0.4/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/CHANGES.txt?rev=829965&r1=829964&r2=829965&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/CHANGES.txt (original)
+++ hadoop/hive/branches/branch-0.4/CHANGES.txt Mon Oct 26 21:11:05 2009
@@ -28,6 +28,9 @@
HIVE-892. Hive to kill hadoop jobs using POST. (Dhruba Borthakur via zshao)
+ HIVE-883. URISyntaxException when partition value contains special chars.
+ (Zheng Shao via namit)
+
Release 0.4.0
INCOMPATIBLE CHANGES
Modified: hadoop/hive/branches/branch-0.4/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java?rev=829965&r1=829964&r2=829965&view=diff
==============================================================================
--- hadoop/hive/branches/branch-0.4/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java (original)
+++ hadoop/hive/branches/branch-0.4/metastore/src/java/org/apache/hadoop/hive/metastore/Warehouse.java Mon Oct 26 21:11:05 2009
@@ -21,6 +21,7 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.BitSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -161,13 +162,77 @@
return false;
}
+ // NOTE: This is for generating the internal path name for partitions. Users
+ // should always use the MetaStore API to get the path name for a partition.
+ // Users should not directly take partition values and turn it into a path
+ // name by themselves, because the logic below may change in the future.
+ //
+ // In the future, it's OK to add new chars to the escape list, and old data
+ // won't be corrupt, because the full path name in metastore is stored.
+ // In that case, Hive will continue to read the old data, but when it creates
+ // new partitions, it will use new names.
+ static BitSet charToEscape = new BitSet(128);
+ static {
+ for (char c = 0; c < ' ' ; c++) {
+ charToEscape.set(c);
+ }
+ char[] clist = new char[] { '"', '#', '%', '\'', '*', '/', ':',
+ '=', '?', '\\', '\u00FF'
+ };
+ for (char c : clist) {
+ charToEscape.set(c);
+ }
+ }
+ static boolean needsEscaping(char c) {
+ return c >= 0 && c < charToEscape.size()
+ && charToEscape.get(c);
+ }
+
+ static String escapePathName(String path) {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < path.length(); i++) {
+ char c = path.charAt(i);
+ if (needsEscaping(c)) {
+ sb.append('%');
+ sb.append(String.format("%1$02X", (int)c));
+ } else {
+ sb.append(c);
+ }
+ }
+ return sb.toString();
+ }
+ static String unescapePathName(String path) {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < path.length(); i++) {
+ char c = path.charAt(i);
+ if (c == '%' && i + 2 < path.length()) {
+ int code = -1;
+ try {
+ code = Integer.valueOf(path.substring(i+1, i+3), 16);
+ } catch (Exception e) {
+ code = -1;
+ }
+ if (code >= 0) {
+ sb.append((char)code);
+ i += 2;
+ continue;
+ }
+ }
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+
public static String makePartName(Map<String, String> spec) throws MetaException {
StringBuffer suffixBuf = new StringBuffer();
for(Entry<String, String> e: spec.entrySet()) {
if(e.getValue() == null || e.getValue().length() == 0) {
throw new MetaException("Partition spec is incorrect. " + spec);
}
- suffixBuf.append(e.getKey() + "=" + e.getValue() + "/");
+ suffixBuf.append(escapePathName(e.getKey()));
+ suffixBuf.append('=');
+ suffixBuf.append(escapePathName(e.getValue()));
+ suffixBuf.append(Path.SEPARATOR);
}
return suffixBuf.toString();
}
@@ -184,8 +249,8 @@
String component = currPath.getName();
Matcher m = pat.matcher(component);
if (m.matches()) {
- String k = m.group(1);
- String v = m.group(2);
+ String k = unescapePathName(m.group(1));
+ String v = unescapePathName(m.group(2));
if (partSpec.containsKey(k)) {
throw new MetaException("Partition name is invalid. Key " + k + " defined at two levels");
@@ -241,9 +306,9 @@
if(i > 0) {
name.append(Path.SEPARATOR);
}
- name.append((partCols.get(i)).getName().toLowerCase());
+ name.append(escapePathName((partCols.get(i)).getName().toLowerCase()));
name.append('=');
- name.append(vals.get(i));
+ name.append(escapePathName(vals.get(i)));
}
return name.toString();
}
Added: hadoop/hive/branches/branch-0.4/ql/src/test/queries/clientpositive/input_part10.q
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/test/queries/clientpositive/input_part10.q?rev=829965&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/test/queries/clientpositive/input_part10.q (added)
+++ hadoop/hive/branches/branch-0.4/ql/src/test/queries/clientpositive/input_part10.q Mon Oct 26 21:11:05 2009
@@ -0,0 +1,20 @@
+CREATE TABLE part_special (
+ a STRING,
+ b STRING
+) PARTITIONED BY (
+ ds STRING,
+ ts STRING
+);
+
+EXPLAIN
+INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+SELECT 1, 2 FROM src LIMIT 1;
+
+INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+SELECT 1, 2 FROM src LIMIT 1;
+
+DESCRIBE EXTENDED part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455');
+
+SELECT * FROM part_special WHERE ds='2008 04 08' AND ts = '10:11:12=455';
+
+DROP TABLE part_special;
Added: hadoop/hive/branches/branch-0.4/ql/src/test/results/clientpositive/input_part10.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/branches/branch-0.4/ql/src/test/results/clientpositive/input_part10.q.out?rev=829965&view=auto
==============================================================================
--- hadoop/hive/branches/branch-0.4/ql/src/test/results/clientpositive/input_part10.q.out (added)
+++ hadoop/hive/branches/branch-0.4/ql/src/test/results/clientpositive/input_part10.q.out Mon Oct 26 21:11:05 2009
@@ -0,0 +1,82 @@
+query: CREATE TABLE part_special (
+ a STRING,
+ b STRING
+) PARTITIONED BY (
+ ds STRING,
+ ts STRING
+)
+query: EXPLAIN
+INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+SELECT 1, 2 FROM src LIMIT 1
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB part_special (TOK_PARTSPEC (TOK_PARTVAL ds '2008 04 08') (TOK_PARTVAL ts '10:11:12=455')))) (TOK_SELECT (TOK_SELEXPR 1) (TOK_SELEXPR 2)) (TOK_LIMIT 1)))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: 1
+ type: int
+ expr: 2
+ type: int
+ outputColumnNames: _col0, _col1
+ Limit
+ Reduce Output Operator
+ sort order:
+ tag: -1
+ value expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: int
+ Reduce Operator Tree:
+ Extract
+ Limit
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: part_special
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ partition:
+ ds 2008 04 08
+ ts 10:11:12=455
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: part_special
+
+
+query: INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+SELECT 1, 2 FROM src LIMIT 1
+Input: default/src
+Output: default/part_special/ds=2008 04 08/ts=10%3A11%3A12%3D455
+query: DESCRIBE EXTENDED part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455')
+a string
+b string
+ds string
+ts string
+
+Detailed Partition Information Partition(values:[2008 04 08, 10:11:12=455], dbName:default, tableName:part_special, createTime:0, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:a, type:string, comment:null), FieldSchema(name:b, type:string, comment:null)], location:file:/data/users/njain/hive_commit5/hive_commit5/build/ql/test/data/warehouse/part_special/ds=2008 04 08/ts=10%3A11%3A12%3D455, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}), parameters:{})
+query: SELECT * FROM part_special WHERE ds='2008 04 08' AND ts = '10:11:12=455'
+Input: default/part_special/ds=2008 04 08/ts=10%3A11%3A12%3D455
+Output: file:/data/users/njain/hive_commit5/hive_commit5/build/ql/tmp/987913037/10000
+1 2 2008 04 08 10:11:12=455
+query: DROP TABLE part_special