You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/01/10 22:11:42 UTC

svn commit: r1557258 - in /hive/trunk: itests/qtest/ ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/

Author: xuefu
Date: Fri Jan 10 21:11:42 2014
New Revision: 1557258

URL: http://svn.apache.org/r1557258
Log:
HIVE-5446: Hive can CREATE an external table but not SELECT from it when file path have spaces

Added:
    hive/trunk/ql/src/test/queries/clientpositive/external_table_with_space_in_location_path.q
    hive/trunk/ql/src/test/results/clientpositive/external_table_with_space_in_location_path.q.out
Modified:
    hive/trunk/itests/qtest/pom.xml
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java

Modified: hive/trunk/itests/qtest/pom.xml
URL: http://svn.apache.org/viewvc/hive/trunk/itests/qtest/pom.xml?rev=1557258&r1=1557257&r2=1557258&view=diff
==============================================================================
--- hive/trunk/itests/qtest/pom.xml (original)
+++ hive/trunk/itests/qtest/pom.xml Fri Jan 10 21:11:42 2014
@@ -36,7 +36,7 @@
     <run_disabled>false</run_disabled>
     <clustermode></clustermode>
     <execute.beeline.tests>false</execute.beeline.tests>
-    <minimr.query.files>stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q</minimr.query.files>
+    <minimr.query.files>stats_counter_partitioned.q,list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,scriptfile1_win.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,schemeAuthority2.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q,import_exported_table.q,stats_counter.q,auto_sortmerge_join_16.q,quotedid_smb.q,file_with_header_footer.q,external_table_with_space_in_location_path.q</minimr.query.files>
     <minimr.query.negative.files>cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q,file_with_header_footer_negative.q</minimr.query.negative.files>
     <minitez.query.files>tez_join_tests.q,tez_joins_explain.q,mrr.q,tez_dml.q,tez_insert_overwrite_local_directory_1.q</minitez.query.files>
     <minitez.query.files.shared>join0.q,join1.q,auto_join0.q,auto_join1.q,bucket2.q,bucket3.q,bucket4.q,count.q,create_merge_compressed.q,cross_join.q,ctas.q,custom_input_output_format.q,disable_merge_for_bucketing.q,enforce_order.q,filter_join_breaktask.q,filter_join_breaktask2.q,groupby1.q,groupby2.q,groupby3.q,having.q,insert1.q,insert_into1.q,insert_into2.q,leftsemijoin.q,limit_pushdown.q,load_dyn_part1.q,load_dyn_part2.q,load_dyn_part3.q,mapjoin_mapjoin.q,mapreduce1.q,mapreduce2.q,merge1.q,merge2.q,metadata_only_queries.q,sample1.q,subquery_in.q,subquery_exists.q,vectorization_15.q,ptf.q</minitez.query.files.shared>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=1557258&r1=1557257&r2=1557258&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java Fri Jan 10 21:11:42 2014
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.metada
 
 import java.io.Serializable;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -235,7 +236,19 @@ public class Partition implements Serial
     if (table.isPartitioned()) {
       return new Path(tPartition.getSd().getLocation());
     } else {
-      return new Path(table.getTTable().getSd().getLocation());
+
+      /**
+       * Table location string need to be constructed as URI first to decode
+       * the http encoded characters in the location path (because location is
+       * stored as URI in org.apache.hadoop.hive.ql.metadata.Table before saved
+       * to metastore database). This is not necessary for partition location.
+       */
+      try {
+        return new Path(new URI(table.getTTable().getSd().getLocation()));
+      } catch (URISyntaxException e) {
+        throw new RuntimeException("Invalid table path " +
+          table.getTTable().getSd().getLocation(), e);
+      }
     }
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java?rev=1557258&r1=1557257&r2=1557258&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java Fri Jan 10 21:11:42 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.metada
 import java.io.IOException;
 import java.io.Serializable;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -244,7 +245,11 @@ public class Table implements Serializab
     if (location == null) {
       return null;
     }
-    return new Path(location);
+    try {
+      return new Path(new URI(location));
+    } catch (URISyntaxException e) {
+      throw new RuntimeException("Invalid table path " + location, e);
+    }
   }
 
   final public String getTableName() {

Added: hive/trunk/ql/src/test/queries/clientpositive/external_table_with_space_in_location_path.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/external_table_with_space_in_location_path.q?rev=1557258&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/external_table_with_space_in_location_path.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/external_table_with_space_in_location_path.q Fri Jan 10 21:11:42 2014
@@ -0,0 +1,23 @@
+dfs ${system:test.dfs.mkdir} hdfs:///tmp/test/;
+
+dfs -copyFromLocal ../../data/files/ext_test_space hdfs:///tmp/test/ext_test_space;
+
+CREATE EXTERNAL TABLE spacetest (id int, message string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' LOCATION 'hdfs:///tmp/test/ext_test_space/folder+with space';
+
+SELECT * FROM spacetest;
+
+SELECT count(*) FROM spacetest;
+
+DROP TABLE spacetest;
+
+CREATE EXTERNAL TABLE spacetestpartition (id int, message string) PARTITIONED BY (day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
+
+ALTER TABLE spacetestpartition ADD PARTITION (day=10) LOCATION 'hdfs:///tmp/test/ext_test_space/folder+with space';
+
+SELECT * FROM spacetestpartition;
+
+SELECT count(*) FROM spacetestpartition;
+
+DROP TABLE spacetestpartition;
+
+dfs -rmr hdfs:///tmp/test;

Added: hive/trunk/ql/src/test/results/clientpositive/external_table_with_space_in_location_path.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/external_table_with_space_in_location_path.q.out?rev=1557258&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/external_table_with_space_in_location_path.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/external_table_with_space_in_location_path.q.out Fri Jan 10 21:11:42 2014
@@ -0,0 +1,78 @@
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@spacetest
+PREHOOK: query: SELECT * FROM spacetest
+PREHOOK: type: QUERY
+PREHOOK: Input: default@spacetest
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM spacetest
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@spacetest
+#### A masked pattern was here ####
+12	jason
+13	steven
+15	joe
+PREHOOK: query: SELECT count(*) FROM spacetest
+PREHOOK: type: QUERY
+PREHOOK: Input: default@spacetest
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(*) FROM spacetest
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@spacetest
+#### A masked pattern was here ####
+3
+PREHOOK: query: DROP TABLE spacetest
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@spacetest
+PREHOOK: Output: default@spacetest
+POSTHOOK: query: DROP TABLE spacetest
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@spacetest
+POSTHOOK: Output: default@spacetest
+PREHOOK: query: CREATE EXTERNAL TABLE spacetestpartition (id int, message string) PARTITIONED BY (day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE EXTERNAL TABLE spacetestpartition (id int, message string) PARTITIONED BY (day int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@spacetestpartition
+#### A masked pattern was here ####
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@spacetestpartition
+#### A masked pattern was here ####
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@spacetestpartition
+POSTHOOK: Output: default@spacetestpartition@day=10
+PREHOOK: query: SELECT * FROM spacetestpartition
+PREHOOK: type: QUERY
+PREHOOK: Input: default@spacetestpartition
+PREHOOK: Input: default@spacetestpartition@day=10
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM spacetestpartition
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@spacetestpartition
+POSTHOOK: Input: default@spacetestpartition@day=10
+#### A masked pattern was here ####
+12	jason	10
+13	steven	10
+15	joe	10
+PREHOOK: query: SELECT count(*) FROM spacetestpartition
+PREHOOK: type: QUERY
+PREHOOK: Input: default@spacetestpartition
+PREHOOK: Input: default@spacetestpartition@day=10
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT count(*) FROM spacetestpartition
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@spacetestpartition
+POSTHOOK: Input: default@spacetestpartition@day=10
+#### A masked pattern was here ####
+3
+PREHOOK: query: DROP TABLE spacetestpartition
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@spacetestpartition
+PREHOOK: Output: default@spacetestpartition
+POSTHOOK: query: DROP TABLE spacetestpartition
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@spacetestpartition
+POSTHOOK: Output: default@spacetestpartition
+#### A masked pattern was here ####