You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/12/06 07:02:30 UTC

svn commit: r1417741 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java test/queries/clientpositive/metadataonly1.q test/results/clientpositive/metadataonly1.q.out

Author: namit
Date: Thu Dec  6 06:02:29 2012
New Revision: 1417741

URL: http://svn.apache.org/viewvc?rev=1417741&view=rev
Log:
HIVE-3594 When Group by Partition Column Type is Timestamp or STRING Which Format contains "HH:MM:SS",
It will occur URISyntaxException (Navis via namit)


Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
    hive/trunk/ql/src/test/queries/clientpositive/metadataonly1.q
    hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java?rev=1417741&r1=1417740&r2=1417741&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java Thu Dec  6 06:02:29 2012
@@ -230,11 +230,11 @@ public class MetadataOnlyOptimizer imple
 
       List<String> paths = getPathsForAlias(work, alias);
       for (String path : paths) {
-        PartitionDesc newPartition = changePartitionToMetadataOnly(work.getPathToPartitionInfo().get(
-            path));
+        PartitionDesc partDesc = work.getPathToPartitionInfo().get(path);
+        PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc);
         Path fakePath = new Path(physicalContext.getContext().getMRTmpFileURI()
             + newPartition.getTableName()
-            + newPartition.getPartSpec().toString());
+            + encode(newPartition.getPartSpec()));
         work.getPathToPartitionInfo().remove(path);
         work.getPathToPartitionInfo().put(fakePath.getName(), newPartition);
         ArrayList<String> aliases = work.getPathToAliases().remove(path);
@@ -242,6 +242,11 @@ public class MetadataOnlyOptimizer imple
       }
     }
 
+    // considered using URLEncoder, but it seemed too much
+    private String encode(Map<String, String> partSpec) {
+      return partSpec.toString().replaceAll("[:/#\\?]", "_");
+    }
+
     private void convertToMetadataOnlyQuery(MapredWork work,
         TableScanOperator tso) {
       String alias = getAliasForTableScanOperator(work, tso);

Modified: hive/trunk/ql/src/test/queries/clientpositive/metadataonly1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/metadataonly1.q?rev=1417741&r1=1417740&r2=1417741&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/metadataonly1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/metadataonly1.q Thu Dec  6 06:02:29 2012
@@ -35,4 +35,11 @@ explain extended select max(ds) from TES
 select max(ds) from TEST1;
 
 select distinct ds from srcpart;
-select min(ds),max(ds) from srcpart;
\ No newline at end of file
+select min(ds),max(ds) from srcpart;
+
+-- HIVE-3594 URI encoding for temporary path
+alter table TEST2 add partition (ds='01:10:10', hr='01');
+alter table TEST2 add partition (ds='01:10:20', hr='02');
+
+explain extended select ds, count(distinct hr) from TEST2 group by ds;
+select ds, count(distinct hr) from TEST2 group by ds;

Modified: hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out?rev=1417741&r1=1417740&r2=1417741&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out Thu Dec  6 06:02:29 2012
@@ -1509,3 +1509,328 @@ POSTHOOK: Input: default@srcpart@ds=2008
 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
 #### A masked pattern was here ####
 2008-04-08	2008-04-09
+PREHOOK: query: -- HIVE-3594 URI encoding for temporary path
+alter table TEST2 add partition (ds='01:10:10', hr='01')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@test2
+POSTHOOK: query: -- HIVE-3594 URI encoding for temporary path
+alter table TEST2 add partition (ds='01:10:10', hr='01')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@test2
+POSTHOOK: Output: default@test2@ds=01%3A10%3A10/hr=01
+PREHOOK: query: alter table TEST2 add partition (ds='01:10:20', hr='02')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@test2
+POSTHOOK: query: alter table TEST2 add partition (ds='01:10:20', hr='02')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@test2
+POSTHOOK: Output: default@test2@ds=01%3A10%3A20/hr=02
+PREHOOK: query: explain extended select ds, count(distinct hr) from TEST2 group by ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended select ds, count(distinct hr) from TEST2 group by ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TEST2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL hr)))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        test2 
+          TableScan
+            alias: test2
+            GatherStats: false
+            Select Operator
+              expressions:
+                    expr: ds
+                    type: string
+                    expr: hr
+                    type: string
+              outputColumnNames: ds, hr
+              Group By Operator
+                aggregations:
+                      expr: count(DISTINCT hr)
+                bucketGroup: false
+                keys:
+                      expr: ds
+                      type: string
+                      expr: hr
+                      type: string
+                mode: hash
+                outputColumnNames: _col0, _col1, _col2
+                Reduce Output Operator
+                  key expressions:
+                        expr: _col0
+                        type: string
+                        expr: _col1
+                        type: string
+                  sort order: ++
+                  Map-reduce partition columns:
+                        expr: _col0
+                        type: string
+                  tag: -1
+                  value expressions:
+                        expr: _col2
+                        type: bigint
+      Needs Tagging: false
+      Path -> Alias:
+        -mr-10002default.test2{ds=01_10_10, hr=01} [test2]
+        -mr-10003default.test2{ds=01_10_20, hr=02} [test2]
+        -mr-10004default.test2{ds=1, hr=1} [test2]
+        -mr-10005default.test2{ds=1, hr=2} [test2]
+        -mr-10006default.test2{ds=1, hr=3} [test2]
+      Path -> Partition:
+        -mr-10002default.test2{ds=01_10_10, hr=01} 
+          Partition
+            base file name: hr=01
+            input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 01:10:10
+              hr 01
+            properties:
+              bucket_count -1
+              columns a,b
+              columns.types int:double
+#### A masked pattern was here ####
+              name default.test2
+              partition_columns ds/hr
+              serialization.ddl struct test2 { i32 a, double b}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns a,b
+                columns.types int:double
+#### A masked pattern was here ####
+                name default.test2
+                partition_columns ds/hr
+                serialization.ddl struct test2 { i32 a, double b}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.test2
+            name: default.test2
+        -mr-10003default.test2{ds=01_10_20, hr=02} 
+          Partition
+            base file name: hr=02
+            input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 01:10:20
+              hr 02
+            properties:
+              bucket_count -1
+              columns a,b
+              columns.types int:double
+#### A masked pattern was here ####
+              name default.test2
+              partition_columns ds/hr
+              serialization.ddl struct test2 { i32 a, double b}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns a,b
+                columns.types int:double
+#### A masked pattern was here ####
+                name default.test2
+                partition_columns ds/hr
+                serialization.ddl struct test2 { i32 a, double b}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.test2
+            name: default.test2
+        -mr-10004default.test2{ds=1, hr=1} 
+          Partition
+            base file name: hr=1
+            input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 1
+              hr 1
+            properties:
+              bucket_count -1
+              columns a,b
+              columns.types int:double
+#### A masked pattern was here ####
+              name default.test2
+              partition_columns ds/hr
+              serialization.ddl struct test2 { i32 a, double b}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns a,b
+                columns.types int:double
+#### A masked pattern was here ####
+                name default.test2
+                partition_columns ds/hr
+                serialization.ddl struct test2 { i32 a, double b}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.test2
+            name: default.test2
+        -mr-10005default.test2{ds=1, hr=2} 
+          Partition
+            base file name: hr=2
+            input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 1
+              hr 2
+            properties:
+              bucket_count -1
+              columns a,b
+              columns.types int:double
+#### A masked pattern was here ####
+              name default.test2
+              partition_columns ds/hr
+              serialization.ddl struct test2 { i32 a, double b}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns a,b
+                columns.types int:double
+#### A masked pattern was here ####
+                name default.test2
+                partition_columns ds/hr
+                serialization.ddl struct test2 { i32 a, double b}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.test2
+            name: default.test2
+        -mr-10006default.test2{ds=1, hr=3} 
+          Partition
+            base file name: hr=3
+            input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+            partition values:
+              ds 1
+              hr 3
+            properties:
+              bucket_count -1
+              columns a,b
+              columns.types int:double
+#### A masked pattern was here ####
+              name default.test2
+              partition_columns ds/hr
+              serialization.ddl struct test2 { i32 a, double b}
+              serialization.format 1
+              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+            serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+          
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              properties:
+                bucket_count -1
+                columns a,b
+                columns.types int:double
+#### A masked pattern was here ####
+                name default.test2
+                partition_columns ds/hr
+                serialization.ddl struct test2 { i32 a, double b}
+                serialization.format 1
+                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.test2
+            name: default.test2
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations:
+                expr: count(DISTINCT KEY._col1:0._col0)
+          bucketGroup: false
+          keys:
+                expr: KEY._col0
+                type: string
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Select Operator
+            expressions:
+                  expr: _col0
+                  type: string
+                  expr: _col1
+                  type: bigint
+            outputColumnNames: _col0, _col1
+            File Output Operator
+              compressed: false
+              GlobalTableId: 0
+#### A masked pattern was here ####
+              NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  properties:
+                    columns _col0,_col1
+                    columns.types string:bigint
+                    escape.delim \
+                    serialization.format 1
+              TotalFiles: 1
+              GatherStats: false
+              MultiFileSpray: false
+      Truncated Path -> Alias:
+        -mr-10002default.test2{ds=01_10_10, hr=01} [test2]
+        -mr-10003default.test2{ds=01_10_20, hr=02} [test2]
+        -mr-10004default.test2{ds=1, hr=1} [test2]
+        -mr-10005default.test2{ds=1, hr=2} [test2]
+        -mr-10006default.test2{ds=1, hr=3} [test2]
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: select ds, count(distinct hr) from TEST2 group by ds
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test2@ds=01%3A10%3A10/hr=01
+PREHOOK: Input: default@test2@ds=01%3A10%3A20/hr=02
+PREHOOK: Input: default@test2@ds=1/hr=1
+PREHOOK: Input: default@test2@ds=1/hr=2
+PREHOOK: Input: default@test2@ds=1/hr=3
+#### A masked pattern was here ####
+POSTHOOK: query: select ds, count(distinct hr) from TEST2 group by ds
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test2@ds=01%3A10%3A10/hr=01
+POSTHOOK: Input: default@test2@ds=01%3A10%3A20/hr=02
+POSTHOOK: Input: default@test2@ds=1/hr=1
+POSTHOOK: Input: default@test2@ds=1/hr=2
+POSTHOOK: Input: default@test2@ds=1/hr=3
+#### A masked pattern was here ####
+01:10:10	1
+01:10:20	1
+1	3