You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/12/06 07:02:30 UTC
svn commit: r1417741 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
test/queries/clientpositive/metadataonly1.q
test/results/clientpositive/metadataonly1.q.out
Author: namit
Date: Thu Dec 6 06:02:29 2012
New Revision: 1417741
URL: http://svn.apache.org/viewvc?rev=1417741&view=rev
Log:
HIVE-3594 When Group by Partition Column Type is Timestamp or STRING Which Format contains "HH:MM:SS",
It will occur URISyntaxException (Navis via namit)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
hive/trunk/ql/src/test/queries/clientpositive/metadataonly1.q
hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java?rev=1417741&r1=1417740&r2=1417741&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java Thu Dec 6 06:02:29 2012
@@ -230,11 +230,11 @@ public class MetadataOnlyOptimizer imple
List<String> paths = getPathsForAlias(work, alias);
for (String path : paths) {
- PartitionDesc newPartition = changePartitionToMetadataOnly(work.getPathToPartitionInfo().get(
- path));
+ PartitionDesc partDesc = work.getPathToPartitionInfo().get(path);
+ PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc);
Path fakePath = new Path(physicalContext.getContext().getMRTmpFileURI()
+ newPartition.getTableName()
- + newPartition.getPartSpec().toString());
+ + encode(newPartition.getPartSpec()));
work.getPathToPartitionInfo().remove(path);
work.getPathToPartitionInfo().put(fakePath.getName(), newPartition);
ArrayList<String> aliases = work.getPathToAliases().remove(path);
@@ -242,6 +242,11 @@ public class MetadataOnlyOptimizer imple
}
}
+ // considered using URLEncoder, but it seemed too much
+ private String encode(Map<String, String> partSpec) {
+ return partSpec.toString().replaceAll("[:/#\\?]", "_");
+ }
+
private void convertToMetadataOnlyQuery(MapredWork work,
TableScanOperator tso) {
String alias = getAliasForTableScanOperator(work, tso);
Modified: hive/trunk/ql/src/test/queries/clientpositive/metadataonly1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/metadataonly1.q?rev=1417741&r1=1417740&r2=1417741&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/metadataonly1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/metadataonly1.q Thu Dec 6 06:02:29 2012
@@ -35,4 +35,11 @@ explain extended select max(ds) from TES
select max(ds) from TEST1;
select distinct ds from srcpart;
-select min(ds),max(ds) from srcpart;
\ No newline at end of file
+select min(ds),max(ds) from srcpart;
+
+-- HIVE-3594 URI encoding for temporary path
+alter table TEST2 add partition (ds='01:10:10', hr='01');
+alter table TEST2 add partition (ds='01:10:20', hr='02');
+
+explain extended select ds, count(distinct hr) from TEST2 group by ds;
+select ds, count(distinct hr) from TEST2 group by ds;
Modified: hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out?rev=1417741&r1=1417740&r2=1417741&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out Thu Dec 6 06:02:29 2012
@@ -1509,3 +1509,328 @@ POSTHOOK: Input: default@srcpart@ds=2008
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
#### A masked pattern was here ####
2008-04-08 2008-04-09
+PREHOOK: query: -- HIVE-3594 URI encoding for temporary path
+alter table TEST2 add partition (ds='01:10:10', hr='01')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@test2
+POSTHOOK: query: -- HIVE-3594 URI encoding for temporary path
+alter table TEST2 add partition (ds='01:10:10', hr='01')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@test2
+POSTHOOK: Output: default@test2@ds=01%3A10%3A10/hr=01
+PREHOOK: query: alter table TEST2 add partition (ds='01:10:20', hr='02')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@test2
+POSTHOOK: query: alter table TEST2 add partition (ds='01:10:20', hr='02')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@test2
+POSTHOOK: Output: default@test2@ds=01%3A10%3A20/hr=02
+PREHOOK: query: explain extended select ds, count(distinct hr) from TEST2 group by ds
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended select ds, count(distinct hr) from TEST2 group by ds
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME TEST2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL hr)))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ test2
+ TableScan
+ alias: test2
+ GatherStats: false
+ Select Operator
+ expressions:
+ expr: ds
+ type: string
+ expr: hr
+ type: string
+ outputColumnNames: ds, hr
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT hr)
+ bucketGroup: false
+ keys:
+ expr: ds
+ type: string
+ expr: hr
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Needs Tagging: false
+ Path -> Alias:
+ -mr-10002default.test2{ds=01_10_10, hr=01} [test2]
+ -mr-10003default.test2{ds=01_10_20, hr=02} [test2]
+ -mr-10004default.test2{ds=1, hr=1} [test2]
+ -mr-10005default.test2{ds=1, hr=2} [test2]
+ -mr-10006default.test2{ds=1, hr=3} [test2]
+ Path -> Partition:
+ -mr-10002default.test2{ds=01_10_10, hr=01}
+ Partition
+ base file name: hr=01
+ input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 01:10:10
+ hr 01
+ properties:
+ bucket_count -1
+ columns a,b
+ columns.types int:double
+#### A masked pattern was here ####
+ name default.test2
+ partition_columns ds/hr
+ serialization.ddl struct test2 { i32 a, double b}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns a,b
+ columns.types int:double
+#### A masked pattern was here ####
+ name default.test2
+ partition_columns ds/hr
+ serialization.ddl struct test2 { i32 a, double b}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test2
+ name: default.test2
+ -mr-10003default.test2{ds=01_10_20, hr=02}
+ Partition
+ base file name: hr=02
+ input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 01:10:20
+ hr 02
+ properties:
+ bucket_count -1
+ columns a,b
+ columns.types int:double
+#### A masked pattern was here ####
+ name default.test2
+ partition_columns ds/hr
+ serialization.ddl struct test2 { i32 a, double b}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns a,b
+ columns.types int:double
+#### A masked pattern was here ####
+ name default.test2
+ partition_columns ds/hr
+ serialization.ddl struct test2 { i32 a, double b}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test2
+ name: default.test2
+ -mr-10004default.test2{ds=1, hr=1}
+ Partition
+ base file name: hr=1
+ input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 1
+ properties:
+ bucket_count -1
+ columns a,b
+ columns.types int:double
+#### A masked pattern was here ####
+ name default.test2
+ partition_columns ds/hr
+ serialization.ddl struct test2 { i32 a, double b}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns a,b
+ columns.types int:double
+#### A masked pattern was here ####
+ name default.test2
+ partition_columns ds/hr
+ serialization.ddl struct test2 { i32 a, double b}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test2
+ name: default.test2
+ -mr-10005default.test2{ds=1, hr=2}
+ Partition
+ base file name: hr=2
+ input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 2
+ properties:
+ bucket_count -1
+ columns a,b
+ columns.types int:double
+#### A masked pattern was here ####
+ name default.test2
+ partition_columns ds/hr
+ serialization.ddl struct test2 { i32 a, double b}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns a,b
+ columns.types int:double
+#### A masked pattern was here ####
+ name default.test2
+ partition_columns ds/hr
+ serialization.ddl struct test2 { i32 a, double b}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test2
+ name: default.test2
+ -mr-10006default.test2{ds=1, hr=3}
+ Partition
+ base file name: hr=3
+ input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ partition values:
+ ds 1
+ hr 3
+ properties:
+ bucket_count -1
+ columns a,b
+ columns.types int:double
+#### A masked pattern was here ####
+ name default.test2
+ partition_columns ds/hr
+ serialization.ddl struct test2 { i32 a, double b}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.NullStructSerDe
+
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ bucket_count -1
+ columns a,b
+ columns.types int:double
+#### A masked pattern was here ####
+ name default.test2
+ partition_columns ds/hr
+ serialization.ddl struct test2 { i32 a, double b}
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+#### A masked pattern was here ####
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test2
+ name: default.test2
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types string:bigint
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Truncated Path -> Alias:
+ -mr-10002default.test2{ds=01_10_10, hr=01} [test2]
+ -mr-10003default.test2{ds=01_10_20, hr=02} [test2]
+ -mr-10004default.test2{ds=1, hr=1} [test2]
+ -mr-10005default.test2{ds=1, hr=2} [test2]
+ -mr-10006default.test2{ds=1, hr=3} [test2]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select ds, count(distinct hr) from TEST2 group by ds
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test2@ds=01%3A10%3A10/hr=01
+PREHOOK: Input: default@test2@ds=01%3A10%3A20/hr=02
+PREHOOK: Input: default@test2@ds=1/hr=1
+PREHOOK: Input: default@test2@ds=1/hr=2
+PREHOOK: Input: default@test2@ds=1/hr=3
+#### A masked pattern was here ####
+POSTHOOK: query: select ds, count(distinct hr) from TEST2 group by ds
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test2@ds=01%3A10%3A10/hr=01
+POSTHOOK: Input: default@test2@ds=01%3A10%3A20/hr=02
+POSTHOOK: Input: default@test2@ds=1/hr=1
+POSTHOOK: Input: default@test2@ds=1/hr=2
+POSTHOOK: Input: default@test2@ds=1/hr=3
+#### A masked pattern was here ####
+01:10:10 1
+01:10:20 1
+1 3