You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/11/05 18:29:30 UTC
svn commit: r1031676 [2/15] - in /hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/
ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/ ql/s...
Modified: hive/trunk/ql/src/test/results/clientpositive/bucket_groupby.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket_groupby.q.out?rev=1031676&r1=1031675&r2=1031676&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket_groupby.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket_groupby.q.out Fri Nov 5 17:29:28 2010
@@ -11,7 +11,7 @@ key string
value string
ds string
-Detailed Table Information Table(tableName:clustergroupby, dbName:default, owner:thiruvel, createTime:1286799299, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:pfile:/home/thiruvel/projects/hive/hive.unsecure/build/ql/test/data/warehouse/clustergroupby, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=1286799299}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
+Detailed Table Information Table(tableName:clustergroupby, dbName:default, owner:sdong, createTime:1288389460, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/clustergroupby, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=1288389460}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
PREHOOK: query: insert overwrite table clustergroupby partition (ds='100') select key, value from src sort by key
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -48,36 +48,32 @@ STAGE PLANS:
predicate:
expr: (ds = '100')
type: boolean
- Filter Operator
- predicate:
- expr: (ds = '100')
- type: boolean
- Select Operator
- expressions:
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
expr: key
type: string
- outputColumnNames: key
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: false
- keys:
- expr: key
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
type: string
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -111,11 +107,11 @@ STAGE PLANS:
PREHOOK: query: select key, count(1) from clustergroupby where ds='100' group by key limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@clustergroupby@ds=100
-PREHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-04_391_496085082728658237/-mr-10000
+PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-57-45_369_7380463323239974897/-mr-10000
POSTHOOK: query: select key, count(1) from clustergroupby where ds='100' group by key limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby@ds=100
-POSTHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-04_391_496085082728658237/-mr-10000
+POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-57-45_369_7380463323239974897/-mr-10000
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
0 3
@@ -148,7 +144,7 @@ key string
value string
ds string
-Detailed Table Information Table(tableName:clustergroupby, dbName:default, owner:thiruvel, createTime:1286799299, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:pfile:/home/thiruvel/projects/hive/hive.unsecure/build/ql/test/data/warehouse/clustergroupby, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, last_modified_by=thiruvel, last_modified_time=1286799306, transient_lastDdlTime=1286799306, numRows=500, totalSi
ze=5812}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
+Detailed Table Information Table(tableName:clustergroupby, dbName:default, owner:sdong, createTime:1288389460, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/clustergroupby, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, last_modified_by=sdong, last_modified_time=1288389468, transient_lastDdlTime=1288389468, numRows=500, totalSize=5812}, viewOr
iginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
PREHOOK: query: insert overwrite table clustergroupby partition (ds='101') select key, value from src distribute by key
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -191,36 +187,32 @@ STAGE PLANS:
predicate:
expr: (ds = '101')
type: boolean
- Filter Operator
- predicate:
- expr: (ds = '101')
- type: boolean
- Select Operator
- expressions:
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: true
+ keys:
expr: key
type: string
- outputColumnNames: key
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: true
- keys:
- expr: key
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
type: string
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -254,11 +246,11 @@ STAGE PLANS:
PREHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@clustergroupby@ds=101
-PREHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-10_631_330475374084282440/-mr-10000
+PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-57-53_750_4694546524307257085/-mr-10000
POSTHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby@ds=101
-POSTHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-10_631_330475374084282440/-mr-10000
+POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-57-53_750_4694546524307257085/-mr-10000
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -303,36 +295,32 @@ STAGE PLANS:
predicate:
expr: (ds = '101')
type: boolean
- Filter Operator
- predicate:
- expr: (ds = '101')
- type: boolean
- Select Operator
- expressions:
- expr: key
- type: string
- outputColumnNames: key
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: true
- keys:
- expr: length(key)
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: true
+ keys:
+ expr: length(key)
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
type: int
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: int
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -366,11 +354,11 @@ STAGE PLANS:
PREHOOK: query: select length(key), count(1) from clustergroupby where ds='101' group by length(key) limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@clustergroupby@ds=101
-PREHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-13_272_378379398402774764/-mr-10000
+PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-57-57_183_3467407082757519286/-mr-10000
POSTHOOK: query: select length(key), count(1) from clustergroupby where ds='101' group by length(key) limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby@ds=101
-POSTHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-13_272_378379398402774764/-mr-10000
+POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-57-57_183_3467407082757519286/-mr-10000
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -406,36 +394,32 @@ STAGE PLANS:
predicate:
expr: (ds = '101')
type: boolean
- Filter Operator
- predicate:
- expr: (ds = '101')
- type: boolean
- Select Operator
- expressions:
- expr: key
- type: string
- outputColumnNames: key
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: true
- keys:
- expr: abs(length(key))
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: true
+ keys:
+ expr: abs(length(key))
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
type: int
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: int
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: int
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -469,11 +453,11 @@ STAGE PLANS:
PREHOOK: query: select abs(length(key)), count(1) from clustergroupby where ds='101' group by abs(length(key)) limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@clustergroupby@ds=101
-PREHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-16_007_3235459708019247294/-mr-10000
+PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-00_556_1225036933831928400/-mr-10000
POSTHOOK: query: select abs(length(key)), count(1) from clustergroupby where ds='101' group by abs(length(key)) limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby@ds=101
-POSTHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-16_007_3235459708019247294/-mr-10000
+POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-00_556_1225036933831928400/-mr-10000
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -511,42 +495,38 @@ STAGE PLANS:
predicate:
expr: (ds = '101')
type: boolean
- Filter Operator
- predicate:
- expr: (ds = '101')
- type: boolean
- Select Operator
- expressions:
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: true
+ keys:
expr: key
type: string
- outputColumnNames: key
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: true
- keys:
- expr: key
+ expr: 3
+ type: int
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
type: string
- expr: 3
+ expr: _col1
type: int
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- expr: _col1
- type: int
- sort order: ++
- Map-reduce partition columns:
- expr: _col0
- type: string
- expr: _col1
- type: int
- tag: -1
- value expressions:
- expr: _col2
- type: bigint
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: int
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -582,11 +562,11 @@ STAGE PLANS:
PREHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@clustergroupby@ds=101
-PREHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-18_557_7014524165209144470/-mr-10000
+PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-03_994_2628288731314011109/-mr-10000
POSTHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby@ds=101
-POSTHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-18_557_7014524165209144470/-mr-10000
+POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-03_994_2628288731314011109/-mr-10000
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -631,41 +611,37 @@ STAGE PLANS:
predicate:
expr: (ds = '101')
type: boolean
- Filter Operator
- predicate:
- expr: (ds = '101')
- type: boolean
+ Select Operator
+ expressions:
+ expr: value
+ type: string
+ outputColumnNames: _col0
Select Operator
expressions:
- expr: value
+ expr: _col0
type: string
outputColumnNames: _col0
- Select Operator
- expressions:
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
expr: _col0
type: string
- outputColumnNames: _col0
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: false
- keys:
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
expr: _col0
type: string
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -699,11 +675,11 @@ STAGE PLANS:
PREHOOK: query: select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@clustergroupby@ds=101
-PREHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-26_089_856130089143672363/-mr-10000
+PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-07_367_3684028022557451628/-mr-10000
POSTHOOK: query: select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby@ds=101
-POSTHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-26_089_856130089143672363/-mr-10000
+POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-07_367_3684028022557451628/-mr-10000
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -801,12 +777,12 @@ PREHOOK: query: select key, count(1) fro
PREHOOK: type: QUERY
PREHOOK: Input: default@clustergroupby@ds=100
PREHOOK: Input: default@clustergroupby@ds=101
-PREHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-28_594_4859888465200168624/-mr-10000
+PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-10_796_7851925785141685773/-mr-10000
POSTHOOK: query: select key, count(1) from clustergroupby group by key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby@ds=100
POSTHOOK: Input: default@clustergroupby@ds=101
-POSTHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-28_594_4859888465200168624/-mr-10000
+POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-10_796_7851925785141685773/-mr-10000
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -1233,7 +1209,7 @@ key string
value string
ds string
-Detailed Table Information Table(tableName:clustergroupby, dbName:default, owner:thiruvel, createTime:1286799299, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:pfile:/home/thiruvel/projects/hive/hive.unsecure/build/ql/test/data/warehouse/clustergroupby, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[value], sortCols:[Order(col:key, order:1), Order(col:value, order:1)], parameters:{}), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=2, numFiles=2, last_modified_by=thiruvel, last_modified_time=1286799335, tr
ansient_lastDdlTime=1286799335, numRows=1000, totalSize=11624}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
+Detailed Table Information Table(tableName:clustergroupby, dbName:default, owner:sdong, createTime:1288389460, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/clustergroupby, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[value], sortCols:[Order(col:key, order:1), Order(col:value, order:1)], parameters:{}), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=2, numFiles=2, last_modified_by=sdong, last_modified_time=1288389494, transient_lastDdlT
ime=1288389494, numRows=1000, totalSize=11624}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
PREHOOK: query: insert overwrite table clustergroupby partition (ds='102') select key, value from src distribute by value sort by key, value
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -1278,36 +1254,32 @@ STAGE PLANS:
predicate:
expr: (ds = '102')
type: boolean
- Filter Operator
- predicate:
- expr: (ds = '102')
- type: boolean
- Select Operator
- expressions:
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: true
+ keys:
expr: key
type: string
- outputColumnNames: key
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: true
- keys:
- expr: key
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
type: string
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1341,11 +1313,11 @@ STAGE PLANS:
PREHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@clustergroupby@ds=102
-PREHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-39_061_3582799164493598492/-mr-10000
+PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-19_617_397682497046947245/-mr-10000
POSTHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby@ds=102
-POSTHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-39_061_3582799164493598492/-mr-10000
+POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-19_617_397682497046947245/-mr-10000
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -1392,36 +1364,32 @@ STAGE PLANS:
predicate:
expr: (ds = '102')
type: boolean
- Filter Operator
- predicate:
- expr: (ds = '102')
- type: boolean
- Select Operator
- expressions:
+ Select Operator
+ expressions:
+ expr: value
+ type: string
+ outputColumnNames: value
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
expr: value
type: string
- outputColumnNames: value
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: false
- keys:
- expr: value
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
type: string
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1455,11 +1423,11 @@ STAGE PLANS:
PREHOOK: query: select value, count(1) from clustergroupby where ds='102' group by value limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@clustergroupby@ds=102
-PREHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-41_567_7781693855424961483/-mr-10000
+PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-23_046_1077430162048304187/-mr-10000
POSTHOOK: query: select value, count(1) from clustergroupby where ds='102' group by value limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby@ds=102
-POSTHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-41_567_7781693855424961483/-mr-10000
+POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-23_046_1077430162048304187/-mr-10000
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -1506,44 +1474,40 @@ STAGE PLANS:
predicate:
expr: (ds = '102')
type: boolean
- Filter Operator
- predicate:
- expr: (ds = '102')
- type: boolean
- Select Operator
- expressions:
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: true
+ keys:
expr: key
type: string
expr: value
type: string
- outputColumnNames: key, value
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: true
- keys:
- expr: key
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
type: string
- expr: value
+ expr: _col1
type: string
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
- sort order: ++
- Map-reduce partition columns:
- expr: _col0
- type: string
- expr: _col1
- type: string
- tag: -1
- value expressions:
- expr: _col2
- type: bigint
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1579,11 +1543,11 @@ STAGE PLANS:
PREHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key, value limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@clustergroupby@ds=102
-PREHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-44_109_2716598788006290449/-mr-10000
+PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-28_476_3643193095660435074/-mr-10000
POSTHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key, value limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby@ds=102
-POSTHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-44_109_2716598788006290449/-mr-10000
+POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-28_476_3643193095660435074/-mr-10000
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -1628,7 +1592,7 @@ key string
value string
ds string
-Detailed Table Information Table(tableName:clustergroupby, dbName:default, owner:thiruvel, createTime:1286799299, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:pfile:/home/thiruvel/projects/hive/hive.unsecure/build/ql/test/data/warehouse/clustergroupby, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[value, key], sortCols:[Order(col:key, order:1)], parameters:{}), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=3, numFiles=3, last_modified_by=thiruvel, last_modified_time=1286799346, transient_lastDdlTime=12
86799346, numRows=1500, totalSize=17436}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
+Detailed Table Information Table(tableName:clustergroupby, dbName:default, owner:sdong, createTime:1288389460, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:pfile:/data/users/sdong/www/hive-trunk/build/ql/test/data/warehouse/clustergroupby, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[value, key], sortCols:[Order(col:key, order:1)], parameters:{}), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=3, numFiles=3, last_modified_by=sdong, last_modified_time=1288389511, transient_lastDdlTime=1288389511, numRow
s=1500, totalSize=17436}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)
PREHOOK: query: insert overwrite table clustergroupby partition (ds='103') select key, value from src distribute by value, key sort by key
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -1677,36 +1641,32 @@ STAGE PLANS:
predicate:
expr: (ds = '103')
type: boolean
- Filter Operator
- predicate:
- expr: (ds = '103')
- type: boolean
- Select Operator
- expressions:
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: true
+ keys:
expr: key
type: string
- outputColumnNames: key
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: true
- keys:
- expr: key
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
type: string
- mode: hash
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- sort order: +
- Map-reduce partition columns:
- expr: _col0
- type: string
- tag: -1
- value expressions:
- expr: _col1
- type: bigint
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1740,11 +1700,11 @@ STAGE PLANS:
PREHOOK: query: select key, count(1) from clustergroupby where ds='103' group by key limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@clustergroupby@ds=103
-PREHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-50_284_7568294531708606872/-mr-10000
+PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-36_974_789554075807114106/-mr-10000
POSTHOOK: query: select key, count(1) from clustergroupby where ds='103' group by key limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby@ds=103
-POSTHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-50_284_7568294531708606872/-mr-10000
+POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-36_974_789554075807114106/-mr-10000
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
@@ -1795,44 +1755,40 @@ STAGE PLANS:
predicate:
expr: (ds = '103')
type: boolean
- Filter Operator
- predicate:
- expr: (ds = '103')
- type: boolean
- Select Operator
- expressions:
+ Select Operator
+ expressions:
+ expr: value
+ type: string
+ expr: key
+ type: string
+ outputColumnNames: value, key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
expr: value
type: string
expr: key
type: string
- outputColumnNames: value, key
- Group By Operator
- aggregations:
- expr: count(1)
- bucketGroup: false
- keys:
- expr: value
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
type: string
- expr: key
+ expr: _col1
type: string
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Reduce Output Operator
- key expressions:
- expr: _col0
- type: string
- expr: _col1
- type: string
- sort order: ++
- Map-reduce partition columns:
- expr: _col0
- type: string
- expr: _col1
- type: string
- tag: -1
- value expressions:
- expr: _col2
- type: bigint
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
Reduce Operator Tree:
Group By Operator
aggregations:
@@ -1868,11 +1824,11 @@ STAGE PLANS:
PREHOOK: query: select key, count(1) from clustergroupby where ds='103' group by value, key limit 10
PREHOOK: type: QUERY
PREHOOK: Input: default@clustergroupby@ds=103
-PREHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-52_910_4870036796285454803/-mr-10000
+PREHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-40_621_2070392858793462231/-mr-10000
POSTHOOK: query: select key, count(1) from clustergroupby where ds='103' group by value, key limit 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@clustergroupby@ds=103
-POSTHOOK: Output: file:/tmp/thiruvel/hive_2010-10-11_05-15-52_910_4870036796285454803/-mr-10000
+POSTHOOK: Output: file:/tmp/sdong/hive_2010-10-29_14-58-40_621_2070392858793462231/-mr-10000
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]