You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/11/14 07:55:33 UTC
svn commit: r836131 - in /hadoop/hive/trunk: ./
common/src/java/org/apache/hadoop/hive/conf/
ql/src/java/org/apache/hadoop/hive/ql/plan/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Author: namit
Date: Sat Nov 14 06:55:32 2009
New Revision: 836131
URL: http://svn.apache.org/viewvc?rev=836131&view=rev
Log:
SELECT pageid, UDTF(adid_list)
[Title]
Summary:
Trac Bug: #
Blame Rev:
Reviewed By:
Test Plan:
Revert Plan:
Database Impact:
Memcache Impact:
Other Notes:
EImportant:
- begin *PUBLIC* platform impact section -
Bugzilla: #
- end platform impact -
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
hadoop/hive/trunk/ql/src/test/queries/clientpositive/merge1.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/merge1.q.out
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=836131&r1=836130&r2=836131&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Sat Nov 14 06:55:32 2009
@@ -262,6 +262,9 @@
HIVE-912 ctas name should be case insensitive (Ning Zhang via namit)
+ HIVE-929. Use a new parameter to check whether to merge files or not
+ (He Yongqiang via namit)
+
Release 0.4.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=836131&r1=836130&r2=836131&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Sat Nov 14 06:55:32 2009
@@ -158,7 +158,8 @@
HIVEMERGEMAPFILES("hive.merge.mapfiles", true),
HIVEMERGEMAPREDFILES("hive.merge.mapredfiles", false),
HIVEMERGEMAPFILESSIZE("hive.merge.size.per.task", (long)(256*1000*1000)),
-
+ HIVEMERGEMAPFILESAVGSIZE("hive.merge.smallfiles.avgsize", (long)(16*1000*1000)),
+
HIVESENDHEARTBEAT("hive.heartbeat.interval", 1000),
HIVEJOBPROGRESS("hive.task.progress", false),
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java?rev=836131&r1=836130&r2=836131&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java Sat Nov 14 06:55:32 2009
@@ -89,6 +89,8 @@
// check if a map-reduce job is needed to merge the files
// If the current size is smaller than the target, merge
long trgtSize = conf.getLongVar(HiveConf.ConfVars.HIVEMERGEMAPFILESSIZE);
+ long avgConditionSize = conf.getLongVar(HiveConf.ConfVars.HIVEMERGEMAPFILESAVGSIZE);
+ trgtSize = trgtSize > avgConditionSize ? trgtSize : avgConditionSize;
try {
// If the input file does not exist, replace it by a empty file
@@ -101,8 +103,8 @@
for (FileStatus fStat : fStats)
totalSz += fStat.getLen();
- long currSz = totalSz / fStats.length;
- if ((currSz < trgtSize) && (fStats.length > 1)) {
+ long currAvgSz = totalSz / fStats.length;
+ if ((currAvgSz < avgConditionSize) && (fStats.length > 1)) {
// also set the number of reducers
Task<? extends Serializable> tsk = ctx.getListTasks().get(1);
mapredWork work = (mapredWork)tsk.getWork();
Modified: hadoop/hive/trunk/ql/src/test/queries/clientpositive/merge1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/merge1.q?rev=836131&r1=836130&r2=836131&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/merge1.q (original)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/merge1.q Sat Nov 14 06:55:32 2009
@@ -13,3 +13,22 @@
select * from dest1;
drop table dest1;
+
+
+create table test_src(key string, value string) partitioned by (ds string);
+create table dest1(key string);
+
+insert overwrite table test_src partition(ds='101') select * from src;
+insert overwrite table test_src partition(ds='102') select * from src;
+
+explain
+insert overwrite table dest1 select key from test_src;
+insert overwrite table dest1 select key from test_src;
+
+set hive.merge.smallfiles.avgsize=16;
+explain
+insert overwrite table dest1 select key from test_src;
+insert overwrite table dest1 select key from test_src;
+
+drop table test_src;
+drop table dest1;
\ No newline at end of file
Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/merge1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/merge1.q.out?rev=836131&r1=836130&r2=836131&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/merge1.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/merge1.q.out Sat Nov 14 06:55:32 2009
@@ -93,10 +93,10 @@
Move Operator
files:
hdfs directory: true
- destination: file:/data/users/njain/hive5/hive5/build/ql/tmp/1186355480/10000
+ destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/585401858/10000
Map Reduce
Alias -> Map Operator Tree:
- file:/data/users/njain/hive5/hive5/build/ql/tmp/443335059/10002
+ file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/846450529/10002
Reduce Output Operator
sort order:
Map-reduce partition columns:
@@ -143,11 +143,11 @@
PREHOOK: query: select * from dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1168542666/10000
+PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/730713753/10000
POSTHOOK: query: select * from dest1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1168542666/10000
+POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/730713753/10000
0 3
10 1
100 2
@@ -462,3 +462,211 @@
POSTHOOK: query: drop table dest1
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: default@dest1
+PREHOOK: query: create table test_src(key string, value string) partitioned by (ds string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table test_src(key string, value string) partitioned by (ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_src
+PREHOOK: query: create table dest1(key string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table dest1(key string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest1
+PREHOOK: query: insert overwrite table test_src partition(ds='101') select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_src@ds=101
+POSTHOOK: query: insert overwrite table test_src partition(ds='101') select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_src@ds=101
+PREHOOK: query: insert overwrite table test_src partition(ds='102') select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_src@ds=102
+POSTHOOK: query: insert overwrite table test_src partition(ds='102') select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_src@ds=102
+PREHOOK: query: explain
+insert overwrite table dest1 select key from test_src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table dest1 select key from test_src
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF test_src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest1)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-4 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-4
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ test_src
+ TableScan
+ alias: test_src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: dest1
+
+ Stage: Stage-4
+ Conditional Operator
+ list of dependent Tasks:
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/353161784/10000
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/2091577421/10002
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns:
+ expr: rand()
+ type: double
+ tag: -1
+ value expressions:
+ expr: key
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: dest1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: dest1
+
+
+PREHOOK: query: insert overwrite table dest1 select key from test_src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_src@ds=101
+PREHOOK: Input: default@test_src@ds=102
+PREHOOK: Output: default@dest1
+POSTHOOK: query: insert overwrite table dest1 select key from test_src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_src@ds=101
+POSTHOOK: Input: default@test_src@ds=102
+POSTHOOK: Output: default@dest1
+PREHOOK: query: explain
+insert overwrite table dest1 select key from test_src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table dest1 select key from test_src
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF test_src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest1)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-4 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-4
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ test_src
+ TableScan
+ alias: test_src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: _col0
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: dest1
+
+ Stage: Stage-4
+ Conditional Operator
+ list of dependent Tasks:
+ Move Operator
+ files:
+ hdfs directory: true
+ destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/1533278841/10000
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/1738586251/10002
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns:
+ expr: rand()
+ type: double
+ tag: -1
+ value expressions:
+ expr: key
+ type: string
+ Reduce Operator Tree:
+ Extract
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: dest1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: dest1
+
+
+PREHOOK: query: insert overwrite table dest1 select key from test_src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_src@ds=101
+PREHOOK: Input: default@test_src@ds=102
+PREHOOK: Output: default@dest1
+POSTHOOK: query: insert overwrite table dest1 select key from test_src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_src@ds=101
+POSTHOOK: Input: default@test_src@ds=102
+POSTHOOK: Output: default@dest1
+PREHOOK: query: drop table test_src
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table test_src
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@test_src
+PREHOOK: query: drop table dest1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table dest1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@dest1