You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/11/14 07:55:33 UTC

svn commit: r836131 - in /hadoop/hive/trunk: ./ common/src/java/org/apache/hadoop/hive/conf/ ql/src/java/org/apache/hadoop/hive/ql/plan/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/

Author: namit
Date: Sat Nov 14 06:55:32 2009
New Revision: 836131

URL: http://svn.apache.org/viewvc?rev=836131&view=rev
Log:
SELECT pageid, UDTF(adid_list)
[Title]

Summary:

Trac Bug: #

Blame Rev:

Reviewed By:

Test Plan:

Revert Plan:

Database Impact:

Memcache Impact:

Other Notes:

EImportant:

- begin *PUBLIC* platform impact section -
Bugzilla: #
- end platform impact -


Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/merge1.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/merge1.q.out

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=836131&r1=836130&r2=836131&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Sat Nov 14 06:55:32 2009
@@ -262,6 +262,9 @@
 
     HIVE-912 ctas name should be case insensitive (Ning Zhang via namit)
 
+    HIVE-929. Use a new parameter to check whether to merge files or not
+    (He Yongqiang via namit)
+
 Release 0.4.0 -  Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=836131&r1=836130&r2=836131&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Sat Nov 14 06:55:32 2009
@@ -158,7 +158,8 @@
     HIVEMERGEMAPFILES("hive.merge.mapfiles", true),
     HIVEMERGEMAPREDFILES("hive.merge.mapredfiles", false),
     HIVEMERGEMAPFILESSIZE("hive.merge.size.per.task", (long)(256*1000*1000)),
-
+    HIVEMERGEMAPFILESAVGSIZE("hive.merge.smallfiles.avgsize", (long)(16*1000*1000)),
+    
     HIVESENDHEARTBEAT("hive.heartbeat.interval", 1000),
 
     HIVEJOBPROGRESS("hive.task.progress", false),

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java?rev=836131&r1=836130&r2=836131&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java Sat Nov 14 06:55:32 2009
@@ -89,6 +89,8 @@
     // check if a map-reduce job is needed to merge the files
     // If the current size is smaller than the target, merge
     long trgtSize = conf.getLongVar(HiveConf.ConfVars.HIVEMERGEMAPFILESSIZE);
+    long avgConditionSize = conf.getLongVar(HiveConf.ConfVars.HIVEMERGEMAPFILESAVGSIZE);
+		trgtSize = trgtSize > avgConditionSize ? trgtSize : avgConditionSize;
     
     try {
       // If the input file does not exist, replace it by a empty file
@@ -101,8 +103,8 @@
         for (FileStatus fStat : fStats) 
           totalSz += fStat.getLen();
       
-        long currSz = totalSz / fStats.length;
-        if ((currSz < trgtSize) && (fStats.length > 1)) {
+        long currAvgSz = totalSz / fStats.length;
+        if ((currAvgSz < avgConditionSize) && (fStats.length > 1)) {
           // also set the number of reducers
           Task<? extends Serializable> tsk = ctx.getListTasks().get(1);
           mapredWork work = (mapredWork)tsk.getWork();

Modified: hadoop/hive/trunk/ql/src/test/queries/clientpositive/merge1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/merge1.q?rev=836131&r1=836130&r2=836131&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/merge1.q (original)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/merge1.q Sat Nov 14 06:55:32 2009
@@ -13,3 +13,22 @@
 select * from dest1;
 
 drop table dest1;
+
+
+create table test_src(key string, value string) partitioned by (ds string);
+create table dest1(key string);
+
+insert overwrite table test_src partition(ds='101') select * from src; 
+insert overwrite table test_src partition(ds='102') select * from src;
+
+explain 
+insert overwrite table dest1 select key from test_src;
+insert overwrite table dest1 select key from test_src;
+
+set hive.merge.smallfiles.avgsize=16;
+explain
+insert overwrite table dest1 select key from test_src;
+insert overwrite table dest1 select key from test_src;
+
+drop table test_src;
+drop table dest1;
\ No newline at end of file

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/merge1.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/merge1.q.out?rev=836131&r1=836130&r2=836131&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/merge1.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/merge1.q.out Sat Nov 14 06:55:32 2009
@@ -93,10 +93,10 @@
           Move Operator
             files:
                 hdfs directory: true
-                destination: file:/data/users/njain/hive5/hive5/build/ql/tmp/1186355480/10000
+                destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/585401858/10000
           Map Reduce
             Alias -> Map Operator Tree:
-              file:/data/users/njain/hive5/hive5/build/ql/tmp/443335059/10002 
+              file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/846450529/10002 
                   Reduce Output Operator
                     sort order: 
                     Map-reduce partition columns:
@@ -143,11 +143,11 @@
 PREHOOK: query: select * from dest1
 PREHOOK: type: QUERY
 PREHOOK: Input: default@dest1
-PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1168542666/10000
+PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/730713753/10000
 POSTHOOK: query: select * from dest1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest1
-POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1168542666/10000
+POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/730713753/10000
 0	3
 10	1
 100	2
@@ -462,3 +462,211 @@
 POSTHOOK: query: drop table dest1
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Output: default@dest1
+PREHOOK: query: create table test_src(key string, value string) partitioned by (ds string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table test_src(key string, value string) partitioned by (ds string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_src
+PREHOOK: query: create table dest1(key string)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table dest1(key string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest1
+PREHOOK: query: insert overwrite table test_src partition(ds='101') select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_src@ds=101
+POSTHOOK: query: insert overwrite table test_src partition(ds='101') select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_src@ds=101
+PREHOOK: query: insert overwrite table test_src partition(ds='102') select * from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_src@ds=102
+POSTHOOK: query: insert overwrite table test_src partition(ds='102') select * from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_src@ds=102
+PREHOOK: query: explain 
+insert overwrite table dest1 select key from test_src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+insert overwrite table dest1 select key from test_src
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF test_src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest1)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-4 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-4
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        test_src 
+          TableScan
+            alias: test_src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              outputColumnNames: _col0
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: dest1
+
+  Stage: Stage-4
+    Conditional Operator
+      list of dependent Tasks:
+          Move Operator
+            files:
+                hdfs directory: true
+                destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/353161784/10000
+          Map Reduce
+            Alias -> Map Operator Tree:
+              file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/2091577421/10002 
+                  Reduce Output Operator
+                    sort order: 
+                    Map-reduce partition columns:
+                          expr: rand()
+                          type: double
+                    tag: -1
+                    value expressions:
+                          expr: key
+                          type: string
+            Reduce Operator Tree:
+              Extract
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: dest1
+
+
+PREHOOK: query: insert overwrite table dest1 select key from test_src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_src@ds=101
+PREHOOK: Input: default@test_src@ds=102
+PREHOOK: Output: default@dest1
+POSTHOOK: query: insert overwrite table dest1 select key from test_src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_src@ds=101
+POSTHOOK: Input: default@test_src@ds=102
+POSTHOOK: Output: default@dest1
+PREHOOK: query: explain
+insert overwrite table dest1 select key from test_src
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+insert overwrite table dest1 select key from test_src
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF test_src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest1)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-4 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-4
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        test_src 
+          TableScan
+            alias: test_src
+            Select Operator
+              expressions:
+                    expr: key
+                    type: string
+              outputColumnNames: _col0
+              File Output Operator
+                compressed: false
+                GlobalTableId: 1
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    name: dest1
+
+  Stage: Stage-4
+    Conditional Operator
+      list of dependent Tasks:
+          Move Operator
+            files:
+                hdfs directory: true
+                destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/1533278841/10000
+          Map Reduce
+            Alias -> Map Operator Tree:
+              file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/1738586251/10002 
+                  Reduce Output Operator
+                    sort order: 
+                    Map-reduce partition columns:
+                          expr: rand()
+                          type: double
+                    tag: -1
+                    value expressions:
+                          expr: key
+                          type: string
+            Reduce Operator Tree:
+              Extract
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: dest1
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: dest1
+
+
+PREHOOK: query: insert overwrite table dest1 select key from test_src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_src@ds=101
+PREHOOK: Input: default@test_src@ds=102
+PREHOOK: Output: default@dest1
+POSTHOOK: query: insert overwrite table dest1 select key from test_src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_src@ds=101
+POSTHOOK: Input: default@test_src@ds=102
+POSTHOOK: Output: default@dest1
+PREHOOK: query: drop table test_src
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table test_src
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@test_src
+PREHOOK: query: drop table dest1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table dest1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@dest1