You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by he...@apache.org on 2011/09/01 23:17:47 UTC

svn commit: r1164278 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/io/RCFile.java test/queries/clientpositive/create_merge_compressed.q test/results/clientpositive/create_merge_compressed.q.out

Author: heyongqiang
Date: Thu Sep  1 21:17:46 2011
New Revision: 1164278

URL: http://svn.apache.org/viewvc?rev=1164278&view=rev
Log:
HIVE-2417: Merging of compressed rcfiles fails to write the valuebuffer part correctly (Krishna Kumar via He Yongqiang)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/create_merge_compressed.q
    hive/trunk/ql/src/test/results/clientpositive/create_merge_compressed.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java?rev=1164278&r1=1164277&r2=1164278&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/RCFile.java Thu Sep  1 21:17:46 2011
@@ -484,8 +484,14 @@ public class RCFile {
 
     @Override
     public void write(DataOutput out) throws IOException {
-      for (NonSyncDataOutputBuffer currentBuf : loadedColumnsValueBuffer) {
-        out.write(currentBuf.getData(), 0, currentBuf.getLength());
+      if (codec != null) {
+        for (NonSyncDataOutputBuffer currentBuf : compressedColumnsValueBuffer) {
+          out.write(currentBuf.getData(), 0, currentBuf.getLength());
+        }
+      } else {
+        for (NonSyncDataOutputBuffer currentBuf : loadedColumnsValueBuffer) {
+          out.write(currentBuf.getData(), 0, currentBuf.getLength());
+        }
       }
     }
 

Added: hive/trunk/ql/src/test/queries/clientpositive/create_merge_compressed.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/create_merge_compressed.q?rev=1164278&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/create_merge_compressed.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/create_merge_compressed.q Thu Sep  1 21:17:46 2011
@@ -0,0 +1,24 @@
+create table src_rc_merge_test(key int, value string) stored as rcfile;
+
+load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test;
+
+set hive.exec.compress.output = true;
+
+create table tgt_rc_merge_test(key int, value string) stored as rcfile;
+insert into table tgt_rc_merge_test select * from src_rc_merge_test;
+insert into table tgt_rc_merge_test select * from src_rc_merge_test;
+
+show table extended like `tgt_rc_merge_test`;
+
+select count(1) from tgt_rc_merge_test;
+select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test;
+
+alter table tgt_rc_merge_test concatenate;
+
+show table extended like `tgt_rc_merge_test`;
+
+select count(1) from tgt_rc_merge_test;
+select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test;
+
+drop table src_rc_merge_test;
+drop table tgt_rc_merge_test;
\ No newline at end of file

Added: hive/trunk/ql/src/test/results/clientpositive/create_merge_compressed.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/create_merge_compressed.q.out?rev=1164278&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/create_merge_compressed.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/create_merge_compressed.q.out Thu Sep  1 21:17:46 2011
@@ -0,0 +1,172 @@
+PREHOOK: query: create table src_rc_merge_test(key int, value string) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table src_rc_merge_test(key int, value string) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@src_rc_merge_test
+PREHOOK: query: load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test
+PREHOOK: type: LOAD
+PREHOOK: Output: default@src_rc_merge_test
+POSTHOOK: query: load data local inpath '../data/files/smbbucket_1.rc' into table src_rc_merge_test
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@src_rc_merge_test
+PREHOOK: query: create table tgt_rc_merge_test(key int, value string) stored as rcfile
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table tgt_rc_merge_test(key int, value string) stored as rcfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@tgt_rc_merge_test
+PREHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_rc_merge_test
+PREHOOK: Output: default@tgt_rc_merge_test
+POSTHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_rc_merge_test
+POSTHOOK: Output: default@tgt_rc_merge_test
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src_rc_merge_test
+PREHOOK: Output: default@tgt_rc_merge_test
+POSTHOOK: query: insert into table tgt_rc_merge_test select * from src_rc_merge_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src_rc_merge_test
+POSTHOOK: Output: default@tgt_rc_merge_test
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: show table extended like `tgt_rc_merge_test`
+PREHOOK: type: SHOW_TABLESTATUS
+POSTHOOK: query: show table extended like `tgt_rc_merge_test`
+POSTHOOK: type: SHOW_TABLESTATUS
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+tableName:tgt_rc_merge_test
+owner:krishnak
+location:pfile:/Users/krishnak/Projects/hdp/sources/hive-git-apache/build/ql/test/data/warehouse/tgt_rc_merge_test
+inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
+outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+columns:struct columns { i32 key, string value}
+partitioned:false
+partitionColumns:
+totalNumberFiles:2
+totalFileSize:532
+maxFileSize:266
+minFileSize:266
+lastAccessTime:0
+lastUpdateTime:1314766542000
+
+PREHOOK: query: select count(1) from tgt_rc_merge_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tgt_rc_merge_test
+PREHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-30_21-55-44_261_3643692219591280612/-mr-10000
+POSTHOOK: query: select count(1) from tgt_rc_merge_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tgt_rc_merge_test
+POSTHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-30_21-55-44_261_3643692219591280612/-mr-10000
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+10
+PREHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tgt_rc_merge_test
+PREHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-30_21-56-06_300_2950525310351863426/-mr-10000
+POSTHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tgt_rc_merge_test
+POSTHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-30_21-56-06_300_2950525310351863426/-mr-10000
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+46	-751895388
+PREHOOK: query: alter table tgt_rc_merge_test concatenate
+PREHOOK: type: ALTER_TABLE_MERGE
+PREHOOK: Input: default@tgt_rc_merge_test
+PREHOOK: Output: default@tgt_rc_merge_test
+POSTHOOK: query: alter table tgt_rc_merge_test concatenate
+POSTHOOK: type: ALTER_TABLE_MERGE
+POSTHOOK: Input: default@tgt_rc_merge_test
+POSTHOOK: Output: default@tgt_rc_merge_test
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: show table extended like `tgt_rc_merge_test`
+PREHOOK: type: SHOW_TABLESTATUS
+POSTHOOK: query: show table extended like `tgt_rc_merge_test`
+POSTHOOK: type: SHOW_TABLESTATUS
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+tableName:tgt_rc_merge_test
+owner:krishnak
+location:pfile:/Users/krishnak/Projects/hdp/sources/hive-git-apache/build/ql/test/data/warehouse/tgt_rc_merge_test
+inputformat:org.apache.hadoop.hive.ql.io.RCFileInputFormat
+outputformat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat
+columns:struct columns { i32 key, string value}
+partitioned:false
+partitionColumns:
+totalNumberFiles:1
+totalFileSize:338
+maxFileSize:338
+minFileSize:338
+lastAccessTime:0
+lastUpdateTime:1314766576000
+
+PREHOOK: query: select count(1) from tgt_rc_merge_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tgt_rc_merge_test
+PREHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-30_21-56-18_639_4261902784336213762/-mr-10000
+POSTHOOK: query: select count(1) from tgt_rc_merge_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tgt_rc_merge_test
+POSTHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-30_21-56-18_639_4261902784336213762/-mr-10000
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+10
+PREHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tgt_rc_merge_test
+PREHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-30_21-56-28_955_7239249873259452326/-mr-10000
+POSTHOOK: query: select sum(hash(key)), sum(hash(value)) from tgt_rc_merge_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tgt_rc_merge_test
+POSTHOOK: Output: file:/var/folders/67/67R3POPtF90VG63KSmCbcU++F0U/-Tmp-/krishnak/hive_2011-08-30_21-56-28_955_7239249873259452326/-mr-10000
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+46	-751895388
+PREHOOK: query: drop table src_rc_merge_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@src_rc_merge_test
+PREHOOK: Output: default@src_rc_merge_test
+POSTHOOK: query: drop table src_rc_merge_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@src_rc_merge_test
+POSTHOOK: Output: default@src_rc_merge_test
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: drop table tgt_rc_merge_test
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tgt_rc_merge_test
+PREHOOK: Output: default@tgt_rc_merge_test
+POSTHOOK: query: drop table tgt_rc_merge_test
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tgt_rc_merge_test
+POSTHOOK: Output: default@tgt_rc_merge_test
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.key SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]
+POSTHOOK: Lineage: tgt_rc_merge_test.value SIMPLE [(src_rc_merge_test)src_rc_merge_test.FieldSchema(name:value, type:string, comment:null), ]