You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/06/22 06:00:28 UTC
svn commit: r956767 - in /hadoop/hive/trunk: CHANGES.txt
ql/src/test/queries/clientpositive/archive.q
ql/src/test/results/clientpositive/archive.q.out
shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java
Author: namit
Date: Tue Jun 22 04:00:28 2010
New Revision: 956767
URL: http://svn.apache.org/viewvc?rev=956767&view=rev
Log:
HIVE-1417. Archived partitions throw error with queries calling
getContentSummary (Paul Yang via namit)
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q
hadoop/hive/trunk/ql/src/test/results/clientpositive/archive.q.out
hadoop/hive/trunk/shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=956767&r1=956766&r2=956767&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Jun 22 04:00:28 2010
@@ -15,6 +15,9 @@ Trunk - Unreleased
HIVE-1421. problem with sequence and rcfiles are mixed for null partitions
(namit via He Yongqiang)
+ HIVE-1417. Archived partitions throw error with queries calling
+ getContentSummary (Paul Yang via namit)
+
Release 0.6.0 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q?rev=956767&r1=956766&r2=956767&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q (original)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q Tue Jun 22 04:00:28 2010
@@ -11,6 +11,11 @@ ALTER TABLE srcpart ARCHIVE PARTITION (d
SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2;
+SELECT key, count(1) FROM srcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key;
+
+SELECT * FROM srcpart a JOIN src b ON a.key=b.key
+WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0';
+
ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12');
SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/archive.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/archive.q.out?rev=956767&r1=956766&r2=956767&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/archive.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/archive.q.out Tue Jun 22 04:00:28 2010
@@ -5,7 +5,7 @@ FROM (SELECT * FROM srcpart WHERE ds='20
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-31_915_8404207959149265563/10000
+PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-12_801_8718664231713136788/10000
POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19)
SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
@@ -13,7 +13,7 @@ FROM (SELECT * FROM srcpart WHERE ds='20
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-31_915_8404207959149265563/10000
+POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-12_801_8718664231713136788/10000
48479881068
PREHOOK: query: ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12')
PREHOOK: type: ALTERTABLE_ARCHIVE
@@ -24,14 +24,44 @@ FROM (SELECT * FROM srcpart WHERE ds='20
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-39_278_6500531861845897423/10000
+PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-20_510_5269010142014944519/10000
POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-39_278_6500531861845897423/10000
+POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-20_510_5269010142014944519/10000
48479881068
+PREHOOK: query: SELECT key, count(1) FROM srcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-26_238_1201801305984652550/10000
+POSTHOOK: query: SELECT key, count(1) FROM srcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-26_238_1201801305984652550/10000
+0 3
+PREHOOK: query: SELECT * FROM srcpart a JOIN src b ON a.key=b.key
+WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-32_413_8808816186480793926/10000
+POSTHOOK: query: SELECT * FROM srcpart a JOIN src b ON a.key=b.key
+WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-32_413_8808816186480793926/10000
+0 val_0 2008-04-08 12 0 val_0
+0 val_0 2008-04-08 12 0 val_0
+0 val_0 2008-04-08 12 0 val_0
+0 val_0 2008-04-08 12 0 val_0
+0 val_0 2008-04-08 12 0 val_0
+0 val_0 2008-04-08 12 0 val_0
+0 val_0 2008-04-08 12 0 val_0
+0 val_0 2008-04-08 12 0 val_0
+0 val_0 2008-04-08 12 0 val_0
PREHOOK: query: ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12')
PREHOOK: type: ALTERTABLE_UNARCHIVE
POSTHOOK: query: ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12')
@@ -41,13 +71,13 @@ FROM (SELECT * FROM srcpart WHERE ds='20
PREHOOK: type: QUERY
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-45_152_7929745238260502728/10000
+PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-37_857_7662280812791374354/10000
POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
-POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-45_152_7929745238260502728/10000
+POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-37_857_7662280812791374354/10000
48479881068
PREHOOK: query: CREATE TABLE harbucket(key INT)
PARTITIONED by (ds STRING)
@@ -70,11 +100,11 @@ POSTHOOK: Lineage: harbucket PARTITION(d
PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
PREHOOK: type: QUERY
PREHOOK: Input: default@harbucket@ds=1
-PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-55_224_4935516234179357829/10000
+PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-47_247_5412318794268628077/10000
POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@harbucket@ds=1
-POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-02-55_224_4935516234179357829/10000
+POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-47_247_5412318794268628077/10000
POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
0
0
@@ -90,11 +120,11 @@ POSTHOOK: Lineage: harbucket PARTITION(d
PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
PREHOOK: type: QUERY
PREHOOK: Input: default@harbucket@ds=1
-PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-01_089_7613007639376060720/10000
+PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-52_696_6661366062442712305/10000
POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@harbucket@ds=1
-POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-01_089_7613007639376060720/10000
+POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-52_696_6661366062442712305/10000
POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
0
0
@@ -110,11 +140,11 @@ POSTHOOK: Lineage: harbucket PARTITION(d
PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
PREHOOK: type: QUERY
PREHOOK: Input: default@harbucket@ds=1
-PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-05_256_2444261282224863204/10000
+PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-56_920_7660869602739278397/10000
POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@harbucket@ds=1
-POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-05_256_2444261282224863204/10000
+POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-29-56_920_7660869602739278397/10000
POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
0
0
@@ -156,12 +186,12 @@ PREHOOK: query: SELECT SUM(hash(col)) FR
FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2
PREHOOK: type: QUERY
PREHOOK: Input: default@old_name@ds=1
-PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-14_435_1169638822418513482/10000
+PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-30-06_143_8274193601305228676/10000
POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@old_name@ds=1
-POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-14_435_1169638822418513482/10000
+POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-30-06_143_8274193601305228676/10000
POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
48656137
@@ -177,12 +207,12 @@ PREHOOK: query: SELECT SUM(hash(col)) FR
FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2
PREHOOK: type: QUERY
PREHOOK: Input: default@new_name@ds=1
-PREHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-19_685_3074346646787769085/10000
+PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-30-10_661_5999329953207292038/10000
POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@new_name@ds=1
-POSTHOOK: Output: file:/data/users/pyang/mstore/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-08_15-03-19_685_3074346646787769085/10000
+POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/scratchdir/hive_2010-06-21_17-30-10_661_5999329953207292038/10000
POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
NULL
Modified: hadoop/hive/trunk/shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java?rev=956767&r1=956766&r2=956767&view=diff
==============================================================================
--- hadoop/hive/trunk/shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java (original)
+++ hadoop/hive/trunk/shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java Tue Jun 22 04:00:28 2010
@@ -18,14 +18,20 @@
package org.apache.hadoop.hive.shims;
+import java.io.IOException;
+
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.HarFileSystem;
+import org.apache.hadoop.fs.Path;
/**
- * HiveHarFileSystem - fixes issue with block locations
+ * HiveHarFileSystem - fixes issues with Hadoop's HarFileSystem
*
*/
public class HiveHarFileSystem extends HarFileSystem {
- /*
+
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
long len) throws IOException {
@@ -35,5 +41,26 @@ public class HiveHarFileSystem extends H
String [] hosts = {"DUMMY_HOST"};
return new BlockLocation[]{new BlockLocation(null, hosts, 0, file.getLen())};
}
- */
+
+ @Override
+ public ContentSummary getContentSummary(Path f) throws IOException {
+ // HarFileSystem has a bug where this method does not work properly
+ // if the underlying FS is HDFS. See MAPREDUCE-1877 for more
+ // information. This method is from FileSystem.
+ FileStatus status = getFileStatus(f);
+ if (!status.isDir()) {
+ // f is a file
+ return new ContentSummary(status.getLen(), 1, 0);
+ }
+ // f is a directory
+ long[] summary = {0, 0, 1};
+ for(FileStatus s : listStatus(f)) {
+ ContentSummary c = s.isDir() ? getContentSummary(s.getPath()) :
+ new ContentSummary(s.getLen(), 1, 0);
+ summary[0] += c.getLength();
+ summary[1] += c.getFileCount();
+ summary[2] += c.getDirectoryCount();
+ }
+ return new ContentSummary(summary[0], summary[1], summary[2]);
+ }
}