You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2014/03/11 02:23:20 UTC
svn commit: r1576164 - in /hive/branches/branch-0.13:
common/src/java/org/apache/hadoop/hive/common/
ql/src/java/org/apache/hadoop/hive/ql/io/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Author: hashutosh
Date: Tue Mar 11 01:23:19 2014
New Revision: 1576164
URL: http://svn.apache.org/r1576164
Log:
HIVE-6585 : bucket map join fails in presence of _SUCCESS file (Ashutosh Chauhan via Vikram Dixit)
Added:
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q
hive/branches/branch-0.13/ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out
Modified:
hive/branches/branch-0.13/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
Modified: hive/branches/branch-0.13/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/common/src/java/org/apache/hadoop/hive/common/FileUtils.java?rev=1576164&r1=1576163&r2=1576164&view=diff
==============================================================================
--- hive/branches/branch-0.13/common/src/java/org/apache/hadoop/hive/common/FileUtils.java (original)
+++ hive/branches/branch-0.13/common/src/java/org/apache/hadoop/hive/common/FileUtils.java Tue Mar 11 01:23:19 2014
@@ -31,6 +31,7 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -297,7 +298,14 @@ public final class FileUtils {
List<FileStatus> results) throws IOException {
if (fileStatus.isDir()) {
- for (FileStatus stat : fs.listStatus(fileStatus.getPath())) {
+ for (FileStatus stat : fs.listStatus(fileStatus.getPath(), new PathFilter() {
+
+ @Override
+ public boolean accept(Path p) {
+ String name = p.getName();
+ return !name.startsWith("_") && !name.startsWith(".");
+ }
+ })) {
listStatusRecursively(fs, stat, results);
}
} else {
Modified: hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java?rev=1576164&r1=1576163&r2=1576164&view=diff
==============================================================================
--- hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java (original)
+++ hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/io/BucketizedHiveInputFormat.java Tue Mar 11 01:23:19 2014
@@ -27,6 +27,7 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
@@ -91,7 +92,14 @@ public class BucketizedHiveInputFormat<K
List<IOException> errors = new ArrayList<IOException>();
FileSystem fs = dir.getFileSystem(job);
- FileStatus[] matches = fs.globStatus(dir);
+ FileStatus[] matches = fs.globStatus(dir, new PathFilter() {
+
+ @Override
+ public boolean accept(Path p) {
+ String name = p.getName();
+ return !name.startsWith("_") && !name.startsWith(".");
+ }
+ });
if (matches == null) {
errors.add(new IOException("Input path does not exist: " + dir));
} else if (matches.length == 0) {
Added: hive/branches/branch-0.13/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q?rev=1576164&view=auto
==============================================================================
--- hive/branches/branch-0.13/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q (added)
+++ hive/branches/branch-0.13/ql/src/test/queries/clientpositive/bucket_if_with_path_filter.q Tue Mar 11 01:23:19 2014
@@ -0,0 +1,15 @@
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/bmjpathfilter;
+
+create table t1 (dt string) location '${system:test.tmp.dir}/bmjpathfilter/t1';
+Create table t2 (dt string) stored as orc;
+dfs -touchz ${system:test.tmp.dir}/bmjpathfilter/t1/_SUCCESS;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+SET hive.optimize.bucketmapjoin=true;
+
+SELECT /*+ MAPJOIN(b) */ a.dt FROM t1 a JOIN t2 b ON (a.dt = b.dt);
+
+SET hive.optimize.bucketmapjoin=false;
+set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+
+dfs -rmr ${system:test.tmp.dir}/bmjpathfilter;
Added: hive/branches/branch-0.13/ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out?rev=1576164&view=auto
==============================================================================
--- hive/branches/branch-0.13/ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out (added)
+++ hive/branches/branch-0.13/ql/src/test/results/clientpositive/bucket_if_with_path_filter.q.out Tue Mar 11 01:23:19 2014
@@ -0,0 +1,26 @@
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: Create table t2 (dt string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: Create table t2 (dt string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: SELECT /*+ MAPJOIN(b) */ a.dt FROM t1 a JOIN t2 b ON (a.dt = b.dt)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ a.dt FROM t1 a JOIN t2 b ON (a.dt = b.dt)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####