You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/12/09 00:59:02 UTC

[1/4] hive git commit: HIVE-11940: "INSERT OVERWRITE" query is very slow because it creates one "distcp" per file to copy data from staging directory to target directory (Sergio Pena, reviewd by Ferdinand Xu)

Repository: hive
Updated Branches:
  refs/heads/branch-1 a34c3b80d -> e1e6c04d6


HIVE-11940: "INSERT OVERWRITE" query is very slow because it creates one "distcp" per file to copy data from staging directory to target directory (Sergio Pena, reviewd by Ferdinand Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/445ed86f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/445ed86f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/445ed86f

Branch: refs/heads/branch-1
Commit: 445ed86f2b51bdcf8beed5291b1eb11be4fd2b61
Parents: a34c3b8
Author: Sergio Pena <se...@cloudera.com>
Authored: Mon Sep 28 09:58:08 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Tue Dec 8 17:29:01 2015 -0600

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/metadata/Hive.java    | 25 ++++++++++++++++----
 1 file changed, 20 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/445ed86f/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index c72b3ec..5d46b07 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2635,16 +2635,31 @@ private void constructOneLBLocationMap(FileStatus fSta,
             if (srcs.length == 0) {
               success = true; // Nothing to move.
             }
+
+            /* Move files one by one because source is a subdirectory of destination */
             for (FileStatus status : srcs) {
-              success = FileUtils.copy(srcf.getFileSystem(conf), status.getPath(), destf.getFileSystem(conf), destf,
-                  true,     // delete source
-                  replace,  // overwrite destination
-                  conf);
+              Path destFile;
 
-              if (!success) {
+              /* Append the source filename to the destination directory */
+              if (destFs.isDirectory(destf)) {
+                destFile = new Path(destf, status.getPath().getName());
+              } else {
+                destFile = destf;
+              }
+
+              // Destination should be replaced, so we delete it first
+              if (destFs.exists(destFile)) {
+                if (!destFs.delete(destFile, true)) {
+                  throw new HiveException(String.format("File to replace could not be deleted: %s", destFile));
+                }
+              }
+
+              if (!(destFs.rename(status.getPath(), destFile))) {
                 throw new HiveException("Unable to move source " + status.getPath() + " to destination " + destf);
               }
             }
+
+            success = true;
           } else {
             success = destFs.rename(srcf, destf);
           }


[2/4] hive git commit: HIVE-11607 : Export tables broken for data > 32 MB (Ashutosh Chauhan via Sushanth Sowmyan, Sergio Pena)

Posted by pr...@apache.org.
HIVE-11607 : Export tables broken for data > 32 MB (Ashutosh Chauhan via Sushanth Sowmyan, Sergio Pena)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4cdd19a3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4cdd19a3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4cdd19a3

Branch: refs/heads/branch-1
Commit: 4cdd19a337ccbfd72f8a3b479cc0eb48f544d2e3
Parents: 445ed86
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Fri Aug 21 10:10:52 2015 -0700
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Tue Dec 8 17:33:54 2015 -0600

----------------------------------------------------------------------
 shims/0.20S/pom.xml                             |  8 +++-
 .../hadoop/hive/shims/Hadoop20SShims.java       | 35 +++++++---------
 shims/0.23/pom.xml                              | 21 ++++------
 .../apache/hadoop/hive/shims/Hadoop23Shims.java | 44 ++++++++------------
 4 files changed, 48 insertions(+), 60 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4cdd19a3/shims/0.20S/pom.xml
----------------------------------------------------------------------
diff --git a/shims/0.20S/pom.xml b/shims/0.20S/pom.xml
index 334a2c5..0d4458b 100644
--- a/shims/0.20S/pom.xml
+++ b/shims/0.20S/pom.xml
@@ -53,5 +53,11 @@
       <version>${hadoop-20S.version}</version>
       <optional>true</optional>
     </dependency>
-  </dependencies>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-tools</artifactId>
+      <version>${hadoop-20S.version}</version>
+      <scope>provided</scope>
+    </dependency>
+ </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/hive/blob/4cdd19a3/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
----------------------------------------------------------------------
diff --git a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
index bf39928..32b072e 100644
--- a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
+++ b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.shims;
 
 import java.io.IOException;
 import java.lang.Override;
-import java.lang.reflect.Constructor;
 import java.net.InetSocketAddress;
 import java.net.MalformedURLException;
 import java.net.URI;
@@ -27,6 +26,7 @@ import java.net.URL;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
@@ -71,6 +71,8 @@ import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.KerberosName;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.tools.distcp2.DistCp;
+import org.apache.hadoop.tools.distcp2.DistCpOptions;
 import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.VersionInfo;
@@ -503,7 +505,7 @@ public class Hadoop20SShims extends HadoopShimsSecure {
   }
 
   public class Hadoop20SFileStatus implements HdfsFileStatus {
-    private FileStatus fileStatus;
+    private final FileStatus fileStatus;
     public Hadoop20SFileStatus(FileStatus fileStatus) {
       this.fileStatus = fileStatus;
     }
@@ -629,28 +631,33 @@ public class Hadoop20SShims extends HadoopShimsSecure {
    */
   public class KerberosNameShim implements HadoopShimsSecure.KerberosNameShim {
 
-    private KerberosName kerberosName;
+    private final KerberosName kerberosName;
 
     public KerberosNameShim(String name) {
       kerberosName = new KerberosName(name);
     }
 
+    @Override
     public String getDefaultRealm() {
       return kerberosName.getDefaultRealm();
     }
 
+    @Override
     public String getServiceName() {
       return kerberosName.getServiceName();
     }
 
+    @Override
     public String getHostName() {
       return kerberosName.getHostName();
     }
 
+    @Override
     public String getRealm() {
       return kerberosName.getRealm();
     }
 
+    @Override
     public String getShortName() throws IOException {
       return kerberosName.getShortName();
     }
@@ -663,27 +670,17 @@ public class Hadoop20SShims extends HadoopShimsSecure {
 
   @Override
   public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException {
-    int rc;
-
-    // Creates the command-line parameters for distcp
-    String[] params = {"-update", "-skipcrccheck", src.toString(), dst.toString()};
 
+    DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst);
+    options.setSkipCRC(true);
+    options.setSyncFolder(true);
     try {
-      Class clazzDistCp = Class.forName("org.apache.hadoop.tools.distcp2");
-      Constructor c = clazzDistCp.getConstructor();
-      c.setAccessible(true);
-      Tool distcp = (Tool)c.newInstance();
-      distcp.setConf(conf);
-      rc = distcp.run(params);
-    } catch (ClassNotFoundException e) {
-      throw new IOException("Cannot find DistCp class package: " + e.getMessage());
-    } catch (NoSuchMethodException e) {
-      throw new IOException("Cannot get DistCp constructor: " + e.getMessage());
+      DistCp distcp = new DistCp(conf, options);
+      distcp.execute();
+      return true;
     } catch (Exception e) {
       throw new IOException("Cannot execute DistCp process: " + e, e);
     }
-
-    return (0 == rc) ? true : false;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/4cdd19a3/shims/0.23/pom.xml
----------------------------------------------------------------------
diff --git a/shims/0.23/pom.xml b/shims/0.23/pom.xml
index 76ce697..79024d2 100644
--- a/shims/0.23/pom.xml
+++ b/shims/0.23/pom.xml
@@ -143,18 +143,11 @@
       <optional>true</optional>
      <type>test-jar</type>
    </dependency>
-  </dependencies>
-
-  <profiles>
-    <profile>
-      <id>hadoop-2</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-distcp</artifactId>
-          <version>${hadoop-23.version}</version>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
+   <dependency>
+     <groupId>org.apache.hadoop</groupId>
+     <artifactId>hadoop-distcp</artifactId>
+     <version>${hadoop-23.version}</version>
+     <scope>provided</scope>
+   </dependency>
+   </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/hive/blob/4cdd19a3/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 7756574..cbac554 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -20,8 +20,6 @@ package org.apache.hadoop.hive.shims;
 import java.io.DataInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Constructor;
 import java.lang.reflect.Method;
 import java.net.InetSocketAddress;
 import java.net.MalformedURLException;
@@ -30,6 +28,7 @@ import java.nio.ByteBuffer;
 import java.security.AccessControlException;
 import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -65,6 +64,9 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.DirectoryListing;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
 import org.apache.hadoop.hdfs.client.HdfsAdmin;
 import org.apache.hadoop.hdfs.protocol.EncryptionZone;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -91,9 +93,9 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.authentication.util.KerberosName;
+import org.apache.hadoop.tools.DistCp;
+import org.apache.hadoop.tools.DistCpOptions;
 import org.apache.hadoop.util.Progressable;
-import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.tez.test.MiniTezCluster;
 
@@ -122,7 +124,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
       zcr = true;
     } catch (ClassNotFoundException ce) {
     }
-    
+
     if (zcr) {
       // in-memory HDFS is only available after zcr
       try {
@@ -608,11 +610,11 @@ public class Hadoop23Shims extends HadoopShimsSecure {
                                                                                 org.apache.hadoop.mapred.TaskAttemptID taskId, Progressable progressable) {
       org.apache.hadoop.mapred.TaskAttemptContext newContext = null;
       try {
-        java.lang.reflect.Constructor construct = org.apache.hadoop.mapred.TaskAttemptContextImpl.class.getDeclaredConstructor(
+        java.lang.reflect.Constructor<org.apache.hadoop.mapred.TaskAttemptContextImpl> construct = org.apache.hadoop.mapred.TaskAttemptContextImpl.class.getDeclaredConstructor(
                 org.apache.hadoop.mapred.JobConf.class, org.apache.hadoop.mapred.TaskAttemptID.class,
                 Reporter.class);
         construct.setAccessible(true);
-        newContext = (org.apache.hadoop.mapred.TaskAttemptContext) construct.newInstance(
+        newContext = construct.newInstance(
                 new JobConf(conf), taskId, progressable);
       } catch (Exception e) {
         throw new RuntimeException(e);
@@ -882,6 +884,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
      * Cannot add Override annotation since FileSystem.access() may not exist in
      * the version of hadoop used to build Hive.
      */
+    @Override
     public void access(Path path, FsAction action) throws AccessControlException,
         FileNotFoundException, IOException {
       Path underlyingFsPath = swizzleParamPath(path);
@@ -1148,7 +1151,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
       }
     }
   }
-    
+
 
   @Override
   public HadoopShims.StoragePolicyShim getStoragePolicyShim(FileSystem fs) {
@@ -1164,27 +1167,17 @@ public class Hadoop23Shims extends HadoopShimsSecure {
 
   @Override
   public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException {
-    int rc;
-
-    // Creates the command-line parameters for distcp
-    String[] params = {"-update", "-skipcrccheck", src.toString(), dst.toString()};
 
+    DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst);
+    options.setSkipCRC(true);
+    options.setSyncFolder(true);
     try {
-      Class clazzDistCp = Class.forName("org.apache.hadoop.tools.DistCp");
-      Constructor c = clazzDistCp.getConstructor();
-      c.setAccessible(true);
-      Tool distcp = (Tool)c.newInstance();
-      distcp.setConf(conf);
-      rc = distcp.run(params);
-    } catch (ClassNotFoundException e) {
-      throw new IOException("Cannot find DistCp class package: " + e.getMessage());
-    } catch (NoSuchMethodException e) {
-      throw new IOException("Cannot get DistCp constructor: " + e.getMessage());
+      DistCp distcp = new DistCp(conf, options);
+      distcp.execute();
+      return true;
     } catch (Exception e) {
       throw new IOException("Cannot execute DistCp process: " + e, e);
     }
-
-    return (0 == rc);
   }
 
   private static Boolean hdfsEncryptionSupport;
@@ -1219,7 +1212,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
      */
     private KeyProvider keyProvider = null;
 
-    private Configuration conf;
+    private final Configuration conf;
 
     public HdfsEncryptionShim(URI uri, Configuration conf) throws IOException {
       DistributedFileSystem dfs = (DistributedFileSystem)FileSystem.get(uri, conf);
@@ -1378,7 +1371,6 @@ public class Hadoop23Shims extends HadoopShimsSecure {
     }
     return result;
   }
-
   @Override
   public void addDelegationTokens(FileSystem fs, Credentials cred, String uname) throws IOException {
     // Use method addDelegationTokens instead of getDelegationToken to get all the tokens including KMS.


[4/4] hive git commit: HIVE-12364 : Distcp job fails when run under Tez (Prasanth J via Ashutosh Chauhan)

Posted by pr...@apache.org.
HIVE-12364 : Distcp job fails when run under Tez (Prasanth J via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e1e6c04d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e1e6c04d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e1e6c04d

Branch: refs/heads/branch-1
Commit: e1e6c04d6b605fddfec131a8955d33bc63853f2f
Parents: ea4073f
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Mon Nov 9 17:59:37 2015 -0800
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Tue Dec 8 17:58:46 2015 -0600

----------------------------------------------------------------------
 itests/qtest/pom.xml                                  |  6 ++++++
 .../src/test/resources/testconfiguration.properties   |  1 +
 .../apache/hadoop/hive/ql/parse/SemanticAnalyzer.java |  1 +
 .../test/queries/clientpositive/insert_dir_distcp.q   |  9 +++++++++
 .../results/clientpositive/insert_dir_distcp.q.out    | 14 ++++++++++++++
 .../clientpositive/tez/insert_dir_distcp.q.out        | 14 ++++++++++++++
 .../org/apache/hadoop/hive/shims/Hadoop23Shims.java   |  8 +++++---
 7 files changed, 50 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/itests/qtest/pom.xml
----------------------------------------------------------------------
diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml
index 187b96e..7dfbd85 100644
--- a/itests/qtest/pom.xml
+++ b/itests/qtest/pom.xml
@@ -225,6 +225,12 @@
         </dependency>
         <dependency>
           <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-distcp</artifactId>
+          <version>${hadoop-23.version}</version>
+          <scope>test</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
           <artifactId>hadoop-hdfs</artifactId>
           <version>${hadoop-23.version}</version>
           <classifier>tests</classifier>

http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 30cb9f1..03b07ce 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -26,6 +26,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\
   infer_bucket_sort_num_buckets.q,\
   infer_bucket_sort_reducers_power_two.q,\
   input16_cc.q,\
+  insert_dir_distcp.q,\
   join1.q,\
   join_acid_non_acid.q,\
   leftsemijoin_mr.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index c76f628..71ec3d4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -53,6 +53,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;

http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/ql/src/test/queries/clientpositive/insert_dir_distcp.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/insert_dir_distcp.q b/ql/src/test/queries/clientpositive/insert_dir_distcp.q
new file mode 100644
index 0000000..6582938
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/insert_dir_distcp.q
@@ -0,0 +1,9 @@
+set hive.exec.copyfile.maxsize=400;
+
+set tez.am.log.level=INFO;
+set tez.task.log.level=INFO;
+-- see TEZ-2931 for using INFO logging
+
+insert overwrite directory '/tmp/src' select * from src;
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/src/;

http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/ql/src/test/results/clientpositive/insert_dir_distcp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insert_dir_distcp.q.out b/ql/src/test/results/clientpositive/insert_dir_distcp.q.out
new file mode 100644
index 0000000..b70fa01
--- /dev/null
+++ b/ql/src/test/results/clientpositive/insert_dir_distcp.q.out
@@ -0,0 +1,14 @@
+PREHOOK: query: -- see TEZ-2931 for using INFO logging
+
+#### A masked pattern was here ####
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- see TEZ-2931 for using INFO logging
+
+#### A masked pattern was here ####
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+Found 1 items
+#### A masked pattern was here ####

http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/ql/src/test/results/clientpositive/tez/insert_dir_distcp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/insert_dir_distcp.q.out b/ql/src/test/results/clientpositive/tez/insert_dir_distcp.q.out
new file mode 100644
index 0000000..b70fa01
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/insert_dir_distcp.q.out
@@ -0,0 +1,14 @@
+PREHOOK: query: -- see TEZ-2931 for using INFO logging
+
+#### A masked pattern was here ####
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- see TEZ-2931 for using INFO logging
+
+#### A masked pattern was here ####
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+Found 1 items
+#### A masked pattern was here ####

http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 89789de..2e09882 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.shims;
 import java.io.DataInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.InputStream;
 import java.lang.reflect.Method;
 import java.net.InetSocketAddress;
 import java.net.MalformedURLException;
@@ -64,9 +65,6 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.protocol.DirectoryListing;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
 import org.apache.hadoop.hdfs.client.HdfsAdmin;
 import org.apache.hadoop.hdfs.protocol.EncryptionZone;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -93,6 +91,7 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.hadoop.tools.DistCp;
 import org.apache.hadoop.tools.DistCpOptions;
 import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
@@ -1174,11 +1173,14 @@ public class Hadoop23Shims extends HadoopShimsSecure {
     options.setSkipCRC(true);
     options.preserve(FileAttribute.BLOCKSIZE);
     try {
+      conf.setBoolean("mapred.mapper.new-api", true);
       DistCp distcp = new DistCp(conf, options);
       distcp.execute();
       return true;
     } catch (Exception e) {
       throw new IOException("Cannot execute DistCp process: " + e, e);
+    } finally {
+      conf.setBoolean("mapred.mapper.new-api", false);
     }
   }
 


[3/4] hive git commit: HIVE-11820 : export tables with size of >32MB throws java.lang.IllegalArgumentException: Skip CRC is valid only with update options (Takahiko Saito via Ashutosh Chauhan)

Posted by pr...@apache.org.
HIVE-11820 : export tables with size of >32MB throws java.lang.IllegalArgumentException: Skip CRC is valid only with update options (Takahiko Saito via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ea4073fc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ea4073fc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ea4073fc

Branch: refs/heads/branch-1
Commit: ea4073fc9041d829a06eff80a81e075d9dfba71d
Parents: 4cdd19a
Author: Takahiko Saito <ty...@gmail.com>
Authored: Wed Sep 16 15:34:00 2015 -0800
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Tue Dec 8 17:34:16 2015 -0600

----------------------------------------------------------------------
 .../main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java  | 5 ++++-
 .../main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java   | 4 +++-
 2 files changed, 7 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/ea4073fc/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
----------------------------------------------------------------------
diff --git a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
index 32b072e..a79a36f 100644
--- a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
+++ b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
@@ -73,6 +73,8 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.tools.distcp2.DistCp;
 import org.apache.hadoop.tools.distcp2.DistCpOptions;
+import org.apache.hadoop.tools.distcp2.DistCpOptions.FileAttribute;
+
 import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.VersionInfo;
@@ -672,8 +674,9 @@ public class Hadoop20SShims extends HadoopShimsSecure {
   public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException {
 
     DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst);
-    options.setSkipCRC(true);
     options.setSyncFolder(true);
+    options.setSkipCRC(true);
+    options.preserve(FileAttribute.BLOCKSIZE);
     try {
       DistCp distcp = new DistCp(conf, options);
       distcp.execute();

http://git-wip-us.apache.org/repos/asf/hive/blob/ea4073fc/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index cbac554..89789de 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -95,6 +95,7 @@ import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.tools.DistCp;
 import org.apache.hadoop.tools.DistCpOptions;
+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
 import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.tez.test.MiniTezCluster;
@@ -1169,8 +1170,9 @@ public class Hadoop23Shims extends HadoopShimsSecure {
   public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException {
 
     DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst);
-    options.setSkipCRC(true);
     options.setSyncFolder(true);
+    options.setSkipCRC(true);
+    options.preserve(FileAttribute.BLOCKSIZE);
     try {
       DistCp distcp = new DistCp(conf, options);
       distcp.execute();