You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/12/09 00:59:02 UTC
[1/4] hive git commit: HIVE-11940: "INSERT OVERWRITE" query is very
slow because it creates one "distcp" per file to copy data from staging
directory to target directory (Sergio Pena, reviewd by Ferdinand Xu)
Repository: hive
Updated Branches:
refs/heads/branch-1 a34c3b80d -> e1e6c04d6
HIVE-11940: "INSERT OVERWRITE" query is very slow because it creates one "distcp" per file to copy data from staging directory to target directory (Sergio Pena, reviewd by Ferdinand Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/445ed86f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/445ed86f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/445ed86f
Branch: refs/heads/branch-1
Commit: 445ed86f2b51bdcf8beed5291b1eb11be4fd2b61
Parents: a34c3b8
Author: Sergio Pena <se...@cloudera.com>
Authored: Mon Sep 28 09:58:08 2015 -0500
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Tue Dec 8 17:29:01 2015 -0600
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/metadata/Hive.java | 25 ++++++++++++++++----
1 file changed, 20 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/445ed86f/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index c72b3ec..5d46b07 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2635,16 +2635,31 @@ private void constructOneLBLocationMap(FileStatus fSta,
if (srcs.length == 0) {
success = true; // Nothing to move.
}
+
+ /* Move files one by one because source is a subdirectory of destination */
for (FileStatus status : srcs) {
- success = FileUtils.copy(srcf.getFileSystem(conf), status.getPath(), destf.getFileSystem(conf), destf,
- true, // delete source
- replace, // overwrite destination
- conf);
+ Path destFile;
- if (!success) {
+ /* Append the source filename to the destination directory */
+ if (destFs.isDirectory(destf)) {
+ destFile = new Path(destf, status.getPath().getName());
+ } else {
+ destFile = destf;
+ }
+
+ // Destination should be replaced, so we delete it first
+ if (destFs.exists(destFile)) {
+ if (!destFs.delete(destFile, true)) {
+ throw new HiveException(String.format("File to replace could not be deleted: %s", destFile));
+ }
+ }
+
+ if (!(destFs.rename(status.getPath(), destFile))) {
throw new HiveException("Unable to move source " + status.getPath() + " to destination " + destf);
}
}
+
+ success = true;
} else {
success = destFs.rename(srcf, destf);
}
[2/4] hive git commit: HIVE-11607 : Export tables broken for data >
32 MB (Ashutosh Chauhan via Sushanth Sowmyan, Sergio Pena)
Posted by pr...@apache.org.
HIVE-11607 : Export tables broken for data > 32 MB (Ashutosh Chauhan via Sushanth Sowmyan, Sergio Pena)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4cdd19a3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4cdd19a3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4cdd19a3
Branch: refs/heads/branch-1
Commit: 4cdd19a337ccbfd72f8a3b479cc0eb48f544d2e3
Parents: 445ed86
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Fri Aug 21 10:10:52 2015 -0700
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Tue Dec 8 17:33:54 2015 -0600
----------------------------------------------------------------------
shims/0.20S/pom.xml | 8 +++-
.../hadoop/hive/shims/Hadoop20SShims.java | 35 +++++++---------
shims/0.23/pom.xml | 21 ++++------
.../apache/hadoop/hive/shims/Hadoop23Shims.java | 44 ++++++++------------
4 files changed, 48 insertions(+), 60 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/4cdd19a3/shims/0.20S/pom.xml
----------------------------------------------------------------------
diff --git a/shims/0.20S/pom.xml b/shims/0.20S/pom.xml
index 334a2c5..0d4458b 100644
--- a/shims/0.20S/pom.xml
+++ b/shims/0.20S/pom.xml
@@ -53,5 +53,11 @@
<version>${hadoop-20S.version}</version>
<optional>true</optional>
</dependency>
- </dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-tools</artifactId>
+ <version>${hadoop-20S.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ </dependencies>
</project>
http://git-wip-us.apache.org/repos/asf/hive/blob/4cdd19a3/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
----------------------------------------------------------------------
diff --git a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
index bf39928..32b072e 100644
--- a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
+++ b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.shims;
import java.io.IOException;
import java.lang.Override;
-import java.lang.reflect.Constructor;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.URI;
@@ -27,6 +26,7 @@ import java.net.URL;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
@@ -71,6 +71,8 @@ import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.KerberosName;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.tools.distcp2.DistCp;
+import org.apache.hadoop.tools.distcp2.DistCpOptions;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.VersionInfo;
@@ -503,7 +505,7 @@ public class Hadoop20SShims extends HadoopShimsSecure {
}
public class Hadoop20SFileStatus implements HdfsFileStatus {
- private FileStatus fileStatus;
+ private final FileStatus fileStatus;
public Hadoop20SFileStatus(FileStatus fileStatus) {
this.fileStatus = fileStatus;
}
@@ -629,28 +631,33 @@ public class Hadoop20SShims extends HadoopShimsSecure {
*/
public class KerberosNameShim implements HadoopShimsSecure.KerberosNameShim {
- private KerberosName kerberosName;
+ private final KerberosName kerberosName;
public KerberosNameShim(String name) {
kerberosName = new KerberosName(name);
}
+ @Override
public String getDefaultRealm() {
return kerberosName.getDefaultRealm();
}
+ @Override
public String getServiceName() {
return kerberosName.getServiceName();
}
+ @Override
public String getHostName() {
return kerberosName.getHostName();
}
+ @Override
public String getRealm() {
return kerberosName.getRealm();
}
+ @Override
public String getShortName() throws IOException {
return kerberosName.getShortName();
}
@@ -663,27 +670,17 @@ public class Hadoop20SShims extends HadoopShimsSecure {
@Override
public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException {
- int rc;
-
- // Creates the command-line parameters for distcp
- String[] params = {"-update", "-skipcrccheck", src.toString(), dst.toString()};
+ DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst);
+ options.setSkipCRC(true);
+ options.setSyncFolder(true);
try {
- Class clazzDistCp = Class.forName("org.apache.hadoop.tools.distcp2");
- Constructor c = clazzDistCp.getConstructor();
- c.setAccessible(true);
- Tool distcp = (Tool)c.newInstance();
- distcp.setConf(conf);
- rc = distcp.run(params);
- } catch (ClassNotFoundException e) {
- throw new IOException("Cannot find DistCp class package: " + e.getMessage());
- } catch (NoSuchMethodException e) {
- throw new IOException("Cannot get DistCp constructor: " + e.getMessage());
+ DistCp distcp = new DistCp(conf, options);
+ distcp.execute();
+ return true;
} catch (Exception e) {
throw new IOException("Cannot execute DistCp process: " + e, e);
}
-
- return (0 == rc) ? true : false;
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/4cdd19a3/shims/0.23/pom.xml
----------------------------------------------------------------------
diff --git a/shims/0.23/pom.xml b/shims/0.23/pom.xml
index 76ce697..79024d2 100644
--- a/shims/0.23/pom.xml
+++ b/shims/0.23/pom.xml
@@ -143,18 +143,11 @@
<optional>true</optional>
<type>test-jar</type>
</dependency>
- </dependencies>
-
- <profiles>
- <profile>
- <id>hadoop-2</id>
- <dependencies>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-distcp</artifactId>
- <version>${hadoop-23.version}</version>
- </dependency>
- </dependencies>
- </profile>
- </profiles>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-distcp</artifactId>
+ <version>${hadoop-23.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ </dependencies>
</project>
http://git-wip-us.apache.org/repos/asf/hive/blob/4cdd19a3/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 7756574..cbac554 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -20,8 +20,6 @@ package org.apache.hadoop.hive.shims;
import java.io.DataInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
@@ -30,6 +28,7 @@ import java.nio.ByteBuffer;
import java.security.AccessControlException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
@@ -65,6 +64,9 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.DirectoryListing;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
import org.apache.hadoop.hdfs.client.HdfsAdmin;
import org.apache.hadoop.hdfs.protocol.EncryptionZone;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -91,9 +93,9 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.authentication.util.KerberosName;
+import org.apache.hadoop.tools.DistCp;
+import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.util.Progressable;
-import org.apache.hadoop.util.Tool;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.tez.test.MiniTezCluster;
@@ -122,7 +124,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
zcr = true;
} catch (ClassNotFoundException ce) {
}
-
+
if (zcr) {
// in-memory HDFS is only available after zcr
try {
@@ -608,11 +610,11 @@ public class Hadoop23Shims extends HadoopShimsSecure {
org.apache.hadoop.mapred.TaskAttemptID taskId, Progressable progressable) {
org.apache.hadoop.mapred.TaskAttemptContext newContext = null;
try {
- java.lang.reflect.Constructor construct = org.apache.hadoop.mapred.TaskAttemptContextImpl.class.getDeclaredConstructor(
+ java.lang.reflect.Constructor<org.apache.hadoop.mapred.TaskAttemptContextImpl> construct = org.apache.hadoop.mapred.TaskAttemptContextImpl.class.getDeclaredConstructor(
org.apache.hadoop.mapred.JobConf.class, org.apache.hadoop.mapred.TaskAttemptID.class,
Reporter.class);
construct.setAccessible(true);
- newContext = (org.apache.hadoop.mapred.TaskAttemptContext) construct.newInstance(
+ newContext = construct.newInstance(
new JobConf(conf), taskId, progressable);
} catch (Exception e) {
throw new RuntimeException(e);
@@ -882,6 +884,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
* Cannot add Override annotation since FileSystem.access() may not exist in
* the version of hadoop used to build Hive.
*/
+ @Override
public void access(Path path, FsAction action) throws AccessControlException,
FileNotFoundException, IOException {
Path underlyingFsPath = swizzleParamPath(path);
@@ -1148,7 +1151,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
}
}
}
-
+
@Override
public HadoopShims.StoragePolicyShim getStoragePolicyShim(FileSystem fs) {
@@ -1164,27 +1167,17 @@ public class Hadoop23Shims extends HadoopShimsSecure {
@Override
public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException {
- int rc;
-
- // Creates the command-line parameters for distcp
- String[] params = {"-update", "-skipcrccheck", src.toString(), dst.toString()};
+ DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst);
+ options.setSkipCRC(true);
+ options.setSyncFolder(true);
try {
- Class clazzDistCp = Class.forName("org.apache.hadoop.tools.DistCp");
- Constructor c = clazzDistCp.getConstructor();
- c.setAccessible(true);
- Tool distcp = (Tool)c.newInstance();
- distcp.setConf(conf);
- rc = distcp.run(params);
- } catch (ClassNotFoundException e) {
- throw new IOException("Cannot find DistCp class package: " + e.getMessage());
- } catch (NoSuchMethodException e) {
- throw new IOException("Cannot get DistCp constructor: " + e.getMessage());
+ DistCp distcp = new DistCp(conf, options);
+ distcp.execute();
+ return true;
} catch (Exception e) {
throw new IOException("Cannot execute DistCp process: " + e, e);
}
-
- return (0 == rc);
}
private static Boolean hdfsEncryptionSupport;
@@ -1219,7 +1212,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
*/
private KeyProvider keyProvider = null;
- private Configuration conf;
+ private final Configuration conf;
public HdfsEncryptionShim(URI uri, Configuration conf) throws IOException {
DistributedFileSystem dfs = (DistributedFileSystem)FileSystem.get(uri, conf);
@@ -1378,7 +1371,6 @@ public class Hadoop23Shims extends HadoopShimsSecure {
}
return result;
}
-
@Override
public void addDelegationTokens(FileSystem fs, Credentials cred, String uname) throws IOException {
// Use method addDelegationTokens instead of getDelegationToken to get all the tokens including KMS.
[4/4] hive git commit: HIVE-12364 : Distcp job fails when run under
Tez (Prasanth J via Ashutosh Chauhan)
Posted by pr...@apache.org.
HIVE-12364 : Distcp job fails when run under Tez (Prasanth J via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e1e6c04d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e1e6c04d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e1e6c04d
Branch: refs/heads/branch-1
Commit: e1e6c04d6b605fddfec131a8955d33bc63853f2f
Parents: ea4073f
Author: Prasanth Jayachandran <j....@gmail.com>
Authored: Mon Nov 9 17:59:37 2015 -0800
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Tue Dec 8 17:58:46 2015 -0600
----------------------------------------------------------------------
itests/qtest/pom.xml | 6 ++++++
.../src/test/resources/testconfiguration.properties | 1 +
.../apache/hadoop/hive/ql/parse/SemanticAnalyzer.java | 1 +
.../test/queries/clientpositive/insert_dir_distcp.q | 9 +++++++++
.../results/clientpositive/insert_dir_distcp.q.out | 14 ++++++++++++++
.../clientpositive/tez/insert_dir_distcp.q.out | 14 ++++++++++++++
.../org/apache/hadoop/hive/shims/Hadoop23Shims.java | 8 +++++---
7 files changed, 50 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/itests/qtest/pom.xml
----------------------------------------------------------------------
diff --git a/itests/qtest/pom.xml b/itests/qtest/pom.xml
index 187b96e..7dfbd85 100644
--- a/itests/qtest/pom.xml
+++ b/itests/qtest/pom.xml
@@ -225,6 +225,12 @@
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-distcp</artifactId>
+ <version>${hadoop-23.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop-23.version}</version>
<classifier>tests</classifier>
http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 30cb9f1..03b07ce 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -26,6 +26,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\
infer_bucket_sort_num_buckets.q,\
infer_bucket_sort_reducers_power_two.q,\
input16_cc.q,\
+ insert_dir_distcp.q,\
join1.q,\
join_acid_non_acid.q,\
leftsemijoin_mr.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index c76f628..71ec3d4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -53,6 +53,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/ql/src/test/queries/clientpositive/insert_dir_distcp.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/insert_dir_distcp.q b/ql/src/test/queries/clientpositive/insert_dir_distcp.q
new file mode 100644
index 0000000..6582938
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/insert_dir_distcp.q
@@ -0,0 +1,9 @@
+set hive.exec.copyfile.maxsize=400;
+
+set tez.am.log.level=INFO;
+set tez.task.log.level=INFO;
+-- see TEZ-2931 for using INFO logging
+
+insert overwrite directory '/tmp/src' select * from src;
+
+dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/src/;
http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/ql/src/test/results/clientpositive/insert_dir_distcp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insert_dir_distcp.q.out b/ql/src/test/results/clientpositive/insert_dir_distcp.q.out
new file mode 100644
index 0000000..b70fa01
--- /dev/null
+++ b/ql/src/test/results/clientpositive/insert_dir_distcp.q.out
@@ -0,0 +1,14 @@
+PREHOOK: query: -- see TEZ-2931 for using INFO logging
+
+#### A masked pattern was here ####
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- see TEZ-2931 for using INFO logging
+
+#### A masked pattern was here ####
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+Found 1 items
+#### A masked pattern was here ####
http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/ql/src/test/results/clientpositive/tez/insert_dir_distcp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/insert_dir_distcp.q.out b/ql/src/test/results/clientpositive/tez/insert_dir_distcp.q.out
new file mode 100644
index 0000000..b70fa01
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/insert_dir_distcp.q.out
@@ -0,0 +1,14 @@
+PREHOOK: query: -- see TEZ-2931 for using INFO logging
+
+#### A masked pattern was here ####
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: -- see TEZ-2931 for using INFO logging
+
+#### A masked pattern was here ####
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+Found 1 items
+#### A masked pattern was here ####
http://git-wip-us.apache.org/repos/asf/hive/blob/e1e6c04d/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 89789de..2e09882 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.shims;
import java.io.DataInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
+import java.io.InputStream;
import java.lang.reflect.Method;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
@@ -64,9 +65,6 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.protocol.DirectoryListing;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocol.HdfsLocatedFileStatus;
import org.apache.hadoop.hdfs.client.HdfsAdmin;
import org.apache.hadoop.hdfs.protocol.EncryptionZone;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -93,6 +91,7 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.authentication.util.KerberosName;
import org.apache.hadoop.tools.DistCp;
import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
@@ -1174,11 +1173,14 @@ public class Hadoop23Shims extends HadoopShimsSecure {
options.setSkipCRC(true);
options.preserve(FileAttribute.BLOCKSIZE);
try {
+ conf.setBoolean("mapred.mapper.new-api", true);
DistCp distcp = new DistCp(conf, options);
distcp.execute();
return true;
} catch (Exception e) {
throw new IOException("Cannot execute DistCp process: " + e, e);
+ } finally {
+ conf.setBoolean("mapred.mapper.new-api", false);
}
}
[3/4] hive git commit: HIVE-11820 : export tables with size of >32MB
throws java.lang.IllegalArgumentException: Skip CRC is valid only with update
options (Takahiko Saito via Ashutosh Chauhan)
Posted by pr...@apache.org.
HIVE-11820 : export tables with size of >32MB throws java.lang.IllegalArgumentException: Skip CRC is valid only with update options (Takahiko Saito via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ea4073fc
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ea4073fc
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ea4073fc
Branch: refs/heads/branch-1
Commit: ea4073fc9041d829a06eff80a81e075d9dfba71d
Parents: 4cdd19a
Author: Takahiko Saito <ty...@gmail.com>
Authored: Wed Sep 16 15:34:00 2015 -0800
Committer: Prasanth Jayachandran <j....@gmail.com>
Committed: Tue Dec 8 17:34:16 2015 -0600
----------------------------------------------------------------------
.../main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java | 5 ++++-
.../main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java | 4 +++-
2 files changed, 7 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ea4073fc/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
----------------------------------------------------------------------
diff --git a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
index 32b072e..a79a36f 100644
--- a/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
+++ b/shims/0.20S/src/main/java/org/apache/hadoop/hive/shims/Hadoop20SShims.java
@@ -73,6 +73,8 @@ import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.tools.distcp2.DistCp;
import org.apache.hadoop.tools.distcp2.DistCpOptions;
+import org.apache.hadoop.tools.distcp2.DistCpOptions.FileAttribute;
+
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.VersionInfo;
@@ -672,8 +674,9 @@ public class Hadoop20SShims extends HadoopShimsSecure {
public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException {
DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst);
- options.setSkipCRC(true);
options.setSyncFolder(true);
+ options.setSkipCRC(true);
+ options.preserve(FileAttribute.BLOCKSIZE);
try {
DistCp distcp = new DistCp(conf, options);
distcp.execute();
http://git-wip-us.apache.org/repos/asf/hive/blob/ea4073fc/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index cbac554..89789de 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -95,6 +95,7 @@ import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.tools.DistCp;
import org.apache.hadoop.tools.DistCpOptions;
+import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.tez.test.MiniTezCluster;
@@ -1169,8 +1170,9 @@ public class Hadoop23Shims extends HadoopShimsSecure {
public boolean runDistCp(Path src, Path dst, Configuration conf) throws IOException {
DistCpOptions options = new DistCpOptions(Collections.singletonList(src), dst);
- options.setSkipCRC(true);
options.setSyncFolder(true);
+ options.setSkipCRC(true);
+ options.preserve(FileAttribute.BLOCKSIZE);
try {
DistCp distcp = new DistCp(conf, options);
distcp.execute();