You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ja...@apache.org on 2013/10/15 00:32:39 UTC
git commit: SQOOP-1192: Add option "--skip-dist-cache" to allow Sqoop
not copying jars in %SQOOP_HOME%\lib folder when launched by Oozie and use
Oozie share lib
Updated Branches:
refs/heads/trunk ddb81e185 -> 840711812
SQOOP-1192: Add option "--skip-dist-cache" to allow Sqoop not copying jars in %SQOOP_HOME%\lib folder when launched by Oozie and use Oozie share lib
(Shuaishuai Nie via Jarek Jarcec Cecho)
Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/84071181
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/84071181
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/84071181
Branch: refs/heads/trunk
Commit: 84071181265f98959ffdfc41425022f8251d2429
Parents: ddb81e1
Author: Jarek Jarcec Cecho <ja...@apache.org>
Authored: Mon Oct 14 15:31:11 2013 -0700
Committer: Jarek Jarcec Cecho <ja...@apache.org>
Committed: Mon Oct 14 15:31:11 2013 -0700
----------------------------------------------------------------------
src/docs/user/import.txt | 13 +++++++++++++
src/java/org/apache/sqoop/SqoopOptions.java | 9 +++++++++
src/java/org/apache/sqoop/mapreduce/JobBase.java | 4 ++++
.../org/apache/sqoop/mapreduce/TextExportMapper.java | 3 +--
.../sqoop/mapreduce/hcat/SqoopHCatUtilities.java | 4 ++++
src/java/org/apache/sqoop/tool/BaseSqoopTool.java | 10 ++++++++++
src/test/com/cloudera/sqoop/TestSqoopOptions.java | 6 ++++++
7 files changed, 47 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/docs/user/import.txt
----------------------------------------------------------------------
diff --git a/src/docs/user/import.txt b/src/docs/user/import.txt
index 71b50d8..dfc9b39 100644
--- a/src/docs/user/import.txt
+++ b/src/docs/user/import.txt
@@ -208,6 +208,19 @@ multi-column indices. If your table has no index column, or has a
multi-column key, then you must also manually choose a splitting
column.
+Controlling Distributed Cache
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sqoop will copy the jars in $SQOOP_HOME/lib folder to job cache every
+time when start a Sqoop job. When launched by Oozie this is unnecessary
+since Oozie use its own Sqoop share lib which keeps Sqoop dependencies
+in the distributed cache. Oozie will do the localization on each
+worker node for the Sqoop dependencies only once during the first Sqoop
+job and reuse the jars on worker node for subsquencial jobs. Using
+option +--skip-dist-cache+ in Sqoop command when launched by Oozie will
+skip the step which Sqoop copies its dependencies to job cache and save
+massive I/O.
+
Controlling the Import Process
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/SqoopOptions.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/SqoopOptions.java b/src/java/org/apache/sqoop/SqoopOptions.java
index 836f588..13637b5 100644
--- a/src/java/org/apache/sqoop/SqoopOptions.java
+++ b/src/java/org/apache/sqoop/SqoopOptions.java
@@ -165,6 +165,7 @@ public class SqoopOptions implements Cloneable {
@StoredAsProperty("hcatalog.storage.stanza")
private String hCatStorageStanza;
private String hCatHome; // not serialized to metastore.
+ private boolean skipDistCache;
// User explicit mapping of types
private Properties mapColumnJava; // stored as map.colum.java
@@ -2198,4 +2199,12 @@ public class SqoopOptions implements Cloneable {
public void setCall(String theCall) {
this.call = theCall;
}
+
+ public void setSkipDistCache(boolean skip) {
+ this.skipDistCache = skip;
+ }
+
+ public boolean isSkipDistCache() {
+ return this.skipDistCache;
+ }
}
http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/mapreduce/JobBase.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/JobBase.java b/src/java/org/apache/sqoop/mapreduce/JobBase.java
index 322df1c..ddef421 100644
--- a/src/java/org/apache/sqoop/mapreduce/JobBase.java
+++ b/src/java/org/apache/sqoop/mapreduce/JobBase.java
@@ -133,6 +133,10 @@ public class JobBase {
*/
protected void cacheJars(Job job, ConnManager mgr)
throws IOException {
+ if (options.isSkipDistCache()) {
+ LOG.info("Not adding sqoop jars to distributed cache as requested");
+ return;
+ }
Configuration conf = job.getConfiguration();
FileSystem fs = FileSystem.getLocal(conf);
http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java b/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java
index fb1edfd..8a354b5 100644
--- a/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java
+++ b/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java
@@ -109,8 +109,7 @@ public class TextExportMapper
LOG.error("due to the batching nature of export.");
LOG.error("");
- throw new IOException("Can't export data, please check task tracker logs",
- e);
+ throw new IOException("Can't export data, please check failed map task logs", e);
}
}
}
http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java b/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java
index b05f587..09652df 100644
--- a/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java
+++ b/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java
@@ -730,6 +730,10 @@ public final class SqoopHCatUtilities {
LOG.info("Not adding hcatalog jars to distributed cache in local mode");
return;
}
+ if (options.isSkipDistCache()) {
+ LOG.info("Not adding hcatalog jars to distributed cache as requested");
+ return;
+ }
Configuration conf = job.getConfiguration();
String hiveHome = null;
String hCatHome = null;
http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/tool/BaseSqoopTool.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java
index a1080d3..9230f82 100644
--- a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java
+++ b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java
@@ -155,6 +155,7 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool {
public static final String UPDATE_KEY_ARG = "update-key";
public static final String UPDATE_MODE_ARG = "update-mode";
public static final String CALL_ARG = "call";
+ public static final String SKIP_DISTCACHE_ARG = "skip-dist-cache";
// Arguments for validation.
public static final String VALIDATE_ARG = "validate";
@@ -416,6 +417,10 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool {
.hasArg().withDescription("Override $HADOOP_MAPRED_HOME_ARG")
.withLongOpt(HADOOP_HOME_ARG)
.create());
+ commonOpts.addOption(OptionBuilder
+ .withDescription("Skip copying jars to distributed cache")
+ .withLongOpt(SKIP_DISTCACHE_ARG)
+ .create());
// misc (common)
commonOpts.addOption(OptionBuilder
@@ -827,6 +832,11 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool {
out.setDriverClassName(in.getOptionValue(DRIVER_ARG));
}
+ if (in.hasOption(SKIP_DISTCACHE_ARG)) {
+ LOG.debug("Disabling dist cache");
+ out.setSkipDistCache(true);
+ }
+
applyCredentialsOptions(in, out);
http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/test/com/cloudera/sqoop/TestSqoopOptions.java
----------------------------------------------------------------------
diff --git a/src/test/com/cloudera/sqoop/TestSqoopOptions.java b/src/test/com/cloudera/sqoop/TestSqoopOptions.java
index 90bc08e..686d398 100644
--- a/src/test/com/cloudera/sqoop/TestSqoopOptions.java
+++ b/src/test/com/cloudera/sqoop/TestSqoopOptions.java
@@ -280,6 +280,12 @@ public class TestSqoopOptions extends TestCase {
assertEquals("String", mapping.get("id"));
}
+ public void testSkipDistCacheOption() throws Exception {
+ String[] args = {"--skip-dist-cache"};
+ SqoopOptions opts = parse(args);
+ assertTrue(opts.isSkipDistCache());
+ }
+
public void testPropertySerialization1() {
// Test that if we write a SqoopOptions out to a Properties,
// and then read it back in, we get all the same results.