You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ja...@apache.org on 2013/10/15 00:32:39 UTC

git commit: SQOOP-1192: Add option "--skip-dist-cache" to allow Sqoop not copying jars in %SQOOP_HOME%\lib folder when launched by Oozie and use Oozie share lib

Updated Branches:
  refs/heads/trunk ddb81e185 -> 840711812


SQOOP-1192: Add option "--skip-dist-cache" to allow Sqoop not copying jars in %SQOOP_HOME%\lib folder when launched by Oozie and use Oozie share lib

(Shuaishuai Nie via Jarek Jarcec Cecho)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/84071181
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/84071181
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/84071181

Branch: refs/heads/trunk
Commit: 84071181265f98959ffdfc41425022f8251d2429
Parents: ddb81e1
Author: Jarek Jarcec Cecho <ja...@apache.org>
Authored: Mon Oct 14 15:31:11 2013 -0700
Committer: Jarek Jarcec Cecho <ja...@apache.org>
Committed: Mon Oct 14 15:31:11 2013 -0700

----------------------------------------------------------------------
 src/docs/user/import.txt                               | 13 +++++++++++++
 src/java/org/apache/sqoop/SqoopOptions.java            |  9 +++++++++
 src/java/org/apache/sqoop/mapreduce/JobBase.java       |  4 ++++
 .../org/apache/sqoop/mapreduce/TextExportMapper.java   |  3 +--
 .../sqoop/mapreduce/hcat/SqoopHCatUtilities.java       |  4 ++++
 src/java/org/apache/sqoop/tool/BaseSqoopTool.java      | 10 ++++++++++
 src/test/com/cloudera/sqoop/TestSqoopOptions.java      |  6 ++++++
 7 files changed, 47 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/docs/user/import.txt
----------------------------------------------------------------------
diff --git a/src/docs/user/import.txt b/src/docs/user/import.txt
index 71b50d8..dfc9b39 100644
--- a/src/docs/user/import.txt
+++ b/src/docs/user/import.txt
@@ -208,6 +208,19 @@ multi-column indices. If your table has no index column, or has a
 multi-column key, then you must also manually choose a splitting
 column.
 
+Controlling Distributed Cache
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sqoop will copy the jars in $SQOOP_HOME/lib folder to job cache every
+time when start a Sqoop job. When launched by Oozie this is unnecessary
+since Oozie use its own Sqoop share lib which keeps Sqoop dependencies
+in the distributed cache. Oozie will do the localization on each
+worker node for the Sqoop dependencies only once during the first Sqoop
+job and reuse the jars on worker node for subsquencial jobs. Using
+option +--skip-dist-cache+ in Sqoop command when launched by Oozie will
+skip the step which Sqoop copies its dependencies to job cache and save
+massive I/O.
+
 Controlling the Import Process
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/SqoopOptions.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/SqoopOptions.java b/src/java/org/apache/sqoop/SqoopOptions.java
index 836f588..13637b5 100644
--- a/src/java/org/apache/sqoop/SqoopOptions.java
+++ b/src/java/org/apache/sqoop/SqoopOptions.java
@@ -165,6 +165,7 @@ public class SqoopOptions implements Cloneable {
   @StoredAsProperty("hcatalog.storage.stanza")
   private String hCatStorageStanza;
   private String hCatHome; // not serialized to metastore.
+  private boolean skipDistCache;
 
   // User explicit mapping of types
   private Properties mapColumnJava; // stored as map.colum.java
@@ -2198,4 +2199,12 @@ public class SqoopOptions implements Cloneable {
   public void setCall(String theCall) {
     this.call = theCall;
   }
+
+  public void setSkipDistCache(boolean skip) {
+    this.skipDistCache = skip;
+  }
+
+  public boolean isSkipDistCache() {
+    return this.skipDistCache;
+  }
 }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/mapreduce/JobBase.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/JobBase.java b/src/java/org/apache/sqoop/mapreduce/JobBase.java
index 322df1c..ddef421 100644
--- a/src/java/org/apache/sqoop/mapreduce/JobBase.java
+++ b/src/java/org/apache/sqoop/mapreduce/JobBase.java
@@ -133,6 +133,10 @@ public class JobBase {
    */
   protected void cacheJars(Job job, ConnManager mgr)
       throws IOException {
+    if (options.isSkipDistCache()) {
+      LOG.info("Not adding sqoop jars to distributed cache as requested");
+      return;
+    }
 
     Configuration conf = job.getConfiguration();
     FileSystem fs = FileSystem.getLocal(conf);

http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java b/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java
index fb1edfd..8a354b5 100644
--- a/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java
+++ b/src/java/org/apache/sqoop/mapreduce/TextExportMapper.java
@@ -109,8 +109,7 @@ public class TextExportMapper
       LOG.error("due to the batching nature of export.");
       LOG.error("");
 
-      throw new IOException("Can't export data, please check task tracker logs",
-        e);
+      throw new IOException("Can't export data, please check failed map task logs", e);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java b/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java
index b05f587..09652df 100644
--- a/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java
+++ b/src/java/org/apache/sqoop/mapreduce/hcat/SqoopHCatUtilities.java
@@ -730,6 +730,10 @@ public final class SqoopHCatUtilities {
       LOG.info("Not adding hcatalog jars to distributed cache in local mode");
       return;
     }
+    if (options.isSkipDistCache()) {
+      LOG.info("Not adding hcatalog jars to distributed cache as requested");
+      return;
+    }
     Configuration conf = job.getConfiguration();
     String hiveHome = null;
     String hCatHome = null;

http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/java/org/apache/sqoop/tool/BaseSqoopTool.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java
index a1080d3..9230f82 100644
--- a/src/java/org/apache/sqoop/tool/BaseSqoopTool.java
+++ b/src/java/org/apache/sqoop/tool/BaseSqoopTool.java
@@ -155,6 +155,7 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool {
   public static final String UPDATE_KEY_ARG = "update-key";
   public static final String UPDATE_MODE_ARG = "update-mode";
   public static final String CALL_ARG = "call";
+  public static final String SKIP_DISTCACHE_ARG = "skip-dist-cache";
 
   // Arguments for validation.
   public static final String VALIDATE_ARG = "validate";
@@ -416,6 +417,10 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool {
             .hasArg().withDescription("Override $HADOOP_MAPRED_HOME_ARG")
             .withLongOpt(HADOOP_HOME_ARG)
             .create());
+    commonOpts.addOption(OptionBuilder
+        .withDescription("Skip copying jars to distributed cache")
+        .withLongOpt(SKIP_DISTCACHE_ARG)
+        .create());
 
     // misc (common)
     commonOpts.addOption(OptionBuilder
@@ -827,6 +832,11 @@ public abstract class BaseSqoopTool extends com.cloudera.sqoop.tool.SqoopTool {
       out.setDriverClassName(in.getOptionValue(DRIVER_ARG));
     }
 
+    if (in.hasOption(SKIP_DISTCACHE_ARG)) {
+      LOG.debug("Disabling dist cache");
+      out.setSkipDistCache(true);
+    }
+
     applyCredentialsOptions(in, out);
 
 

http://git-wip-us.apache.org/repos/asf/sqoop/blob/84071181/src/test/com/cloudera/sqoop/TestSqoopOptions.java
----------------------------------------------------------------------
diff --git a/src/test/com/cloudera/sqoop/TestSqoopOptions.java b/src/test/com/cloudera/sqoop/TestSqoopOptions.java
index 90bc08e..686d398 100644
--- a/src/test/com/cloudera/sqoop/TestSqoopOptions.java
+++ b/src/test/com/cloudera/sqoop/TestSqoopOptions.java
@@ -280,6 +280,12 @@ public class TestSqoopOptions extends TestCase {
     assertEquals("String", mapping.get("id"));
   }
 
+  public void testSkipDistCacheOption() throws Exception {
+    String[] args = {"--skip-dist-cache"};
+    SqoopOptions opts = parse(args);
+    assertTrue(opts.isSkipDistCache());
+  }
+
   public void testPropertySerialization1() {
     // Test that if we write a SqoopOptions out to a Properties,
     // and then read it back in, we get all the same results.