You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ve...@apache.org on 2015/07/07 16:23:03 UTC

sqoop git commit: SQOOP-2370: Netezza - need to support additional options for full control character handling (Venkat Ranganathan)

Repository: sqoop
Updated Branches:
  refs/heads/trunk e21529ac6 -> c6627c04c


SQOOP-2370: Netezza - need to support additional options for full control character handling
   (Venkat Ranganathan)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/c6627c04
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/c6627c04
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/c6627c04

Branch: refs/heads/trunk
Commit: c6627c04cdbc048a193d271c6e94023f921db4e3
Parents: e21529a
Author: Venkat Ranganathan <ve...@hortonworks.com>
Authored: Mon Jul 6 20:13:19 2015 -0700
Committer: Venkat Ranganathan <ve...@hortonworks.com>
Committed: Mon Jul 6 20:13:19 2015 -0700

----------------------------------------------------------------------
 src/docs/user/connectors.txt                    | 26 +++++++++++++----
 .../sqoop/manager/DirectNetezzaManager.java     | 24 ++++++++++++++++
 .../NetezzaExternalTableExportMapper.java       | 30 +++++++++++++++-----
 3 files changed, 68 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/c6627c04/src/docs/user/connectors.txt
----------------------------------------------------------------------
diff --git a/src/docs/user/connectors.txt b/src/docs/user/connectors.txt
index 496d3cf..c5ce4d6 100644
--- a/src/docs/user/connectors.txt
+++ b/src/docs/user/connectors.txt
@@ -381,31 +381,47 @@ Argument                              Description
                                       of data slices of a table or all\
                                       Default is "false" for standard mode\
                                       and "true" for direct mode.
-+--max-errors+                        Applicable only in direct mode.\
++--max-errors+                        Applicable only for direct mode export.\
                                       This option specifies the error threshold\
                                       per mapper while transferring data. If\
                                       the number of errors encountered exceed\
                                       this threshold then the job will fail.
                                       Default value is 1.
-+--log-dir+                           Applicable only in direct mode.\
++--log-dir+                           Applicable only for direct mode export.\
                                       Specifies the directory where Netezza\
                                       external table operation logs are stored\
                                       on the hadoop filesystem.  Logs are\
                                       stored under this directory with one\
                                       directory for the job and sub-directories\
                                       for each task number and attempt.\
-                                      Default value is the user home directory.
-+--trunc-string+                      Applicable only in direct mode.\
+                                      Default value is the user home directory.\
+                                      The nzlog and nzbad files will be under
+                                      (logdir)/job-id/job-attempt-id.
++--trunc-string+                      Applicable only for direct mode export.\
                                       Specifies whether the system \
                                       truncates strings to the declared\
                                       storage and loads the data. By default\
                                       truncation of strings is reported as an\
                                       error.
-+--ctrl-chars+                        Applicable only in direct mode.\
++--ctrl-chars+                        Applicable only for direct mode export.\
                                       Specifies whether control characters \
                                       (ASCII chars 1 - 31) can be allowed \
                                       to be part of char/nchar/varchar/nvarchar\
                                       columns.  Default is false.
++--crin-string+                       Applicable only for direct mode export.\
+                                      Specifies whether carriage return \
+                                      (ASCII char 13) can be allowed \
+                                      to be part of char/nchar/varchar/nvarchar\
+                                      columns.  Note that CR can no longer \
+                                      be a record delimiter with this option.\
+                                      Default is false.
++--ignore-zero+                       Applicable only for direct mode export.\
+                                      Specifies whether NUL character \
+                                      (ASCII char 0) should be scanned \
+                                      and ignored as part of the data loaded\
+                                      into char/nchar/varchar/nvarchar \
+                                      columns.\
+                                      Default is false.
 --------------------------------------------------------------------------------
 
 

http://git-wip-us.apache.org/repos/asf/sqoop/blob/c6627c04/src/java/org/apache/sqoop/manager/DirectNetezzaManager.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/manager/DirectNetezzaManager.java b/src/java/org/apache/sqoop/manager/DirectNetezzaManager.java
index 06fa976..2ec0770 100644
--- a/src/java/org/apache/sqoop/manager/DirectNetezzaManager.java
+++ b/src/java/org/apache/sqoop/manager/DirectNetezzaManager.java
@@ -64,6 +64,18 @@ public class DirectNetezzaManager extends NetezzaManager {
   public static final String NETEZZA_CTRL_CHARS_LONG_ARG =
       "ctrl-chars";
 
+
+  public static final String NETEZZA_CRIN_STRING_OPT =
+      "netezza.crin.string";
+  public static final String NETEZZA_CRIN_STRING_LONG_ARG =
+      "crin-string";
+
+
+  public static final String NETEZZA_IGNORE_ZERO_OPT =
+      "netezza.ignore.zero";
+  public static final String NETEZZA_IGNORE_ZERO_LONG_ARG =
+      "ignore-zero";
+
   public static final String NETEZZA_TRUNC_STRING_OPT =
       "netezza.trunc.string";
   public static final String NETEZZA_TRUNC_STRING_LONG_ARG =
@@ -268,6 +280,12 @@ public class DirectNetezzaManager extends NetezzaManager {
     netezzaOpts.addOption(OptionBuilder.withArgName(NETEZZA_TRUNC_STRING_OPT)
       .withDescription("Truncate string to declared storage size")
       .withLongOpt(NETEZZA_TRUNC_STRING_LONG_ARG).create());
+    netezzaOpts.addOption(OptionBuilder.withArgName(NETEZZA_CRIN_STRING_OPT)
+      .withDescription("Truncate string to declared storage size")
+      .withLongOpt(NETEZZA_CRIN_STRING_LONG_ARG).create());
+    netezzaOpts.addOption(OptionBuilder.withArgName(NETEZZA_IGNORE_ZERO_OPT)
+      .withDescription("Truncate string to declared storage size")
+      .withLongOpt(NETEZZA_IGNORE_ZERO_LONG_ARG).create());
     return netezzaOpts;
   }
 
@@ -296,6 +314,12 @@ public class DirectNetezzaManager extends NetezzaManager {
     conf.setBoolean(NETEZZA_TRUNC_STRING_OPT,
       cmdLine.hasOption(NETEZZA_TRUNC_STRING_LONG_ARG));
 
+    conf.setBoolean(NETEZZA_CRIN_STRING_OPT,
+      cmdLine.hasOption(NETEZZA_CRIN_STRING_LONG_ARG));
+
+    conf.setBoolean(NETEZZA_IGNORE_ZERO_OPT,
+      cmdLine.hasOption(NETEZZA_IGNORE_ZERO_LONG_ARG));
+
     // Always true for Netezza direct mode access
     conf.setBoolean(NETEZZA_DATASLICE_ALIGNED_ACCESS_OPT, true);
   }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/c6627c04/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableExportMapper.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableExportMapper.java b/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableExportMapper.java
index f377fb9..aa058d1 100644
--- a/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableExportMapper.java
+++ b/src/java/org/apache/sqoop/mapreduce/db/netezza/NetezzaExternalTableExportMapper.java
@@ -87,7 +87,10 @@ public abstract class NetezzaExternalTableExportMapper<K, V> extends
         conf.getBoolean(DirectNetezzaManager.NETEZZA_CTRL_CHARS_OPT, false);
     boolean truncString =
         conf.getBoolean(DirectNetezzaManager.NETEZZA_TRUNC_STRING_OPT, false);
-
+    boolean ignoreZero =
+        conf.getBoolean(DirectNetezzaManager.NETEZZA_IGNORE_ZERO_OPT, false);
+    boolean crinString =
+        conf.getBoolean(DirectNetezzaManager.NETEZZA_CRIN_STRING_OPT, false);
     StringBuilder sqlStmt = new StringBuilder(2048);
 
     sqlStmt.append("INSERT INTO ");
@@ -96,13 +99,20 @@ public abstract class NetezzaExternalTableExportMapper<K, V> extends
     sqlStmt.append(fifoFile.getAbsolutePath());
     sqlStmt.append("' USING (REMOTESOURCE 'JDBC' ");
     sqlStmt.append(" BOOLSTYLE 'TRUE_FALSE' ");
-    sqlStmt.append(" CRINSTRING FALSE ");
+    if (crinString) {
+      sqlStmt.append(" CRINSTRING TRUE ");
+    } else {
+      sqlStmt.append(" CRINSTRING FALSE ");
+    }
     if (ctrlChars) {
       sqlStmt.append(" CTRLCHARS TRUE ");
     }
     if (truncString) {
       sqlStmt.append(" TRUNCSTRING TRUE ");
     }
+    if (ignoreZero) {
+      sqlStmt.append(" IGNOREZERO TRUE ");
+    }
     sqlStmt.append(" DELIMITER ");
     sqlStmt.append(Integer.toString(fd));
     sqlStmt.append(" ENCODING 'internal' ");
@@ -228,18 +238,24 @@ public abstract class NetezzaExternalTableExportMapper<K, V> extends
         }
         cleanup(context);
       } finally {
-        recordWriter.close();
-        extTableThread.join();
+        try {
+          recordWriter.close();
+          extTableThread.join();
+        } catch (Exception e) {
+          LOG.debug("Exception cleaning up mapper operation : " + e.getMessage());
+        }
         counter.stopClock();
         LOG.info("Transferred " + counter.toString());
+        FileUploader.uploadFilesToDFS(taskAttemptDir.getAbsolutePath(),
+          localLogDir, logDir, context.getJobID().toString(),
+          conf);
+
         if (extTableThread.hasExceptions()) {
           extTableThread.printException();
           throw new IOException(extTableThread.getException());
         }
       }
-      FileUploader.uploadFilesToDFS(taskAttemptDir.getAbsolutePath(),
-        localLogDir, logDir, context.getJobID().toString(),
-        conf);
+
     }
   }