You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by gw...@apache.org on 2015/01/23 19:13:47 UTC

sqoop git commit: SQOOP-1572: Sqoop2: Duplicate Column Name in Multiple Tables Import

Repository: sqoop
Updated Branches:
  refs/heads/sqoop2 d95e7537a -> 5b6ac608c


SQOOP-1572: Sqoop2: Duplicate Column Name in Multiple Tables Import


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/5b6ac608
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/5b6ac608
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/5b6ac608

Branch: refs/heads/sqoop2
Commit: 5b6ac608c33a3701fd9452cd45dc727d1d64e60b
Parents: d95e753
Author: Gwen Shapira <cs...@gmail.com>
Authored: Fri Jan 23 10:03:11 2015 -0800
Committer: Gwen Shapira <cs...@gmail.com>
Committed: Fri Jan 23 10:03:11 2015 -0800

----------------------------------------------------------------------
 .../jdbc/GenericJdbcFromInitializer.java        |  4 +-
 docs/src/site/sphinx/Connectors.rst             |  3 +-
 .../jdbc/generic/FromRDBMSToHDFSTest.java       | 98 ++++++++++++++++++++
 3 files changed, 102 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/5b6ac608/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcFromInitializer.java
----------------------------------------------------------------------
diff --git a/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcFromInitializer.java b/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcFromInitializer.java
index ff42949..425b2cc 100644
--- a/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcFromInitializer.java
+++ b/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcFromInitializer.java
@@ -85,9 +85,9 @@ public class GenericJdbcFromInitializer extends Initializer<LinkConfiguration, F
 
       rsmt = rs.getMetaData();
       for (int i = 1 ; i <= rsmt.getColumnCount(); i++) {
-        String columnName = rsmt.getColumnName(i);
+        String columnName = rsmt.getColumnLabel(i);
         if (StringUtils.isEmpty(columnName)) {
-          columnName = rsmt.getColumnLabel(i);
+          columnName = rsmt.getColumnName(i);
           if (StringUtils.isEmpty(columnName)) {
             columnName = "Column " + i;
           }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/5b6ac608/docs/src/site/sphinx/Connectors.rst
----------------------------------------------------------------------
diff --git a/docs/src/site/sphinx/Connectors.rst b/docs/src/site/sphinx/Connectors.rst
index bcc5b43..f67e187 100644
--- a/docs/src/site/sphinx/Connectors.rst
+++ b/docs/src/site/sphinx/Connectors.rst
@@ -74,7 +74,7 @@ Inputs associated with the Job configuration for the FROM direction include:
 |                             |         | *Optional*. See note below.                                             |                                             |
 +-----------------------------+---------+-------------------------------------------------------------------------+---------------------------------------------+
 | Table SQL statement         | String  | The SQL statement used to perform a **free form query**.                | ``SELECT COUNT(*) FROM test ${CONDITIONS}`` |
-|                             |         | *Optional*. See note below.                                             |                                             |
+|                             |         | *Optional*. See notes below.                                             |                                             |
 +-----------------------------+---------+-------------------------------------------------------------------------+---------------------------------------------+
 | Table column names          | String  | Columns to extract from the JDBC data source.                           | col1,col2                                   |
 |                             |         | *Optional* Comma separated list of columns.                             |                                             |
@@ -94,6 +94,7 @@ Inputs associated with the Job configuration for the FROM direction include:
 
 1. *Table name* and *Table SQL statement* are mutually exclusive. If *Table name* is provided, the *Table SQL statement* should not be provided. If *Table SQL statement* is provided then *Table name* should not be provided.
 2. *Table column names* should be provided only if *Table name* is provided.
+3. If there are columns with similar names, column aliases are required. For example: ``SELECT table1.id as "i", table2.id as "j" FROM table1 INNER JOIN table2 ON table1.id = table2.id``.
 
 **TO Job Configuration**
 ++++++++++++++++++++++++

http://git-wip-us.apache.org/repos/asf/sqoop/blob/5b6ac608/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
index 85b9d2d..aa9f212 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/FromRDBMSToHDFSTest.java
@@ -122,4 +122,102 @@ public class FromRDBMSToHDFSTest extends ConnectorTestCase {
     // Clean up testing table
     dropTable();
   }
+
+  @Test
+  public void testSql() throws Exception {
+    createAndLoadTableCities();
+
+    // RDBMS link
+    MLink rdbmsLink = getClient().createLink("generic-jdbc-connector");
+    fillRdbmsLinkConfig(rdbmsLink);
+    saveLink(rdbmsLink);
+
+    // HDFS link
+    MLink hdfsLink = getClient().createLink("hdfs-connector");
+    saveLink(hdfsLink);
+
+    // Job creation
+    MJob job = getClient().createJob(rdbmsLink.getPersistenceId(), hdfsLink.getPersistenceId());
+
+    // Connector values
+    MConfigList configs = job.getJobConfig(Direction.FROM);
+    configs.getStringInput("fromJobConfig.sql").setValue("SELECT " + provider.escapeColumnName("id")
+        + " FROM " + provider.escapeTableName(getTableName()) + " WHERE ${CONDITIONS}");
+    configs.getStringInput("fromJobConfig.partitionColumn").setValue(provider.escapeColumnName("id"));
+    fillHdfsToConfig(job, ToFormat.TEXT_FILE);
+    saveJob(job);
+
+    MSubmission submission = getClient().startJob(job.getPersistenceId());
+    assertTrue(submission.getStatus().isRunning());
+
+    // Wait until the job finish - this active waiting will be removed once
+    // Sqoop client API will get blocking support.
+    do {
+      Thread.sleep(5000);
+      submission = getClient().getJobStatus(job.getPersistenceId());
+    } while(submission.getStatus().isRunning());
+
+    // Assert correct output
+    assertTo(
+        "1",
+        "2",
+        "3",
+        "4"
+    );
+
+    // Clean up testing table
+    dropTable();
+  }
+
+  @Test
+  public void testDuplicateColumns() throws Exception {
+    createAndLoadTableCities();
+
+    // RDBMS link
+    MLink rdbmsLink = getClient().createLink("generic-jdbc-connector");
+    fillRdbmsLinkConfig(rdbmsLink);
+    saveLink(rdbmsLink);
+
+    // HDFS link
+    MLink hdfsLink = getClient().createLink("hdfs-connector");
+    saveLink(hdfsLink);
+
+    // Job creation
+    MJob job = getClient().createJob(rdbmsLink.getPersistenceId(), hdfsLink.getPersistenceId());
+
+    // Connector values
+    String partitionColumn = provider.escapeTableName(getTableName()) + "." + provider.escapeColumnName("id");
+    MConfigList configs = job.getJobConfig(Direction.FROM);
+    configs.getStringInput("fromJobConfig.sql").setValue(
+        "SELECT " + provider.escapeColumnName("id") + " as " + provider.escapeColumnName("i") + ", "
+            + provider.escapeColumnName("id") + " as " + provider.escapeColumnName("j")
+            + " FROM " + provider.escapeTableName(getTableName()) + " WHERE ${CONDITIONS}");
+    configs.getStringInput("fromJobConfig.partitionColumn").setValue(partitionColumn);
+    configs.getStringInput("fromJobConfig.boundaryQuery").setValue(
+        "SELECT MIN(" + partitionColumn + "), MAX(" + partitionColumn + ") FROM "
+            + provider.escapeTableName(getTableName()));
+    fillHdfsToConfig(job, ToFormat.TEXT_FILE);
+    saveJob(job);
+
+    MSubmission submission = getClient().startJob(job.getPersistenceId());
+    assertTrue(submission.getStatus().isRunning());
+
+    // Wait until the job finish - this active waiting will be removed once
+    // Sqoop client API will get blocking support.
+    do {
+      Thread.sleep(5000);
+      submission = getClient().getJobStatus(job.getPersistenceId());
+    } while(submission.getStatus().isRunning());
+
+    // Assert correct output
+    assertTo(
+        "1,1",
+        "2,2",
+        "3,3",
+        "4,4"
+    );
+
+    // Clean up testing table
+    dropTable();
+  }
 }