You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ja...@apache.org on 2013/07/04 01:13:01 UTC

git commit: SQOOP-1113: Sqoop2: Text splitter will fail on interval "Breezy Badger"" to "Warty Warthog"

Updated Branches:
  refs/heads/sqoop2 4d9bae84e -> 59c2188be


SQOOP-1113: Sqoop2: Text splitter will fail on interval "Breezy Badger"" to "Warty Warthog"

(Venkat Ranganathan via Jarek Jarcec Cecho)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/59c2188b
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/59c2188b
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/59c2188b

Branch: refs/heads/sqoop2
Commit: 59c2188be7831f97d399a045d929b68a53195973
Parents: 4d9bae8
Author: Jarek Jarcec Cecho <ja...@apache.org>
Authored: Wed Jul 3 16:12:17 2013 -0700
Committer: Jarek Jarcec Cecho <ja...@apache.org>
Committed: Wed Jul 3 16:12:17 2013 -0700

----------------------------------------------------------------------
 .../jdbc/GenericJdbcImportPartitioner.java      | 29 ++++++++++++++++++--
 .../connector/jdbc/TestImportPartitioner.java   | 18 ++++++++++++
 2 files changed, 44 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/59c2188b/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcImportPartitioner.java
----------------------------------------------------------------------
diff --git a/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcImportPartitioner.java b/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcImportPartitioner.java
index 74dd1b8..4401800 100644
--- a/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcImportPartitioner.java
+++ b/connector/connector-generic-jdbc/src/main/java/org/apache/sqoop/connector/jdbc/GenericJdbcImportPartitioner.java
@@ -244,9 +244,16 @@ public class GenericJdbcImportPartitioner extends Partitioner<ConnectionConfigur
 
     BigDecimal curVal = minStringBD;
 
-    while (curVal.compareTo(maxStringBD) <= 0) {
+    int parts = 0;
+
+    while (curVal.compareTo(maxStringBD) <= 0 && parts < numberPartitions) {
       splitPoints.add(curVal);
       curVal = curVal.add(splitSize);
+      // bigDecimalToText approximates to next comparison location.
+      // Make sure we are still in range
+      String text = bigDecimalToText(curVal);
+      curVal = textToBigDecimal(text);
+      ++parts;
     }
 
     if (splitPoints.size() == 0
@@ -530,7 +537,7 @@ public class GenericJdbcImportPartitioner extends Partitioner<ConnectionConfigur
    *  is restricted to prevent repeating fractions and rounding errors
    *  towards the higher fraction positions.
    */
-  private static final BigDecimal UNITS_BASE = new BigDecimal(2097152);
+  private static final BigDecimal UNITS_BASE = new BigDecimal(0x200000);
   private static final int MAX_CHARS_TO_CONVERT = 4;
 
   private BigDecimal textToBigDecimal(String str) {
@@ -557,12 +564,28 @@ public class GenericJdbcImportPartitioner extends Partitioner<ConnectionConfigur
     for (int n = 0; n < MAX_CHARS_TO_CONVERT; ++n) {
       curVal = curVal.multiply(UNITS_BASE);
       int cp = curVal.intValue();
-      if (0 == cp) {
+      if (0 >= cp) {
         break;
       }
+
+      if (!Character.isDefined(cp)) {
+        int t_cp = Character.MAX_CODE_POINT < cp ? 1 : cp;
+        // We are guaranteed to find at least one character
+        while(!Character.isDefined(t_cp)) {
+          ++t_cp;
+          if (t_cp == cp) {
+            break;
+          }
+          if (t_cp >= Character.MAX_CODE_POINT || t_cp <= 0)  {
+            t_cp = 1;
+          }
+        }
+        cp = t_cp;
+      }
       curVal = curVal.subtract(new BigDecimal(cp));
       sb.append(Character.toChars(cp));
     }
+
     return sb.toString();
   }
 

http://git-wip-us.apache.org/repos/asf/sqoop/blob/59c2188b/connector/connector-generic-jdbc/src/test/java/org/apache/sqoop/connector/jdbc/TestImportPartitioner.java
----------------------------------------------------------------------
diff --git a/connector/connector-generic-jdbc/src/test/java/org/apache/sqoop/connector/jdbc/TestImportPartitioner.java b/connector/connector-generic-jdbc/src/test/java/org/apache/sqoop/connector/jdbc/TestImportPartitioner.java
index d46e4dd..0afec49 100644
--- a/connector/connector-generic-jdbc/src/test/java/org/apache/sqoop/connector/jdbc/TestImportPartitioner.java
+++ b/connector/connector-generic-jdbc/src/test/java/org/apache/sqoop/connector/jdbc/TestImportPartitioner.java
@@ -414,6 +414,24 @@ public class TestImportPartitioner extends TestCase {
         "'Y' <= VCCOL AND VCCOL <= 'Z'",
     });
   }
+  public void testVarcharPartition2() throws Exception {
+    MutableContext context = new MutableMapContext();
+    context.setString(GenericJdbcConnectorConstants
+      .CONNECTOR_JDBC_PARTITION_COLUMNNAME, "VCCOL");
+    context.setString(GenericJdbcConnectorConstants
+      .CONNECTOR_JDBC_PARTITION_COLUMNTYPE, String.valueOf(Types.VARCHAR));
+    context.setString(GenericJdbcConnectorConstants
+      .CONNECTOR_JDBC_PARTITION_MINVALUE, "Breezy Badger");
+    context.setString(GenericJdbcConnectorConstants
+      .CONNECTOR_JDBC_PARTITION_MAXVALUE, "Warty Warthog");
+
+    ConnectionConfiguration connConf = new ConnectionConfiguration();
+    ImportJobConfiguration jobConf = new ImportJobConfiguration();
+    Partitioner partitioner = new GenericJdbcImportPartitioner();
+    PartitionerContext partitionerContext = new PartitionerContext(context, 5);
+    List<Partition> partitions = partitioner.getPartitions(partitionerContext, connConf, jobConf);
+    assertEquals(partitions.size(), 5);
+  }
 
   public void testVarcharPartitionWithCommonPrefix() throws Exception {
     MutableContext context = new MutableMapContext();