You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ch...@apache.org on 2012/09/20 21:15:12 UTC

git commit: SQOOP-603 Support small intervals in IntegerSplitter implementation

Updated Branches:
  refs/heads/trunk 75f2b707b -> 5616152ac


SQOOP-603 Support small intervals in IntegerSplitter implementation

(Jarek Jarcec Cecho via Cheolsoo Park)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/5616152a
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/5616152a
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/5616152a

Branch: refs/heads/trunk
Commit: 5616152ac4c96d6c0589768b982cf67f3277df74
Parents: 75f2b70
Author: Cheolsoo Park <ch...@apache.org>
Authored: Thu Sep 20 12:14:15 2012 -0700
Committer: Cheolsoo Park <ch...@apache.org>
Committed: Thu Sep 20 12:14:15 2012 -0700

----------------------------------------------------------------------
 .../mapreduce/db/DataDrivenDBInputFormat.java      |    3 ++
 .../apache/sqoop/mapreduce/db/IntegerSplitter.java |   18 +++++++++++++++
 .../sqoop/mapreduce/db/TestIntegerSplitter.java    |    8 +++++-
 3 files changed, 28 insertions(+), 1 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/5616152a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java b/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java
index 35b74eb..6f4b208 100644
--- a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java
+++ b/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java
@@ -319,6 +319,9 @@ public class DataDrivenDBInputFormat<T extends DBWritable>
     public DataDrivenDBInputSplit(final String lower, final String upper) {
       this.lowerBoundClause = lower;
       this.upperBoundClause = upper;
+
+      LOG.debug("Creating input split with lower bound '" + lower
+        + "' and upper bound '" + upper + "'");
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/sqoop/blob/5616152a/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java
index 8e7a096..e6fefc6 100644
--- a/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java
+++ b/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java
@@ -141,6 +141,24 @@ public class IntegerSplitter implements DBSplitter  {
       if (splits.size() == 1) {
         // make a valid singleton split
         splits.add(maxVal);
+      } else if ((maxVal - minVal) <= numSplits) {
+        // Edge case when there is lesser split points (intervals) then
+        // requested number of splits. In such case we are creating last split
+        // with two values, for example interval [1, 5] broken down into 5
+        // splits will create following conditions:
+        //  * 1 <= x < 2
+        //  * 2 <= x < 3
+        //  * 3 <= x < 4
+        //  * 4 <= x <= 5
+        // Notice that the last split have twice more data than others. In
+        // those cases we add one maxVal at the end to create following splits
+        // instead:
+        //  * 1 <= x < 2
+        //  * 2 <= x < 3
+        //  * 3 <= x < 4
+        //  * 4 <= x < 5
+        //  * 5 <= x <= 5
+        splits.add(maxVal);
       }
 
       return splits;

http://git-wip-us.apache.org/repos/asf/sqoop/blob/5616152a/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java b/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java
index 22d5140..136afc7 100644
--- a/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java
+++ b/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java
@@ -102,7 +102,13 @@ public class TestIntegerSplitter extends TestCase {
 
   public void testTooManySplits() throws SQLException {
     List<Long> splits = new IntegerSplitter().split(5, 3, 5);
-    long [] expected = { 3, 4, 5 };
+    long [] expected = { 3, 4, 5, 5};
+    assertLongArrayEquals(expected, toLongArray(splits));
+  }
+
+  public void testExactSplitsAsInterval() throws SQLException {
+    List<Long> splits = new IntegerSplitter().split(5, 1, 5);
+    long [] expected = { 1, 2, 3, 4, 5, 5};
     assertLongArrayEquals(expected, toLongArray(splits));
   }