You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ch...@apache.org on 2012/09/20 21:15:12 UTC
git commit: SQOOP-603 Support small intervals in IntegerSplitter
implementation
Updated Branches:
refs/heads/trunk 75f2b707b -> 5616152ac
SQOOP-603 Support small intervals in IntegerSplitter implementation
(Jarek Jarcec Cecho via Cheolsoo Park)
Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/5616152a
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/5616152a
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/5616152a
Branch: refs/heads/trunk
Commit: 5616152ac4c96d6c0589768b982cf67f3277df74
Parents: 75f2b70
Author: Cheolsoo Park <ch...@apache.org>
Authored: Thu Sep 20 12:14:15 2012 -0700
Committer: Cheolsoo Park <ch...@apache.org>
Committed: Thu Sep 20 12:14:15 2012 -0700
----------------------------------------------------------------------
.../mapreduce/db/DataDrivenDBInputFormat.java | 3 ++
.../apache/sqoop/mapreduce/db/IntegerSplitter.java | 18 +++++++++++++++
.../sqoop/mapreduce/db/TestIntegerSplitter.java | 8 +++++-
3 files changed, 28 insertions(+), 1 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/sqoop/blob/5616152a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java b/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java
index 35b74eb..6f4b208 100644
--- a/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java
+++ b/src/java/org/apache/sqoop/mapreduce/db/DataDrivenDBInputFormat.java
@@ -319,6 +319,9 @@ public class DataDrivenDBInputFormat<T extends DBWritable>
public DataDrivenDBInputSplit(final String lower, final String upper) {
this.lowerBoundClause = lower;
this.upperBoundClause = upper;
+
+ LOG.debug("Creating input split with lower bound '" + lower
+ + "' and upper bound '" + upper + "'");
}
/**
http://git-wip-us.apache.org/repos/asf/sqoop/blob/5616152a/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java b/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java
index 8e7a096..e6fefc6 100644
--- a/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java
+++ b/src/java/org/apache/sqoop/mapreduce/db/IntegerSplitter.java
@@ -141,6 +141,24 @@ public class IntegerSplitter implements DBSplitter {
if (splits.size() == 1) {
// make a valid singleton split
splits.add(maxVal);
+ } else if ((maxVal - minVal) <= numSplits) {
+ // Edge case when there is lesser split points (intervals) then
+ // requested number of splits. In such case we are creating last split
+ // with two values, for example interval [1, 5] broken down into 5
+ // splits will create following conditions:
+ // * 1 <= x < 2
+ // * 2 <= x < 3
+ // * 3 <= x < 4
+ // * 4 <= x <= 5
+ // Notice that the last split have twice more data than others. In
+ // those cases we add one maxVal at the end to create following splits
+ // instead:
+ // * 1 <= x < 2
+ // * 2 <= x < 3
+ // * 3 <= x < 4
+ // * 4 <= x < 5
+ // * 5 <= x <= 5
+ splits.add(maxVal);
}
return splits;
http://git-wip-us.apache.org/repos/asf/sqoop/blob/5616152a/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java
----------------------------------------------------------------------
diff --git a/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java b/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java
index 22d5140..136afc7 100644
--- a/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java
+++ b/src/test/org/apache/sqoop/mapreduce/db/TestIntegerSplitter.java
@@ -102,7 +102,13 @@ public class TestIntegerSplitter extends TestCase {
public void testTooManySplits() throws SQLException {
List<Long> splits = new IntegerSplitter().split(5, 3, 5);
- long [] expected = { 3, 4, 5 };
+ long [] expected = { 3, 4, 5, 5};
+ assertLongArrayEquals(expected, toLongArray(splits));
+ }
+
+ public void testExactSplitsAsInterval() throws SQLException {
+ List<Long> splits = new IntegerSplitter().split(5, 1, 5);
+ long [] expected = { 1, 2, 3, 4, 5, 5};
assertLongArrayEquals(expected, toLongArray(splits));
}