You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ab...@apache.org on 2015/01/13 06:18:08 UTC

sqoop git commit: SQOOP-1936: Sqoop2: Sort by comparing IDF data in shuffle phase

Repository: sqoop
Updated Branches:
  refs/heads/sqoop2 2d54e26a0 -> 7631d2933


SQOOP-1936: Sqoop2: Sort by comparing IDF data in shuffle phase

(Veena Basavaraj via Abraham Elmahrek)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/7631d293
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/7631d293
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/7631d293

Branch: refs/heads/sqoop2
Commit: 7631d293335855fd42a319c1fcb56530c27f78a4
Parents: 2d54e26
Author: Abraham Elmahrek <ab...@apache.org>
Authored: Mon Jan 12 21:16:13 2015 -0800
Committer: Abraham Elmahrek <ab...@apache.org>
Committed: Mon Jan 12 21:16:13 2015 -0800

----------------------------------------------------------------------
 .../connector/idf/CSVIntermediateDataFormat.java    |  7 +++----
 .../sqoop/connector/idf/IntermediateDataFormat.java | 16 +++++++++++++++-
 .../connector/idf/JSONIntermediateDataFormat.java   |  4 ++++
 .../idf/TestCSVIntermediateDataFormat.java          |  3 +--
 .../java/org/apache/sqoop/job/io/SqoopWritable.java |  4 ++--
 .../org/apache/sqoop/job/io/TestSqoopWritable.java  |  9 ---------
 6 files changed, 25 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/7631d293/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
----------------------------------------------------------------------
diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
index 2af6acd..4870fae 100644
--- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
+++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
@@ -58,8 +58,7 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
    */
   @Override
   public String getCSVTextData() {
-    // TODO:SQOOP-1936 to enable schema validation after we use compareTo
-    return this.data;
+    return super.getData();
   }
 
   /**
@@ -81,14 +80,14 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
     if (csvStringArray == null) {
       return null;
     }
+    Column[] columns = schema.getColumnsArray();
 
-    if (csvStringArray.length != schema.getColumnsArray().length) {
+    if (csvStringArray.length != columns.length) {
       throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0001,
           "The data " + getCSVTextData() + " has the wrong number of fields.");
     }
 
     Object[] objectArray = new Object[csvStringArray.length];
-    Column[] columns = schema.getColumnsArray();
     for (int i = 0; i < csvStringArray.length; i++) {
       if (csvStringArray[i].equals(NULL_VALUE) && !columns[i].isNullable()) {
         throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0005,

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7631d293/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java
----------------------------------------------------------------------
diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java
index 6063320..6f945c2 100644
--- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java
+++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormat.java
@@ -44,10 +44,13 @@ import java.util.Set;
  * Any conversion to the format dictated by the corresponding data source from the native or  CSV text format
  * has to be done by the connector themselves both in FROM and TO
  *
+ * NOTE: we cannot use the generic for comparable, since the comparison can be arbitrary for instance,
+ * purely based on text format
  * @param <T> - Each data format may have a native representation of the
  *            data, represented by the parameter.
  */
-public abstract class IntermediateDataFormat<T> {
+@SuppressWarnings("rawtypes")
+public abstract class IntermediateDataFormat<T> implements Comparable {
 
   protected volatile T data;
 
@@ -203,4 +206,15 @@ public abstract class IntermediateDataFormat<T> {
     return true;
   }
 
+  @Override
+  public String toString() {
+    return this.data.toString();
+  }
+
+  @Override
+  public int compareTo(Object o) {
+    IntermediateDataFormat<?> idf = (IntermediateDataFormat<?>) o;
+    return toString().compareTo(idf.toString());
+  }
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7631d293/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java
----------------------------------------------------------------------
diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java
index 3cfd356..c8df6e0 100644
--- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java
+++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/JSONIntermediateDataFormat.java
@@ -419,4 +419,8 @@ public class JSONIntermediateDataFormat extends IntermediateDataFormat<JSONObjec
     return object;
   }
 
+  @Override
+  public String toString() {
+    return this.data.toJSONString();
+  }
 }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7631d293/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
----------------------------------------------------------------------
diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
index 861d34e..d2b0ae0 100644
--- a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
+++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
@@ -1143,8 +1143,7 @@ public class TestCSVIntermediateDataFormat {
     dataFormat.getData();
   }
 
-  //SQOOP-1936 to enable schema validation after we use compareTo
-  @Test
+  @Test(expectedExceptions = SqoopException.class)
   public void testNotSettingSchemaAndGetCSVData() {
     dataFormat = new CSVIntermediateDataFormat();
     dataFormat.getCSVTextData();

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7631d293/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java
----------------------------------------------------------------------
diff --git a/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java b/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java
index 08c2031..59ad311 100644
--- a/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java
+++ b/execution/mapreduce/src/main/java/org/apache/sqoop/job/io/SqoopWritable.java
@@ -68,12 +68,12 @@ public class SqoopWritable implements Configurable, WritableComparable<SqoopWrit
 
   @Override
   public int compareTo(SqoopWritable o) {
-    return toString().compareTo(o.toString());
+    return toIDF.compareTo(o.toIDF);
   }
 
   @Override
   public String toString() {
-    return toIDF.getCSVTextData();
+    return toIDF.toString();
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/sqoop/blob/7631d293/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java
----------------------------------------------------------------------
diff --git a/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java b/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java
index 6a14201..452e085 100644
--- a/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java
+++ b/execution/mapreduce/src/test/java/org/apache/sqoop/job/io/TestSqoopWritable.java
@@ -51,15 +51,6 @@ public class TestSqoopWritable {
   }
 
   @Test
-  public void testStringInStringOut() {
-    String testData = "Live Long and prosper";
-    writable.setString(testData);
-    verify(idfMock, times(1)).setCSVTextData(testData);
-    writable.toString();
-    verify(idfMock, times(1)).getCSVTextData();
-  }
-
-  @Test
   public void testWrite() throws IOException {
     String testData = "One ring to rule them all";
     ByteArrayOutputStream ostream = new ByteArrayOutputStream();