You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ab...@apache.org on 2014/12/04 08:56:52 UTC

sqoop git commit: SQOOP-1815: Sqoop2: Date and DateTime is not encoded in Single Quotes

Repository: sqoop
Updated Branches:
  refs/heads/sqoop2 49d6e2687 -> ae31a0237


SQOOP-1815: Sqoop2: Date and DateTime is not encoded in Single Quotes

Also includes unit tests.

(Veena Basavaraj via Abraham Elmahrek)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/ae31a023
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/ae31a023
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/ae31a023

Branch: refs/heads/sqoop2
Commit: ae31a02372278cbed6c435dc042cdbc5d20f2090
Parents: 49d6e26
Author: Abraham Elmahrek <ab...@elmahrek.com>
Authored: Wed Dec 3 23:47:24 2014 -0800
Committer: Abraham Elmahrek <ab...@elmahrek.com>
Committed: Wed Dec 3 23:48:34 2014 -0800

----------------------------------------------------------------------
 .../idf/CSVIntermediateDataFormat.java          |  33 ++++--
 .../idf/TestCSVIntermediateDataFormat.java      | 104 ++++++++++++++++---
 .../org/apache/sqoop/driver/JobManager.java     |   5 +
 .../connector/jdbc/generic/PartitionerTest.java |  40 +++----
 4 files changed, 139 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/ae31a023/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
----------------------------------------------------------------------
diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
index bd0fbf0..a075d3f 100644
--- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
+++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
@@ -27,8 +27,10 @@ import org.apache.sqoop.schema.type.Column;
 import org.apache.sqoop.schema.type.ColumnType;
 import org.apache.sqoop.schema.type.FixedPoint;
 import org.apache.sqoop.schema.type.FloatingPoint;
+import org.joda.time.DateTime;
 import org.joda.time.LocalDate;
-import org.joda.time.LocalDateTime;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
 import org.json.simple.JSONArray;
 import org.json.simple.JSONObject;
 import org.json.simple.parser.JSONParser;
@@ -47,7 +49,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.regex.Matcher;
 
-
 /**
  * A concrete implementation for the {@link #IntermediateDataFormat} that
  * represents each row of the data source as a comma separates list. Each
@@ -85,11 +86,16 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
   // ISO-8859-1 is an 8-bit codec that is supported in every java
   // implementation.
   static final String BYTE_FIELD_CHARSET = "ISO-8859-1";
+  //http://www.joda.org/joda-time/key_format.html provides details on the formatter token
+  static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS'Z'");
+  static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd");
 
   private final List<Integer> stringTypeColumnIndices = new ArrayList<Integer>();
   private final List<Integer> byteTypeColumnIndices = new ArrayList<Integer>();
   private final List<Integer> listTypeColumnIndices = new ArrayList<Integer>();
   private final List<Integer> mapTypeColumnIndices = new ArrayList<Integer>();
+  private final List<Integer> dateTimeTypeColumnIndices = new ArrayList<Integer>();
+  private final List<Integer> dateTypeColumnIndices = new ArrayList<Integer>();
 
   private Schema schema;
 
@@ -128,8 +134,12 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
     List<Column> columns = schema.getColumns();
     int i = 0;
     for (Column col : columns) {
-      if (isColumnStringType(col) ) {
+      if (isColumnStringType(col)) {
         stringTypeColumnIndices.add(i);
+      } else if (col.getType() == ColumnType.DATE) {
+        dateTypeColumnIndices.add(i);
+      } else if (col.getType() == ColumnType.DATE_TIME) {
+        dateTimeTypeColumnIndices.add(i);
       } else if (col.getType() == ColumnType.BINARY) {
         byteTypeColumnIndices.add(i);
       } else if (isColumnListType(col)) {
@@ -261,14 +271,14 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
       returnValue = new BigDecimal(fieldString);
       break;
     case DATE:
-      returnValue = LocalDate.parse(fieldString);
+      returnValue = LocalDate.parse(removeQuotes(fieldString));
       break;
     case DATE_TIME:
       // A datetime string with a space as date-time separator will not be
       // parsed expectedly. The expected separator is "T". See also:
       // https://github.com/JodaOrg/joda-time/issues/11
-      String iso8601 = fieldString.replace(" ", "T");
-      returnValue = LocalDateTime.parse(iso8601);
+      String dateTime = removeQuotes(fieldString).replace(" ", "T");
+      returnValue = DateTime.parse(dateTime);
       break;
     case BIT:
       returnValue = Boolean.valueOf(fieldString.equals("1")
@@ -415,6 +425,17 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
     for (int i : stringTypeColumnIndices) {
       stringArray[i] = escapeString((String) stringArray[i]);
     }
+    for (int i : dateTimeTypeColumnIndices) {
+      if (stringArray[i] instanceof org.joda.time.DateTime) {
+        stringArray[i] = encloseWithQuote(dtf.print((org.joda.time.DateTime) stringArray[i]));
+      } else if (stringArray[i] instanceof org.joda.time.LocalDateTime) {
+        stringArray[i] = encloseWithQuote(dtf.print((org.joda.time.LocalDateTime) stringArray[i]));
+      }
+    }
+    for (int i : dateTypeColumnIndices) {
+      org.joda.time.LocalDate date = (org.joda.time.LocalDate) stringArray[i];
+      stringArray[i] = encloseWithQuote(df.print(date));
+    }
     for (int i : byteTypeColumnIndices) {
       stringArray[i] = escapeByteArrays((byte[]) stringArray[i]);
     }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/ae31a023/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
----------------------------------------------------------------------
diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
index 75fe429..bf15c69 100644
--- a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
+++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
@@ -272,38 +272,111 @@ public class TestCSVIntermediateDataFormat {
     assertTrue(Arrays.deepEquals(inCopy, dataFormat.getObjectData()));
   }
 
-  //**************test cases for date/datetime*******************
+  // **************test cases for date*******************
 
   @Test
-  public void testDate() {
+  public void testDateWithCSVTextInCSVTextOut() {
     Schema schema = new Schema("test");
     schema.addColumn(new Date("1"));
     dataFormat.setSchema(schema);
+    dataFormat.setTextData("'2014-10-01'");
+    assertEquals("'2014-10-01'", dataFormat.getTextData());
+  }
 
-    dataFormat.setTextData("2014-10-01");
-    assertEquals("2014-10-01", dataFormat.getObjectData()[0].toString());
+  @Test
+  public void testDateWithCSVTextInObjectArrayOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new Date("1"));
+    dataFormat.setSchema(schema);
+    dataFormat.setTextData("'2014-10-01'");
+    org.joda.time.LocalDate date = new org.joda.time.LocalDate(2014, 10, 01);
+    assertEquals(date.toString(), dataFormat.getObjectData()[0].toString());
   }
 
   @Test
-  public void testDateTime() {
+  public void testDateWithObjectArrayInCSVTextOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new Date("1")).addColumn(new Text("2"));
+    dataFormat.setSchema(schema);
+    org.joda.time.LocalDate date = new org.joda.time.LocalDate(2014, 10, 01);
+    Object[] in = { date, "test" };
+    dataFormat.setObjectData(in);
+    assertEquals("'2014-10-01','test'", dataFormat.getTextData());
+  }
+
+  @Test
+  public void testDateWithObjectArrayInObjectArrayOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new Date("1"));
+    dataFormat.setSchema(schema);
+    org.joda.time.LocalDate date = new org.joda.time.LocalDate(2014, 10, 01);
+    Object[] in = { date };
+    dataFormat.setObjectData(in);
+    assertEquals(date.toString(), dataFormat.getObjectData()[0].toString());
+  }
+
+  // **************test cases for dateTime*******************
+
+  @Test
+  public void testDateTimeWithCSVTextInCSVTextOut() {
     Schema schema = new Schema("test");
     schema.addColumn(new DateTime("1"));
     dataFormat.setSchema(schema);
 
-    for (String dateTime : new String[]{
-        "2014-10-01T12:00:00",
-        "2014-10-01T12:00:00.000"
-    }) {
+    dataFormat.setTextData("'2014-10-01 12:00:00'");
+    assertEquals("'2014-10-01 12:00:00'", dataFormat.getTextData());
+  }
+
+  @Test
+  public void testDateTimeWithCSVTextInObjectArrayOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new DateTime("1"));
+    dataFormat.setSchema(schema);
+
+    dataFormat.setTextData("'2014-10-01 12:00:00'");
+    assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString());
+  }
+
+  @Test
+  public void testDateTimeWithObjectInCSVTextOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new DateTime("1"));
+    dataFormat.setSchema(schema);
+    org.joda.time.DateTime dateTime = new org.joda.time.DateTime(2014, 10, 01, 12, 0, 0, 0);
+    Object[] in = { dateTime };
+    dataFormat.setObjectData(in);
+    assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData());
+  }
+
+  @Test
+  public void testLocalDateTimeWithObjectInCSVTextOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new DateTime("1"));
+    dataFormat.setSchema(schema);
+    org.joda.time.LocalDateTime dateTime = new org.joda.time.LocalDateTime(2014, 10, 01, 12, 0, 0,
+        0);
+    Object[] in = { dateTime };
+    dataFormat.setObjectData(in);
+    assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData());
+  }
+
+  @Test
+  public void testDateTimePrecisionWithCSVTextInObjectArrayOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new DateTime("1"));
+    dataFormat.setSchema(schema);
+
+    for (String dateTime : new String[] { "'2014-10-01 12:00:00.000'" }) {
       dataFormat.setTextData(dateTime);
-      assertEquals("2014-10-01T12:00:00.000", dataFormat.getObjectData()[0].toString());
+      assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString());
     }
   }
 
   /**
    * In ISO8601 "T" is used as date-time separator. Unfortunately in the real
-   * world, database (confirmed with mysql and postgres) might return a datatime
+   * world, database (confirmed with mysql and postgres) might return a datetime
    * string with a space as separator. The test case intends to check, whether
-   * such datatime string can be handled expectedly.
+   * such datetime string can be handled expectedly.
    */
   @Test
   public void testDateTimeISO8601Alternative() {
@@ -311,12 +384,9 @@ public class TestCSVIntermediateDataFormat {
     schema.addColumn(new DateTime("1"));
     dataFormat.setSchema(schema);
 
-    for (String dateTime : new String[]{
-        "2014-10-01 12:00:00",
-        "2014-10-01 12:00:00.000"
-    }) {
+    for (String dateTime : new String[] { "'2014-10-01 12:00:00'", "'2014-10-01 12:00:00.000'" }) {
       dataFormat.setTextData(dateTime);
-      assertEquals("2014-10-01T12:00:00.000", dataFormat.getObjectData()[0].toString());
+      assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString());
     }
   }
 

http://git-wip-us.apache.org/repos/asf/sqoop/blob/ae31a023/core/src/main/java/org/apache/sqoop/driver/JobManager.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/sqoop/driver/JobManager.java b/core/src/main/java/org/apache/sqoop/driver/JobManager.java
index d6efa6d..f4f5561 100644
--- a/core/src/main/java/org/apache/sqoop/driver/JobManager.java
+++ b/core/src/main/java/org/apache/sqoop/driver/JobManager.java
@@ -47,8 +47,10 @@ import org.apache.sqoop.schema.Schema;
 import org.apache.sqoop.submission.SubmissionStatus;
 import org.apache.sqoop.submission.counter.Counters;
 import org.apache.sqoop.utils.ClassUtils;
+import org.joda.time.DateTime;
 import org.joda.time.LocalDate;
 import org.joda.time.LocalDateTime;
+import org.joda.time.LocalTime;
 import org.json.simple.JSONValue;
 
 public class JobManager implements Reconfigurable {
@@ -397,8 +399,11 @@ public class JobManager implements Reconfigurable {
     jobRequest.addJarForClass(executionEngine.getClass());
     // Extra libraries that Sqoop code requires
     jobRequest.addJarForClass(JSONValue.class);
+    // Add JODA classes for IDF date/time handling
     jobRequest.addJarForClass(LocalDate.class);
     jobRequest.addJarForClass(LocalDateTime.class);
+    jobRequest.addJarForClass(DateTime.class);
+    jobRequest.addJarForClass(LocalTime.class);
   }
 
   MSubmission createJobSubmission(HttpEventContext ctx, long jobId) {

http://git-wip-us.apache.org/repos/asf/sqoop/blob/ae31a023/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java
index 824a51d..bd34911 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java
@@ -103,26 +103,26 @@ public class PartitionerTest extends ConnectorTestCase {
     // Assert correct output
     assertToFiles((extractors > maxOutputFiles) ? maxOutputFiles : extractors);
     assertTo(
-      "1,'Warty Warthog',4.10,2004-10-20,false",
-      "2,'Hoary Hedgehog',5.04,2005-04-08,false",
-      "3,'Breezy Badger',5.10,2005-10-13,false",
-      "4,'Dapper Drake',6.06,2006-06-01,true",
-      "5,'Edgy Eft',6.10,2006-10-26,false",
-      "6,'Feisty Fawn',7.04,2007-04-19,false",
-      "7,'Gutsy Gibbon',7.10,2007-10-18,false",
-      "8,'Hardy Heron',8.04,2008-04-24,true",
-      "9,'Intrepid Ibex',8.10,2008-10-18,false",
-      "10,'Jaunty Jackalope',9.04,2009-04-23,false",
-      "11,'Karmic Koala',9.10,2009-10-29,false",
-      "12,'Lucid Lynx',10.04,2010-04-29,true",
-      "13,'Maverick Meerkat',10.10,2010-10-10,false",
-      "14,'Natty Narwhal',11.04,2011-04-28,false",
-      "15,'Oneiric Ocelot',11.10,2011-10-10,false",
-      "16,'Precise Pangolin',12.04,2012-04-26,true",
-      "17,'Quantal Quetzal',12.10,2012-10-18,false",
-      "18,'Raring Ringtail',13.04,2013-04-25,false",
-      "19,'Saucy Salamander',13.10,2013-10-17,false"
-    );
+        "1,'Warty Warthog',4.10,'2004-10-20',false",
+        "2,'Hoary Hedgehog',5.04,'2005-04-08',false",
+        "3,'Breezy Badger',5.10,'2005-10-13',false",
+        "4,'Dapper Drake',6.06,'2006-06-01',true",
+        "5,'Edgy Eft',6.10,'2006-10-26',false",
+        "6,'Feisty Fawn',7.04,'2007-04-19',false",
+        "7,'Gutsy Gibbon',7.10,'2007-10-18',false",
+        "8,'Hardy Heron',8.04,'2008-04-24',true",
+        "9,'Intrepid Ibex',8.10,'2008-10-18',false",
+        "10,'Jaunty Jackalope',9.04,'2009-04-23',false",
+        "11,'Karmic Koala',9.10,'2009-10-29',false",
+        "12,'Lucid Lynx',10.04,'2010-04-29',true",
+        "13,'Maverick Meerkat',10.10,'2010-10-10',false",
+        "14,'Natty Narwhal',11.04,'2011-04-28',false",
+        "15,'Oneiric Ocelot',11.10,'2011-10-10',false",
+        "16,'Precise Pangolin',12.04,'2012-04-26',true",
+        "17,'Quantal Quetzal',12.10,'2012-10-18',false",
+        "18,'Raring Ringtail',13.04,'2013-04-25',false",
+        "19,'Saucy Salamander',13.10,'2013-10-17',false"
+      );
 
     // Clean up testing table
     dropTable();