You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ab...@apache.org on 2014/12/04 08:56:52 UTC
sqoop git commit: SQOOP-1815: Sqoop2: Date and DateTime is not
encoded in Single Quotes
Repository: sqoop
Updated Branches:
refs/heads/sqoop2 49d6e2687 -> ae31a0237
SQOOP-1815: Sqoop2: Date and DateTime is not encoded in Single Quotes
Also includes unit tests.
(Veena Basavaraj via Abraham Elmahrek)
Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/ae31a023
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/ae31a023
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/ae31a023
Branch: refs/heads/sqoop2
Commit: ae31a02372278cbed6c435dc042cdbc5d20f2090
Parents: 49d6e26
Author: Abraham Elmahrek <ab...@elmahrek.com>
Authored: Wed Dec 3 23:47:24 2014 -0800
Committer: Abraham Elmahrek <ab...@elmahrek.com>
Committed: Wed Dec 3 23:48:34 2014 -0800
----------------------------------------------------------------------
.../idf/CSVIntermediateDataFormat.java | 33 ++++--
.../idf/TestCSVIntermediateDataFormat.java | 104 ++++++++++++++++---
.../org/apache/sqoop/driver/JobManager.java | 5 +
.../connector/jdbc/generic/PartitionerTest.java | 40 +++----
4 files changed, 139 insertions(+), 43 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/sqoop/blob/ae31a023/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
----------------------------------------------------------------------
diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
index bd0fbf0..a075d3f 100644
--- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
+++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
@@ -27,8 +27,10 @@ import org.apache.sqoop.schema.type.Column;
import org.apache.sqoop.schema.type.ColumnType;
import org.apache.sqoop.schema.type.FixedPoint;
import org.apache.sqoop.schema.type.FloatingPoint;
+import org.joda.time.DateTime;
import org.joda.time.LocalDate;
-import org.joda.time.LocalDateTime;
+import org.joda.time.format.DateTimeFormat;
+import org.joda.time.format.DateTimeFormatter;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
@@ -47,7 +49,6 @@ import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
-
/**
* A concrete implementation for the {@link #IntermediateDataFormat} that
* represents each row of the data source as a comma separates list. Each
@@ -85,11 +86,16 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
// ISO-8859-1 is an 8-bit codec that is supported in every java
// implementation.
static final String BYTE_FIELD_CHARSET = "ISO-8859-1";
+ //http://www.joda.org/joda-time/key_format.html provides details on the formatter token
+ static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS'Z'");
+ static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd");
private final List<Integer> stringTypeColumnIndices = new ArrayList<Integer>();
private final List<Integer> byteTypeColumnIndices = new ArrayList<Integer>();
private final List<Integer> listTypeColumnIndices = new ArrayList<Integer>();
private final List<Integer> mapTypeColumnIndices = new ArrayList<Integer>();
+ private final List<Integer> dateTimeTypeColumnIndices = new ArrayList<Integer>();
+ private final List<Integer> dateTypeColumnIndices = new ArrayList<Integer>();
private Schema schema;
@@ -128,8 +134,12 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
List<Column> columns = schema.getColumns();
int i = 0;
for (Column col : columns) {
- if (isColumnStringType(col) ) {
+ if (isColumnStringType(col)) {
stringTypeColumnIndices.add(i);
+ } else if (col.getType() == ColumnType.DATE) {
+ dateTypeColumnIndices.add(i);
+ } else if (col.getType() == ColumnType.DATE_TIME) {
+ dateTimeTypeColumnIndices.add(i);
} else if (col.getType() == ColumnType.BINARY) {
byteTypeColumnIndices.add(i);
} else if (isColumnListType(col)) {
@@ -261,14 +271,14 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
returnValue = new BigDecimal(fieldString);
break;
case DATE:
- returnValue = LocalDate.parse(fieldString);
+ returnValue = LocalDate.parse(removeQuotes(fieldString));
break;
case DATE_TIME:
// A datetime string with a space as date-time separator will not be
// parsed expectedly. The expected separator is "T". See also:
// https://github.com/JodaOrg/joda-time/issues/11
- String iso8601 = fieldString.replace(" ", "T");
- returnValue = LocalDateTime.parse(iso8601);
+ String dateTime = removeQuotes(fieldString).replace(" ", "T");
+ returnValue = DateTime.parse(dateTime);
break;
case BIT:
returnValue = Boolean.valueOf(fieldString.equals("1")
@@ -415,6 +425,17 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
for (int i : stringTypeColumnIndices) {
stringArray[i] = escapeString((String) stringArray[i]);
}
+ for (int i : dateTimeTypeColumnIndices) {
+ if (stringArray[i] instanceof org.joda.time.DateTime) {
+ stringArray[i] = encloseWithQuote(dtf.print((org.joda.time.DateTime) stringArray[i]));
+ } else if (stringArray[i] instanceof org.joda.time.LocalDateTime) {
+ stringArray[i] = encloseWithQuote(dtf.print((org.joda.time.LocalDateTime) stringArray[i]));
+ }
+ }
+ for (int i : dateTypeColumnIndices) {
+ org.joda.time.LocalDate date = (org.joda.time.LocalDate) stringArray[i];
+ stringArray[i] = encloseWithQuote(df.print(date));
+ }
for (int i : byteTypeColumnIndices) {
stringArray[i] = escapeByteArrays((byte[]) stringArray[i]);
}
http://git-wip-us.apache.org/repos/asf/sqoop/blob/ae31a023/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
----------------------------------------------------------------------
diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
index 75fe429..bf15c69 100644
--- a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
+++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
@@ -272,38 +272,111 @@ public class TestCSVIntermediateDataFormat {
assertTrue(Arrays.deepEquals(inCopy, dataFormat.getObjectData()));
}
- //**************test cases for date/datetime*******************
+ // **************test cases for date*******************
@Test
- public void testDate() {
+ public void testDateWithCSVTextInCSVTextOut() {
Schema schema = new Schema("test");
schema.addColumn(new Date("1"));
dataFormat.setSchema(schema);
+ dataFormat.setTextData("'2014-10-01'");
+ assertEquals("'2014-10-01'", dataFormat.getTextData());
+ }
- dataFormat.setTextData("2014-10-01");
- assertEquals("2014-10-01", dataFormat.getObjectData()[0].toString());
+ @Test
+ public void testDateWithCSVTextInObjectArrayOut() {
+ Schema schema = new Schema("test");
+ schema.addColumn(new Date("1"));
+ dataFormat.setSchema(schema);
+ dataFormat.setTextData("'2014-10-01'");
+ org.joda.time.LocalDate date = new org.joda.time.LocalDate(2014, 10, 01);
+ assertEquals(date.toString(), dataFormat.getObjectData()[0].toString());
}
@Test
- public void testDateTime() {
+ public void testDateWithObjectArrayInCSVTextOut() {
+ Schema schema = new Schema("test");
+ schema.addColumn(new Date("1")).addColumn(new Text("2"));
+ dataFormat.setSchema(schema);
+ org.joda.time.LocalDate date = new org.joda.time.LocalDate(2014, 10, 01);
+ Object[] in = { date, "test" };
+ dataFormat.setObjectData(in);
+ assertEquals("'2014-10-01','test'", dataFormat.getTextData());
+ }
+
+ @Test
+ public void testDateWithObjectArrayInObjectArrayOut() {
+ Schema schema = new Schema("test");
+ schema.addColumn(new Date("1"));
+ dataFormat.setSchema(schema);
+ org.joda.time.LocalDate date = new org.joda.time.LocalDate(2014, 10, 01);
+ Object[] in = { date };
+ dataFormat.setObjectData(in);
+ assertEquals(date.toString(), dataFormat.getObjectData()[0].toString());
+ }
+
+ // **************test cases for dateTime*******************
+
+ @Test
+ public void testDateTimeWithCSVTextInCSVTextOut() {
Schema schema = new Schema("test");
schema.addColumn(new DateTime("1"));
dataFormat.setSchema(schema);
- for (String dateTime : new String[]{
- "2014-10-01T12:00:00",
- "2014-10-01T12:00:00.000"
- }) {
+ dataFormat.setTextData("'2014-10-01 12:00:00'");
+ assertEquals("'2014-10-01 12:00:00'", dataFormat.getTextData());
+ }
+
+ @Test
+ public void testDateTimeWithCSVTextInObjectArrayOut() {
+ Schema schema = new Schema("test");
+ schema.addColumn(new DateTime("1"));
+ dataFormat.setSchema(schema);
+
+ dataFormat.setTextData("'2014-10-01 12:00:00'");
+ assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString());
+ }
+
+ @Test
+ public void testDateTimeWithObjectInCSVTextOut() {
+ Schema schema = new Schema("test");
+ schema.addColumn(new DateTime("1"));
+ dataFormat.setSchema(schema);
+ org.joda.time.DateTime dateTime = new org.joda.time.DateTime(2014, 10, 01, 12, 0, 0, 0);
+ Object[] in = { dateTime };
+ dataFormat.setObjectData(in);
+ assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData());
+ }
+
+ @Test
+ public void testLocalDateTimeWithObjectInCSVTextOut() {
+ Schema schema = new Schema("test");
+ schema.addColumn(new DateTime("1"));
+ dataFormat.setSchema(schema);
+ org.joda.time.LocalDateTime dateTime = new org.joda.time.LocalDateTime(2014, 10, 01, 12, 0, 0,
+ 0);
+ Object[] in = { dateTime };
+ dataFormat.setObjectData(in);
+ assertEquals("'2014-10-01 12:00:00.000Z'", dataFormat.getTextData());
+ }
+
+ @Test
+ public void testDateTimePrecisionWithCSVTextInObjectArrayOut() {
+ Schema schema = new Schema("test");
+ schema.addColumn(new DateTime("1"));
+ dataFormat.setSchema(schema);
+
+ for (String dateTime : new String[] { "'2014-10-01 12:00:00.000'" }) {
dataFormat.setTextData(dateTime);
- assertEquals("2014-10-01T12:00:00.000", dataFormat.getObjectData()[0].toString());
+ assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString());
}
}
/**
* In ISO8601 "T" is used as date-time separator. Unfortunately in the real
- * world, database (confirmed with mysql and postgres) might return a datatime
+ * world, database (confirmed with mysql and postgres) might return a datetime
* string with a space as separator. The test case intends to check, whether
- * such datatime string can be handled expectedly.
+ * such datetime string can be handled expectedly.
*/
@Test
public void testDateTimeISO8601Alternative() {
@@ -311,12 +384,9 @@ public class TestCSVIntermediateDataFormat {
schema.addColumn(new DateTime("1"));
dataFormat.setSchema(schema);
- for (String dateTime : new String[]{
- "2014-10-01 12:00:00",
- "2014-10-01 12:00:00.000"
- }) {
+ for (String dateTime : new String[] { "'2014-10-01 12:00:00'", "'2014-10-01 12:00:00.000'" }) {
dataFormat.setTextData(dateTime);
- assertEquals("2014-10-01T12:00:00.000", dataFormat.getObjectData()[0].toString());
+ assertEquals("2014-10-01T12:00:00.000-07:00", dataFormat.getObjectData()[0].toString());
}
}
http://git-wip-us.apache.org/repos/asf/sqoop/blob/ae31a023/core/src/main/java/org/apache/sqoop/driver/JobManager.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/sqoop/driver/JobManager.java b/core/src/main/java/org/apache/sqoop/driver/JobManager.java
index d6efa6d..f4f5561 100644
--- a/core/src/main/java/org/apache/sqoop/driver/JobManager.java
+++ b/core/src/main/java/org/apache/sqoop/driver/JobManager.java
@@ -47,8 +47,10 @@ import org.apache.sqoop.schema.Schema;
import org.apache.sqoop.submission.SubmissionStatus;
import org.apache.sqoop.submission.counter.Counters;
import org.apache.sqoop.utils.ClassUtils;
+import org.joda.time.DateTime;
import org.joda.time.LocalDate;
import org.joda.time.LocalDateTime;
+import org.joda.time.LocalTime;
import org.json.simple.JSONValue;
public class JobManager implements Reconfigurable {
@@ -397,8 +399,11 @@ public class JobManager implements Reconfigurable {
jobRequest.addJarForClass(executionEngine.getClass());
// Extra libraries that Sqoop code requires
jobRequest.addJarForClass(JSONValue.class);
+ // Add JODA classes for IDF date/time handling
jobRequest.addJarForClass(LocalDate.class);
jobRequest.addJarForClass(LocalDateTime.class);
+ jobRequest.addJarForClass(DateTime.class);
+ jobRequest.addJarForClass(LocalTime.class);
}
MSubmission createJobSubmission(HttpEventContext ctx, long jobId) {
http://git-wip-us.apache.org/repos/asf/sqoop/blob/ae31a023/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java
----------------------------------------------------------------------
diff --git a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java
index 824a51d..bd34911 100644
--- a/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java
+++ b/test/src/test/java/org/apache/sqoop/integration/connector/jdbc/generic/PartitionerTest.java
@@ -103,26 +103,26 @@ public class PartitionerTest extends ConnectorTestCase {
// Assert correct output
assertToFiles((extractors > maxOutputFiles) ? maxOutputFiles : extractors);
assertTo(
- "1,'Warty Warthog',4.10,2004-10-20,false",
- "2,'Hoary Hedgehog',5.04,2005-04-08,false",
- "3,'Breezy Badger',5.10,2005-10-13,false",
- "4,'Dapper Drake',6.06,2006-06-01,true",
- "5,'Edgy Eft',6.10,2006-10-26,false",
- "6,'Feisty Fawn',7.04,2007-04-19,false",
- "7,'Gutsy Gibbon',7.10,2007-10-18,false",
- "8,'Hardy Heron',8.04,2008-04-24,true",
- "9,'Intrepid Ibex',8.10,2008-10-18,false",
- "10,'Jaunty Jackalope',9.04,2009-04-23,false",
- "11,'Karmic Koala',9.10,2009-10-29,false",
- "12,'Lucid Lynx',10.04,2010-04-29,true",
- "13,'Maverick Meerkat',10.10,2010-10-10,false",
- "14,'Natty Narwhal',11.04,2011-04-28,false",
- "15,'Oneiric Ocelot',11.10,2011-10-10,false",
- "16,'Precise Pangolin',12.04,2012-04-26,true",
- "17,'Quantal Quetzal',12.10,2012-10-18,false",
- "18,'Raring Ringtail',13.04,2013-04-25,false",
- "19,'Saucy Salamander',13.10,2013-10-17,false"
- );
+ "1,'Warty Warthog',4.10,'2004-10-20',false",
+ "2,'Hoary Hedgehog',5.04,'2005-04-08',false",
+ "3,'Breezy Badger',5.10,'2005-10-13',false",
+ "4,'Dapper Drake',6.06,'2006-06-01',true",
+ "5,'Edgy Eft',6.10,'2006-10-26',false",
+ "6,'Feisty Fawn',7.04,'2007-04-19',false",
+ "7,'Gutsy Gibbon',7.10,'2007-10-18',false",
+ "8,'Hardy Heron',8.04,'2008-04-24',true",
+ "9,'Intrepid Ibex',8.10,'2008-10-18',false",
+ "10,'Jaunty Jackalope',9.04,'2009-04-23',false",
+ "11,'Karmic Koala',9.10,'2009-10-29',false",
+ "12,'Lucid Lynx',10.04,'2010-04-29',true",
+ "13,'Maverick Meerkat',10.10,'2010-10-10',false",
+ "14,'Natty Narwhal',11.04,'2011-04-28',false",
+ "15,'Oneiric Ocelot',11.10,'2011-10-10',false",
+ "16,'Precise Pangolin',12.04,'2012-04-26',true",
+ "17,'Quantal Quetzal',12.10,'2012-10-18',false",
+ "18,'Raring Ringtail',13.04,'2013-04-25',false",
+ "19,'Saucy Salamander',13.10,'2013-10-17',false"
+ );
// Clean up testing table
dropTable();