You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ab...@apache.org on 2014/12/06 01:03:13 UTC

sqoop git commit: SQOOP-1817: Sqoop2: Update CSVIntermediate BIT data type

Repository: sqoop
Updated Branches:
  refs/heads/sqoop2 c19f9c946 -> c865aefea


SQOOP-1817: Sqoop2: Update CSVIntermediate BIT data type

(Veena Basavaraj via Abraham Elmahrek)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/c865aefe
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/c865aefe
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/c865aefe

Branch: refs/heads/sqoop2
Commit: c865aefea62a8f0f5eabe56860b393538f03c09c
Parents: c19f9c9
Author: Abraham Elmahrek <ab...@abe-MBP.local>
Authored: Fri Dec 5 15:57:39 2014 -0800
Committer: Abraham Elmahrek <ab...@abe-MBP.local>
Committed: Fri Dec 5 15:57:39 2014 -0800

----------------------------------------------------------------------
 .../idf/CSVIntermediateDataFormat.java          |  57 ++++++----
 .../idf/IntermediateDataFormatError.java        |   9 +-
 .../idf/TestCSVIntermediateDataFormat.java      | 106 +++++++++++++++++--
 3 files changed, 144 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/c865aefe/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
----------------------------------------------------------------------
diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
index d481cce..daa51eb 100644
--- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
+++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/CSVIntermediateDataFormat.java
@@ -44,6 +44,7 @@ import java.math.BigDecimal;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -89,9 +90,10 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
   // http://www.joda.org/joda-time/key_format.html provides details on the formatter token
   static final DateTimeFormatter dtf = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSSSSSZ");
   static final DateTimeFormatter df = DateTimeFormat.forPattern("yyyy-MM-dd");
-  static final DateTimeFormatter tf = DateTimeFormat.forPattern("HH:mm:ss.SSSSSSZ");
+  static final DateTimeFormatter tf = DateTimeFormat.forPattern("HH:mm:ss.SSSSSS");
 
   private final List<Integer> stringTypeColumnIndices = new ArrayList<Integer>();
+  private final List<Integer> bitTypeColumnIndices = new ArrayList<Integer>();
   private final List<Integer> byteTypeColumnIndices = new ArrayList<Integer>();
   private final List<Integer> listTypeColumnIndices = new ArrayList<Integer>();
   private final List<Integer> mapTypeColumnIndices = new ArrayList<Integer>();
@@ -99,6 +101,11 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
   private final List<Integer> dateTypeColumnIndices = new ArrayList<Integer>();
   private final List<Integer> timeColumnIndices = new ArrayList<Integer>();
 
+  static final String[] TRUE_BIT_VALUES = new String[] { "1", "true", "TRUE" };
+  static final Set<String> TRUE_BIT_SET = new HashSet<String>(Arrays.asList(TRUE_BIT_VALUES));
+  static final String[] FALSE_BIT_VALUES = new String[] { "0", "false", "FALSE" };
+  static final Set<String> FALSE_BIT_SET = new HashSet<String>(Arrays.asList(FALSE_BIT_VALUES));
+
   private Schema schema;
 
   public CSVIntermediateDataFormat() {
@@ -138,6 +145,8 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
     for (Column col : columns) {
       if (isColumnStringType(col)) {
         stringTypeColumnIndices.add(i);
+      } else if (col.getType() == ColumnType.BIT) {
+        bitTypeColumnIndices.add(i);
       } else if (col.getType() == ColumnType.DATE) {
         dateTypeColumnIndices.add(i);
       } else if (col.getType() == ColumnType.TIME) {
@@ -288,8 +297,12 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
       returnValue = DateTime.parse(dateTime);
       break;
     case BIT:
-      returnValue = Boolean.valueOf(fieldString.equals("1")
-          || fieldString.toLowerCase().equals("true"));
+      if ((TRUE_BIT_SET.contains(fieldString)) || (FALSE_BIT_SET.contains(fieldString))) {
+        returnValue = TRUE_BIT_SET.contains(fieldString);
+      } else {
+        // throw an exception for any unsupported value for BITs
+        throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0009, " given bit value: " + fieldString);
+      }
       break;
     case ARRAY:
     case SET:
@@ -425,36 +438,44 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
   /**
    * Sanitize every element of the CSV string based on the column type
    *
-   * @param stringArray
+   * @param objectArray
    */
   @SuppressWarnings("unchecked")
-  private void encodeCSVStringElements(Object[] stringArray, Column[] columnArray) {
+  private void encodeCSVStringElements(Object[] objectArray, Column[] columnArray) {
+    for (int i : bitTypeColumnIndices) {
+      String bitStringValue = objectArray[i].toString();
+      if ((TRUE_BIT_SET.contains(bitStringValue)) || (FALSE_BIT_SET.contains(bitStringValue))) {
+        objectArray[i] = bitStringValue;
+      } else {
+        throw new SqoopException(IntermediateDataFormatError.INTERMEDIATE_DATA_FORMAT_0009, " given bit value: " + objectArray[i]);
+      }
+    }
     for (int i : stringTypeColumnIndices) {
-      stringArray[i] = escapeString((String) stringArray[i]);
+      objectArray[i] = escapeString((String) objectArray[i]);
     }
     for (int i : dateTimeTypeColumnIndices) {
-      if (stringArray[i] instanceof org.joda.time.DateTime) {
-        stringArray[i] = encloseWithQuote(dtf.print((org.joda.time.DateTime) stringArray[i]));
-      } else if (stringArray[i] instanceof org.joda.time.LocalDateTime) {
-        stringArray[i] = encloseWithQuote(dtf.print((org.joda.time.LocalDateTime) stringArray[i]));
+      if (objectArray[i] instanceof org.joda.time.DateTime) {
+        objectArray[i] = encloseWithQuote(dtf.print((org.joda.time.DateTime) objectArray[i]));
+      } else if (objectArray[i] instanceof org.joda.time.LocalDateTime) {
+        objectArray[i] = encloseWithQuote(dtf.print((org.joda.time.LocalDateTime) objectArray[i]));
       }
     }
     for (int i : dateTypeColumnIndices) {
-      org.joda.time.LocalDate date = (org.joda.time.LocalDate) stringArray[i];
-      stringArray[i] = encloseWithQuote(df.print(date));
+      org.joda.time.LocalDate date = (org.joda.time.LocalDate) objectArray[i];
+      objectArray[i] = encloseWithQuote(df.print(date));
     }
     for (int i : timeColumnIndices) {
-      org.joda.time.LocalTime date = (org.joda.time.LocalTime) stringArray[i];
-      stringArray[i] = encloseWithQuote(tf.print(date));
+      org.joda.time.LocalTime date = (org.joda.time.LocalTime) objectArray[i];
+      objectArray[i] = encloseWithQuote(tf.print(date));
     }
     for (int i : byteTypeColumnIndices) {
-      stringArray[i] = escapeByteArrays((byte[]) stringArray[i]);
+      objectArray[i] = escapeByteArrays((byte[]) objectArray[i]);
     }
     for (int i : listTypeColumnIndices) {
-      stringArray[i] = encodeList((Object[]) stringArray[i], columnArray[i]);
+      objectArray[i] = encodeList((Object[]) objectArray[i], columnArray[i]);
     }
     for (int i : mapTypeColumnIndices) {
-      stringArray[i] = encodeMap((Map<Object, Object>) stringArray[i], columnArray[i]);
+      objectArray[i] = encodeMap((Map<Object, Object>) objectArray[i], columnArray[i]);
     }
   }
 
@@ -571,4 +592,4 @@ public class CSVIntermediateDataFormat extends IntermediateDataFormat<String> {
   public String toString() {
     return data;
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/sqoop/blob/c865aefe/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java
----------------------------------------------------------------------
diff --git a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java
index 665418d..4b0dd88 100644
--- a/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java
+++ b/connector/connector-sdk/src/main/java/org/apache/sqoop/connector/idf/IntermediateDataFormatError.java
@@ -36,13 +36,18 @@ public enum IntermediateDataFormatError implements ErrorCode {
   /** Column type isn't known by Intermediate Data Format. */
   INTERMEDIATE_DATA_FORMAT_0004("Unknown column type."),
 
-  /** Number of fields. */
-  INTERMEDIATE_DATA_FORMAT_0005("Wrong number of fields."),
+  /** Number of columns in schema does not match the data set. */
+  INTERMEDIATE_DATA_FORMAT_0005("Wrong number of columns."),
 
+  /** Schema is missing in the IDF. */
   INTERMEDIATE_DATA_FORMAT_0006("Schema missing."),
 
+  /** For arrays and maps we use JSON representation and incorrect representation results in parse exception*/
   INTERMEDIATE_DATA_FORMAT_0008("JSON parse internal error."),
 
+  /** Unsupported bit values */
+  INTERMEDIATE_DATA_FORMAT_0009("Unsupported bit value."),
+
   ;
 
   private final String message;

http://git-wip-us.apache.org/repos/asf/sqoop/blob/c865aefe/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
----------------------------------------------------------------------
diff --git a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
index b348ed8..8a032ef 100644
--- a/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
+++ b/connector/connector-sdk/src/test/java/org/apache/sqoop/connector/idf/TestCSVIntermediateDataFormat.java
@@ -442,28 +442,118 @@ public class TestCSVIntermediateDataFormat {
     }
   }
 
+  // **************test cases for BIT*******************
+
+  @Test
+  public void testBitTrueFalseWithCSVTextInAndCSVTextOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new Bit("1"));
+    dataFormat.setSchema(schema);
+
+    for (String trueBit : new String[] { "true", "TRUE" }) {
+      dataFormat.setTextData(trueBit);
+      assertTrue(Boolean.valueOf(dataFormat.getTextData()));
+    }
+
+    for (String falseBit : new String[] { "false", "FALSE" }) {
+      dataFormat.setTextData(falseBit);
+      assertFalse(Boolean.valueOf(dataFormat.getTextData()));
+    }
+  }
+
+  @Test
+  public void testBitWithCSVTextInAndCSVTextOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new Bit("1"));
+    dataFormat.setSchema(schema);
+    dataFormat.setTextData("1");
+    assertEquals("1", dataFormat.getTextData());
+    dataFormat.setTextData("0");
+    assertEquals("0", dataFormat.getTextData());
+  }
+
+  @Test
+  public void testBitWithObjectArrayInAndCSVTextOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new Bit("1")).addColumn(new Bit("2"));
+    dataFormat.setSchema(schema);
+    Object[] data = new Object[2];
+    data[0] = Boolean.TRUE;
+    data[1] = Boolean.FALSE;
+    dataFormat.setObjectData(data);
+    assertEquals("true,false", dataFormat.getTextData());
+  }
+
+  @Test(expected = SqoopException.class)
+  public void testUnsupportedBitWithObjectArrayInAndCSVTextOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new Bit("1")).addColumn(new Bit("2"));
+    dataFormat.setSchema(schema);
+    Object[] data = new Object[2];
+    data[0] = "1";
+    data[1] = "2";
+    dataFormat.setObjectData(data);
+    assertEquals("1,2", dataFormat.getTextData());
+  }
+
   @Test
-  public void testBit() {
+  public void testBitWithObjectArrayInAndObjectOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new Bit("1")).addColumn(new Bit("2"));
+    dataFormat.setSchema(schema);
+    Object[] data = new Object[2];
+    data[0] = Boolean.TRUE;
+    data[1] = Boolean.FALSE;
+    dataFormat.setObjectData(data);
+    assertEquals(true, dataFormat.getObjectData()[0]);
+    assertEquals(false, dataFormat.getObjectData()[1]);
+    data[0] = "1";
+    data[1] = "0";
+    dataFormat.setObjectData(data);
+    assertEquals(true, dataFormat.getObjectData()[0]);
+    assertEquals(false, dataFormat.getObjectData()[1]);
+  }
+
+  public void testBitWithCSVTextInAndObjectArrayOut() {
     Schema schema = new Schema("test");
     schema.addColumn(new Bit("1"));
     dataFormat.setSchema(schema);
 
-    for (String trueBit : new String[]{
-        "true", "TRUE", "1"
-    }) {
+    for (String trueBit : new String[] { "true", "TRUE", "1" }) {
       dataFormat.setTextData(trueBit);
       assertTrue((Boolean) dataFormat.getObjectData()[0]);
     }
 
-    for (String falseBit : new String[]{
-        "false", "FALSE", "0"
-    }) {
+    for (String falseBit : new String[] { "false", "FALSE", "0" }) {
       dataFormat.setTextData(falseBit);
       assertFalse((Boolean) dataFormat.getObjectData()[0]);
     }
   }
 
-  //**************test cases for arrays*******************
+  @Test(expected = SqoopException.class)
+  public void testUnsupportedBitWithObjectArrayInAndObjectOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new Bit("1")).addColumn(new Bit("2"));
+    dataFormat.setSchema(schema);
+    Object[] data = new Object[2];
+    data[0] = "1";
+    data[1] = "2";
+    dataFormat.setObjectData(data);
+    assertEquals(true, dataFormat.getObjectData()[0]);
+    assertEquals(false, dataFormat.getObjectData()[1]);
+  }
+
+  @Test(expected = SqoopException.class)
+  public void testUnsupportedBitWithCSVTextInAndObjectOut() {
+    Schema schema = new Schema("test");
+    schema.addColumn(new Bit("1")).addColumn(new Bit("2"));
+    dataFormat.setSchema(schema);
+    dataFormat.setTextData("1,3");
+    assertEquals(true, dataFormat.getObjectData()[0]);
+    assertEquals(false, dataFormat.getObjectData()[1]);
+  }
+
+  // **************test cases for arrays*******************
   @Test
   public void testArrayOfStringWithObjectArrayInObjectArrayOut() {
     Schema schema = new Schema("test");