You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by to...@apache.org on 2010/03/19 00:17:51 UTC

svn commit: r925040 - in /hadoop/mapreduce/trunk: ./ src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/ src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/ src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ src/contrib/sqoop/src/test/...

Author: tomwhite
Date: Thu Mar 18 23:17:51 2010
New Revision: 925040

URL: http://svn.apache.org/viewvc?rev=925040&view=rev
Log:
MAPREDUCE-1446. Sqoop should support CLOB and BLOB datatypes. Contributed by Aaron Kimball.

Added:
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/BlobRef.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/ClobRef.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/LobSerializer.java
Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/FieldFormatter.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/JdbcWritableBridge.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/SqlManager.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/TestColumnTypes.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/MySQLCompatTest.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/OracleCompatTest.java
    hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/testutil/ManagerCompatTestCase.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/db/DBRecordReader.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=925040&r1=925039&r2=925040&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Thu Mar 18 23:17:51 2010
@@ -68,6 +68,9 @@ Trunk (unreleased changes)
     MAPREDUCE-1455. Introduces job-level authorization for mapreduce servlets.
     (Ravi Gummadi via vinodkv)
 
+    MAPREDUCE-1446. Sqoop should support CLOB and BLOB datatypes.
+    (Aaron Kimball via tomwhite)
+
   IMPROVEMENTS
 
     MAPREDUCE-1198. Alternatively schedule different types of tasks in

Added: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/BlobRef.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/BlobRef.java?rev=925040&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/BlobRef.java (added)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/BlobRef.java Thu Mar 18 23:17:51 2010
@@ -0,0 +1,102 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop.lib;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * BlobRef is a wrapper that holds a Blob either directly, or a
+ * reference to a file that holds the blob data.
+ */
+public class BlobRef implements Writable {
+
+  public BlobRef(byte [] bytes) {
+    this.blobFileNum = 0;
+    this.data = new BytesWritable(bytes);
+  }
+
+  public BlobRef() {
+    this.blobFileNum = 0;
+    this.data = null;
+  }
+
+  // If the data is 'small', it's held directly, here.
+  private BytesWritable data;
+
+  // If there data is too large, it's written into a file
+  // and the file is numbered; this number is recorded here.
+  // This takes precedence if this value is positive.
+  private long blobFileNum;
+
+  public byte [] getData() {
+    if (blobFileNum > 0) {
+      // We have a numbered file.
+      // TODO: Implement this.
+      throw new RuntimeException("Unsupported: Indirect BLOBs are not supported");
+    }
+
+    return data.getBytes();
+  }
+
+  @Override
+  public String toString() {
+    if (blobFileNum > 0) {
+      return "indirectBlob(" + blobFileNum + ")";
+    } else {
+      return data.toString();
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    // The serialization format for this object is:
+    // boolean isIndirect
+    // if true, the next field is a Long containing blobFileNum
+    // if false, the next field is String data.
+
+    boolean isIndirect = in.readBoolean();
+    if (isIndirect) {
+      this.data = null;
+      this.blobFileNum = in.readLong();
+    } else {
+      if (null == this.data) {
+        this.data = new BytesWritable();
+      }
+      this.data.readFields(in);
+      this.blobFileNum = 0;
+    }
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    boolean isIndirect = blobFileNum > 0;
+    out.writeBoolean(isIndirect);
+    if (isIndirect) {
+      out.writeLong(blobFileNum);
+    } else {
+      data.write(out);
+    }
+  }
+}
+

Added: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/ClobRef.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/ClobRef.java?rev=925040&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/ClobRef.java (added)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/ClobRef.java Thu Mar 18 23:17:51 2010
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop.lib;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * ClobRef is a wrapper that holds a Clob either directly, or a
+ * reference to a file that holds the clob data.
+ */
+public class ClobRef implements Writable {
+
+  public ClobRef(String chars) {
+    this.clobFileNum = 0;
+    this.data = chars;
+  }
+
+  public ClobRef() {
+    this.clobFileNum = 0;
+    this.data = null;
+  }
+
+  // If the data is 'small', it's held directly, here.
+  private String data;
+
+  // If there data is too large, it's written into a file
+  // and the file is numbered; this number is recorded here.
+  // This takes precedence if this value is positive.
+  private long clobFileNum;
+
+  public String getData() {
+    if (clobFileNum > 0) {
+      // We have a numbered file.
+      // TODO: Implement this.
+      throw new RuntimeException("Unsupported: Indirect CLOBs are not supported");
+    }
+
+    return data;
+  }
+
+  @Override
+  public String toString() {
+    if (clobFileNum > 0) {
+      return "indirectClob(" + clobFileNum + ")";
+    } else {
+      return data;
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    // The serialization format for this object is:
+    // boolean isIndirect
+    // if true, the next field is a Long containing clobFileNum
+    // if false, the next field is String data.
+
+    boolean isIndirect = in.readBoolean();
+    if (isIndirect) {
+      this.data = null;
+      this.clobFileNum = in.readLong();
+    } else {
+      this.data = Text.readString(in);
+      this.clobFileNum = 0;
+    }
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    boolean isIndirect = clobFileNum > 0;
+    out.writeBoolean(isIndirect);
+    if (isIndirect) {
+      out.writeLong(clobFileNum);
+    } else {
+      Text.writeString(out, data);
+    }
+  }
+}
+

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/FieldFormatter.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/FieldFormatter.java?rev=925040&r1=925039&r2=925040&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/FieldFormatter.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/FieldFormatter.java Thu Mar 18 23:17:51 2010
@@ -20,9 +20,7 @@ package org.apache.hadoop.sqoop.lib;
 
 
 /**
- * Abstract base class for all DBWritable types generated by Sqoop.
- * Contains methods required by all such types, to help with parsing,
- * stringification, etc.
+ * Static helper class that will help format data with quotes and escape chars.
  */
 public final class FieldFormatter {
 

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/JdbcWritableBridge.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/JdbcWritableBridge.java?rev=925040&r1=925039&r2=925040&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/JdbcWritableBridge.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/JdbcWritableBridge.java Thu Mar 18 23:17:51 2010
@@ -19,6 +19,8 @@
 package org.apache.hadoop.sqoop.lib;
 
 import java.math.BigDecimal;
+import java.sql.Blob;
+import java.sql.Clob;
 import java.sql.Date;
 import java.sql.PreparedStatement;
 import java.sql.ResultSet;
@@ -31,12 +33,13 @@ import java.sql.Timestamp;
  * Java types, and do serialization of these types to/from DataInput/DataOutput
  * for use with Hadoop's Writable implementation. This supports null values
  * for all types.
- *
- * 
- *
  */
 public final class JdbcWritableBridge {
 
+  // Currently, cap BLOB/CLOB objects at 16 MB until we can use external storage.
+  public final static long MAX_BLOB_LENGTH = 16 * 1024 * 1024;
+  public final static long MAX_CLOB_LENGTH = 16 * 1024 * 1024;
+
   private JdbcWritableBridge() {
   }
 
@@ -110,6 +113,36 @@ public final class JdbcWritableBridge {
     return r.getBigDecimal(colNum);
   }
 
+  public static BlobRef readBlobRef(int colNum, ResultSet r)
+      throws SQLException {
+    Blob b = r.getBlob(colNum);
+    if (null == b) {
+      return null;
+    } else if (b.length() > MAX_BLOB_LENGTH) {
+      // TODO: Deserialize very large BLOBs into separate files.
+      throw new UnsupportedOperationException("BLOB size exceeds max: "
+          + MAX_BLOB_LENGTH);
+    } else {
+      // This is a 1-based array.
+      return new BlobRef(b.getBytes(1, (int) b.length()));
+    }
+  }
+
+  public static ClobRef readClobRef(int colNum, ResultSet r)
+      throws SQLException {
+    Clob c = r.getClob(colNum);
+    if (null == c) {
+      return null;
+    } else if (c.length() > MAX_CLOB_LENGTH) {
+      // TODO: Deserialize very large CLOBs into separate files.
+      throw new UnsupportedOperationException("CLOB size exceeds max: "
+          + MAX_CLOB_LENGTH);
+    } else {
+      // This is a 1-based array.
+      return new ClobRef(c.getSubString(1, (int) c.length()));
+    }
+  }
+
   public static void writeInteger(Integer val, int paramIdx, int sqlType, PreparedStatement s)
       throws SQLException {
     if (null == val) {
@@ -200,4 +233,15 @@ public final class JdbcWritableBridge {
     }
   }
 
+  public static void writeBlobRef(BlobRef val, int paramIdx,
+      int sqlType, PreparedStatement s) throws SQLException {
+    // TODO: support this.
+    throw new RuntimeException("Unsupported: Cannot export BLOB data");
+  }
+
+  public static void writeClobRef(ClobRef val, int paramIdx,
+      int sqlType, PreparedStatement s) throws SQLException {
+    // TODO: support this.
+    throw new RuntimeException("Unsupported: Cannot export CLOB data");
+  }
 }

Added: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/LobSerializer.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/LobSerializer.java?rev=925040&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/LobSerializer.java (added)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/lib/LobSerializer.java Thu Mar 18 23:17:51 2010
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.sqoop.lib;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/**
+ * Serialize LOB classes to/from DataInput and DataOutput objects.
+ */
+public final class LobSerializer {
+
+  private LobSerializer() { }
+
+  public static void writeClob(ClobRef clob, DataOutput out) throws IOException {
+    clob.write(out);
+  }
+
+  public static void writeBlob(BlobRef blob, DataOutput out) throws IOException {
+    blob.write(out);
+  }
+
+  public static ClobRef readClobFields(DataInput in) throws IOException {
+    ClobRef clob = new ClobRef();
+    clob.readFields(in);
+    return clob;
+  }
+
+  public static BlobRef readBlobFields(DataInput in) throws IOException {
+    BlobRef blob = new BlobRef();
+    blob.readFields(in);
+    return blob;
+  }
+}

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/SqlManager.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/SqlManager.java?rev=925040&r1=925039&r2=925040&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/SqlManager.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/manager/SqlManager.java Thu Mar 18 23:17:51 2010
@@ -20,6 +20,8 @@ package org.apache.hadoop.sqoop.manager;
 
 import org.apache.hadoop.sqoop.SqoopOptions;
 import org.apache.hadoop.sqoop.hive.HiveTypes;
+import org.apache.hadoop.sqoop.lib.BlobRef;
+import org.apache.hadoop.sqoop.lib.ClobRef;
 import org.apache.hadoop.sqoop.mapreduce.DataDrivenImportJob;
 import org.apache.hadoop.sqoop.mapreduce.ExportJob;
 import org.apache.hadoop.sqoop.util.ExportException;
@@ -359,10 +361,17 @@ public abstract class SqlManager extends
       return "java.sql.Time";
     } else if (sqlType == Types.TIMESTAMP) {
       return "java.sql.Timestamp";
+    } else if (sqlType == Types.CLOB) {
+      return ClobRef.class.getName();
+    } else if (sqlType == Types.BLOB
+        || sqlType == Types.LONGVARBINARY
+        || sqlType == Types.VARBINARY
+        || sqlType == Types.BINARY) {
+      return BlobRef.class.getName();
     } else {
-      // TODO(aaron): Support BINARY, VARBINARY, LONGVARBINARY, DISTINCT, CLOB, BLOB, ARRAY,
-      // STRUCT, REF, JAVA_OBJECT.
-      // return database specific java data type
+      // TODO(aaron): Support DISTINCT, ARRAY, STRUCT, REF, JAVA_OBJECT.
+      // Return null indicating database-specific manager should return a
+      // java data type if it can find one for any nonstandard type.
       return null;
     }
   }

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java?rev=925040&r1=925039&r2=925040&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/java/org/apache/hadoop/sqoop/orm/ClassWriter.java Thu Mar 18 23:17:51 2010
@@ -22,9 +22,12 @@ import org.apache.hadoop.sqoop.SqoopOpti
 import org.apache.hadoop.sqoop.manager.ConnManager;
 import org.apache.hadoop.sqoop.manager.SqlManager;
 import org.apache.hadoop.sqoop.lib.BigDecimalSerializer;
-import org.apache.hadoop.sqoop.lib.JdbcWritableBridge;
 import org.apache.hadoop.sqoop.lib.FieldFormatter;
+import org.apache.hadoop.sqoop.lib.JdbcWritableBridge;
+import org.apache.hadoop.sqoop.lib.LobSerializer;
 import org.apache.hadoop.sqoop.lib.RecordParser;
+import org.apache.hadoop.sqoop.lib.BlobRef;
+import org.apache.hadoop.sqoop.lib.ClobRef;
 import org.apache.hadoop.sqoop.lib.SqoopRecord;
 
 import java.io.File;
@@ -291,6 +294,12 @@ public class ClassWriter {
     } else if (javaType.equals("java.math.BigDecimal")) {
       return "    this." + colName + " = " + BigDecimalSerializer.class.getCanonicalName()
           + ".readFields(" + inputObj + ");\n";
+    } else if (javaType.equals(ClobRef.class.getName())) {
+      return "    this." + colName + " = " + LobSerializer.class.getCanonicalName()
+          + ".readClobFields(" + inputObj + ");\n";
+    } else if (javaType.equals(BlobRef.class.getName())) {
+      return "    this." + colName + " = " + LobSerializer.class.getCanonicalName()
+          + ".readBlobFields(" + inputObj + ");\n";
     } else {
       LOG.error("No ResultSet method for Java type " + javaType);
       return null;
@@ -342,6 +351,12 @@ public class ClassWriter {
     } else if (javaType.equals("java.math.BigDecimal")) {
       return "    " + BigDecimalSerializer.class.getCanonicalName()
           + ".write(this." + colName + ", " + outputObj + ");\n";
+    } else if (javaType.equals(ClobRef.class.getName())) {
+      return "    " + LobSerializer.class.getCanonicalName()
+          + ".writeClob(this." + colName + ", " + outputObj + ");\n";
+    } else if (javaType.equals(BlobRef.class.getName())) {
+      return "    " + LobSerializer.class.getCanonicalName()
+          + ".writeBlob(this." + colName + ", " + outputObj + ");\n";
     } else {
       LOG.error("No ResultSet method for Java type " + javaType);
       return null;
@@ -622,6 +637,13 @@ public class ClassWriter {
       sb.append("      this." + colName + " = java.sql.Timestamp.valueOf(__cur_str);\n");
     } else if (javaType.equals("java.math.BigDecimal")) {
       sb.append("      this." + colName + " = new java.math.BigDecimal(__cur_str);\n");
+    } else if (javaType.equals(ClobRef.class.getName())) {
+      sb.append("      this." + colName + " = new ClobRef(__cur_str);\n");
+    } else if (javaType.equals(BlobRef.class.getName())) {
+      // We don't support parsing BLOB data.
+      // Users must store this in SequenceFiles.
+      LOG.warn("BLOB data cannot be reparsed from text files");
+      sb.append("      this." + colName + " = new BlobRef();\n");
     } else {
       LOG.error("No parser available for Java type " + javaType);
     }
@@ -820,6 +842,8 @@ public class ClassWriter {
     sb.append("import " + JdbcWritableBridge.class.getCanonicalName() + ";\n");
     sb.append("import " + FieldFormatter.class.getCanonicalName() + ";\n");
     sb.append("import " + RecordParser.class.getCanonicalName() + ";\n");
+    sb.append("import " + BlobRef.class.getCanonicalName() + ";\n");
+    sb.append("import " + ClobRef.class.getCanonicalName() + ";\n");
     sb.append("import " + SqoopRecord.class.getCanonicalName() + ";\n");
     sb.append("import java.sql.PreparedStatement;\n");
     sb.append("import java.sql.ResultSet;\n");

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/TestColumnTypes.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/TestColumnTypes.java?rev=925040&r1=925039&r2=925040&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/TestColumnTypes.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/TestColumnTypes.java Thu Mar 18 23:17:51 2010
@@ -58,5 +58,17 @@ public class TestColumnTypes extends Man
   }
 
   // Don't need to override getConnectString() because the default uses hsqldb.
+
+  // HSQLdb does not support these types over JDBC.
+
+  @Override
+  protected boolean supportsClob() {
+    return false;
+  }
+
+  @Override
+  protected boolean supportsBlob() {
+    return false;
+  }
 }
 

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/MySQLCompatTest.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/MySQLCompatTest.java?rev=925040&r1=925039&r2=925040&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/MySQLCompatTest.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/MySQLCompatTest.java Thu Mar 18 23:17:51 2010
@@ -83,6 +83,16 @@ public class MySQLCompatTest extends Man
   }
 
   @Override
+  protected String getClobType() {
+    return "MEDIUMTEXT";
+  }
+
+  @Override
+  protected String getBlobType() {
+    return "MEDIUMBLOB";
+  }
+
+  @Override
   protected String getTrueBoolDbOutput() {
     return "1";
   }

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/OracleCompatTest.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/OracleCompatTest.java?rev=925040&r1=925039&r2=925040&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/OracleCompatTest.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/manager/OracleCompatTest.java Thu Mar 18 23:17:51 2010
@@ -18,10 +18,12 @@
 
 package org.apache.hadoop.sqoop.manager;
 
+import java.io.UnsupportedEncodingException;
 import java.sql.Connection;
 import java.sql.PreparedStatement;
 import java.sql.ResultSet;
 import java.sql.SQLException;
+import java.util.Formatter;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -192,9 +194,31 @@ public class OracleCompatTest extends Ma
     return doubleAsInserted;
   }
 
+  @Override
+  protected String getBlobInsertStr(String blobData) {
+    // Oracle wants blob data encoded as hex (e.g. '01fca3b5').
+
+    StringBuilder sb = new StringBuilder();
+    sb.append("'");
+
+    Formatter fmt = new Formatter(sb);
+    try {
+      for (byte b : blobData.getBytes("UTF-8")) {
+        fmt.format("%02X", b);
+      }
+    } catch (UnsupportedEncodingException uee) {
+      // Should not happen; Java always supports UTF-8.
+      fail("Could not get utf-8 bytes for blob string");
+      return null;
+    }
+    sb.append("'");
+    return sb.toString();
+  }
+
   // Disable this test since Oracle isn't ANSI compliant.
   @Override
   public void testEmptyStringCol() {
+    this.skipped = true;
     LOG.info(
         "Oracle treats empty strings as null (non-ANSI compliant). Skipping.");
   }

Modified: hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/testutil/ManagerCompatTestCase.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/testutil/ManagerCompatTestCase.java?rev=925040&r1=925039&r2=925040&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/testutil/ManagerCompatTestCase.java (original)
+++ hadoop/mapreduce/trunk/src/contrib/sqoop/src/test/org/apache/hadoop/sqoop/testutil/ManagerCompatTestCase.java Thu Mar 18 23:17:51 2010
@@ -20,12 +20,16 @@ package org.apache.hadoop.sqoop.testutil
 
 import java.io.File;
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.sql.Blob;
+import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.ArrayList;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.util.ToolRunner;
 
 import org.apache.hadoop.sqoop.SqoopOptions;
@@ -146,6 +150,16 @@ public abstract class ManagerCompatTestC
     return true;
   }
 
+  /** @return true if the database under test supports CLOB types */
+  protected boolean supportsClob() {
+    return true;
+  }
+
+  /** @return true if the database under test supports BLOB types */
+  protected boolean supportsBlob() {
+    return true;
+  }
+
   //////// These methods indicate how to define various datatypes.
 
   /**
@@ -215,6 +229,20 @@ public abstract class ManagerCompatTestC
     return "TIMESTAMP";
   }
 
+  /**
+   * Define a CLOB column that can contain up to 16 MB of data.
+   */
+  protected String getClobType() {
+    return "CLOB";
+  }
+  
+  /**
+   * Define a BLOB column that can contain up to 16 MB of data.
+   */
+  protected String getBlobType() {
+    return "BLOB";
+  }
+
   //////// These methods indicate how databases respond to various datatypes.
   //////// Since our comparisons are all string-based, these return strings.
 
@@ -425,6 +453,38 @@ public abstract class ManagerCompatTestC
     return asInserted;
   }
 
+  /**
+   * Encode a string to be inserted in a BLOB field
+   * @param blobData the raw text (Without quote marks) to insert for a BLOB.
+   * @return 'blobData' in a String form ready for insertion
+   */
+  protected String getBlobInsertStr(String blobData) {
+    return "'" + blobData + "'";
+  }
+
+  /**
+   * @return A byte array declaring how an inserted BLOB will be returned to
+   * us via the database.
+   */
+  protected byte [] getBlobDbOutput(String asInserted) {
+    // The database will give us back a byte array; we need to create
+    // an identical byte array.
+    try {
+      return asInserted.getBytes("UTF-8");
+    } catch (UnsupportedEncodingException uee) {
+      fail("Could not get utf8 bytes"); // Java should always support UTF-8.
+      return null;
+    }
+  }
+
+  /**
+   * @return A String declaring how an inserted BLOB will be returned to
+   * us via the sequencefile.
+   */
+  protected String getBlobSeqOutput(String asInserted) {
+    return new BytesWritable(getBlobDbOutput(asInserted)).toString();
+  }
+
   //////// The actual tests occur below here. ////////
 
   /**
@@ -794,5 +854,87 @@ public abstract class ManagerCompatTestC
         "this is a long varchar");
   }
 
+
+  protected void verifyClob(String insertVal, String returnVal, String seqFileVal) {
+    String [] types = { "INTEGER NOT NULL", getClobType() };
+    String [] vals = { "1", insertVal };
+    String [] checkCol = { "DATA_COL0", "DATA_COL1" };
+
+    createTableWithColTypes(types, vals);
+    verifyReadback(2, returnVal);
+    verifyImport("1," + seqFileVal, checkCol);
+  }
+
+  protected void verifyBlob(String insertVal, byte [] returnVal, String seqFileVal) {
+    String [] types = { "INTEGER NOT NULL", getBlobType() };
+    String [] vals = { "1", insertVal };
+    String [] checkCols = { "DATA_COL0", "DATA_COL1" };
+
+    createTableWithColTypes(types, vals);
+
+    // Verify readback of the data.
+    ResultSet results = null;
+    try {
+      results = getManager().readTable(getTableName(), getColNames());
+      assertNotNull("Null results from readTable()!", results);
+      assertTrue("Expected at least one row returned", results.next());
+      Blob blob = results.getBlob(2);
+      byte [] databaseBytes = blob.getBytes(1, (int) blob.length());
+      LOG.info("Verifying readback of bytes from " + getTableName());
+
+      assertEquals("byte arrays differ in size", returnVal.length,
+          databaseBytes.length);
+      for (int i = 0; i < returnVal.length; i++) {
+        assertEquals("bytes differ at position " + i + ". Expected "
+            + returnVal[i] + "; got " + databaseBytes[i],
+            returnVal[i],
+            databaseBytes[i]);
+      }
+
+      assertFalse("Expected at most one row returned", results.next());
+    } catch (SQLException sqlE) {
+      fail("Got SQLException: " + sqlE.toString());
+    } finally {
+      if (null != results) {
+        try {
+          results.close();
+        } catch (SQLException sqlE) {
+          fail("Got SQLException in resultset.close(): " + sqlE.toString());
+        }
+      }
+
+      // Free internal resources after the readTable.
+      getManager().release();
+    }
+
+    // Now verify that we can use the Sqoop import mechanism on this data.
+    verifyImport("1," + seqFileVal, checkCols);
+  }
+
+
+  @Test
+  public void testClob1() {
+    if (!supportsClob()) {
+      LOG.info("Skipping CLOB test; database does not support CLOB");
+      return;
+    }
+
+    verifyClob("'This is short CLOB data'",
+        "This is short CLOB data",
+        "This is short CLOB data");
+  }
+
+  @Test
+  public void testBlob1() {
+    if (!supportsBlob()) {
+      LOG.info("Skipping BLOB test; database does not support BLOB");
+      return;
+    }
+
+    verifyBlob(getBlobInsertStr("This is short BLOB data"),
+        getBlobDbOutput("This is short BLOB data"),
+        getBlobSeqOutput("This is short BLOB data"));
+  }
+
 }
 

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/db/DBRecordReader.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/db/DBRecordReader.java?rev=925040&r1=925039&r2=925040&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/db/DBRecordReader.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/db/DBRecordReader.java Thu Mar 18 23:17:51 2010
@@ -232,7 +232,7 @@ public class DBRecordReader<T extends DB
 
       pos ++;
     } catch (SQLException e) {
-      throw new IOException(e.getMessage());
+      throw new IOException("SQLException in nextKeyValue", e);
     }
     return true;
   }