You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ja...@apache.org on 2013/02/08 03:39:00 UTC

git commit: SQOOP-862: Hbase import fails if there is a row where all columns are null

Updated Branches:
  refs/heads/trunk a220ae469 -> 20b16fd4b


SQOOP-862: Hbase import fails if there is a row where all columns are null

(David Robson via Jarek Jarcec Cecho)


Project: http://git-wip-us.apache.org/repos/asf/sqoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/sqoop/commit/20b16fd4
Tree: http://git-wip-us.apache.org/repos/asf/sqoop/tree/20b16fd4
Diff: http://git-wip-us.apache.org/repos/asf/sqoop/diff/20b16fd4

Branch: refs/heads/trunk
Commit: 20b16fd4bd0b045a11006d81ddd1bcc8bccd01db
Parents: a220ae4
Author: Jarek Jarcec Cecho <ja...@apache.org>
Authored: Thu Feb 7 18:37:50 2013 -0800
Committer: Jarek Jarcec Cecho <ja...@apache.org>
Committed: Thu Feb 7 18:37:50 2013 -0800

----------------------------------------------------------------------
 src/docs/user/hbase.txt                            |    3 +-
 .../org/apache/sqoop/hbase/HBasePutProcessor.java  |   15 +++++++++++-
 .../com/cloudera/sqoop/hbase/HBaseImportTest.java  |   15 +++++++++++
 .../com/cloudera/sqoop/hbase/HBaseTestCase.java    |   19 +++++++++++++++
 4 files changed, 50 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sqoop/blob/20b16fd4/src/docs/user/hbase.txt
----------------------------------------------------------------------
diff --git a/src/docs/user/hbase.txt b/src/docs/user/hbase.txt
index 7404929..24c8df8 100644
--- a/src/docs/user/hbase.txt
+++ b/src/docs/user/hbase.txt
@@ -48,6 +48,7 @@ using the default parameters from your HBase configuration.
 Sqoop currently serializes all values to HBase by converting each field
 to its string representation (as if you were importing to HDFS in text
 mode), and then inserts the UTF-8 bytes of this string in the target
-cell.
+cell. Sqoop will skip all rows containing null values in all columns
+except the row key column.
 
 

http://git-wip-us.apache.org/repos/asf/sqoop/blob/20b16fd4/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java
index cca641f..ffa5f63 100644
--- a/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java
+++ b/src/java/org/apache/sqoop/hbase/HBasePutProcessor.java
@@ -23,10 +23,13 @@ import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.sqoop.mapreduce.ImportJobBase;
 
@@ -41,6 +44,9 @@ import com.cloudera.sqoop.lib.ProcessingException;
 public class HBasePutProcessor implements Closeable, Configurable,
     FieldMapProcessor {
 
+  public static final Log LOG = LogFactory.getLog(
+      HBasePutProcessor.class.getName());
+
   /** Configuration key specifying the table to insert into. */
   public static final String TABLE_NAME_KEY = "sqoop.hbase.insert.table";
 
@@ -124,7 +130,14 @@ public class HBasePutProcessor implements Closeable, Configurable,
     List<Put> putList = putTransformer.getPutCommand(fields);
     if (null != putList) {
       for (Put put : putList) {
-        this.table.put(put);
+        if (put!=null) {
+          if (put.isEmpty()) {
+            LOG.warn("Could not insert row with no columns "
+                + "for row-key column: " + Bytes.toString(put.getRow()));
+          } else {
+            this.table.put(put);
+          }
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/20b16fd4/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java
----------------------------------------------------------------------
diff --git a/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java b/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java
index bf24608..e1f9696 100644
--- a/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java
+++ b/src/test/com/cloudera/sqoop/hbase/HBaseImportTest.java
@@ -112,4 +112,19 @@ public class HBaseImportTest extends HBaseTestCase {
 
     fail("should have gotten exception");
   }
+
+  @Test
+  public void testNullRow() throws IOException {
+    String [] argv = getArgv(true, "nullRowT", "nullRowF", true, null);
+    String [] types = { "INT", "INT" };
+    String [] vals = { "0", "null" };
+    createTableWithColTypes(types, vals);
+    runImport(argv);
+
+    // This cell should not be placed in the results..
+    verifyHBaseCell("nullRowT", "0", "nullRowF", getColName(1), null);
+
+    int rowCount = countHBaseTable("nullRowT", "nullRowF");
+    assertEquals(0, rowCount);
+  }
 }

http://git-wip-us.apache.org/repos/asf/sqoop/blob/20b16fd4/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java
----------------------------------------------------------------------
diff --git a/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java b/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java
index 65ff87b..37dc004 100644
--- a/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java
+++ b/src/test/com/cloudera/sqoop/hbase/HBaseTestCase.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.util.Bytes;
 
 import org.apache.hadoop.util.StringUtils;
@@ -234,4 +235,22 @@ public abstract class HBaseTestCase extends ImportJobTestCase {
     }
     throw new IllegalStateException("Failed to create directory");
   }
+
+  protected int countHBaseTable(String tableName, String colFamily)
+      throws IOException {
+    int count = 0;
+    HTable table = new HTable(new Configuration(
+        hbaseTestUtil.getConfiguration()), Bytes.toBytes(tableName));
+    try {
+      ResultScanner scanner = table.getScanner(Bytes.toBytes(colFamily));
+      for(Result result = scanner.next();
+          result != null;
+          result = scanner.next()) {
+        count++;
+      }
+    } finally {
+      table.close();
+    }
+    return count;
+  }
 }