You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sqoop.apache.org by ka...@apache.org on 2012/05/05 09:25:46 UTC
svn commit: r1334328 - /sqoop/trunk/src/java/org/apache/sqoop/hive/HiveImport.java

Author: kathleen
Date: Sat May  5 07:25:46 2012
New Revision: 1334328

URL: http://svn.apache.org/viewvc?rev=1334328&view=rev
Log:
SQOOP-443. Calling sqoop with hive import is not working multiple times due 
to kept output directory
(Jarek Jarcec Cecho via Kathleen Ting)

Modified:
    sqoop/trunk/src/java/org/apache/sqoop/hive/HiveImport.java

Modified: sqoop/trunk/src/java/org/apache/sqoop/hive/HiveImport.java
URL: http://svn.apache.org/viewvc/sqoop/trunk/src/java/org/apache/sqoop/hive/HiveImport.java?rev=1334328&r1=1334327&r2=1334328&view=diff
==============================================================================
--- sqoop/trunk/src/java/org/apache/sqoop/hive/HiveImport.java (original)
+++ sqoop/trunk/src/java/org/apache/sqoop/hive/HiveImport.java Sat May  5 07:25:46 2012
@@ -30,6 +30,7 @@ import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.conf.Configuration;
@@ -102,18 +103,7 @@ public class HiveImport {
    */
   private void removeTempLogs(String tableName) throws IOException {
     FileSystem fs = FileSystem.get(configuration);
-    Path tablePath;
-    if (null != tableName) {
-        String warehouseDir = options.getWarehouseDir();
-        if (warehouseDir != null) {
-          tablePath = new Path(new Path(warehouseDir), tableName);
-        } else {
-          tablePath = new Path(tableName);
-        }
-    } else {
-        // --table option is not used, so use the target dir instead
-        tablePath = new Path(options.getTargetDir());
-    }
+    Path tablePath = getOutputPath(tableName);
 
     Path logsPath = new Path(tablePath, "_logs");
     if (fs.exists(logsPath)) {
@@ -126,6 +116,26 @@ public class HiveImport {
   }
 
   /**
+   * Get directory where we stored job output files.
+   *
+   * @param tableName imported table name
+   * @return Path with directory where output files can be found
+   */
+  private Path getOutputPath(String tableName) {
+    if (null != tableName) {
+      String warehouseDir = options.getWarehouseDir();
+      if (warehouseDir != null) {
+        return new Path(new Path(warehouseDir), tableName);
+      } else {
+        return new Path(tableName);
+      }
+    } else {
+      // --table option is not used, so use the target dir instead
+      return new Path(options.getTargetDir());
+    }
+  }
+
+  /**
    * @return true if we're just generating the DDL for the import, but
    * not actually running it (i.e., --generate-only mode). If so, don't
    * do any side-effecting actions in Hive.
@@ -239,6 +249,8 @@ public class HiveImport {
         executeScript(filename, env);
 
         LOG.info("Hive import complete.");
+
+        cleanUp(inputTableName);
       }
     } finally {
       if (!isGenerateOnly()) {
@@ -252,6 +264,35 @@ public class HiveImport {
     }
   }
 
+  /**
+   * Clean up after successful HIVE import.
+   *
+   * @param table Imported table name
+   * @throws IOException
+   */
+  private void cleanUp(String table) throws IOException {
+    FileSystem fs = FileSystem.get(configuration);
+
+    // HIVE is not always removing input directory after LOAD DATA statement
+    // (which is our export directory). We're removing export directory in case
+    // that is blank for case that user wants to periodically populate HIVE
+    // table (for example with --hive-overwrite).
+    Path outputPath = getOutputPath(table);
+    try {
+      if (outputPath != null && fs.exists(outputPath)) {
+        FileStatus[] statuses = fs.listStatus(outputPath);
+        if (statuses.length == 0) {
+          LOG.info("Export directory is empty, removing it.");
+          fs.delete(getOutputPath(table));
+        } else {
+          LOG.info("Export directory is not empty, keeping it.");
+        }
+      }
+    } catch(IOException e) {
+      LOG.error("Issue with cleaning (safe to ignore)", e);
+    }
+  }
+
   @SuppressWarnings("unchecked")
   /**
    * Execute the script file via Hive.