You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2018/11/01 07:50:50 UTC

carbondata git commit: [CARBONDATA-3052] Improve drop table performance by reducing the namenode RPC calls during physical deletion of files

Repository: carbondata
Updated Branches:
  refs/heads/master 625844784 -> 82eec10cb


[CARBONDATA-3052] Improve drop table performance by reducing the namenode RPC calls during physical deletion of files

Problem
Current drop table command takes more than 1 minute to delete 3000 files during drop table operation from HDFS

Analysis
Even though we are using HDFS file system we are explicitly we are recursively iterating through the table folders and deleting each file. For each file deletion and file listing one rpc call is made to namenode. To delete 3000 files 3000 rpc calls are made to namenode for file deletion and few more rpc calls for file listing in each folder.

Solution
HDFS provides an API for deleting all folders and files recursively for a given path in a single RPC call. Use that API and improve the drop table operation performance.

Result: After these code changes drop table operation time to delete 3000 files from HDFS has reduced from 1 minute to ~2 sec.

This closes #2868


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/82eec10c
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/82eec10c
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/82eec10c

Branch: refs/heads/master
Commit: 82eec10cb482e88df914657240a42a8dff8b1c51
Parents: 6258447
Author: manishgupta88 <to...@gmail.com>
Authored: Mon Oct 29 11:39:09 2018 +0530
Committer: Jacky Li <ja...@qq.com>
Committed: Thu Nov 1 15:50:33 2018 +0800

----------------------------------------------------------------------
 .../core/datastore/filesystem/CarbonFile.java   |  5 ++
 .../datastore/filesystem/LocalCarbonFile.java   |  7 +-
 .../apache/carbondata/core/util/CarbonUtil.java | 68 ++++----------------
 .../SubqueryWithFilterAndSortTestCase.scala     |  1 -
 4 files changed, 24 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/82eec10c/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/CarbonFile.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/CarbonFile.java b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/CarbonFile.java
index bc8e707..8044817 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/CarbonFile.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/CarbonFile.java
@@ -62,6 +62,11 @@ public interface CarbonFile {
 
   boolean renameForce(String changeToName);
 
+  /**
+   * This method will delete the files recursively from file system
+   *
+   * @return true if success
+   */
   boolean delete();
 
   boolean createNewFile();

http://git-wip-us.apache.org/repos/asf/carbondata/blob/82eec10c/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFile.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFile.java b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFile.java
index edcd38f..620a19c 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFile.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/filesystem/LocalCarbonFile.java
@@ -141,7 +141,12 @@ public class LocalCarbonFile implements CarbonFile {
   }
 
   public boolean delete() {
-    return file.delete();
+    try {
+      return deleteFile(file.getAbsolutePath(), FileFactory.getFileType(file.getAbsolutePath()));
+    } catch (IOException e) {
+      LOGGER.error("Exception occurred:" + e.getMessage(), e);
+      return false;
+    }
   }
 
   @Override public CarbonFile[] listFiles() {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/82eec10c/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
index f401a97..1840ba0 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonUtil.java
@@ -300,40 +300,27 @@ public final class CarbonUtil {
 
       @Override public Void run() throws Exception {
         for (int i = 0; i < path.length; i++) {
-          deleteRecursive(path[i]);
+          CarbonFile carbonFile = FileFactory.getCarbonFile(path[i].getAbsolutePath());
+          boolean delete = carbonFile.delete();
+          if (!delete) {
+            throw new IOException("Error while deleting file: " + carbonFile.getAbsolutePath());
+          }
         }
         return null;
       }
     });
   }
 
-  /**
-   * Recursively delete the files
-   *
-   * @param f File to be deleted
-   * @throws IOException
-   */
-  private static void deleteRecursive(File f) throws IOException {
-    if (f.isDirectory()) {
-      File[] files = f.listFiles();
-      if (null != files) {
-        for (File c : files) {
-          deleteRecursive(c);
-        }
-      }
-    }
-    if (f.exists() && !f.delete()) {
-      throw new IOException("Error while deleting the folders and files");
-    }
-  }
-
   public static void deleteFoldersAndFiles(final CarbonFile... file)
       throws IOException, InterruptedException {
     UserGroupInformation.getLoginUser().doAs(new PrivilegedExceptionAction<Void>() {
 
       @Override public Void run() throws Exception {
         for (int i = 0; i < file.length; i++) {
-          deleteRecursive(file[i]);
+          boolean delete = file[i].delete();
+          if (!delete) {
+            throw new IOException("Error while deleting file: " + file[i].getAbsolutePath());
+          }
         }
         return null;
       }
@@ -346,45 +333,16 @@ public final class CarbonUtil {
 
       @Override public Void run() throws Exception {
         for (int i = 0; i < file.length; i++) {
-          deleteRecursiveSilent(file[i]);
+          boolean delete = file[i].delete();
+          if (!delete) {
+            LOGGER.warn("Unable to delete file: " + file[i].getCanonicalPath());
+          }
         }
         return null;
       }
     });
   }
 
-  /**
-   * Recursively delete the files
-   *
-   * @param f File to be deleted
-   * @throws IOException
-   */
-  private static void deleteRecursive(CarbonFile f) throws IOException {
-    if (f.isDirectory()) {
-      if (f.listFiles() != null) {
-        for (CarbonFile c : f.listFiles()) {
-          deleteRecursive(c);
-        }
-      }
-    }
-    if (f.exists() && !f.delete()) {
-      throw new IOException("Error while deleting the folders and files");
-    }
-  }
-
-  private static void deleteRecursiveSilent(CarbonFile f) {
-    if (f.isDirectory()) {
-      if (f.listFiles() != null) {
-        for (CarbonFile c : f.listFiles()) {
-          deleteRecursiveSilent(c);
-        }
-      }
-    }
-    if (f.exists() && !f.delete()) {
-      return;
-    }
-  }
-
   public static void deleteFiles(File[] intermediateFiles) throws IOException {
     for (int i = 0; i < intermediateFiles.length; i++) {
       // ignore deleting for index file since it is inside merged file.

http://git-wip-us.apache.org/repos/asf/carbondata/blob/82eec10c/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/SubqueryWithFilterAndSortTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/SubqueryWithFilterAndSortTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/SubqueryWithFilterAndSortTestCase.scala
index a0f5e8f..bb5182f 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/SubqueryWithFilterAndSortTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/detailquery/SubqueryWithFilterAndSortTestCase.scala
@@ -72,7 +72,6 @@ class SubqueryWithFilterAndSortTestCase extends QueryTest with BeforeAndAfterAll
     sql("drop table if exists subqueryfilterwithsort")
     sql("drop table if exists subqueryfilterwithsort_hive")
     deleteFile(tempFilePath)
-    deleteFile(tempDirPath)
   }
 
 }