You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2020/08/28 16:09:39 UTC

[carbondata] branch master updated: [CARBONDATA-3962]Remove unwanted empty fact directory in case of flat_folder table

This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 96e7d95  [CARBONDATA-3962]Remove unwanted empty fact directory in case of flat_folder table
96e7d95 is described below

commit 96e7d957da160151b4b44abc10914e3208d2f3ab
Author: akashrn5 <ak...@gmail.com>
AuthorDate: Wed Aug 26 17:16:09 2020 +0530

    [CARBONDATA-3962]Remove unwanted empty fact directory in case of flat_folder table
    
    Why is this PR needed?
    In case of flat folder, we write the data files directly at table path,
    so fact dir is not required. Fact dir is unwanted and present as empty dir.
    
    What changes were proposed in this PR?
    Remove empty fact dirs
    
    This closes #3904
---
 .../org/apache/carbondata/core/metadata/SegmentFileStore.java  | 10 +++++++++-
 .../testsuite/flatfolder/FlatFolderTableLoadingTestCase.scala  |  8 +++++---
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
index d6dc89e..52939eb 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
@@ -466,14 +466,22 @@ public class SegmentFileStore {
   /**
    * Move the loaded data from source folder to destination folder.
    */
-  private static void moveFromTempFolder(String source, String dest) {
+  private static void moveFromTempFolder(String source, String dest) throws IOException {
 
     CarbonFile oldFolder = FileFactory.getCarbonFile(source);
     CarbonFile[] oldFiles = oldFolder.listFiles();
     for (CarbonFile file : oldFiles) {
       file.renameForce(dest + CarbonCommonConstants.FILE_SEPARATOR + file.getName());
     }
+    // delete the segment path at any cost at first, we we dont want to delete fact directory in
+    // case of multiple load scenario or update, delete scenario
     oldFolder.delete();
+    CarbonFile partDir = FileFactory.getCarbonFile(CarbonTablePath.getPartitionDir(dest));
+    // once last segment is processed(in case of update delete), delete the main fact directory
+    if (partDir.listFiles(false).size() == 0) {
+      CarbonFile oldFactDirPath = FileFactory.getCarbonFile(CarbonTablePath.getFactDir(dest));
+      oldFactDirPath.delete();
+    }
   }
 
   /**
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/flatfolder/FlatFolderTableLoadingTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/flatfolder/FlatFolderTableLoadingTestCase.scala
index 8a6cb47..7147138 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/flatfolder/FlatFolderTableLoadingTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/flatfolder/FlatFolderTableLoadingTestCase.scala
@@ -44,10 +44,12 @@ class FlatFolderTableLoadingTestCase extends QueryTest with BeforeAndAfterAll {
 
   }
 
-  def validateDataFiles(tableUniqueName: String, segmentId: String): Unit = {
+  def validateDataFiles(tableUniqueName: String): Unit = {
     val carbonTable = CarbonMetadata.getInstance().getCarbonTable(tableUniqueName)
     val files = FileFactory.getCarbonFile(carbonTable.getTablePath).listFiles()
+    val factPath = FileFactory.getCarbonFile(CarbonTablePath.getFactDir(carbonTable.getTablePath))
     assert(files.exists(_.getName.endsWith(CarbonTablePath.CARBON_DATA_EXT)))
+    assert(!factPath.exists())
   }
 
   test("data loading for flat folder with global sort") {
@@ -61,7 +63,7 @@ class FlatFolderTableLoadingTestCase extends QueryTest with BeforeAndAfterAll {
       """.stripMargin)
     sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE flatfolder_gs OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '"')""")
 
-    validateDataFiles("default_flatfolder_gs", "0")
+    validateDataFiles("default_flatfolder_gs")
 
     checkAnswer(sql("select empno, empname, designation, doj, workgroupcategory, workgroupcategoryname, deptno, deptname, projectcode, projectjoindate, projectenddate, attendance, utilization, salary from flatfolder_gs order by empno"),
       sql("select  empno, empname, designation, doj, workgroupcategory, workgroupcategoryname, deptno, deptname, projectcode, projectjoindate, projectenddate, attendance, utilization, salary from originTable order by empno"))
@@ -79,7 +81,7 @@ class FlatFolderTableLoadingTestCase extends QueryTest with BeforeAndAfterAll {
       """.stripMargin)
     sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE flatfolder OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '"')""")
 
-    validateDataFiles("default_flatfolder", "0")
+    validateDataFiles("default_flatfolder")
 
     checkAnswer(sql("select empno, empname, designation, doj, workgroupcategory, workgroupcategoryname, deptno, deptname, projectcode, projectjoindate, projectenddate, attendance, utilization, salary from flatfolder order by empno"),
       sql("select  empno, empname, designation, doj, workgroupcategory, workgroupcategoryname, deptno, deptname, projectcode, projectjoindate, projectenddate, attendance, utilization, salary from originTable order by empno"))