You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2020/08/28 16:09:39 UTC
[carbondata] branch master updated: [CARBONDATA-3962]Remove
unwanted empty fact directory in case of flat_folder table
This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 96e7d95 [CARBONDATA-3962]Remove unwanted empty fact directory in case of flat_folder table
96e7d95 is described below
commit 96e7d957da160151b4b44abc10914e3208d2f3ab
Author: akashrn5 <ak...@gmail.com>
AuthorDate: Wed Aug 26 17:16:09 2020 +0530
[CARBONDATA-3962]Remove unwanted empty fact directory in case of flat_folder table
Why is this PR needed?
In case of flat folder, we write the data files directly at table path,
so fact dir is not required. Fact dir is unwanted and present as empty dir.
What changes were proposed in this PR?
Remove empty fact dirs
This closes #3904
---
.../org/apache/carbondata/core/metadata/SegmentFileStore.java | 10 +++++++++-
.../testsuite/flatfolder/FlatFolderTableLoadingTestCase.scala | 8 +++++---
2 files changed, 14 insertions(+), 4 deletions(-)
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
index d6dc89e..52939eb 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/SegmentFileStore.java
@@ -466,14 +466,22 @@ public class SegmentFileStore {
/**
* Move the loaded data from source folder to destination folder.
*/
- private static void moveFromTempFolder(String source, String dest) {
+ private static void moveFromTempFolder(String source, String dest) throws IOException {
CarbonFile oldFolder = FileFactory.getCarbonFile(source);
CarbonFile[] oldFiles = oldFolder.listFiles();
for (CarbonFile file : oldFiles) {
file.renameForce(dest + CarbonCommonConstants.FILE_SEPARATOR + file.getName());
}
+ // delete the segment path at any cost at first, we we dont want to delete fact directory in
+ // case of multiple load scenario or update, delete scenario
oldFolder.delete();
+ CarbonFile partDir = FileFactory.getCarbonFile(CarbonTablePath.getPartitionDir(dest));
+ // once last segment is processed(in case of update delete), delete the main fact directory
+ if (partDir.listFiles(false).size() == 0) {
+ CarbonFile oldFactDirPath = FileFactory.getCarbonFile(CarbonTablePath.getFactDir(dest));
+ oldFactDirPath.delete();
+ }
}
/**
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/flatfolder/FlatFolderTableLoadingTestCase.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/flatfolder/FlatFolderTableLoadingTestCase.scala
index 8a6cb47..7147138 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/flatfolder/FlatFolderTableLoadingTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/flatfolder/FlatFolderTableLoadingTestCase.scala
@@ -44,10 +44,12 @@ class FlatFolderTableLoadingTestCase extends QueryTest with BeforeAndAfterAll {
}
- def validateDataFiles(tableUniqueName: String, segmentId: String): Unit = {
+ def validateDataFiles(tableUniqueName: String): Unit = {
val carbonTable = CarbonMetadata.getInstance().getCarbonTable(tableUniqueName)
val files = FileFactory.getCarbonFile(carbonTable.getTablePath).listFiles()
+ val factPath = FileFactory.getCarbonFile(CarbonTablePath.getFactDir(carbonTable.getTablePath))
assert(files.exists(_.getName.endsWith(CarbonTablePath.CARBON_DATA_EXT)))
+ assert(!factPath.exists())
}
test("data loading for flat folder with global sort") {
@@ -61,7 +63,7 @@ class FlatFolderTableLoadingTestCase extends QueryTest with BeforeAndAfterAll {
""".stripMargin)
sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE flatfolder_gs OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '"')""")
- validateDataFiles("default_flatfolder_gs", "0")
+ validateDataFiles("default_flatfolder_gs")
checkAnswer(sql("select empno, empname, designation, doj, workgroupcategory, workgroupcategoryname, deptno, deptname, projectcode, projectjoindate, projectenddate, attendance, utilization, salary from flatfolder_gs order by empno"),
sql("select empno, empname, designation, doj, workgroupcategory, workgroupcategoryname, deptno, deptname, projectcode, projectjoindate, projectenddate, attendance, utilization, salary from originTable order by empno"))
@@ -79,7 +81,7 @@ class FlatFolderTableLoadingTestCase extends QueryTest with BeforeAndAfterAll {
""".stripMargin)
sql(s"""LOAD DATA local inpath '$resourcesPath/data.csv' INTO TABLE flatfolder OPTIONS('DELIMITER'= ',', 'QUOTECHAR'= '"')""")
- validateDataFiles("default_flatfolder", "0")
+ validateDataFiles("default_flatfolder")
checkAnswer(sql("select empno, empname, designation, doj, workgroupcategory, workgroupcategoryname, deptno, deptname, projectcode, projectjoindate, projectenddate, attendance, utilization, salary from flatfolder order by empno"),
sql("select empno, empname, designation, doj, workgroupcategory, workgroupcategoryname, deptno, deptname, projectcode, projectjoindate, projectenddate, attendance, utilization, salary from originTable order by empno"))