You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/05/31 10:59:39 UTC
[carbondata] branch master updated: [CARBONDATA-3405] Fix getSplits() should clear the cache in SDK

This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 2251528  [CARBONDATA-3405] Fix getSplits() should clear the cache in SDK
2251528 is described below

commit 225152844016b52dfb58ce794c3ae7a516a7e34f
Author: ajantha-bhat <aj...@gmail.com>
AuthorDate: Wed May 29 17:44:11 2019 +0530

    [CARBONDATA-3405] Fix getSplits() should clear the cache in SDK
    
    Problem: when getsplits is called back to back once with blocklet
    and once with block cache, block cache is not set.
    Cause: cache key was dbname_tableName, but table_name was always hardcoded to null.
    Solution: set the table name in cache key, clear cache after
    getting splits in the getsplits()
    
    This closes #3247
---
 .../core/metadata/schema/table/CarbonTable.java    |  2 +-
 .../carbondata/sdk/file/CarbonReaderBuilder.java   | 27 ++++++++++++++--------
 .../carbondata/sdk/file/CarbonReaderTest.java      | 22 ++++++++++++++++++
 3 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
index 4f4475d..47ad582 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
@@ -250,7 +250,7 @@ public class CarbonTable implements Serializable, Writable {
       String tablePath,
       String tableName,
       Configuration configuration) throws IOException {
-    TableInfo tableInfoInfer = CarbonUtil.buildDummyTableInfo(tablePath, "null", "null");
+    TableInfo tableInfoInfer = CarbonUtil.buildDummyTableInfo(tablePath, tableName, "null");
     // InferSchema from data file
     org.apache.carbondata.format.TableInfo tableInfo =
         CarbonUtil.inferSchema(tablePath, tableName, false, configuration);
diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
index 2db92ea..3ba7835 100644
--- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
+++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
@@ -384,9 +384,6 @@ public class CarbonReaderBuilder {
         return new CarbonReader<>(readers);
       }
     } catch (Exception ex) {
-      // Clear the datamap cache as it can get added in getSplits() method
-      DataMapStoreManager.getInstance().clearDataMaps(
-          format.getOrCreateCarbonTable((job.getConfiguration())).getAbsoluteTableIdentifier());
       throw ex;
     }
   }
@@ -405,13 +402,23 @@ public class CarbonReaderBuilder {
     if (hadoopConf == null) {
       hadoopConf = FileFactory.getConfiguration();
     }
-    final Job job = new Job(new JobConf(hadoopConf));
-    CarbonFileInputFormat format = prepareFileInputFormat(job, enableBlockletDistribution, false);
-    List<InputSplit> splits =
-        format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
-    for (InputSplit split : splits) {
-      // Load the detailInfo
-      ((CarbonInputSplit) split).getDetailInfo();
+    Job job = null;
+    List<InputSplit> splits;
+    CarbonFileInputFormat format = null;
+    try {
+      job = new Job(new JobConf(hadoopConf));
+      format = prepareFileInputFormat(job, enableBlockletDistribution, false);
+      splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
+      for (InputSplit split : splits) {
+        // Load the detailInfo
+        ((CarbonInputSplit) split).getDetailInfo();
+      }
+    } finally {
+      if (format != null) {
+        // Clear the datamap cache as it is added in getSplits() method
+        DataMapStoreManager.getInstance().clearDataMaps(
+            format.getOrCreateCarbonTable((job.getConfiguration())).getAbsoluteTableIdentifier());
+      }
     }
     return splits.toArray(new InputSplit[splits.size()]);
   }
diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
index 1073428..24a735e 100644
--- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
+++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
@@ -2586,4 +2586,26 @@ public class CarbonReaderTest extends TestCase {
     Assert.assertEquals(totalCount, 1000000);
     FileUtils.deleteDirectory(new File(path));
   }
+
+  @Test
+  public void testGetSplits() throws IOException, InterruptedException {
+    String path = "./testWriteFiles/" + System.nanoTime();
+    FileUtils.deleteDirectory(new File(path));
+
+    Field[] fields = new Field[2];
+    fields[0] = new Field("name", DataTypes.STRING);
+    fields[1] = new Field("age", DataTypes.INT);
+
+    TestUtil.writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, 1, 100);
+
+    InputSplit[] splits = CarbonReader.builder(path).getSplits(true);
+    // check for 3 blocklet count (as only one carbon file will be created)
+    Assert.assertEquals(splits.length, 3);
+
+    InputSplit[] splits1 = CarbonReader.builder(path).getSplits(false);
+    // check for 1 block count (as only one carbon file will be created)
+    Assert.assertEquals(splits1.length, 1);
+    FileUtils.deleteDirectory(new File(path));
+  }
+
 }