You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/05/31 10:59:39 UTC
[carbondata] branch master updated: [CARBONDATA-3405] Fix
getSplits() should clear the cache in SDK
This is an automated email from the ASF dual-hosted git repository.
kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 2251528 [CARBONDATA-3405] Fix getSplits() should clear the cache in SDK
2251528 is described below
commit 225152844016b52dfb58ce794c3ae7a516a7e34f
Author: ajantha-bhat <aj...@gmail.com>
AuthorDate: Wed May 29 17:44:11 2019 +0530
[CARBONDATA-3405] Fix getSplits() should clear the cache in SDK
Problem: when getsplits is called back to back once with blocklet
and once with block cache, block cache is not set.
Cause: cache key was dbname_tableName, but table_name was always hardcoded to null.
Solution: set the table name in cache key, clear cache after
getting splits in the getsplits()
This closes #3247
---
.../core/metadata/schema/table/CarbonTable.java | 2 +-
.../carbondata/sdk/file/CarbonReaderBuilder.java | 27 ++++++++++++++--------
.../carbondata/sdk/file/CarbonReaderTest.java | 22 ++++++++++++++++++
3 files changed, 40 insertions(+), 11 deletions(-)
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
index 4f4475d..47ad582 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/CarbonTable.java
@@ -250,7 +250,7 @@ public class CarbonTable implements Serializable, Writable {
String tablePath,
String tableName,
Configuration configuration) throws IOException {
- TableInfo tableInfoInfer = CarbonUtil.buildDummyTableInfo(tablePath, "null", "null");
+ TableInfo tableInfoInfer = CarbonUtil.buildDummyTableInfo(tablePath, tableName, "null");
// InferSchema from data file
org.apache.carbondata.format.TableInfo tableInfo =
CarbonUtil.inferSchema(tablePath, tableName, false, configuration);
diff --git a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
index 2db92ea..3ba7835 100644
--- a/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
+++ b/store/sdk/src/main/java/org/apache/carbondata/sdk/file/CarbonReaderBuilder.java
@@ -384,9 +384,6 @@ public class CarbonReaderBuilder {
return new CarbonReader<>(readers);
}
} catch (Exception ex) {
- // Clear the datamap cache as it can get added in getSplits() method
- DataMapStoreManager.getInstance().clearDataMaps(
- format.getOrCreateCarbonTable((job.getConfiguration())).getAbsoluteTableIdentifier());
throw ex;
}
}
@@ -405,13 +402,23 @@ public class CarbonReaderBuilder {
if (hadoopConf == null) {
hadoopConf = FileFactory.getConfiguration();
}
- final Job job = new Job(new JobConf(hadoopConf));
- CarbonFileInputFormat format = prepareFileInputFormat(job, enableBlockletDistribution, false);
- List<InputSplit> splits =
- format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
- for (InputSplit split : splits) {
- // Load the detailInfo
- ((CarbonInputSplit) split).getDetailInfo();
+ Job job = null;
+ List<InputSplit> splits;
+ CarbonFileInputFormat format = null;
+ try {
+ job = new Job(new JobConf(hadoopConf));
+ format = prepareFileInputFormat(job, enableBlockletDistribution, false);
+ splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
+ for (InputSplit split : splits) {
+ // Load the detailInfo
+ ((CarbonInputSplit) split).getDetailInfo();
+ }
+ } finally {
+ if (format != null) {
+ // Clear the datamap cache as it is added in getSplits() method
+ DataMapStoreManager.getInstance().clearDataMaps(
+ format.getOrCreateCarbonTable((job.getConfiguration())).getAbsoluteTableIdentifier());
+ }
}
return splits.toArray(new InputSplit[splits.size()]);
}
diff --git a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
index 1073428..24a735e 100644
--- a/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
+++ b/store/sdk/src/test/java/org/apache/carbondata/sdk/file/CarbonReaderTest.java
@@ -2586,4 +2586,26 @@ public class CarbonReaderTest extends TestCase {
Assert.assertEquals(totalCount, 1000000);
FileUtils.deleteDirectory(new File(path));
}
+
+ @Test
+ public void testGetSplits() throws IOException, InterruptedException {
+ String path = "./testWriteFiles/" + System.nanoTime();
+ FileUtils.deleteDirectory(new File(path));
+
+ Field[] fields = new Field[2];
+ fields[0] = new Field("name", DataTypes.STRING);
+ fields[1] = new Field("age", DataTypes.INT);
+
+ TestUtil.writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, 1, 100);
+
+ InputSplit[] splits = CarbonReader.builder(path).getSplits(true);
+ // check for 3 blocklet count (as only one carbon file will be created)
+ Assert.assertEquals(splits.length, 3);
+
+ InputSplit[] splits1 = CarbonReader.builder(path).getSplits(false);
+ // check for 1 block count (as only one carbon file will be created)
+ Assert.assertEquals(splits1.length, 1);
+ FileUtils.deleteDirectory(new File(path));
+ }
+
}