You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2019/03/14 08:35:34 UTC

[carbondata] branch master updated: [CARBONDATA-3313] count(*) is not invalidating the invalid segments cache

This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 89c3873  [CARBONDATA-3313] count(*) is not invalidating the invalid segments cache
89c3873 is described below

commit 89c387342b0ad230d6c9753f6aa40a1467023054
Author: dhatchayani <dh...@gmail.com>
AuthorDate: Tue Mar 12 14:36:54 2019 +0530

    [CARBONDATA-3313] count(*) is not invalidating the invalid segments cache
    
    Problem:
    If any segment is deleted the next query has to clear/invalidate the datamap cache for those invalid segments. But count(*) has not considered the invalid segments to clear the datamap cache.
    
    Solution:
    In count(*) flow, before clearing the datamap cache, check and add the invalid segments of that table.
    
    This closes #3144
---
 .../hadoop/api/CarbonTableInputFormat.java         |  2 ++
 .../sql/commands/TestCarbonShowCacheCommand.scala  | 23 ++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
index c56b1db..281143b 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
@@ -617,6 +617,8 @@ public class CarbonTableInputFormat<T> extends CarbonInputFormat<T> {
         toBeCleanedSegments.add(eachSegment);
       }
     }
+    // remove entry in the segment index if there are invalid segments
+    toBeCleanedSegments.addAll(allSegments.getInvalidSegments());
     if (toBeCleanedSegments.size() > 0) {
       DataMapStoreManager.getInstance()
           .clearInvalidSegments(getOrCreateCarbonTable(job.getConfiguration()),
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/sql/commands/TestCarbonShowCacheCommand.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/sql/commands/TestCarbonShowCacheCommand.scala
index e999fc7..69c5f7e 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/sql/commands/TestCarbonShowCacheCommand.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/sql/commands/TestCarbonShowCacheCommand.scala
@@ -110,6 +110,28 @@ class TestCarbonShowCacheCommand extends QueryTest with BeforeAndAfterAll {
     sql("select workgroupcategoryname,count(empname) as count from cache_4 group by workgroupcategoryname").collect()
   }
 
+  test("test drop cache invalidation in case of invalid segments"){
+    sql(s"CREATE TABLE empTable(empno int, empname String, designation String, " +
+        s"doj Timestamp, workgroupcategory int, workgroupcategoryname String, deptno int, " +
+        s"deptname String, projectcode int, projectjoindate Timestamp, projectenddate Timestamp," +
+        s"attendance int, utilization int, salary int) stored by 'carbondata'")
+    sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE empTable")
+    sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE empTable")
+    sql(s"LOAD DATA INPATH '$resourcesPath/data.csv' INTO TABLE empTable")
+    sql("select count(*) from empTable").show()
+    var showCache = sql("SHOW METACACHE on table empTable").collect()
+    assert(showCache(0).get(2).toString.equalsIgnoreCase("3/3 index files cached"))
+    sql("delete from table empTable where segment.id in(0)").show()
+    // check whether count(*) query invalidates the cache for the invalid segments
+    sql("select count(*) from empTable").show()
+    showCache = sql("SHOW METACACHE on table empTable").collect()
+    assert(showCache(0).get(2).toString.equalsIgnoreCase("2/2 index files cached"))
+    sql("delete from table empTable where segment.id in(1)").show()
+    // check whether select * query invalidates the cache for the invalid segments
+    sql("select * from empTable").show()
+    showCache = sql("SHOW METACACHE on table empTable").collect()
+    assert(showCache(0).get(2).toString.equalsIgnoreCase("1/1 index files cached"))
+  }
 
   override protected def afterAll(): Unit = {
     sql("use default").collect()
@@ -122,6 +144,7 @@ class TestCarbonShowCacheCommand extends QueryTest with BeforeAndAfterAll {
     sql("DROP TABLE IF EXISTS cache_db.cache_3")
     sql("DROP TABLE IF EXISTS default.cache_4")
     sql("DROP TABLE IF EXISTS default.cache_5")
+    sql("DROP TABLE IF EXISTS empTable")
   }
 
   test("show cache") {