You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2018/11/14 05:48:59 UTC

carbondata git commit: [CARBONDATA-3077] Fixed query failure in fileformat due stale cache issue

Repository: carbondata
Updated Branches:
  refs/heads/master 4de60509d -> cd0ce4187


[CARBONDATA-3077] Fixed query failure in fileformat due stale cache issue

Problem
While using FileFormat API, if a table created, dropped and then recreated with the same name the query fails because of schema mismatch issue

Analysis
In case of carbondata used through FileFormat API, once a table is dropped and recreated with the same name again then because the dataMap contains the stale carbon table schema mismatch exception is thrown

Solution
To avoid such scenarios it is always better to update the carbon table object retrieved

This closes #2898


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/cd0ce418
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/cd0ce418
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/cd0ce418

Branch: refs/heads/master
Commit: cd0ce4187e84d784a0c3f0d9c350efa3ee2a51d4
Parents: 4de6050
Author: m00258959 <ma...@huawei.com>
Authored: Mon Nov 5 15:45:46 2018 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Wed Nov 14 11:18:49 2018 +0530

----------------------------------------------------------------------
 .../core/datamap/DataMapStoreManager.java       |  6 ++
 .../core/datamap/dev/DataMapFactory.java        |  4 +
 .../src/test/resources/vardhandaterestruct.csv  | 99 ++++++++++++++++++++
 .../datasource/SparkCarbonDataSourceTest.scala  | 18 ++++
 4 files changed, 127 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/cd0ce418/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
index 75290d3..baf4739 100644
--- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
+++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java
@@ -345,6 +345,12 @@ public final class DataMapStoreManager {
     if (dataMap == null) {
       throw new RuntimeException("Datamap does not exist");
     }
+    // This is done to handle the scenario of stale cache because of which schema mismatch
+    // exception can be thrown. Scenario: In case of carbondata used through FileFormat API,
+    // once a table is dropped and recreated with the same name again then because the dataMap
+    // contains the stale carbon table schema mismatch exception is thrown. To avoid such scenarios
+    // it is always better to update the carbon table object retrieved
+    dataMap.getDataMapFactory().setCarbonTable(table);
     return dataMap;
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/cd0ce418/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
index de8dc58..ee7914d 100644
--- a/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
+++ b/core/src/main/java/org/apache/carbondata/core/datamap/dev/DataMapFactory.java
@@ -54,6 +54,10 @@ public abstract class DataMapFactory<T extends DataMap> {
     return carbonTable;
   }
 
+  public void setCarbonTable(CarbonTable carbonTable) {
+    this.carbonTable = carbonTable;
+  }
+
   public DataMapSchema getDataMapSchema() {
     return dataMapSchema;
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/cd0ce418/integration/spark-datasource/src/test/resources/vardhandaterestruct.csv
----------------------------------------------------------------------
diff --git a/integration/spark-datasource/src/test/resources/vardhandaterestruct.csv b/integration/spark-datasource/src/test/resources/vardhandaterestruct.csv
new file mode 100644
index 0000000..daa7c38
--- /dev/null
+++ b/integration/spark-datasource/src/test/resources/vardhandaterestruct.csv
@@ -0,0 +1,99 @@
+1AA1,1,8RAM size,4,Chinese,guangzhou,2738,2014-07-01 12:07:28,2014-07-01 12:07:28,25
+1AA10,10,4RAM size,4,Chinese,wuhan,1714,2014-07-02 12:07:28,2014-07-02 12:07:28,26
+1AA100,100,7RAM size,6,Chinese,yichang,1271,2014-07-03 12:07:28,2014-07-03 12:07:28,27
+1AA1000,1000,5RAM size,3,Chinese,changsha,692,2014-07-04 12:07:28,2014-07-04 12:07:28,28
+1AA10000,10000,1RAM size,1,Chinese,xiangtan,2175,2015-07-05 12:07:28,2015-07-05 12:07:28,29
+1AA100000,100000,4RAM size,6,Chinese,yichang,136,2015-07-06 12:07:28,2015-07-06 12:07:28,30
+1AA1000000,1000000,4RAM size,6,Chinese,xiangtan,1600,2015-07-07 12:07:28,2015-07-07 12:07:28,31
+1AA100001,100001,7RAM size,7,Chinese,wuhan,505,2014-07-08 12:07:28,2014-07-08 12:07:28,32
+1AA100002,100002,0RAM size,3,Chinese,zhuzhou,1341,2015-07-09 12:07:28,2015-07-09 12:07:28,33
+1AA100003,100003,7RAM size,3,Chinese,zhuzhou,2239,2015-07-10 12:07:28,2015-07-10 12:07:28,34
+1AA100004,100004,8RAM size,2,Chinese,wuhan,2970,2014-07-11 12:07:28,2014-07-11 12:07:28,35
+1AA100005,100005,0RAM size,1,Chinese,changsha,2593,2015-07-12 12:07:28,2015-07-12 12:07:28,36
+1AA100006,100006,4RAM size,2,Chinese,changsha,2572,2015-07-13 12:07:28,2015-07-13 12:07:28,37
+1AA100007,100007,4RAM size,3,Chinese,xiangtan,1991,2015-07-14 12:07:28,2015-07-14 12:07:28,38
+1AA100008,100008,0RAM size,1,Chinese,wuhan,1442,2015-07-15 12:07:28,2015-07-15 12:07:28,39
+1AA100009,100009,4RAM size,3,Chinese,xiangtan,1841,2014-07-16 12:07:28,2014-07-16 12:07:28,40
+1AA10001,10001,6RAM size,5,Chinese,xiangtan,298,2015-07-17 12:07:28,2015-07-17 12:07:28,41
+1AA100010,100010,0RAM size,6,Chinese,guangzhou,79,2015-07-18 12:07:28,2015-07-18 12:07:28,42
+1AA100011,100011,1RAM size,1,Chinese,xiangtan,202,2015-07-19 12:07:28,2015-07-19 12:07:28,43
+1AA100012,100012,6RAM size,2,Chinese,xiangtan,568,2015-07-20 12:07:28,2015-07-20 12:07:28,44
+1AA100013,100013,8RAM size,6,Chinese,shenzhen,355,2015-07-21 12:07:28,2015-07-21 12:07:28,45
+1AA100014,100014,7RAM size,3,Chinese,changsha,151,2015-07-22 12:07:28,2015-07-22 12:07:28,46
+1AA100015,100015,3RAM size,1,Chinese,changsha,2863,2015-07-23 12:07:28,2015-07-23 12:07:28,47
+1AA100016,100016,8RAM size,3,Chinese,xiangtan,1873,2015-07-24 12:07:28,2015-07-24 12:07:28,48
+1AA100017,100017,5RAM size,3,Chinese,guangzhou,2205,2015-07-25 12:07:28,2015-07-25 12:07:28,49
+1AA100018,100018,4RAM size,4,Chinese,yichang,441,2015-07-26 12:07:28,2015-07-26 12:07:28,50
+1AA100019,100019,0RAM size,3,Chinese,changsha,2194,2015-07-27 12:07:28,2015-07-27 12:07:28,51
+1AA10002,10002,8RAM size,5,Chinese,yichang,2972,2015-07-28 12:07:28,2015-07-28 12:07:28,52
+1AA100020,100020,1RAM size,7,Chinese,shenzhen,256,2015-07-29 12:07:28,2015-07-29 12:07:28,53
+1AA100021,100021,0RAM size,6,Chinese,changsha,1778,2015-07-30 12:07:28,2015-07-30 12:07:28,54
+1AA100022,100022,3RAM size,5,Chinese,guangzhou,1999,2015-07-31 12:07:28,2015-07-31 12:07:28,55
+1AA100023,100023,8RAM size,3,Chinese,yichang,2194,2015-08-01 12:07:28,2015-08-01 12:07:28,56
+1AA100024,100024,0RAM size,2,Chinese,wuhan,2483,2015-08-02 12:07:28,2015-08-02 12:07:28,57
+1AA100025,100025,7RAM size,1,Chinese,wuhan,1724,2015-08-03 12:07:28,2015-08-03 12:07:28,58
+1AA100026,100026,6RAM size,7,Chinese,guangzhou,1768,2015-08-04 12:07:28,2015-08-04 12:07:28,59
+1AA100027,100027,3RAM size,4,Chinese,zhuzhou,2436,2015-08-05 12:07:28,2015-08-05 12:07:28,60
+1AA100028,100028,0RAM size,5,Chinese,shenzhen,2849,2015-08-06 12:07:28,2015-08-06 12:07:28,61
+1AA100029,100029,4RAM size,5,Chinese,changsha,1691,2015-08-07 12:07:28,2015-08-07 12:07:28,62
+1AA10003,10003,9RAM size,4,Chinese,xiangtan,2071,2015-08-08 12:07:28,2015-08-08 12:07:28,63
+1AA100030,100030,1RAM size,3,Chinese,guangzhou,1333,2015-08-09 12:07:28,2015-08-09 12:07:28,64
+1AA100031,100031,3RAM size,1,Chinese,xiangtan,1080,2015-08-10 12:07:28,2015-08-10 12:07:28,65
+1AA100032,100032,3RAM size,7,Chinese,shenzhen,1053,2015-08-11 12:07:28,2015-08-11 12:07:28,66
+1AA100033,100033,7RAM size,6,Chinese,yichang,760,2015-08-12 12:07:28,2015-08-12 12:07:28,67
+1AA100034,100034,6RAM size,6,Chinese,changsha,2061,2015-08-13 12:07:28,2015-08-13 12:07:28,68
+1AA100035,100035,6RAM size,7,Chinese,shenzhen,2142,2015-08-14 12:07:28,2015-08-14 12:07:28,69
+1AA100036,100036,9RAM size,5,Chinese,changsha,2224,2015-08-15 12:07:28,2015-08-15 12:07:28,70
+1AA100037,100037,9RAM size,6,Chinese,changsha,1015,2015-08-16 12:07:28,2015-08-16 12:07:28,71
+1AA100038,100038,8RAM size,3,Chinese,xiangtan,1229,2015-08-17 12:07:28,2015-08-17 12:07:28,72
+1AA100039,100039,7RAM size,1,Chinese,wuhan,1750,2015-08-18 12:07:28,2015-08-18 12:07:28,73
+1AA10004,10004,4RAM size,4,Chinese,shenzhen,1717,2015-08-19 12:07:28,2015-08-19 12:07:28,74
+1AA100040,100040,1RAM size,7,Chinese,yichang,2078,2015-08-20 12:07:28,2015-08-20 12:07:28,75
+1AA100041,100041,1RAM size,1,Chinese,xiangtan,2734,2015-08-21 12:07:28,2015-08-21 12:07:28,76
+1AA100042,100042,1RAM size,5,Chinese,zhuzhou,2745,2015-08-22 12:07:28,2015-08-22 12:07:28,77
+1AA100043,100043,9RAM size,6,Chinese,yichang,571,2015-08-23 12:07:28,2015-08-23 12:07:28,78
+1AA100044,100044,9RAM size,2,Chinese,shenzhen,1697,2015-08-24 12:07:28,2015-08-24 12:07:28,79
+1AA100045,100045,4RAM size,6,Chinese,wuhan,2553,2015-08-25 12:07:28,2015-08-25 12:07:28,80
+1AA100046,100046,4RAM size,4,Chinese,wuhan,1077,2015-08-26 12:07:28,2015-08-26 12:07:28,81
+1AA100047,100047,6RAM size,1,Chinese,wuhan,1823,2015-08-27 12:07:28,2015-08-27 12:07:28,82
+1AA100048,100048,1RAM size,1,Chinese,xiangtan,2399,2015-08-28 12:07:28,2015-08-28 12:07:28,83
+1AA100049,100049,4RAM size,6,Chinese,xiangtan,2890,2015-08-29 12:07:28,2015-08-29 12:07:28,84
+1AA10005,10005,3RAM size,2,Chinese,zhuzhou,1608,2015-08-30 12:07:28,2015-08-30 12:07:28,85
+1AA100050,100050,4RAM size,1,Chinese,yichang,29,2015-08-31 12:07:28,2015-08-31 12:07:28,86
+1AA100051,100051,3RAM size,7,Chinese,xiangtan,1407,2015-09-01 12:07:28,2015-09-01 12:07:28,87
+1AA100052,100052,8RAM size,7,Chinese,zhuzhou,845,2015-09-02 12:07:28,2015-09-02 12:07:28,88
+1AA100053,100053,3RAM size,3,Chinese,zhuzhou,1655,2015-09-03 12:07:28,2015-09-03 12:07:28,89
+1AA100054,100054,9RAM size,2,Chinese,shenzhen,1368,2015-09-04 12:07:28,2015-09-04 12:07:28,90
+1AA100055,100055,4RAM size,7,Chinese,guangzhou,1728,2015-09-05 12:07:28,2015-09-05 12:07:28,91
+1AA100056,100056,0RAM size,5,Chinese,wuhan,750,2015-09-06 12:07:28,2015-09-06 12:07:28,92
+1AA100057,100057,4RAM size,6,Chinese,changsha,2288,2015-09-07 12:07:28,2015-09-07 12:07:28,93
+1AA100058,100058,3RAM size,4,Chinese,wuhan,2635,2015-09-08 12:07:28,2015-09-08 12:07:28,94
+1AA100059,100059,3RAM size,7,Chinese,xiangtan,1337,2015-09-09 12:07:28,2015-09-09 12:07:28,95
+1AA10006,10006,5RAM size,5,Chinese,wuhan,2478,2015-09-10 12:07:28,2015-09-10 12:07:28,96
+1AA100060,100060,4RAM size,4,Chinese,shenzhen,538,2015-09-11 12:07:28,2015-09-11 12:07:28,97
+1AA100061,100061,0RAM size,6,Chinese,wuhan,1407,2015-09-12 12:07:28,2015-09-12 12:07:28,98
+1AA100062,100062,6RAM size,6,Chinese,zhuzhou,2952,2015-09-13 12:07:28,2015-09-13 12:07:28,99
+1AA100063,100063,8RAM size,3,Chinese,zhuzhou,1226,2015-09-14 12:07:28,2015-09-14 12:07:28,100
+1AA100064,100064,4RAM size,7,Chinese,changsha,865,2015-09-15 12:07:28,2015-09-15 12:07:28,101
+1AA100065,100065,4RAM size,7,Chinese,changsha,901,2015-09-16 12:07:28,2015-09-16 12:07:28,102
+1AA100066,100066,1RAM size,4,Chinese,yichang,1864,2015-09-17 12:07:28,2015-09-17 12:07:28,103
+1AA100067,100067,6RAM size,7,Chinese,changsha,572,2015-09-18 12:07:28,2015-09-18 12:07:28,104
+1AA100068,100068,4RAM size,7,Chinese,xiangtan,412,2015-09-19 12:07:28,2015-09-19 12:07:28,105
+1AA100069,100069,3RAM size,5,Chinese,yichang,1491,2015-09-20 12:07:28,2015-09-20 12:07:28,106
+1AA10007,10007,2RAM size,3,Chinese,xiangtan,1350,2015-09-21 12:07:28,2015-09-21 12:07:28,107
+1AA100070,100070,9RAM size,3,Chinese,wuhan,1567,2015-09-22 12:07:28,2015-09-22 12:07:28,108
+1AA100071,100071,2RAM size,5,Chinese,changsha,1973,2015-09-23 12:07:28,2015-09-23 12:07:28,109
+1AA100072,100072,9RAM size,7,Chinese,xiangtan,448,2015-09-24 12:07:28,2015-09-24 12:07:28,110
+1AA100073,100073,3RAM size,2,Chinese,shenzhen,2488,2015-09-25 12:07:28,2015-09-25 12:07:28,111
+1AA100074,100074,3RAM size,7,Chinese,shenzhen,907,2015-09-26 12:07:28,2015-09-26 12:07:28,112
+1AA100075,100075,5RAM size,6,Chinese,guangzhou,2507,2015-09-27 12:07:28,2015-09-27 12:07:28,113
+1AA100076,100076,4RAM size,7,Chinese,yichang,732,2015-09-28 12:07:28,2015-09-28 12:07:28,114
+1AA100077,100077,5RAM size,6,Chinese,changsha,2077,2015-09-29 12:07:28,2015-09-29 12:07:28,115
+1AA100078,100078,6RAM size,5,Chinese,wuhan,1434,2015-09-30 12:07:28,2015-09-30 12:07:28,116
+1AA100079,100079,0RAM size,6,Chinese,changsha,1098,2015-10-01 12:07:28,2015-10-01 12:07:28,117
+1AA10008,10008,4RAM size,4,Chinese,changsha,813,2015-10-02 12:07:28,2015-10-02 12:07:28,118
+1AA100080,100080,9RAM size,6,Chinese,xiangtan,954,2015-10-03 12:07:28,2015-10-03 12:07:28,119
+1AA100081,100081,8RAM size,2,Chinese,zhuzhou,613,2015-10-04 12:07:28,2015-10-04 12:07:28,120
+1AA100082,100082,9RAM size,7,Chinese,xiangtan,2348,2015-10-05 12:07:28,2015-10-05 12:07:28,121
+1AA100083,100083,3RAM size,2,Chinese,shenzhen,2192,2015-10-06 12:07:28,2015-10-06 12:07:28,122
+1AA100084,100084,4RAM size,4,Chinese,xiangtan,2826,2015-10-07 12:07:28,2015-10-07 12:07:28,123

http://git-wip-us.apache.org/repos/asf/carbondata/blob/cd0ce418/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
----------------------------------------------------------------------
diff --git a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
index 937f0d9..1e58a9e 100644
--- a/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
+++ b/integration/spark-datasource/src/test/scala/org/apache/spark/sql/carbondata/datasource/SparkCarbonDataSourceTest.scala
@@ -1312,6 +1312,24 @@ class SparkCarbonDataSourceTest extends FunSuite with BeforeAndAfterAll {
     spark.sql(s"drop table if exists t_carbn01b")
   }
 
+  test("test fileformat flow with drop and query on same table") {
+    spark.sql("drop table if exists fileformat_drop")
+    spark.sql("drop table if exists fileformat_drop_hive")
+    spark.sql("create table fileformat_drop (imei string,AMSize string,channelsId string,ActiveCountry string, Activecity string,gamePointId double,deviceInformationId double,productionDate Timestamp,deliveryDate timestamp,deliverycharge double) using carbon options('table_blocksize'='1','LOCAL_DICTIONARY_ENABLE'='TRUE','LOCAL_DICTIONARY_THRESHOLD'='1000')")
+    spark.sql("create table fileformat_drop_hive(imei string,deviceInformationId double,AMSize string,channelsId string,ActiveCountry string,Activecity string,gamePointId double,productionDate Timestamp,deliveryDate timestamp,deliverycharge double)row format delimited FIELDS terminated by ',' LINES terminated by '\n' stored as textfile")
+    val sourceFile = FileFactory.getPath(s"$resource/vardhandaterestruct.csv").toString
+    spark.sql(s"load data local inpath '$sourceFile' into table fileformat_drop_hive")
+    spark.sql("insert into fileformat_drop select imei ,deviceInformationId ,AMSize ,channelsId ,ActiveCountry ,Activecity ,gamePointId ,productionDate ,deliveryDate ,deliverycharge from fileformat_drop_hive")
+    assert(spark.sql("select count(*) from fileformat_drop where imei='1AA10000'").collect().length == 1)
+
+    spark.sql("drop table if exists fileformat_drop")
+    spark.sql("create table fileformat_drop (imei string,deviceInformationId double,AMSize string,channelsId string,ActiveCountry string,Activecity string,gamePointId float,productionDate timestamp,deliveryDate timestamp,deliverycharge decimal(10,2)) using carbon options('table_blocksize'='1','LOCAL_DICTIONARY_ENABLE'='true','local_dictionary_threshold'='1000')")
+    spark.sql("insert into fileformat_drop select imei ,deviceInformationId ,AMSize ,channelsId ,ActiveCountry ,Activecity ,gamePointId ,productionDate ,deliveryDate ,deliverycharge from fileformat_drop_hive")
+    assert(spark.sql("select count(*) from fileformat_drop where imei='1AA10000'").collect().length == 1)
+    spark.sql("drop table if exists fileformat_drop")
+    spark.sql("drop table if exists fileformat_drop_hive")
+  }
+
   override protected def beforeAll(): Unit = {
     drop
     createParquetTable