You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by qi...@apache.org on 2020/08/09 10:26:43 UTC

[carbondata] branch master updated: [CARBONDATA-3945] Fix NullPointerException of data loading

This is an automated email from the ASF dual-hosted git repository.

qiangcai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 85314dd  [CARBONDATA-3945] Fix NullPointerException of data loading
85314dd is described below

commit 85314ddbc70839913f16121ae60293c6c05f0a76
Author: haomarch <ma...@126.com>
AuthorDate: Thu Aug 6 10:25:02 2020 +0800

    [CARBONDATA-3945] Fix NullPointerException of data loading
    
    Why is this PR needed?
    1. getLastModifiedTime of LoadMetadataDetails fails due to "updateDeltaEndTimestamp is empty string".
    2. In the getCommittedIndexFile founction, NPE happens because of "segmentfile is null" under the Unusual cases.
    3. Cleaning temp files fails because of "partitionInfo is null" under the unusual cases.
    4. When calculating sizeInBytes of CarbonRelation, under the unusual cases, it need to collect the directory size. but the directory path only works for non-partition tables, for partition tables, filenotfoundexcepiton was throwed.
    
    What changes were proposed in this PR?
    1. when updateDeltaEndTimestamp is empty string, skip the code "convertTimeStampToLong(updateDeltaEndTimestamp)";
    2. when segmentfile is null, avoid trigger "segmentfile.getSegmentMetaDataInfo()"
    3. init partitionInfo with a empty list instead of "null"
    4. Add a judgment condition, to avoid scan directory path for partitiontable when the directroy path only works for non-partitiontable.
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    Yes
    
    This closes #3881
---
 .../readcommitter/TableStatusReadCommittedScope.java     |  4 +++-
 .../core/statusmanager/LoadMetadataDetails.java          |  2 +-
 .../core/load/LoadMetadataDetailsUnitTest.java           | 16 ++++++++++++++++
 .../spark/sql/events/MergeIndexEventListener.scala       |  2 +-
 .../scala/org/apache/spark/sql/hive/CarbonRelation.scala |  3 ---
 5 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/readcommitter/TableStatusReadCommittedScope.java b/core/src/main/java/org/apache/carbondata/core/readcommitter/TableStatusReadCommittedScope.java
index 815efee..5413cec 100644
--- a/core/src/main/java/org/apache/carbondata/core/readcommitter/TableStatusReadCommittedScope.java
+++ b/core/src/main/java/org/apache/carbondata/core/readcommitter/TableStatusReadCommittedScope.java
@@ -87,7 +87,9 @@ public class TableStatusReadCommittedScope implements ReadCommittedScope {
       SegmentFileStore fileStore =
           new SegmentFileStore(identifier.getTablePath(), segment.getSegmentFileName());
       indexFiles = fileStore.getIndexOrMergeFiles();
-      segment.setSegmentMetaDataInfo(fileStore.getSegmentFile().getSegmentMetaDataInfo());
+      if (fileStore.getSegmentFile() != null) {
+        segment.setSegmentMetaDataInfo(fileStore.getSegmentFile().getSegmentMetaDataInfo());
+      }
     }
     return indexFiles;
   }
diff --git a/core/src/main/java/org/apache/carbondata/core/statusmanager/LoadMetadataDetails.java b/core/src/main/java/org/apache/carbondata/core/statusmanager/LoadMetadataDetails.java
index b97109b..0527492 100644
--- a/core/src/main/java/org/apache/carbondata/core/statusmanager/LoadMetadataDetails.java
+++ b/core/src/main/java/org/apache/carbondata/core/statusmanager/LoadMetadataDetails.java
@@ -489,7 +489,7 @@ public class LoadMetadataDetails implements Serializable {
   }
 
   public long getLastModifiedTime() {
-    if (updateDeltaEndTimestamp != null) {
+    if (!StringUtils.isEmpty(updateDeltaEndTimestamp)) {
       return convertTimeStampToLong(updateDeltaEndTimestamp);
     }
     return convertTimeStampToLong(timestamp);
diff --git a/core/src/test/java/org/apache/carbondata/core/load/LoadMetadataDetailsUnitTest.java b/core/src/test/java/org/apache/carbondata/core/load/LoadMetadataDetailsUnitTest.java
index e1e92f8..273323e 100644
--- a/core/src/test/java/org/apache/carbondata/core/load/LoadMetadataDetailsUnitTest.java
+++ b/core/src/test/java/org/apache/carbondata/core/load/LoadMetadataDetailsUnitTest.java
@@ -108,6 +108,22 @@ public class LoadMetadataDetailsUnitTest {
     assertEquals(expected_result, result);
   }
 
+  @Test public void testGetLastTimeStamp() throws Exception {
+    SimpleDateFormat parser = new SimpleDateFormat(CarbonCommonConstants.CARBON_TIMESTAMP_MILLIS);
+    long timeStamp = parser.parse("01-01-2016 00:00:00:000").getTime();
+    loadMetadataDetails.setUpdateDeltaEndTimestamp("01-01-2016 00:00:00:000");
+    assertEquals(loadMetadataDetails.getLastModifiedTime(), timeStamp);;
+    loadMetadataDetails.setUpdateDeltaEndTimestamp("01-01-2016 00:00:00");
+    assertEquals(loadMetadataDetails.getLastModifiedTime(), timeStamp);;
+    loadMetadataDetails.setUpdateDeltaEndTimestamp(null);
+    loadMetadataDetails.setLoadEndTime(timeStamp);
+    assertEquals(loadMetadataDetails.getLastModifiedTime(), timeStamp);
+    loadMetadataDetails.setUpdateDeltaEndTimestamp("");
+    assertEquals(loadMetadataDetails.getLastModifiedTime(), timeStamp);
+    loadMetadataDetails.setUpdateDeltaEndTimestamp("#");
+    assertEquals(loadMetadataDetails.getLastModifiedTime(), 0L);
+  }
+
   public static Long getTime(String date) {
     SimpleDateFormat simpleDateFormat = new SimpleDateFormat(CarbonCommonConstants.CARBON_TIMESTAMP_MILLIS);
     try {
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/events/MergeIndexEventListener.scala b/integration/spark/src/main/scala/org/apache/spark/sql/events/MergeIndexEventListener.scala
index 4e06ff0..58ce582 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/events/MergeIndexEventListener.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/events/MergeIndexEventListener.scala
@@ -53,7 +53,7 @@ class MergeIndexEventListener extends OperationEventListener with Logging {
         val carbonTable = loadModel.getCarbonDataLoadSchema.getCarbonTable
         val compactedSegments = loadModel.getMergedSegmentIds
         val sparkSession = SparkSession.getActiveSession.get
-        var partitionInfo: util.List[String] = null
+        var partitionInfo: util.List[String] = new util.ArrayList[String]()
         val partitionPath = operationContext.getProperty("partitionPath")
         if (partitionPath != null) {
           partitionInfo = ObjectSerializationUtil
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonRelation.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonRelation.scala
index 264d06b..7e5dade 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonRelation.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonRelation.scala
@@ -207,9 +207,6 @@ case class CarbonRelation(
                   null != validSeg.getLoadMetadataDetails.getIndexSize) {
                 size = size + validSeg.getLoadMetadataDetails.getDataSize.toLong +
                        validSeg.getLoadMetadataDetails.getIndexSize.toLong
-              } else {
-                size = size + FileFactory.getDirectorySize(
-                  CarbonTablePath.getSegmentPath(tablePath, validSeg.getSegmentNo))
               }
             }
             // update the new table status time