You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2019/05/17 07:19:32 UTC

[carbondata] branch master updated: [CARBONDATA-3386] Concurrent Merge index and query is failing

This is an automated email from the ASF dual-hosted git repository.

ravipesala pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 894216e  [CARBONDATA-3386] Concurrent Merge index and query is failing
894216e is described below

commit 894216ebcf3de5a0df2f3c7c3e455d93530cc3a3
Author: dhatchayani <dh...@gmail.com>
AuthorDate: Wed May 15 14:48:20 2019 +0530

    [CARBONDATA-3386] Concurrent Merge index and query is failing
    
    Problem:
    Concurrent merge index and query is failing. Load is triggered on a table, at the end of the load Merge index will be triggered. But this is triggered after the table status is updated as SUCCESS/PARTIAL SUCCESS for that segments. So for the concurrent query, this segment is available for query. Once the merge index is done, it deletes the index files, which are still referred by the query, this leads to the query failure.
    
    Solution:
    Trigger merge index before table status updation for all the operations like LOAD, COMPACT.
    
    This closes #3221
---
 .../createTable/TestCreateTableIfNotExists.scala   |   1 +
 .../CarbonIndexFileMergeTestCase.scala             |   4 +-
 .../spark/rdd/CarbonTableCompactor.scala           |   5 +
 .../scala/org/apache/spark/sql/CarbonEnv.scala     |   5 +-
 .../spark/sql/events/MergeIndexEventListener.scala |  70 ++-----------
 .../command/table/CarbonCreateTableCommand.scala   |   5 +-
 .../org/apache/spark/util/MergeIndexUtil.scala     | 108 +++++++++++++++++++++
 7 files changed, 128 insertions(+), 70 deletions(-)

diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateTableIfNotExists.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateTableIfNotExists.scala
index dc54127..b3fa0eb 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateTableIfNotExists.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateTableIfNotExists.scala
@@ -78,6 +78,7 @@ class TestCreateTableIfNotExists extends QueryTest with BeforeAndAfterAll {
           } catch {
             case exception: Exception =>
               result = exception.getMessage
+              exception.printStackTrace()
           }
           result
         }
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/CarbonIndexFileMergeTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/CarbonIndexFileMergeTestCase.scala
index 173c14f..c9a7971 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/CarbonIndexFileMergeTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datacompaction/CarbonIndexFileMergeTestCase.scala
@@ -300,8 +300,8 @@ class CarbonIndexFileMergeTestCase
     assert(getIndexFileCount("default_nonindexmerge", "1") == 100)
     assert(getIndexFileCount("default_nonindexmerge", "2") == 100)
     assert(getIndexFileCount("default_nonindexmerge", "3") == 100)
-    assert(getIndexFileCount("default_nonindexmerge", "0.1") == 100)
-    assert(getIndexFileCount("default_nonindexmerge", "2.1") == 100)
+    assert(getIndexFileCount("default_nonindexmerge", "0.1") == 0)
+    assert(getIndexFileCount("default_nonindexmerge", "2.1") == 0)
     assert(getIndexFileCount("default_nonindexmerge", "0.2") == 0)
     checkAnswer(sql("""Select count(*) from nonindexmerge"""), rows)
   }
diff --git a/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonTableCompactor.scala b/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonTableCompactor.scala
index cfb40ec..afe2927 100644
--- a/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonTableCompactor.scala
+++ b/integration/spark2/src/main/scala/org/apache/carbondata/spark/rdd/CarbonTableCompactor.scala
@@ -26,6 +26,7 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.execution.command.{CarbonMergerMapping, CompactionCallableModel, CompactionModel}
+import org.apache.spark.util.MergeIndexUtil
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.datamap.{DataMapStoreManager, Segment}
@@ -196,6 +197,10 @@ class CarbonTableCompactor(carbonLoadModel: CarbonLoadModel,
       val mergedLoadNumber = CarbonDataMergerUtil.getLoadNumberFromLoadName(mergedLoadName)
       var segmentFilesForIUDCompact = new util.ArrayList[Segment]()
       var segmentFileName: String = null
+      if (compactionType != CompactionType.IUD_DELETE_DELTA &&
+          compactionType != CompactionType.IUD_UPDDEL_DELTA) {
+        MergeIndexUtil.mergeIndexFilesOnCompaction(compactionCallableModel)
+      }
       if (carbonTable.isHivePartitionTable) {
         val readPath =
           CarbonTablePath.getSegmentFilesLocation(carbonLoadModel.getTablePath) +
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonEnv.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonEnv.scala
index 7ca9945..149e45e 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonEnv.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/CarbonEnv.scala
@@ -37,7 +37,7 @@ import org.apache.carbondata.core.metadata.schema.table.CarbonTable
 import org.apache.carbondata.core.util._
 import org.apache.carbondata.datamap.{TextMatchMaxDocUDF, TextMatchUDF}
 import org.apache.carbondata.events._
-import org.apache.carbondata.processing.loading.events.LoadEvents.{LoadMetadataEvent, LoadTablePostExecutionEvent, LoadTablePostStatusUpdateEvent, LoadTablePreExecutionEvent, LoadTablePreStatusUpdateEvent}
+import org.apache.carbondata.processing.loading.events.LoadEvents.{LoadMetadataEvent, LoadTablePostStatusUpdateEvent, LoadTablePreExecutionEvent, LoadTablePreStatusUpdateEvent}
 import org.apache.carbondata.spark.rdd.SparkReadSupport
 import org.apache.carbondata.spark.readsupport.SparkRowReadSupportImpl
 
@@ -182,8 +182,7 @@ object CarbonEnv {
       .addListener(classOf[AlterTableDropPartitionPostStatusEvent],
         AlterTableDropPartitionPostStatusListener)
       .addListener(classOf[AlterTableDropPartitionMetaEvent], AlterTableDropPartitionMetaListener)
-      .addListener(classOf[LoadTablePostExecutionEvent], new MergeIndexEventListener)
-      .addListener(classOf[AlterTableCompactionPostEvent], new MergeIndexEventListener)
+      .addListener(classOf[LoadTablePreStatusUpdateEvent], new MergeIndexEventListener)
       .addListener(classOf[AlterTableMergeIndexEvent], new MergeIndexEventListener)
       .addListener(classOf[BuildDataMapPostExecutionEvent], new MergeBloomIndexEventListener)
       .addListener(classOf[DropTableCacheEvent], DropCachePreAggEventListener)
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/events/MergeIndexEventListener.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/events/MergeIndexEventListener.scala
index 35b73d6..7844f28 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/events/MergeIndexEventListener.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/events/MergeIndexEventListener.scala
@@ -23,20 +23,17 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.SparkContext
 import org.apache.spark.rdd.CarbonMergeFilesRDD
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.util.CarbonException
+import org.apache.spark.util.MergeIndexUtil
 
 import org.apache.carbondata.common.logging.LogServiceFactory
-import org.apache.carbondata.core.constants.CarbonCommonConstants
 import org.apache.carbondata.core.datamap.Segment
 import org.apache.carbondata.core.locks.{CarbonLockFactory, LockUsage}
-import org.apache.carbondata.core.metadata.SegmentFileStore
-import org.apache.carbondata.core.metadata.schema.table.CarbonTable
 import org.apache.carbondata.core.statusmanager.SegmentStatusManager
-import org.apache.carbondata.events.{AlterTableCompactionPostEvent, AlterTableMergeIndexEvent, Event, OperationContext, OperationEventListener}
-import org.apache.carbondata.processing.loading.events.LoadEvents.LoadTablePostExecutionEvent
+import org.apache.carbondata.events._
+import org.apache.carbondata.processing.loading.events.LoadEvents.LoadTablePreStatusUpdateEvent
 import org.apache.carbondata.processing.merger.CarbonDataMergerUtil
 
 class MergeIndexEventListener extends OperationEventListener with Logging {
@@ -44,7 +41,7 @@ class MergeIndexEventListener extends OperationEventListener with Logging {
 
   override def onEvent(event: Event, operationContext: OperationContext): Unit = {
     event match {
-      case preStatusUpdateEvent: LoadTablePostExecutionEvent =>
+      case preStatusUpdateEvent: LoadTablePreStatusUpdateEvent =>
         LOGGER.info("Load post status event-listener called for merge index")
         val loadModel = preStatusUpdateEvent.getCarbonLoadModel
         val carbonTable = loadModel.getCarbonDataLoadSchema.getCarbonTable
@@ -52,7 +49,7 @@ class MergeIndexEventListener extends OperationEventListener with Logging {
         val sparkSession = SparkSession.getActiveSession.get
         if(!carbonTable.isStreamingSink) {
           if (null != compactedSegments && !compactedSegments.isEmpty) {
-            mergeIndexFilesForCompactedSegments(sparkSession,
+            MergeIndexUtil.mergeIndexFilesForCompactedSegments(sparkSession,
               carbonTable,
               compactedSegments)
           } else {
@@ -67,17 +64,9 @@ class MergeIndexEventListener extends OperationEventListener with Logging {
               carbonTable.getTablePath,
               carbonTable, false)
             // clear Block dataMap Cache
-            clearBlockDataMapCache(carbonTable, Seq(loadModel.getSegmentId))
+            MergeIndexUtil.clearBlockDataMapCache(carbonTable, Seq(loadModel.getSegmentId))
           }
         }
-      case alterTableCompactionPostEvent: AlterTableCompactionPostEvent =>
-        LOGGER.info("Merge index for compaction called")
-        val carbonTable = alterTableCompactionPostEvent.carbonTable
-        val mergedLoads = alterTableCompactionPostEvent.compactedLoads
-        val sparkSession = alterTableCompactionPostEvent.sparkSession
-        if(!carbonTable.isStreamingSink) {
-          mergeIndexFilesForCompactedSegments(sparkSession, carbonTable, mergedLoads)
-        }
       case alterTableMergeIndexEvent: AlterTableMergeIndexEvent =>
         val carbonMainTable = alterTableMergeIndexEvent.carbonTable
         val sparkSession = alterTableMergeIndexEvent.sparkSession
@@ -123,7 +112,7 @@ class MergeIndexEventListener extends OperationEventListener with Logging {
                 mergeIndexProperty = true,
                 readFileFooterFromCarbonDataFile = true)
               // clear Block dataMap Cache
-              clearBlockDataMapCache(carbonMainTable, validSegmentIds)
+              MergeIndexUtil.clearBlockDataMapCache(carbonMainTable, validSegmentIds)
               val requestMessage = "Compaction request completed for table " +
                 s"${ carbonMainTable.getDatabaseName }.${ carbonMainTable.getTableName }"
               LOGGER.info(requestMessage)
@@ -142,49 +131,4 @@ class MergeIndexEventListener extends OperationEventListener with Logging {
     }
   }
 
-  def mergeIndexFilesForCompactedSegments(sparkSession: SparkSession,
-    carbonTable: CarbonTable,
-    mergedLoads: util.List[String]): Unit = {
-    // get only the valid segments of the table
-    val validSegments: mutable.Buffer[Segment] = CarbonDataMergerUtil.getValidSegmentList(
-      carbonTable.getAbsoluteTableIdentifier).asScala
-    val mergedSegmentIds = new util.ArrayList[String]()
-    mergedLoads.asScala.foreach(mergedLoad => {
-      val loadName = mergedLoad
-        .substring(mergedLoad.indexOf(CarbonCommonConstants.LOAD_FOLDER) +
-                   CarbonCommonConstants.LOAD_FOLDER.length)
-      mergedSegmentIds.add(loadName)
-    })
-    val loadFolderDetailsArray = SegmentStatusManager
-      .readLoadMetadata(carbonTable.getMetadataPath)
-    val segmentFileNameMap: java.util.Map[String, String] = new util.HashMap[String, String]()
-    loadFolderDetailsArray.foreach(loadMetadataDetails => {
-      segmentFileNameMap
-        .put(loadMetadataDetails.getLoadName, String.valueOf(loadMetadataDetails.getLoadStartTime))
-    })
-    // filter out only the valid segments from the list of compacted segments
-    // Example: say compacted segments list contains 0.1, 3.1, 6.1, 0.2.
-    // In this list 0.1, 3.1 and 6.1 are compacted to 0.2 in the level 2 compaction.
-    // So, it is enough to do merge index only for 0.2 as it is the only valid segment in this list
-    val validMergedSegIds = validSegments
-      .filter { seg => mergedSegmentIds.contains(seg.getSegmentNo) }.map(_.getSegmentNo)
-    if (null != validMergedSegIds && validMergedSegIds.nonEmpty) {
-      CarbonMergeFilesRDD.mergeIndexFiles(sparkSession,
-          validMergedSegIds,
-          segmentFileNameMap,
-          carbonTable.getTablePath,
-          carbonTable,
-          false)
-      // clear Block dataMap Cache
-      clearBlockDataMapCache(carbonTable, validMergedSegIds)
-    }
-  }
-
-  private def clearBlockDataMapCache(carbonTable: CarbonTable, segmentIds: Seq[String]): Unit = {
-    // clear driver Block dataMap cache for each segment
-    segmentIds.foreach { segmentId =>
-      SegmentFileStore.clearBlockDataMapCache(carbonTable, segmentId)
-    }
-  }
-
 }
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala
index debb283..b5aa8f9 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/table/CarbonCreateTableCommand.scala
@@ -191,10 +191,11 @@ case class CarbonCreateTableCommand(
               if (ifNotExistsSet) {
                 LOGGER.error(e, e)
               } else {
+                LOGGER.error(e)
                 throw e
               }
-            }
-            else {
+            } else {
+              LOGGER.error(e)
               throw e
             }
 
diff --git a/integration/spark2/src/main/scala/org/apache/spark/util/MergeIndexUtil.scala b/integration/spark2/src/main/scala/org/apache/spark/util/MergeIndexUtil.scala
new file mode 100644
index 0000000..f109b2e
--- /dev/null
+++ b/integration/spark2/src/main/scala/org/apache/spark/util/MergeIndexUtil.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.util
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.rdd.CarbonMergeFilesRDD
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.command.CompactionCallableModel
+
+import org.apache.carbondata.common.logging.LogServiceFactory
+import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.datamap.Segment
+import org.apache.carbondata.core.metadata.SegmentFileStore
+import org.apache.carbondata.core.metadata.schema.table.CarbonTable
+import org.apache.carbondata.core.statusmanager.SegmentStatusManager
+import org.apache.carbondata.processing.loading.model.CarbonLoadModel
+import org.apache.carbondata.processing.merger.CarbonDataMergerUtil
+
+object MergeIndexUtil {
+
+  val LOGGER = LogServiceFactory.getLogService(this.getClass.getName)
+
+  def mergeIndexFilesOnCompaction(compactionCallableModel: CompactionCallableModel): Unit = {
+    val carbonTable = compactionCallableModel.carbonTable
+    LOGGER.info(s"Merge index for compaction is called on table ${carbonTable.getTableUniqueName}")
+    val mergedLoads = compactionCallableModel.compactedSegments
+    val sparkSession = compactionCallableModel.sqlContext.sparkSession
+    if (!carbonTable.isStreamingSink) {
+      val mergedSegmentIds = new util.ArrayList[String]()
+      mergedLoads.asScala.foreach(mergedLoad => {
+        val loadName = mergedLoad
+          .substring(mergedLoad.indexOf(CarbonCommonConstants.LOAD_FOLDER) +
+                     CarbonCommonConstants.LOAD_FOLDER.length)
+        mergedSegmentIds.add(loadName)
+      })
+      CarbonMergeFilesRDD.mergeIndexFiles(sparkSession,
+        mergedSegmentIds.asScala,
+        new util.HashMap[String, String](),
+        carbonTable.getTablePath,
+        carbonTable, false)
+    }
+  }
+
+  def mergeIndexFilesForCompactedSegments(sparkSession: SparkSession,
+    carbonTable: CarbonTable,
+    mergedLoads: util.List[String]): Unit = {
+    // get only the valid segments of the table
+    val validSegments: mutable.Buffer[Segment] = CarbonDataMergerUtil.getValidSegmentList(
+      carbonTable.getAbsoluteTableIdentifier).asScala
+    val mergedSegmentIds = new util.ArrayList[String]()
+    mergedLoads.asScala.foreach(mergedLoad => {
+      val loadName = mergedLoad
+        .substring(mergedLoad.indexOf(CarbonCommonConstants.LOAD_FOLDER) +
+                   CarbonCommonConstants.LOAD_FOLDER.length)
+      mergedSegmentIds.add(loadName)
+    })
+    val loadFolderDetailsArray = SegmentStatusManager
+      .readLoadMetadata(carbonTable.getMetadataPath)
+    val segmentFileNameMap: java.util.Map[String, String] = new util.HashMap[String, String]()
+    loadFolderDetailsArray.foreach(loadMetadataDetails => {
+      segmentFileNameMap
+        .put(loadMetadataDetails.getLoadName, String.valueOf(loadMetadataDetails.getLoadStartTime))
+    })
+    // filter out only the valid segments from the list of compacted segments
+    // Example: say compacted segments list contains 0.1, 3.1, 6.1, 0.2.
+    // In this list 0.1, 3.1 and 6.1 are compacted to 0.2 in the level 2 compaction.
+    // So, it is enough to do merge index only for 0.2 as it is the only valid segment in this list
+    val validMergedSegIds = validSegments
+      .filter { seg => mergedSegmentIds.contains(seg.getSegmentNo) }.map(_.getSegmentNo)
+    if (null != validMergedSegIds && validMergedSegIds.nonEmpty) {
+      CarbonMergeFilesRDD.mergeIndexFiles(sparkSession,
+        validMergedSegIds,
+        segmentFileNameMap,
+        carbonTable.getTablePath,
+        carbonTable,
+        false)
+      // clear Block dataMap Cache
+      clearBlockDataMapCache(carbonTable, validMergedSegIds)
+    }
+  }
+
+  def clearBlockDataMapCache(carbonTable: CarbonTable, segmentIds: Seq[String]): Unit = {
+    // clear driver Block dataMap cache for each segment
+    segmentIds.foreach { segmentId =>
+      SegmentFileStore.clearBlockDataMapCache(carbonTable, segmentId)
+    }
+  }
+
+}