You are viewing a plain text version of this content. The canonical link for it is here.

Posted to issues@carbondata.apache.org by GitBox <gi...@apache.org> on 2019/05/09 13:02:34 UTC

[GitHub] [carbondata] ravipesala commented on a change in pull request #3210: [CARBONDATA-3375] [CARBONDATA-3376] Fix GC Overhead limit exceeded issue and partition column as range column issue

ravipesala commented on a change in pull request #3210: [CARBONDATA-3375] [CARBONDATA-3376] Fix GC Overhead limit exceeded issue and partition column as range column issue
URL: https://github.com/apache/carbondata/pull/3210#discussion_r282474383
 
 

 ##########
 File path: integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonMergerRDD.scala
 ##########
 @@ -433,75 +442,118 @@ class CarbonMergerRDD[K, V](
     val newRanges = allRanges.filter { range =>
       range != null
     }
-    carbonInputSplits.foreach { split =>
-      var dataFileFooter: DataFileFooter = null
-      if (null == rangeColumn) {
-        val taskNo = getTaskNo(split, partitionTaskMap, counter)
-        var sizeOfSplit = split.getDetailInfo.getBlockSize
-        val splitList = taskIdMapping.get(taskNo)
-        noOfBlocks += 1
+    val noOfSplitsPerTask = Math.ceil(carbonInputSplits.size / defaultParallelism)
+    var taskCount = 0
+    // In case of range column if only one data value is present then we try to
+    // divide the splits to different tasks in order to avoid single task creation
+    // and load on single executor
+    if (singleRange) {
+      var filterExpr = CarbonCompactionUtil
 
 Review comment:
   For single range no need to add filter expression.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services