You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2017/03/30 05:35:43 UTC

[1/2] incubator-carbondata git commit: Only return 1 preferred loc to confirm each node run 1 task

Repository: incubator-carbondata
Updated Branches:
  refs/heads/master 32bb7fef1 -> 5d2ae6be2


Only return 1 preferred loc to confirm each node run 1 task

Add comments

Remove unused code

Remove unused import

Fix style for mkString output


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/cbca5d38
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/cbca5d38
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/cbca5d38

Branch: refs/heads/master
Commit: cbca5d38e6c58f6400d2fd5db8e6f0e8b458cc1f
Parents: 32bb7fe
Author: l00251599 <l0...@huaweiobz.com>
Authored: Wed Mar 29 17:07:50 2017 +0800
Committer: jackylk <ja...@huawei.com>
Committed: Thu Mar 30 11:01:23 2017 +0530

----------------------------------------------------------------------
 .../spark/rdd/NewCarbonDataLoadRDD.scala        | 32 ++++++--------------
 1 file changed, 9 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/cbca5d38/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
index 50894d4..0690ba1 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
@@ -20,7 +20,6 @@ package org.apache.carbondata.spark.rdd
 import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
 import java.nio.ByteBuffer
 import java.text.SimpleDateFormat
-import java.util
 import java.util.{Date, UUID}
 
 import scala.collection.JavaConverters._
@@ -352,28 +351,15 @@ class NewCarbonDataLoadRDD[K, V](
     } else {
       val theSplit = split.asInstanceOf[CarbonNodePartition]
       val firstOptionLocation: Seq[String] = List(theSplit.serializableHadoopSplit)
-      logInfo("Preferred Location for split : " + firstOptionLocation.head)
-      val blockMap = new util.LinkedHashMap[String, Integer]()
-      val tableBlocks = theSplit.blocksDetails
-      tableBlocks.foreach { tableBlock =>
-        tableBlock.getLocations.foreach { location =>
-          if (!firstOptionLocation.exists(location.equalsIgnoreCase(_))) {
-            val currentCount = blockMap.get(location)
-            if (currentCount == null) {
-              blockMap.put(location, 1)
-            } else {
-              blockMap.put(location, currentCount + 1)
-            }
-          }
-        }
-      }
-
-      val sortedList = blockMap.entrySet().asScala.toSeq.sortWith { (nodeCount1, nodeCount2) =>
-        nodeCount1.getValue > nodeCount2.getValue
-      }
-
-      val sortedNodesList = sortedList.map(nodeCount => nodeCount.getKey).take(2)
-      firstOptionLocation ++ sortedNodesList
+      logInfo("Preferred Location for split : " + firstOptionLocation.mkString(","))
+      /**
+       * At original logic, we were adding the next preferred location so that in case of the
+       * failure the Spark should know where to schedule the failed task.
+       * Remove the next preferred location is because some time Spark will pick the same node
+       * for 2 tasks, so one node is getting over loaded with the task and one have no task to
+       * do. And impacting the performance despite of any failure.
+       */
+      firstOptionLocation
     }
   }
 }


[2/2] incubator-carbondata git commit: [CARBONDATA-830] Incorrect schedule for NewCarbonDataLoadRDD This closes #708

Posted by ja...@apache.org.
[CARBONDATA-830] Incorrect schedule for NewCarbonDataLoadRDD This closes #708


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/5d2ae6be
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/5d2ae6be
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/5d2ae6be

Branch: refs/heads/master
Commit: 5d2ae6be272038fa8dd607b5c7f2ee087adeb87d
Parents: 32bb7fe cbca5d3
Author: jackylk <ja...@huawei.com>
Authored: Thu Mar 30 11:05:08 2017 +0530
Committer: jackylk <ja...@huawei.com>
Committed: Thu Mar 30 11:05:08 2017 +0530

----------------------------------------------------------------------
 .../spark/rdd/NewCarbonDataLoadRDD.scala        | 32 ++++++--------------
 1 file changed, 9 insertions(+), 23 deletions(-)
----------------------------------------------------------------------