You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2017/03/30 05:35:43 UTC
[1/2] incubator-carbondata git commit: Only return 1 preferred loc to
confirm each node run 1 task
Repository: incubator-carbondata
Updated Branches:
refs/heads/master 32bb7fef1 -> 5d2ae6be2
Only return 1 preferred loc to confirm each node run 1 task
Add comments
Remove unused code
Remove unused import
Fix style for mkString output
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/cbca5d38
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/cbca5d38
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/cbca5d38
Branch: refs/heads/master
Commit: cbca5d38e6c58f6400d2fd5db8e6f0e8b458cc1f
Parents: 32bb7fe
Author: l00251599 <l0...@huaweiobz.com>
Authored: Wed Mar 29 17:07:50 2017 +0800
Committer: jackylk <ja...@huawei.com>
Committed: Thu Mar 30 11:01:23 2017 +0530
----------------------------------------------------------------------
.../spark/rdd/NewCarbonDataLoadRDD.scala | 32 ++++++--------------
1 file changed, 9 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/cbca5d38/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
index 50894d4..0690ba1 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
@@ -20,7 +20,6 @@ package org.apache.carbondata.spark.rdd
import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.text.SimpleDateFormat
-import java.util
import java.util.{Date, UUID}
import scala.collection.JavaConverters._
@@ -352,28 +351,15 @@ class NewCarbonDataLoadRDD[K, V](
} else {
val theSplit = split.asInstanceOf[CarbonNodePartition]
val firstOptionLocation: Seq[String] = List(theSplit.serializableHadoopSplit)
- logInfo("Preferred Location for split : " + firstOptionLocation.head)
- val blockMap = new util.LinkedHashMap[String, Integer]()
- val tableBlocks = theSplit.blocksDetails
- tableBlocks.foreach { tableBlock =>
- tableBlock.getLocations.foreach { location =>
- if (!firstOptionLocation.exists(location.equalsIgnoreCase(_))) {
- val currentCount = blockMap.get(location)
- if (currentCount == null) {
- blockMap.put(location, 1)
- } else {
- blockMap.put(location, currentCount + 1)
- }
- }
- }
- }
-
- val sortedList = blockMap.entrySet().asScala.toSeq.sortWith { (nodeCount1, nodeCount2) =>
- nodeCount1.getValue > nodeCount2.getValue
- }
-
- val sortedNodesList = sortedList.map(nodeCount => nodeCount.getKey).take(2)
- firstOptionLocation ++ sortedNodesList
+ logInfo("Preferred Location for split : " + firstOptionLocation.mkString(","))
+ /**
+ * At original logic, we were adding the next preferred location so that in case of the
+ * failure the Spark should know where to schedule the failed task.
+ * Remove the next preferred location is because some time Spark will pick the same node
+ * for 2 tasks, so one node is getting over loaded with the task and one have no task to
+ * do. And impacting the performance despite of any failure.
+ */
+ firstOptionLocation
}
}
}
[2/2] incubator-carbondata git commit: [CARBONDATA-830] Incorrect
schedule for NewCarbonDataLoadRDD This closes #708
Posted by ja...@apache.org.
[CARBONDATA-830] Incorrect schedule for NewCarbonDataLoadRDD This closes #708
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/5d2ae6be
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/5d2ae6be
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/5d2ae6be
Branch: refs/heads/master
Commit: 5d2ae6be272038fa8dd607b5c7f2ee087adeb87d
Parents: 32bb7fe cbca5d3
Author: jackylk <ja...@huawei.com>
Authored: Thu Mar 30 11:05:08 2017 +0530
Committer: jackylk <ja...@huawei.com>
Committed: Thu Mar 30 11:05:08 2017 +0530
----------------------------------------------------------------------
.../spark/rdd/NewCarbonDataLoadRDD.scala | 32 ++++++--------------
1 file changed, 9 insertions(+), 23 deletions(-)
----------------------------------------------------------------------