You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by aj...@apache.org on 2020/01/06 02:55:39 UTC
[carbondata] branch master updated: [HOTFIX] Optimize array length
in loop in scala code
This is an automated email from the ASF dual-hosted git repository.
ajantha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 123904a [HOTFIX] Optimize array length in loop in scala code
123904a is described below
commit 123904a1f0b104c9701e4dff69fb824aaafdefe9
Author: Jacky Li <ja...@qq.com>
AuthorDate: Sat Dec 28 14:40:41 2019 +0800
[HOTFIX] Optimize array length in loop in scala code
Inspired by CARBONDATA-3626, this PR optimized other places where
getting array length in for loop and while loop in scala code.
This closes #3539
---
.../org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala | 5 ++++-
.../org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala | 5 ++++-
.../src/main/scala/org/apache/spark/DataSkewRangePartitioner.scala | 6 ++++--
.../scala/org/apache/spark/rdd/DataLoadPartitionCoalescer.scala | 5 ++++-
.../src/main/scala/org/apache/spark/sql/hive/DistributionUtil.scala | 3 ++-
.../carbondata/execution/datasources/SparkCarbonFileFormat.scala | 6 ++++--
.../sql/execution/command/management/CarbonLoadDataCommand.scala | 6 ++++--
.../org/apache/carbondata/streaming/parser/FieldConverter.scala | 5 ++++-
8 files changed, 30 insertions(+), 11 deletions(-)
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
index 86f28ca..95a22ca 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
@@ -159,8 +159,11 @@ class StringArrayRow(var values: Array[String]) extends Row {
override def getString(i: Int): String = values(i)
private def reset(): Unit = {
- for (i <- 0 until values.length) {
+ val len = values.length
+ var i = 0
+ while (i < len) {
values(i) = null
+ i = i + 1
}
}
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
index 7511502..3e3b9a4 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/rdd/NewCarbonDataLoadRDD.scala
@@ -354,11 +354,14 @@ class NewRddIterator(rddIter: Iterator[Row],
def next: Array[AnyRef] = {
val row = rddIter.next()
val columns = new Array[AnyRef](row.length)
- for (i <- 0 until columns.length) {
+ val len = columns.length
+ var i = 0
+ while (i < len) {
columns(i) = CarbonScalaUtil.getString(row.get(i), serializationNullFormat,
complexDelimiters, timeStampFormat, dateFormat,
isVarcharType = i < isVarcharTypeMapping.size && isVarcharTypeMapping(i),
isComplexType = i < isComplexTypeMapping.size && isComplexTypeMapping(i))
+ i += 1
}
columns
}
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/DataSkewRangePartitioner.scala b/integration/spark-common/src/main/scala/org/apache/spark/DataSkewRangePartitioner.scala
index 733ee87..e72e78e 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/DataSkewRangePartitioner.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/DataSkewRangePartitioner.scala
@@ -252,7 +252,8 @@ class DataSkewRangePartitioner[K: Ordering : ClassTag, V](
var partition = 0
if (rangeBounds.length <= 128) {
// If we have less than 128 partitions naive search
- while (partition < rangeBounds.length && ordering.gt(k, rangeBounds(partition))) {
+ val len = rangeBounds.length
+ while (partition < len && ordering.gt(k, rangeBounds(partition))) {
partition += 1
}
} else {
@@ -306,7 +307,8 @@ class DataSkewRangePartitioner[K: Ordering : ClassTag, V](
val prime = 31
var result = 1
var i = 0
- while (i < rangeBounds.length) {
+ val len = rangeBounds.length
+ while (i < len) {
result = prime * result + rangeBounds(i).hashCode
i += 1
}
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/rdd/DataLoadPartitionCoalescer.scala b/integration/spark-common/src/main/scala/org/apache/spark/rdd/DataLoadPartitionCoalescer.scala
index 41e8397..6d45cf4 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/rdd/DataLoadPartitionCoalescer.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/rdd/DataLoadPartitionCoalescer.scala
@@ -173,8 +173,11 @@ class DataLoadPartitionCoalescer(prev: RDD[_], nodeList: Array[String]) {
private def assignPartitionNodeLocality(
noEmptyHosts: Seq[(String, LinkedHashSet[Int])]): Array[ArrayBuffer[Int]] = {
val localityResult = new Array[ArrayBuffer[Int]](noEmptyHosts.length)
- for (i <- 0 until localityResult.length) {
+ var i = 0
+ val len = localityResult.length
+ while (i < len) {
localityResult(i) = new ArrayBuffer[Int]
+ i += 1
}
val noEmptyHostSet = new HashSet[String]
noEmptyHosts.foreach {loc => noEmptyHostSet.add(loc._1)}
diff --git a/integration/spark-common/src/main/scala/org/apache/spark/sql/hive/DistributionUtil.scala b/integration/spark-common/src/main/scala/org/apache/spark/sql/hive/DistributionUtil.scala
index 62e52d0..04ff0ce 100644
--- a/integration/spark-common/src/main/scala/org/apache/spark/sql/hive/DistributionUtil.scala
+++ b/integration/spark-common/src/main/scala/org/apache/spark/sql/hive/DistributionUtil.scala
@@ -238,7 +238,8 @@ object DistributionUtil {
val maxRetryCount = calculateMaxRetry
var maxTimes = maxRetryCount
breakable {
- while (nodes.length < requiredExecutors && maxTimes > 0) {
+ val len = nodes.length
+ while (requiredExecutors > len && maxTimes > 0) {
Thread.sleep(threadSleepTime);
nodes = DistributionUtil.getNodeList(sparkContext)
maxTimes = maxTimes - 1;
diff --git a/integration/spark-datasource/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/SparkCarbonFileFormat.scala b/integration/spark-datasource/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/SparkCarbonFileFormat.scala
index 52f98e8..23b97f1 100644
--- a/integration/spark-datasource/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/SparkCarbonFileFormat.scala
+++ b/integration/spark-datasource/src/main/scala/org/apache/spark/sql/carbondata/execution/datasources/SparkCarbonFileFormat.scala
@@ -220,7 +220,8 @@ class SparkCarbonFileFormat extends FileFormat
private def extractData(row: InternalRow, fieldTypes: Array[StructField]): Array[AnyRef] = {
val data = new Array[AnyRef](fieldTypes.length)
var i = 0
- while (i < fieldTypes.length) {
+ val len = fieldTypes.length
+ while (i < len) {
if (!row.isNullAt(i)) {
fieldTypes(i).dataType match {
case StringType =>
@@ -274,7 +275,8 @@ class SparkCarbonFileFormat extends FileFormat
private def extractData(row: ArrayData, dataType: DataType): Array[AnyRef] = {
val data = new Array[AnyRef](row.numElements())
var i = 0
- while (i < data.length) {
+ val len = data.length
+ while (i < len) {
if (!row.isNullAt(i)) {
dataType match {
case StringType =>
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
index 1334178..130580d 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
@@ -513,12 +513,14 @@ case class CarbonLoadDataCommand(
val transRdd = rdd.map { f =>
val data = new Array[Any](len)
var i = 0
- while (i < f.length) {
+ val length = f.length
+ while (i < length) {
data(nonPartitionBounds(i)) = f.get(i)
i = i + 1
}
var j = 0
- while (j < partitionBounds.length) {
+ val boundLength = partitionBounds.length
+ while (j < boundLength) {
data(partitionBounds(j)) = UTF8String.fromString(partitionValues(j))
j = j + 1
}
diff --git a/streaming/src/main/scala/org/apache/carbondata/streaming/parser/FieldConverter.scala b/streaming/src/main/scala/org/apache/carbondata/streaming/parser/FieldConverter.scala
index 10f9b40..93d55cf 100644
--- a/streaming/src/main/scala/org/apache/carbondata/streaming/parser/FieldConverter.scala
+++ b/streaming/src/main/scala/org/apache/carbondata/streaming/parser/FieldConverter.scala
@@ -102,11 +102,14 @@ object FieldConverter {
case r: org.apache.spark.sql.Row =>
val delimiter = complexDelimiters.get(level)
val builder = new StringBuilder()
- for (i <- 0 until r.length) {
+ val len = r.length
+ var i = 0
+ while (i < len) {
val nextLevel = level + 1
builder.append(objectToString(r(i), serializationNullFormat, complexDelimiters,
timeStampFormat, dateFormat, isVarcharType, level = nextLevel))
.append(delimiter)
+ i += 1
}
builder.substring(0, builder.length - delimiter.length())
case other => other.toString