You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2018/03/16 09:34:00 UTC
[3/6] carbondata git commit: [CARBONDATA-2250][DataLoad] Reduce
massive object generation in global sort
[CARBONDATA-2250][DataLoad] Reduce massive object generation in global sort
Generate compatator outside the function, otherwise it will be generated for every row
This closes #2059
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/31011fc2
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/31011fc2
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/31011fc2
Branch: refs/heads/carbonfile
Commit: 31011fc29f85d949bdabaa3551b7513eb1183ee5
Parents: 5b48e70
Author: xuchuanyin <xu...@hust.edu.cn>
Authored: Wed Mar 14 14:45:53 2018 +0800
Committer: Jacky Li <ja...@qq.com>
Committed: Thu Mar 15 17:43:39 2018 +0800
----------------------------------------------------------------------
.../spark/load/DataLoadProcessBuilderOnSpark.scala | 13 ++++++-------
.../processing/sort/sortdata/NewRowComparator.java | 4 +++-
.../sort/sortdata/NewRowComparatorForNormalDims.java | 5 ++++-
3 files changed, 13 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/31011fc2/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
index 1062cd7..dc238fb 100644
--- a/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
+++ b/integration/spark-common/src/main/scala/org/apache/carbondata/spark/load/DataLoadProcessBuilderOnSpark.scala
@@ -80,15 +80,14 @@ object DataLoadProcessBuilderOnSpark {
// 3. Sort
val configuration = DataLoadProcessBuilder.createConfiguration(model)
val sortParameters = SortParameters.createSortParameters(configuration)
+ val rowComparator: Comparator[Array[AnyRef]] =
+ if (sortParameters.getNoDictionaryCount > 0) {
+ new NewRowComparator(sortParameters.getNoDictionaryDimnesionColumn)
+ } else {
+ new NewRowComparatorForNormalDims(sortParameters.getDimColCount)
+ }
object RowOrdering extends Ordering[Array[AnyRef]] {
def compare(rowA: Array[AnyRef], rowB: Array[AnyRef]): Int = {
- val rowComparator: Comparator[Array[AnyRef]] =
- if (sortParameters.getNoDictionaryCount > 0) {
- new NewRowComparator(sortParameters.getNoDictionaryDimnesionColumn)
- } else {
- new NewRowComparatorForNormalDims(sortParameters.getDimColCount)
- }
-
rowComparator.compare(rowA, rowB)
}
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/31011fc2/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java
index 3f94533..f47ecc7 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparator.java
@@ -17,11 +17,13 @@
package org.apache.carbondata.processing.sort.sortdata;
+import java.io.Serializable;
import java.util.Comparator;
import org.apache.carbondata.core.util.ByteUtil.UnsafeComparer;
-public class NewRowComparator implements Comparator<Object[]> {
+public class NewRowComparator implements Comparator<Object[]>, Serializable {
+ private static final long serialVersionUID = -1739874611112709436L;
/**
* mapping of dictionary dimensions and no dictionary of sort_column.
http://git-wip-us.apache.org/repos/asf/carbondata/blob/31011fc2/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparatorForNormalDims.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparatorForNormalDims.java b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparatorForNormalDims.java
index 7538c92..aea83ba 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparatorForNormalDims.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/sort/sortdata/NewRowComparatorForNormalDims.java
@@ -16,13 +16,16 @@
*/
package org.apache.carbondata.processing.sort.sortdata;
+import java.io.Serializable;
import java.util.Comparator;
/**
* This class is used as comparator for comparing dims which are non high cardinality dims.
* Here the dims will be in form of int[] (surrogates) so directly comparing the integers.
*/
-public class NewRowComparatorForNormalDims implements Comparator<Object[]> {
+public class NewRowComparatorForNormalDims implements Comparator<Object[]>, Serializable {
+ private static final long serialVersionUID = -1749874611112709432L;
+
/**
* dimension count
*/