You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/08/15 07:00:26 UTC
svn commit: r1618095 - in
/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark:
GenSparkUtils.java GenSparkWork.java
Author: brock
Date: Fri Aug 15 05:00:26 2014
New Revision: 1618095
URL: http://svn.apache.org/r1618095
Log:
HIVE-7659 - Unnecessary sort in query plan (Rui Li via Brock) [Spark Branch]
Modified:
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java?rev=1618095&r1=1618094&r2=1618095&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java Fri Aug 15 05:00:26 2014
@@ -109,7 +109,7 @@ public class GenSparkUtils {
edgeProp.setShuffleGroup();
}
String sortOrder = Strings.nullToEmpty(reduceSink.getConf().getOrder()).trim();
- if (!sortOrder.isEmpty()) {
+ if (!sortOrder.isEmpty() && isSortNecessary(reduceSink)) {
edgeProp.setShuffleSort();
}
@@ -297,4 +297,26 @@ public class GenSparkUtils {
}
}
}
+
+ /**
+ * Test if the sort order in the RS is necessary.
+ * Unnecessary sort is mainly introduced when GBY is created. Therefore, if the sorting
+ * keys, partitioning keys and grouping keys are the same, we ignore the sort and use
+ * GroupByShuffler to shuffle the data. In this case a group-by transformation should be
+ * sufficient to produce the correct results, i.e. data is properly grouped by the keys
+ * but keys are not guaranteed to be sorted.
+ */
+ public static boolean isSortNecessary(ReduceSinkOperator reduceSinkOperator) {
+ List<Operator<? extends OperatorDesc>> children = reduceSinkOperator.getChildOperators();
+ if (children != null && children.size() == 1 &&
+ children.get(0) instanceof GroupByOperator) {
+ GroupByOperator child = (GroupByOperator) children.get(0);
+ if (reduceSinkOperator.getConf().getKeyCols().equals(
+ reduceSinkOperator.getConf().getPartitionCols()) &&
+ reduceSinkOperator.getConf().getKeyCols().size() == child.getConf().getKeys().size()) {
+ return false;
+ }
+ }
+ return true;
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java?rev=1618095&r1=1618094&r2=1618095&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java Fri Aug 15 05:00:26 2014
@@ -277,7 +277,7 @@ public class GenSparkWork implements Nod
edgeProp.setShuffleGroup();
}
String sortOrder = Strings.nullToEmpty(rs.getConf().getOrder()).trim();
- if (!sortOrder.isEmpty()) {
+ if (!sortOrder.isEmpty() && GenSparkUtils.isSortNecessary(rs)) {
edgeProp.setShuffleSort();
}
sparkWork.connect(work, rWork, edgeProp);