You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2015/01/12 03:03:40 UTC
svn commit: r1651024 [2/3] - in /hive/trunk: ./
common/src/java/org/apache/hadoop/hive/common/
common/src/java/org/apache/hadoop/hive/conf/ data/conf/ data/conf/spark/
hbase-handler/ hcatalog/webhcat/svr/ itests/ itests/hive-unit/
itests/hive-unit/src/...
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Mon Jan 12 02:03:38 2015
@@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.exec.Un
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
import org.apache.hadoop.hive.ql.io.merge.MergeFileWork;
@@ -92,6 +93,7 @@ import org.apache.hadoop.hive.ql.plan.Pl
import org.apache.hadoop.hive.ql.plan.RCFileMergeDesc;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.ReduceWork;
+import org.apache.hadoop.hive.ql.plan.SparkWork;
import org.apache.hadoop.hive.ql.plan.StatsWork;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
@@ -858,6 +860,13 @@ public final class GenMapRedUtils {
((MapWork)w).deriveExplainAttributes();
}
}
+ } else if (task instanceof SparkTask) {
+ SparkWork work = (SparkWork) task.getWork();
+ for (BaseWork w : work.getAllWorkUnsorted()) {
+ if (w instanceof MapWork) {
+ ((MapWork) w).deriveExplainAttributes();
+ }
+ }
}
if (task.getChildTasks() == null) {
@@ -974,7 +983,7 @@ public final class GenMapRedUtils {
* @param parseCtx
* @return The TableScanOperator inserted before child.
*/
- protected static TableScanOperator createTemporaryFile(
+ public static TableScanOperator createTemporaryFile(
Operator<? extends OperatorDesc> parent, Operator<? extends OperatorDesc> child,
Path taskTmpDir, TableDesc tt_desc, ParseContext parseCtx) {
@@ -1285,6 +1294,10 @@ public final class GenMapRedUtils {
work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
cplan.setName("File Merge");
((TezWork) work).add(cplan);
+ } else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
+ work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
+ cplan.setName("Spark Merge File Work");
+ ((SparkWork) work).add(cplan);
} else {
work = cplan;
}
@@ -1294,6 +1307,10 @@ public final class GenMapRedUtils {
work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
cplan.setName("File Merge");
((TezWork)work).add(cplan);
+ } else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
+ work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
+ cplan.setName("Spark Merge File Work");
+ ((SparkWork) work).add(cplan);
} else {
work = new MapredWork();
((MapredWork)work).setMapWork(cplan);
@@ -1429,7 +1446,12 @@ public final class GenMapRedUtils {
if (mrWork.getReduceWork() != null) {
mrWork.getReduceWork().setGatheringStats(true);
}
- } else {
+ } else if (currTask.getWork() instanceof SparkWork) {
+ SparkWork work = (SparkWork) currTask.getWork();
+ for (BaseWork w: work.getAllWork()) {
+ w.setGatheringStats(true);
+ }
+ } else { // must be TezWork
TezWork work = (TezWork) currTask.getWork();
for (BaseWork w: work.getAllWork()) {
w.setGatheringStats(true);
@@ -1694,6 +1716,9 @@ public final class GenMapRedUtils {
// tez blurs the boundary between map and reduce, thus it has it's own
// config
return hconf.getBoolVar(ConfVars.HIVEMERGETEZFILES);
+ } else if (currTask.getWork() instanceof SparkWork) {
+ // spark has its own config for merging
+ return hconf.getBoolVar(ConfVars.HIVEMERGESPARKFILES);
}
if (fsOp.getConf().isLinkedFileSink()) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java Mon Jan 12 02:03:38 2015
@@ -234,7 +234,7 @@ public class MapJoinProcessor implements
newWork.getMapWork().getOpParseCtxMap();
QBJoinTree newJoinTree = newWork.getMapWork().getJoinTree();
// generate the map join operator; already checked the map join
- MapJoinOperator newMapJoinOp = MapJoinProcessor.convertMapJoin(conf, opParseCtxMap, op,
+ MapJoinOperator newMapJoinOp = new MapJoinProcessor().convertMapJoin(conf, opParseCtxMap, op,
newJoinTree, mapJoinPos, true, false);
genLocalWorkForMapJoin(newWork, newMapJoinOp, mapJoinPos);
}
@@ -303,8 +303,9 @@ public class MapJoinProcessor implements
* position of the source to be read as part of map-reduce framework. All other sources
* are cached in memory
* @param noCheckOuterJoin
+ * @param validateMapJoinTree
*/
- public static MapJoinOperator convertMapJoin(HiveConf conf,
+ public MapJoinOperator convertMapJoin(HiveConf conf,
LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtxMap,
JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin,
boolean validateMapJoinTree)
@@ -381,7 +382,7 @@ public class MapJoinProcessor implements
return mapJoinOp;
}
- static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf,
+ public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf,
LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtxMap,
JoinOperator op, QBJoinTree joinTree, int mapJoinPos, boolean noCheckOuterJoin)
throws SemanticException {
@@ -407,6 +408,7 @@ public class MapJoinProcessor implements
childOp.replaceParent(op, mapJoinOp);
}
+ mapJoinOp.setPosToAliasMap(op.getPosToAliasMap());
mapJoinOp.setChildOperators(childOps);
op.setChildOperators(null);
op.setParentOperators(null);
@@ -594,7 +596,7 @@ public class MapJoinProcessor implements
return mapJoinPos;
}
- private void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
+ protected void genSelectPlan(ParseContext pctx, MapJoinOperator input) throws SemanticException {
List<Operator<? extends OperatorDesc>> childOps = input.getChildOperators();
input.setChildOperators(null);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java Mon Jan 12 02:03:38 2015
@@ -56,6 +56,7 @@ public class Optimizer {
public void initialize(HiveConf hiveConf) {
boolean isTezExecEngine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
+ boolean isSparkExecEngine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark");
boolean bucketMapJoinOptimizer = false;
transformations = new ArrayList<Transform>();
@@ -100,9 +101,13 @@ public class Optimizer {
transformations.add(new RewriteGBUsingIndex());
}
transformations.add(new SamplePruner());
- transformations.add(new MapJoinProcessor());
- if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN)) && !isTezExecEngine) {
+ MapJoinProcessor mapJoinProcessor = isSparkExecEngine ? new SparkMapJoinProcessor()
+ : new MapJoinProcessor();
+ transformations.add(mapJoinProcessor);
+
+ if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN))
+ && !isTezExecEngine && !isSparkExecEngine) {
transformations.add(new BucketMapJoinOptimizer());
bucketMapJoinOptimizer = true;
}
@@ -110,7 +115,7 @@ public class Optimizer {
// If optimize hive.optimize.bucketmapjoin.sortedmerge is set, add both
// BucketMapJoinOptimizer and SortedMergeBucketMapJoinOptimizer
if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN))
- && !isTezExecEngine) {
+ && !isTezExecEngine && !isSparkExecEngine) {
if (!bucketMapJoinOptimizer) {
// No need to add BucketMapJoinOptimizer twice
transformations.add(new BucketMapJoinOptimizer());
@@ -152,7 +157,7 @@ public class Optimizer {
if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTIMIZEMETADATAQUERIES)) {
transformations.add(new StatsOptimizer());
}
- if (pctx.getContext().getExplain() && !isTezExecEngine) {
+ if (isSparkExecEngine || (pctx.getContext().getExplain() && !isTezExecEngine)) {
transformations.add(new AnnotateWithStatistics());
transformations.add(new AnnotateWithOpTraits());
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java Mon Jan 12 02:03:38 2015
@@ -489,6 +489,7 @@ public class ReduceSinkDeDuplication imp
if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) {
CorrelationUtilities.replaceReduceSinkWithSelectOperator(
cRS, dedupCtx.getPctx(), dedupCtx);
+ pRS.getConf().setEnforceSort(true);
return true;
}
return false;
@@ -511,6 +512,7 @@ public class ReduceSinkDeDuplication imp
if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) {
CorrelationUtilities.removeReduceSinkForGroupBy(
cRS, cGBY, dedupCtx.getPctx(), dedupCtx);
+ pRS.getConf().setEnforceSort(true);
return true;
}
return false;
@@ -529,6 +531,12 @@ public class ReduceSinkDeDuplication imp
pJoin.getConf().setFixedAsSorted(true);
CorrelationUtilities.replaceReduceSinkWithSelectOperator(
cRS, dedupCtx.getPctx(), dedupCtx);
+ ReduceSinkOperator pRS =
+ CorrelationUtilities.findPossibleParent(
+ pJoin, ReduceSinkOperator.class, dedupCtx.trustScript());
+ if (pRS != null) {
+ pRS.getConf().setEnforceSort(true);
+ }
return true;
}
return false;
@@ -547,6 +555,12 @@ public class ReduceSinkDeDuplication imp
pJoin.getConf().setFixedAsSorted(true);
CorrelationUtilities.removeReduceSinkForGroupBy(
cRS, cGBY, dedupCtx.getPctx(), dedupCtx);
+ ReduceSinkOperator pRS =
+ CorrelationUtilities.findPossibleParent(
+ pJoin, ReduceSinkOperator.class, dedupCtx.trustScript());
+ if (pRS != null) {
+ pRS.getConf().setEnforceSort(true);
+ }
return true;
}
return false;
@@ -565,6 +579,7 @@ public class ReduceSinkDeDuplication imp
if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) {
CorrelationUtilities.replaceReduceSinkWithSelectOperator(
cRS, dedupCtx.getPctx(), dedupCtx);
+ pRS.getConf().setEnforceSort(true);
return true;
}
return false;
@@ -581,6 +596,7 @@ public class ReduceSinkDeDuplication imp
start, ReduceSinkOperator.class, dedupCtx.trustScript());
if (pRS != null && merge(cRS, pRS, dedupCtx.minReducer())) {
CorrelationUtilities.removeReduceSinkForGroupBy(cRS, cGBY, dedupCtx.getPctx(), dedupCtx);
+ pRS.getConf().setEnforceSort(true);
return true;
}
return false;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CrossProductCheck.java Mon Jan 12 02:03:38 2015
@@ -241,7 +241,7 @@ public class CrossProductCheck implement
* <p>
* For MR the taskname is the StageName, for Tez it is the vertex name.
*/
- class MapJoinCheck implements NodeProcessor, NodeProcessorCtx {
+ public static class MapJoinCheck implements NodeProcessor, NodeProcessorCtx {
final List<String> warnings;
final String taskName;
@@ -302,7 +302,7 @@ public class CrossProductCheck implement
* in the Work. For Tez, you can restrict it to ReduceSinks for a particular output
* vertex.
*/
- static class ExtractReduceSinkInfo implements NodeProcessor, NodeProcessorCtx {
+ public static class ExtractReduceSinkInfo implements NodeProcessor, NodeProcessorCtx {
static class Info {
List<ExprNodeDesc> keyCols;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Mon Jan 12 02:03:38 2015
@@ -39,6 +39,7 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.*;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
@@ -74,6 +75,8 @@ import org.apache.hadoop.hive.ql.plan.Op
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
+import org.apache.hadoop.hive.ql.plan.SparkWork;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc;
@@ -309,6 +312,15 @@ public class Vectorizer implements Physi
}
}
}
+ } else if (currTask instanceof SparkTask) {
+ SparkWork sparkWork = (SparkWork) currTask.getWork();
+ for (BaseWork baseWork : sparkWork.getAllWork()) {
+ if (baseWork instanceof MapWork) {
+ convertMapWork((MapWork) baseWork, false);
+ } else if (baseWork instanceof ReduceWork) {
+ convertReduceWork((ReduceWork) baseWork);
+ }
+ }
}
return null;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java Mon Jan 12 02:03:38 2015
@@ -96,7 +96,8 @@ public class UnionProcessor implements T
// Walk the tree again to see if the union can be removed completely
HiveConf conf = pCtx.getConf();
opRules.clear();
- if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE)) {
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE)
+ && !conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
if (!conf.getBoolVar(HiveConf.ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES)) {
throw new
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompilerFactory.java Mon Jan 12 02:03:38 2015
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.parse;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.parse.spark.SparkCompiler;
/**
* TaskCompilerFactory is a factory class to choose the appropriate
@@ -37,6 +38,8 @@ public class TaskCompilerFactory {
public static TaskCompiler getCompiler(HiveConf conf, ParseContext parseContext) {
if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
return new TezCompiler();
+ } else if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
+ return new SparkCompiler();
} else {
return new MapReduceCompiler();
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java Mon Jan 12 02:03:38 2015
@@ -117,6 +117,31 @@ public abstract class BaseWork extends A
return returnSet;
}
+ /**
+ * Returns a set containing all leaf operators from the operator tree in this work.
+ * @return a set containing all leaf operators in this operator tree.
+ */
+ public Set<Operator<?>> getAllLeafOperators() {
+ Set<Operator<?>> returnSet = new LinkedHashSet<Operator<?>>();
+ Set<Operator<?>> opSet = getAllRootOperators();
+ Stack<Operator<?>> opStack = new Stack<Operator<?>>();
+
+ // add all children
+ opStack.addAll(opSet);
+
+ while (!opStack.empty()) {
+ Operator<?> op = opStack.pop();
+ if (op.getNumChild() == 0) {
+ returnSet.add(op);
+ }
+ if (op.getChildOperators() != null) {
+ opStack.addAll(op.getChildOperators());
+ }
+ }
+
+ return returnSet;
+ }
+
public Map<String, Map<Integer, String>> getAllScratchColumnVectorTypeMaps() {
return allScratchColumnVectorTypeMaps;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BucketMapJoinContext.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BucketMapJoinContext.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BucketMapJoinContext.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/BucketMapJoinContext.java Mon Jan 12 02:03:38 2015
@@ -21,6 +21,7 @@ import java.io.Serializable;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -176,6 +177,33 @@ public class BucketMapJoinContext implem
this.bigTablePartSpecToFileMapping = bigTablePartSpecToFileMapping;
}
+ /**
+ * Given a small table input file, find the mapping
+ * big table input file with the smallest bucket number.
+ */
+ public String getMappingBigFile(String alias, String smallFile) {
+ HashSet<String> bigFiles = new HashSet<String>();
+ Map<String, List<String>> mapping = aliasBucketFileNameMapping.get(alias);
+ for (Map.Entry<String, List<String>> entry: mapping.entrySet()) {
+ if (entry.getValue().contains(smallFile)) {
+ bigFiles.add(entry.getKey());
+ }
+ }
+ // There could be several big table input files
+ // mapping to the same small input file.
+ // Find that one with the lowest bucket id.
+ int bucketId = Integer.MAX_VALUE;
+ String bigFile = null;
+ for (String f: bigFiles) {
+ int id = bucketFileNameMapping.get(f);
+ if (id < bucketId) {
+ bucketId = id;
+ bigFile = f;
+ }
+ }
+ return bigFile;
+ }
+
// returns fileId for SMBJoin, which consists part of result file name
// needed to avoid file name conflict when big table is partitioned
public String createFileId(String inputPath) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java Mon Jan 12 02:03:38 2015
@@ -149,6 +149,8 @@ public class ConditionalResolverMergeFil
work = ((MapredWork) mrTask.getWork()).getMapWork();
} else if (mrTask.getWork() instanceof TezWork){
work = (MapWork) ((TezWork) mrTask.getWork()).getAllWork().get(0);
+ } else if (mrTask.getWork() instanceof SparkWork) {
+ work = (MapWork) ((SparkWork) mrTask.getWork()).getAllWork().get(0);
} else {
work = (MapWork) mrTask.getWork();
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java Mon Jan 12 02:03:38 2015
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.plan;
import java.io.Serializable;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
@@ -45,6 +46,8 @@ public class MapredLocalWork implements
private BucketMapJoinContext bucketMapjoinContext;
private Path tmpPath;
private String stageID;
+ // Temp HDFS path for Spark HashTable sink
+ private Path tmpHDFSPath;
private List<Operator<? extends OperatorDesc>> dummyParentOp;
private Map<MapJoinOperator, List<Operator<? extends OperatorDesc>>> directFetchOp;
@@ -52,7 +55,10 @@ public class MapredLocalWork implements
private boolean hasStagedAlias;
public MapredLocalWork() {
-
+ this(new LinkedHashMap<String, Operator<? extends OperatorDesc>>(),
+ new LinkedHashMap<String, FetchWork>());
+ this.dummyParentOp = new ArrayList<Operator<? extends OperatorDesc>>();
+ this.directFetchOp = new LinkedHashMap<MapJoinOperator, List<Operator<? extends OperatorDesc>>>();
}
public MapredLocalWork(
@@ -60,23 +66,20 @@ public class MapredLocalWork implements
final LinkedHashMap<String, FetchWork> aliasToFetchWork) {
this.aliasToWork = aliasToWork;
this.aliasToFetchWork = aliasToFetchWork;
-
}
public MapredLocalWork(MapredLocalWork clone){
this.tmpPath = clone.tmpPath;
this.inputFileChangeSensitive=clone.inputFileChangeSensitive;
-
}
-
public void setDummyParentOp(List<Operator<? extends OperatorDesc>> op){
this.dummyParentOp=op;
}
public List<Operator<? extends OperatorDesc>> getDummyParentOp(){
- return this.dummyParentOp;
+ return dummyParentOp;
}
@@ -168,6 +171,14 @@ public class MapredLocalWork implements
return tmpPath;
}
+ public void setTmpHDFSPath(Path tmpPath) {
+ this.tmpHDFSPath = tmpPath;
+ }
+
+ public Path getTmpHDFSPath() {
+ return tmpHDFSPath;
+ }
+
public String getBucketFileName(String bigFileName) {
if (!inputFileChangeSensitive || bigFileName == null || bigFileName.isEmpty()) {
return "-";
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java Mon Jan 12 02:03:38 2015
@@ -111,6 +111,9 @@ public class ReduceSinkDesc extends Abst
// Write type, since this needs to calculate buckets differently for updates and deletes
private AcidUtils.Operation writeType;
+ // whether we'll enforce the sort order of the RS
+ private transient boolean enforceSort = false;
+
private static transient Log LOG = LogFactory.getLog(ReduceSinkDesc.class);
public ReduceSinkDesc() {
}
@@ -165,6 +168,7 @@ public class ReduceSinkDesc extends Abst
desc.setStatistics(this.getStatistics());
desc.setSkipTag(skipTag);
desc.reduceTraits = reduceTraits.clone();
+ desc.setEnforceSort(enforceSort);
return desc;
}
@@ -407,4 +411,12 @@ public class ReduceSinkDesc extends Abst
public AcidUtils.Operation getWriteType() {
return writeType;
}
+
+ public boolean isEnforceSort() {
+ return enforceSort;
+ }
+
+ public void setEnforceSort(boolean isDeduplicated) {
+ this.enforceSort = isDeduplicated;
+ }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java Mon Jan 12 02:03:38 2015
@@ -42,6 +42,8 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.ql.MapRedStats;
import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.spark.session.SparkSession;
+import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl;
import org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager;
import org.apache.hadoop.hive.ql.exec.tez.TezSessionState;
import org.apache.hadoop.hive.ql.history.HiveHistory;
@@ -179,6 +181,8 @@ public class SessionState {
private String userIpAddress;
+ private SparkSession sparkSession;
+
/**
* Lineage state.
*/
@@ -1264,6 +1268,16 @@ public class SessionState {
tezSessionState = null;
}
+ if (sparkSession != null) {
+ try {
+ SparkSessionManagerImpl.getInstance().closeSession(sparkSession);
+ } catch (Exception ex) {
+ LOG.error("Error closing spark session.", ex);
+ } finally {
+ sparkSession = null;
+ }
+ }
+
dropSessionPaths(conf);
}
@@ -1358,6 +1372,14 @@ public class SessionState {
this.userIpAddress = userIpAddress;
}
+ public SparkSession getSparkSession() {
+ return sparkSession;
+ }
+
+ public void setSparkSession(SparkSession sparkSession) {
+ this.sparkSession = sparkSession;
+ }
+
/**
* Get the next suffix to use in naming a temporary table created by insert...values
* @return suffix
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsAggregator.java Mon Jan 12 02:03:38 2015
@@ -41,14 +41,16 @@ public class CounterStatsAggregator impl
@Override
public boolean connect(Configuration hconf, Task sourceTask) {
- try {
- jc = new JobClient(toJobConf(hconf));
- RunningJob job = jc.getJob(((MapRedTask)sourceTask).getJobID());
- if (job != null) {
- counters = job.getCounters();
+ if (sourceTask instanceof MapRedTask) {
+ try {
+ jc = new JobClient(toJobConf(hconf));
+ RunningJob job = jc.getJob(((MapRedTask)sourceTask).getJobID());
+ if (job != null) {
+ counters = job.getCounters();
+ }
+ } catch (Exception e) {
+ LOG.error("Failed to get Job instance for " + sourceTask.getJobID(),e);
}
- } catch (Exception e) {
- LOG.error("Failed to get Job instance for " + sourceTask.getJobID(),e);
}
return counters != null;
}
@@ -59,9 +61,13 @@ public class CounterStatsAggregator impl
@Override
public String aggregateStats(String counterGrpName, String statType) {
- // In case of counters, aggregation is done by JobTracker / MR AM itself
- // so no need to aggregate, simply return the counter value for requested stat.
- return String.valueOf(counters.getGroup(counterGrpName).getCounter(statType));
+ long value = 0;
+ if (counters != null) {
+ // In case of counters, aggregation is done by JobTracker / MR AM itself
+ // so no need to aggregate, simply return the counter value for requested stat.
+ value = counters.getGroup(counterGrpName).getCounter(statType);
+ }
+ return String.valueOf(value);
}
@Override
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/CounterStatsPublisher.java Mon Jan 12 02:03:38 2015
@@ -52,8 +52,9 @@ public class CounterStatsPublisher imple
for (Map.Entry<String, String> entry : stats.entrySet()) {
try {
reporter.incrCounter(fileID, entry.getKey(), Long.valueOf(entry.getValue()));
- } catch (NumberFormatException e) {
- LOG.error("Invalid counter value " + entry.getValue() + " for " + entry.getKey());
+ } catch (Exception e) {
+ LOG.error("Failed to increment counter value " + entry.getValue() + " for " + entry.getKey());
+ return false;
}
}
return true;
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestOperators.java Mon Jan 12 02:03:38 2015
@@ -272,7 +272,7 @@ public class TestOperators extends TestC
JobConf hconf = new JobConf(TestOperators.class);
HiveConf.setVar(hconf, HiveConf.ConfVars.HADOOPMAPFILENAME,
"hdfs:///testDir/testFile");
- IOContext.get(hconf.get(Utilities.INPUT_NAME)).setInputPath(
+ IOContext.get(hconf).setInputPath(
new Path("hdfs:///testDir/testFile"));
// initialize pathToAliases
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveBinarySearchRecordReader.java Mon Jan 12 02:03:38 2015
@@ -116,7 +116,7 @@ public class TestHiveBinarySearchRecordR
private void resetIOContext() {
conf.set(Utilities.INPUT_NAME, "TestHiveBinarySearchRecordReader");
- ioContext = IOContext.get(conf.get(Utilities.INPUT_NAME));
+ ioContext = IOContext.get(conf);
ioContext.setUseSorted(false);
ioContext.setBinarySearching(false);
ioContext.setEndBinarySearch(false);
Modified: hive/trunk/ql/src/test/queries/clientpositive/disable_merge_for_bucketing.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/disable_merge_for_bucketing.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/disable_merge_for_bucketing.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/disable_merge_for_bucketing.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.input.format=org.apache.hadoop.
set hive.enforce.bucketing = true;
set hive.exec.reducers.max = 1;
set hive.merge.mapredfiles=true;
+set hive.merge.sparkfiles=true;
CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS;
Modified: hive/trunk/ql/src/test/queries/clientpositive/merge1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/merge1.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/merge1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/merge1.q Mon Jan 12 02:03:38 2015
@@ -1,4 +1,5 @@
set hive.merge.mapredfiles=true;
+set hive.merge.sparkfiles=true;
-- SORT_QUERY_RESULTS
Modified: hive/trunk/ql/src/test/queries/clientpositive/merge2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/merge2.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/merge2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/merge2.q Mon Jan 12 02:03:38 2015
@@ -1,5 +1,6 @@
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
+set hive.merge.sparkfiles=true;
set mapred.min.split.size=256;
set mapred.min.split.size.per.node=256;
set mapred.min.split.size.per.rack=256;
Modified: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_union_remove_1.q Mon Jan 12 02:03:38 2015
@@ -6,6 +6,7 @@ set hive.optimize.union.remove=true;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
+set hive.merge.sparkfiles=false;
set mapred.input.dir.recursive=true;
-- This is to test the union->selectstar->filesink and skewjoin optimization
Modified: hive/trunk/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/skewjoin_union_remove_2.q Mon Jan 12 02:03:38 2015
@@ -6,6 +6,7 @@ set hive.optimize.union.remove=true;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
+set hive.merge.sparkfiles=false;
set mapred.input.dir.recursive=true;
CREATE TABLE T1(key STRING, val STRING)
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_1.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_1.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_10.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_10.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_10.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_10.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.smallfiles.avgsize=1;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_11.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_11.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_11.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_11.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.smallfiles.avgsize=1;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_12.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_12.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_12.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_12.q Mon Jan 12 02:03:38 2015
@@ -3,6 +3,7 @@ set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
set hive.auto.convert.join=true;
+set hive.merge.sparkfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.smallfiles.avgsize=1;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_13.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_13.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_13.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_13.q Mon Jan 12 02:03:38 2015
@@ -3,6 +3,7 @@ set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
set hive.auto.convert.join=true;
+set hive.merge.sparkfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.smallfiles.avgsize=1;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_14.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_14.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_14.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_14.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.auto.convert.join=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_15.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_15.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_15.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_15.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_16.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_16.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_16.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_16.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.smallfiles.avgsize=1;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_17.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_17.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_17.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_17.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_18.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_18.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_18.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_18.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_19.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_19.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_19.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_19.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_2.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_2.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_2.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_20.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_20.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_20.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_20.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_21.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_21.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_21.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_21.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_22.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_22.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_22.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_22.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_23.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_23.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_23.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_23.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_24.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_24.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_24.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_24.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_25.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_25.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_25.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_25.q Mon Jan 12 02:03:38 2015
@@ -3,6 +3,7 @@ set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_3.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_3.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_3.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_3.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_4.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_4.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_4.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_4.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_5.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_5.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_5.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_5.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.smallfiles.avgsize=1;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_6.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_6.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_6.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_6.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_7.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_7.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_7.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_7.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_8.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_8.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_8.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_8.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;
Modified: hive/trunk/ql/src/test/queries/clientpositive/union_remove_9.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/union_remove_9.q?rev=1651024&r1=1651023&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/union_remove_9.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/union_remove_9.q Mon Jan 12 02:03:38 2015
@@ -2,6 +2,7 @@ set hive.stats.autogather=false;
set hive.optimize.union.remove=true;
set hive.mapred.supports.subdirectories=true;
+set hive.merge.sparkfiles=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.merge.smallfiles.avgsize=1;
Modified: hive/trunk/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out?rev=1651024&r1=1650662&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/spark/alter_merge_orc.q.out Mon Jan 12 02:03:38 2015
@@ -48,9 +48,9 @@ columns:struct columns { i32 key, string
partitioned:false
partitionColumns:
totalNumberFiles:3
-totalFileSize:7494
-maxFileSize:2498
-minFileSize:2498
+totalFileSize:7545
+maxFileSize:2515
+minFileSize:2515
#### A masked pattern was here ####
PREHOOK: query: select count(1) from src_orc_merge_test
@@ -91,9 +91,9 @@ columns:struct columns { i32 key, string
partitioned:false
partitionColumns:
totalNumberFiles:1
-totalFileSize:7169
-maxFileSize:7169
-minFileSize:7169
+totalFileSize:7198
+maxFileSize:7198
+minFileSize:7198
#### A masked pattern was here ####
PREHOOK: query: select count(1) from src_orc_merge_test
@@ -171,9 +171,9 @@ columns:struct columns { i32 key, string
partitioned:true
partitionColumns:struct partition_columns { string ds}
totalNumberFiles:3
-totalFileSize:7494
-maxFileSize:2498
-minFileSize:2498
+totalFileSize:7545
+maxFileSize:2515
+minFileSize:2515
#### A masked pattern was here ####
PREHOOK: query: select count(1) from src_orc_merge_test_part
@@ -218,9 +218,9 @@ columns:struct columns { i32 key, string
partitioned:true
partitionColumns:struct partition_columns { string ds}
totalNumberFiles:1
-totalFileSize:7169
-maxFileSize:7169
-minFileSize:7169
+totalFileSize:7198
+maxFileSize:7198
+minFileSize:7198
#### A masked pattern was here ####
PREHOOK: query: select count(1) from src_orc_merge_test_part
Modified: hive/trunk/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out?rev=1651024&r1=1650662&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out Mon Jan 12 02:03:38 2015
@@ -48,9 +48,9 @@ columns:struct columns { i32 key, string
partitioned:false
partitionColumns:
totalNumberFiles:3
-totalFileSize:7494
-maxFileSize:2498
-minFileSize:2498
+totalFileSize:7545
+maxFileSize:2515
+minFileSize:2515
#### A masked pattern was here ####
PREHOOK: query: desc extended src_orc_merge_test_stat
@@ -94,7 +94,7 @@ Table Parameters:
numFiles 3
numRows 1500
rawDataSize 141000
- totalSize 7494
+ totalSize 7545
#### A masked pattern was here ####
# Storage Information
@@ -146,7 +146,7 @@ Table Parameters:
numFiles 1
numRows 1500
rawDataSize 141000
- totalSize 7169
+ totalSize 7198
#### A masked pattern was here ####
# Storage Information
@@ -216,9 +216,9 @@ columns:struct columns { i32 key, string
partitioned:true
partitionColumns:struct partition_columns { string ds}
totalNumberFiles:3
-totalFileSize:7494
-maxFileSize:2498
-minFileSize:2498
+totalFileSize:7545
+maxFileSize:2515
+minFileSize:2515
#### A masked pattern was here ####
PREHOOK: query: desc formatted src_orc_merge_test_part_stat partition (ds='2011')
@@ -249,7 +249,7 @@ Partition Parameters:
numFiles 3
numRows 1500
rawDataSize 141000
- totalSize 7494
+ totalSize 7545
#### A masked pattern was here ####
# Storage Information
@@ -300,7 +300,7 @@ Partition Parameters:
numFiles 3
numRows 1500
rawDataSize 141000
- totalSize 7494
+ totalSize 7545
#### A masked pattern was here ####
# Storage Information
@@ -359,7 +359,7 @@ Partition Parameters:
numFiles 1
numRows 1500
rawDataSize 141000
- totalSize 7169
+ totalSize 7198
#### A masked pattern was here ####
# Storage Information
Modified: hive/trunk/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out?rev=1651024&r1=1650662&r2=1651024&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out Mon Jan 12 02:03:38 2015
@@ -102,14 +102,14 @@ PREHOOK: query: EXPLAIN SELECT
AVG(CAST(50 AS INT)) AS `avg_int_ok`,
AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
- FROM over1korc GROUP BY i ORDER BY i LIMIT 10
+ FROM over1korc GROUP BY i LIMIT 10
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN SELECT
i,
AVG(CAST(50 AS INT)) AS `avg_int_ok`,
AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
- FROM over1korc GROUP BY i ORDER BY i LIMIT 10
+ FROM over1korc GROUP BY i LIMIT 10
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
@@ -120,7 +120,6 @@ STAGE PLANS:
Spark
Edges:
Reducer 2 <- Map 1 (GROUP, 2)
- Reducer 3 <- Reducer 2 (SORT, 1)
#### A masked pattern was here ####
Vertices:
Map 1
@@ -153,28 +152,20 @@ STAGE PLANS:
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4))
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4))
- Reducer 3
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4))
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
- Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Limit
+ Number of rows: 10
Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- Execution mode: vectorized
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -187,7 +178,7 @@ PREHOOK: query: SELECT
AVG(CAST(50 AS INT)) AS `avg_int_ok`,
AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
- FROM over1korc GROUP BY i ORDER BY i LIMIT 10
+ FROM over1korc GROUP BY i LIMIT 10
PREHOOK: type: QUERY
PREHOOK: Input: default@over1korc
#### A masked pattern was here ####
@@ -196,17 +187,17 @@ POSTHOOK: query: SELECT
AVG(CAST(50 AS INT)) AS `avg_int_ok`,
AVG(CAST(50 AS DOUBLE)) AS `avg_double_ok`,
AVG(CAST(50 AS DECIMAL)) AS `avg_decimal_ok`
- FROM over1korc GROUP BY i ORDER BY i LIMIT 10
+ FROM over1korc GROUP BY i LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@over1korc
#### A masked pattern was here ####
-65536 50.0 50.0 50
-65537 50.0 50.0 50
-65538 50.0 50.0 50
-65539 50.0 50.0 50
-65540 50.0 50.0 50
-65541 50.0 50.0 50
-65542 50.0 50.0 50
-65543 50.0 50.0 50
-65544 50.0 50.0 50
-65545 50.0 50.0 50
+65598 50.0 50.0 50
+65694 50.0 50.0 50
+65678 50.0 50.0 50
+65684 50.0 50.0 50
+65596 50.0 50.0 50
+65692 50.0 50.0 50
+65630 50.0 50.0 50
+65674 50.0 50.0 50
+65628 50.0 50.0 50
+65776 50.0 50.0 50