You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/09/21 07:43:47 UTC
svn commit: r1626516 - in /hive/branches/spark/ql/src:
java/org/apache/hadoop/hive/ql/exec/spark/
java/org/apache/hadoop/hive/ql/optimizer/ test/queries/clientpositive/
test/results/clientpositive/spark/
Author: xuefu
Date: Sun Sep 21 05:43:46 2014
New Revision: 1626516
URL: http://svn.apache.org/r1626516
Log:
HIVE-8043: Support merging small files [Spark Branch] (Rui via Xuefu)
Added:
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMergeFileRecordHandler.java
Modified:
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunction.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunctionResultList.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
hive/branches/spark/ql/src/test/queries/clientpositive/disable_merge_for_bucketing.q
hive/branches/spark/ql/src/test/queries/clientpositive/merge1.q
hive/branches/spark/ql/src/test/queries/clientpositive/merge2.q
hive/branches/spark/ql/src/test/results/clientpositive/spark/merge1.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/merge2.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_10.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_11.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_16.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_4.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_5.q.out
hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_9.q.out
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunction.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunction.java?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunction.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunction.java Sun Sep 21 05:43:46 2014
@@ -20,7 +20,9 @@ package org.apache.hadoop.hive.ql.exec.s
import java.util.Iterator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.HiveKey;
+import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
@@ -47,7 +49,15 @@ public class HiveMapFunction implements
jobConf = KryoSerializer.deserializeJobConf(this.buffer);
}
- SparkMapRecordHandler mapRecordHandler = new SparkMapRecordHandler();
+ SparkRecordHandler mapRecordHandler;
+
+ // need different record handler for MergeFileWork
+ if (MergeFileMapper.class.getName().equals(jobConf.get(Utilities.MAPRED_MAPPER_CLASS))) {
+ mapRecordHandler = new SparkMergeFileRecordHandler();
+ } else {
+ mapRecordHandler = new SparkMapRecordHandler();
+ }
+
HiveMapFunctionResultList result = new HiveMapFunctionResultList(jobConf, it, mapRecordHandler);
//TODO we need to implement a Spark specified Reporter to collect stats, refer to HIVE-7709.
mapRecordHandler.init(jobConf, result, Reporter.NULL);
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunctionResultList.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunctionResultList.java?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunctionResultList.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveMapFunctionResultList.java Sun Sep 21 05:43:46 2014
@@ -27,7 +27,7 @@ import java.util.Iterator;
public class HiveMapFunctionResultList extends
HiveBaseFunctionResultList<Tuple2<BytesWritable, BytesWritable>> {
- private final SparkMapRecordHandler recordHandler;
+ private final SparkRecordHandler recordHandler;
/**
* Instantiate result set Iterable for Map function output.
@@ -36,7 +36,7 @@ public class HiveMapFunctionResultList e
* @param handler Initialized {@link SparkMapRecordHandler} instance.
*/
public HiveMapFunctionResultList(Configuration conf,
- Iterator<Tuple2<BytesWritable, BytesWritable>> inputIterator, SparkMapRecordHandler handler) {
+ Iterator<Tuple2<BytesWritable, BytesWritable>> inputIterator, SparkRecordHandler handler) {
super(conf, inputIterator);
recordHandler = handler;
}
@@ -44,7 +44,7 @@ public class HiveMapFunctionResultList e
@Override
protected void processNextRecord(Tuple2<BytesWritable, BytesWritable> inputRecord)
throws IOException {
- recordHandler.processRow(inputRecord._2());
+ recordHandler.processRow(inputRecord._1(), inputRecord._2());
}
@Override
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMapRecordHandler.java Sun Sep 21 05:43:46 2014
@@ -54,7 +54,7 @@ import java.util.List;
* - Catch and handle errors during execution of the operators.
*
*/
-public class SparkMapRecordHandler extends SparkRecordHandler{
+public class SparkMapRecordHandler extends SparkRecordHandler {
private static final String PLAN_KEY = "__MAP_PLAN__";
private MapOperator mo;
@@ -130,7 +130,7 @@ public class SparkMapRecordHandler exten
}
@Override
- public void processRow(Object value) throws IOException {
+ public void processRow(Object key, Object value) throws IOException {
// reset the execContext for each new row
execContext.resetRow();
@@ -158,6 +158,7 @@ public class SparkMapRecordHandler exten
throw new UnsupportedOperationException("Do not support this method in SparkMapRecordHandler.");
}
+ @Override
public void close() {
// No row was processed
if (oc == null) {
@@ -202,6 +203,7 @@ public class SparkMapRecordHandler exten
}
}
+ @Override
public boolean getDone() {
return mo.getDone();
}
Added: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMergeFileRecordHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMergeFileRecordHandler.java?rev=1626516&view=auto
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMergeFileRecordHandler.java (added)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkMergeFileRecordHandler.java Sun Sep 21 05:43:46 2014
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.spark;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.*;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
+import org.apache.hadoop.hive.ql.io.merge.MergeFileWork;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.MapWork;
+import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+/**
+ * Copied from MergeFileMapper
+ *
+ * As MergeFileMapper is very similar to ExecMapper, this class is
+ * very similar to SparkMapRecordHandler
+ */
+public class SparkMergeFileRecordHandler extends SparkRecordHandler {
+
+ private static final String PLAN_KEY = "__MAP_PLAN__";
+ private static final Log l4j = LogFactory.getLog(SparkMergeFileRecordHandler.class);
+ private Operator<? extends OperatorDesc> op;
+ private AbstractFileMergeOperator mergeOp;
+ private Object[] row;
+
+ @Override
+ public void init(JobConf job, OutputCollector output, Reporter reporter) {
+ super.init(job, output, reporter);
+
+ ObjectCache cache = ObjectCacheFactory.getCache(job);
+
+ try {
+ jc = job;
+ MapWork mapWork = (MapWork) cache.retrieve(PLAN_KEY);
+
+ if (mapWork == null) {
+ mapWork = Utilities.getMapWork(job);
+ cache.cache(PLAN_KEY, mapWork);
+ } else {
+ Utilities.setMapWork(job, mapWork);
+ }
+
+ if (mapWork instanceof MergeFileWork) {
+ MergeFileWork mergeFileWork = (MergeFileWork) mapWork;
+ String alias = mergeFileWork.getAliasToWork().keySet().iterator().next();
+ op = mergeFileWork.getAliasToWork().get(alias);
+ if (op instanceof AbstractFileMergeOperator) {
+ mergeOp = (AbstractFileMergeOperator) op;
+ mergeOp.initializeOp(jc);
+ row = new Object[2];
+ abort = false;
+ } else {
+ abort = true;
+ throw new RuntimeException(
+ "Merge file work's top operator should be an" +
+ " instance of AbstractFileMergeOperator");
+ }
+ } else {
+ abort = true;
+ throw new RuntimeException("Map work should be a merge file work.");
+ }
+
+ l4j.info(mergeOp.dump(0));
+ } catch (HiveException e) {
+ abort = true;
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public void processRow(Object key, Object value) throws IOException {
+ row[0] = key;
+ row[1] = value;
+ try {
+ mergeOp.processOp(row, 0);
+ } catch (HiveException e) {
+ abort = true;
+ throw new IOException(e);
+ }
+ }
+
+ @Override
+ public void processRow(Object key, Iterator values) throws IOException {
+ throw new UnsupportedOperationException("Do not support this method in "
+ + this.getClass().getSimpleName());
+ }
+
+ @Override
+ public void close() {
+ l4j.info("Closing Merge Operator " + mergeOp.getName());
+ try {
+ mergeOp.closeOp(abort);
+ } catch (HiveException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public boolean getDone() {
+ return mergeOp.getDone();
+ }
+}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java Sun Sep 21 05:43:46 2014
@@ -28,7 +28,12 @@ import com.google.common.base.Preconditi
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper;
+import org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat;
+import org.apache.hadoop.hive.ql.io.merge.MergeFileWork;
+import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
@@ -84,8 +89,8 @@ public class SparkPlanGenerator {
}
MapWork mapWork = (MapWork) w;
JobConf newJobConf = cloneJobConf(mapWork);
- SparkTran tran = generate(newJobConf, mapWork);
JavaPairRDD<BytesWritable, BytesWritable> input = generateRDD(newJobConf, mapWork);
+ SparkTran tran = generate(newJobConf, mapWork);
trans.addRootTranWithInput(tran, input);
while (sparkWork.getChildren(w).size() > 0) {
@@ -155,13 +160,14 @@ public class SparkPlanGenerator {
private JavaPairRDD<BytesWritable, BytesWritable> generateRDD(JobConf jobConf, MapWork mapWork)
throws Exception {
- Class ifClass = getInputFormat(mapWork);
+ Class ifClass = getInputFormat(jobConf, mapWork);
return sc.hadoopRDD(jobConf, ifClass, WritableComparable.class,
Writable.class);
}
- private Class getInputFormat(MapWork mWork) throws HiveException {
+ private Class getInputFormat(JobConf jobConf, MapWork mWork) throws HiveException {
+ // MergeFileWork is sub-class of MapWork, we don't need to distinguish here
if (mWork.getInputformat() != null) {
HiveConf.setVar(jobConf, HiveConf.ConfVars.HIVEINPUTFORMAT,
mWork.getInputformat());
@@ -190,6 +196,20 @@ public class SparkPlanGenerator {
}
private MapTran generate(JobConf jobConf, MapWork mw) throws Exception {
+ // Create tmp dir for MergeFileWork
+ if (mw instanceof MergeFileWork) {
+ Path outputPath = ((MergeFileWork) mw).getOutputDir();
+ Path tempOutPath = Utilities.toTempPath(outputPath);
+ FileSystem fs = outputPath.getFileSystem(jobConf);
+ try {
+ if (!fs.exists(tempOutPath)) {
+ fs.mkdirs(tempOutPath);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(
+ "Can't make path " + outputPath + " : " + e.getMessage());
+ }
+ }
initStatsPublisher(mw);
MapTran result = new MapTran();
byte[] confBytes = KryoSerializer.serializeJobConf(jobConf);
@@ -238,11 +258,18 @@ public class SparkPlanGenerator {
Utilities.setInputPaths(cloned, inputPaths);
Utilities.setMapWork(cloned, (MapWork) work, scratchDir, false);
Utilities.createTmpDirs(cloned, (MapWork) work);
- cloned.set("mapred.mapper.class", ExecMapper.class.getName());
+ if (work instanceof MergeFileWork) {
+ MergeFileWork mergeFileWork = (MergeFileWork) work;
+ cloned.set(Utilities.MAPRED_MAPPER_CLASS, MergeFileMapper.class.getName());
+ cloned.set("mapred.input.format.class", mergeFileWork.getInputformat());
+ cloned.setClass("mapred.output.format.class", MergeFileOutputFormat.class, FileOutputFormat.class);
+ } else {
+ cloned.set(Utilities.MAPRED_MAPPER_CLASS, ExecMapper.class.getName());
+ }
} else if (work instanceof ReduceWork) {
Utilities.setReduceWork(cloned, (ReduceWork) work, scratchDir, false);
Utilities.createTmpDirs(cloned, (ReduceWork) work);
- cloned.set("mapred.reducer.class", ExecReducer.class.getName());
+ cloned.set(Utilities.MAPRED_REDUCER_CLASS, ExecReducer.class.getName());
}
return cloned;
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkRecordHandler.java Sun Sep 21 05:43:46 2014
@@ -67,9 +67,9 @@ public abstract class SparkRecordHandler
}
/**
- * Process row with single value.
+ * Process row with key and single value.
*/
- public abstract void processRow(Object value) throws IOException;
+ public abstract void processRow(Object key, Object value) throws IOException;
/**
* Process row with key and value collection.
@@ -89,7 +89,8 @@ public abstract class SparkRecordHandler
}
}
- abstract void close();
+ public abstract void close();
+ public abstract boolean getDone();
/**
* Log information to be logged at the end
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java Sun Sep 21 05:43:46 2014
@@ -151,7 +151,7 @@ public class SparkReduceRecordHandler ex
}
@Override
- public void processRow(Object value) throws IOException {
+ public void processRow(Object key, Object value) throws IOException {
throw new UnsupportedOperationException("Do not support this method in SparkReduceRecordHandler.");
}
@@ -278,4 +278,9 @@ public class SparkReduceRecordHandler ex
Utilities.clearWorkMap();
}
}
+
+ @Override
+ public boolean getDone() {
+ return reducer.getDone();
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Sun Sep 21 05:43:46 2014
@@ -1274,6 +1274,10 @@ public final class GenMapRedUtils {
work = new TezWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
cplan.setName("Tez Merge File Work");
((TezWork) work).add(cplan);
+ } else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
+ work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
+ cplan.setName("Spark Merge File Work");
+ ((SparkWork) work).add(cplan);
} else {
work = cplan;
}
@@ -1285,8 +1289,8 @@ public final class GenMapRedUtils {
((TezWork)work).add(cplan);
} else if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
work = new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
- cplan.setName("Merge");
- ((SparkWork)work).add(cplan);
+ cplan.setName("Spark Merge File Work");
+ ((SparkWork) work).add(cplan);
} else {
work = new MapredWork();
((MapredWork)work).setMapWork(cplan);
Modified: hive/branches/spark/ql/src/test/queries/clientpositive/disable_merge_for_bucketing.q
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/queries/clientpositive/disable_merge_for_bucketing.q?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/queries/clientpositive/disable_merge_for_bucketing.q (original)
+++ hive/branches/spark/ql/src/test/queries/clientpositive/disable_merge_for_bucketing.q Sun Sep 21 05:43:46 2014
@@ -2,6 +2,7 @@ set hive.input.format=org.apache.hadoop.
set hive.enforce.bucketing = true;
set hive.exec.reducers.max = 1;
set hive.merge.mapredfiles=true;
+set hive.merge.sparkfiles=true;
CREATE TABLE bucket2_1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS;
Modified: hive/branches/spark/ql/src/test/queries/clientpositive/merge1.q
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/queries/clientpositive/merge1.q?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/queries/clientpositive/merge1.q (original)
+++ hive/branches/spark/ql/src/test/queries/clientpositive/merge1.q Sun Sep 21 05:43:46 2014
@@ -1,4 +1,5 @@
set hive.merge.mapredfiles=true;
+set hive.merge.sparkfiles=true;
-- SORT_QUERY_RESULTS
Modified: hive/branches/spark/ql/src/test/queries/clientpositive/merge2.q
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/queries/clientpositive/merge2.q?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/queries/clientpositive/merge2.q (original)
+++ hive/branches/spark/ql/src/test/queries/clientpositive/merge2.q Sun Sep 21 05:43:46 2014
@@ -1,5 +1,6 @@
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
+set hive.merge.sparkfiles=true;
set mapred.min.split.size=256;
set mapred.min.split.size.per.node=256;
set mapred.min.split.size.per.rack=256;
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/merge1.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/merge1.q.out?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/merge1.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/merge1.q.out Sun Sep 21 05:43:46 2014
@@ -20,9 +20,14 @@ select key, count(1) from src group by k
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
+ Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
@@ -73,6 +78,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
Stage: Stage-2
Dependency Collection
@@ -89,6 +103,42 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-6
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
PREHOOK: query: insert overwrite table dest1
select key, count(1) from src group by key
PREHOOK: type: QUERY
@@ -473,9 +523,14 @@ insert overwrite table dest1 select key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
+ Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
@@ -500,6 +555,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
Stage: Stage-2
Dependency Collection
@@ -516,6 +580,42 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-6
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
PREHOOK: query: insert overwrite table dest1 select key from test_src
PREHOOK: type: QUERY
PREHOOK: Input: default@test_src
@@ -538,9 +638,14 @@ insert overwrite table dest1 select key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
+ Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
@@ -565,6 +670,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
Stage: Stage-2
Dependency Collection
@@ -581,6 +695,42 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-6
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
PREHOOK: query: insert overwrite table dest1 select key from test_src
PREHOOK: type: QUERY
PREHOOK: Input: default@test_src
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/merge2.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/merge2.q.out?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/merge2.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/merge2.q.out Sun Sep 21 05:43:46 2014
@@ -20,9 +20,14 @@ select key, count(1) from src group by k
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
+ Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
@@ -73,6 +78,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test1
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
Stage: Stage-2
Dependency Collection
@@ -89,6 +103,42 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test1
+
+ Stage: Stage-6
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test1
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
PREHOOK: query: insert overwrite table test1
select key, count(1) from src group by key
PREHOOK: type: QUERY
@@ -473,9 +523,14 @@ insert overwrite table test1 select key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
+ Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
@@ -500,6 +555,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test1
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
Stage: Stage-2
Dependency Collection
@@ -516,6 +580,42 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test1
+
+ Stage: Stage-6
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test1
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
PREHOOK: query: insert overwrite table test1 select key from test_src
PREHOOK: type: QUERY
PREHOOK: Input: default@test_src
@@ -538,9 +638,14 @@ insert overwrite table test1 select key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-2 depends on stages: Stage-1
+ Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6
+ Stage-5
+ Stage-2 depends on stages: Stage-5, Stage-4, Stage-7
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
+ Stage-4
+ Stage-6
+ Stage-7 depends on stages: Stage-6
STAGE PLANS:
Stage: Stage-1
@@ -565,6 +670,15 @@ STAGE PLANS:
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test1
+ Stage: Stage-8
+ Conditional Operator
+
+ Stage: Stage-5
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
Stage: Stage-2
Dependency Collection
@@ -581,6 +695,42 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-4
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test1
+
+ Stage: Stage-6
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.test1
+
+ Stage: Stage-7
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
PREHOOK: query: insert overwrite table test1 select key from test_src
PREHOOK: type: QUERY
PREHOOK: Input: default@test_src
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_10.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_10.q.out?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_10.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_10.q.out Sun Sep 21 05:43:46 2014
@@ -201,18 +201,26 @@ STAGE PLANS:
name: default.outputtbl1
Stage: Stage-3
- Merge File Operator
- Map Operator Tree:
- RCFile Merge Operator
- merge level: block
- input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Merge File Operator
+ Map Operator Tree:
+ RCFile Merge Operator
+ merge level: block
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
Stage: Stage-5
- Merge File Operator
- Map Operator Tree:
- RCFile Merge Operator
- merge level: block
- input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Merge File Operator
+ Map Operator Tree:
+ RCFile Merge Operator
+ merge level: block
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
Stage: Stage-6
Move Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_11.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_11.q.out?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_11.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_11.q.out Sun Sep 21 05:43:46 2014
@@ -176,18 +176,26 @@ STAGE PLANS:
name: default.outputtbl1
Stage: Stage-3
- Merge File Operator
- Map Operator Tree:
- RCFile Merge Operator
- merge level: block
- input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Merge File Operator
+ Map Operator Tree:
+ RCFile Merge Operator
+ merge level: block
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
Stage: Stage-5
- Merge File Operator
- Map Operator Tree:
- RCFile Merge Operator
- merge level: block
- input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Merge File Operator
+ Map Operator Tree:
+ RCFile Merge Operator
+ merge level: block
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
Stage: Stage-6
Move Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_16.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_16.q.out?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_16.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_16.q.out Sun Sep 21 05:43:46 2014
@@ -202,18 +202,26 @@ STAGE PLANS:
name: default.outputtbl1
Stage: Stage-3
- Merge File Operator
- Map Operator Tree:
- RCFile Merge Operator
- merge level: block
- input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Merge File Operator
+ Map Operator Tree:
+ RCFile Merge Operator
+ merge level: block
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
Stage: Stage-5
- Merge File Operator
- Map Operator Tree:
- RCFile Merge Operator
- merge level: block
- input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Merge File Operator
+ Map Operator Tree:
+ RCFile Merge Operator
+ merge level: block
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
Stage: Stage-6
Move Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_4.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_4.q.out?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_4.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_4.q.out Sun Sep 21 05:43:46 2014
@@ -190,7 +190,7 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Merge
+ Spark Merge File Work
Map Operator Tree:
TableScan
File Output Operator
@@ -205,7 +205,7 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Merge
+ Spark Merge File Work
Map Operator Tree:
TableScan
File Output Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_5.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_5.q.out?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_5.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_5.q.out Sun Sep 21 05:43:46 2014
@@ -190,7 +190,7 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Merge
+ Spark Merge File Work
Map Operator Tree:
TableScan
File Output Operator
@@ -205,7 +205,7 @@ STAGE PLANS:
Spark
#### A masked pattern was here ####
Vertices:
- Merge
+ Spark Merge File Work
Map Operator Tree:
TableScan
File Output Operator
Modified: hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_9.q.out
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_9.q.out?rev=1626516&r1=1626515&r2=1626516&view=diff
==============================================================================
--- hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_9.q.out (original)
+++ hive/branches/spark/ql/src/test/results/clientpositive/spark/union_remove_9.q.out Sun Sep 21 05:43:46 2014
@@ -197,18 +197,26 @@ STAGE PLANS:
name: default.outputtbl1
Stage: Stage-3
- Merge File Operator
- Map Operator Tree:
- RCFile Merge Operator
- merge level: block
- input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Merge File Operator
+ Map Operator Tree:
+ RCFile Merge Operator
+ merge level: block
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
Stage: Stage-5
- Merge File Operator
- Map Operator Tree:
- RCFile Merge Operator
- merge level: block
- input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
+ Spark
+#### A masked pattern was here ####
+ Vertices:
+ Spark Merge File Work
+ Merge File Operator
+ Map Operator Tree:
+ RCFile Merge Operator
+ merge level: block
+ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat
Stage: Stage-6
Move Operator