You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/07/20 05:50:52 UTC
svn commit: r1363639 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/
java/org/apache/hadoop/hive/ql/optimizer/
java/org/apache/hadoop/hive/ql/optimizer/ppr/
java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/plan/
test/q...
Author: namit
Date: Fri Jul 20 03:50:52 2012
New Revision: 1363639
URL: http://svn.apache.org/viewvc?rev=1363639&view=rev
Log:
HIVE-3205 Bucketed mapjoin on partitioned table which has no partition throws NPE
(Navis via namit)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java
hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin1.q
hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin9.q
hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin1.q.out
hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin9.q.out
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java Fri Jul 20 03:50:52 2012
@@ -24,7 +24,6 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
-import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -42,6 +41,7 @@ import org.apache.hadoop.hive.ql.plan.Pa
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -64,7 +64,6 @@ public class FetchOperator implements Se
static Log LOG = LogFactory.getLog(FetchOperator.class.getName());
static LogHelper console = new LogHelper(LOG);
- private boolean isEmptyTable;
private boolean isNativeTable;
private FetchWork work;
private int splitNum;
@@ -96,7 +95,7 @@ public class FetchOperator implements Se
this.job = job;
tblDataDone = false;
rowWithPart = new Object[2];
- if (work.getTblDesc() != null) {
+ if (work.getTblDir() != null) {
isNativeTable = !work.getTblDesc().isNonNative();
} else {
isNativeTable = true;
@@ -144,11 +143,7 @@ public class FetchOperator implements Se
}
public boolean isEmptyTable() {
- return isEmptyTable;
- }
-
- public void setEmptyTable(boolean isEmptyTable) {
- this.isEmptyTable = isEmptyTable;
+ return work.getTblDir() == null && (work.getPartDir() == null || work.getPartDir().isEmpty());
}
/**
@@ -171,28 +166,37 @@ public class FetchOperator implements Se
return inputFormats.get(inputFormatClass);
}
- private void setPrtnDesc() throws Exception {
- List<String> partNames = new ArrayList<String>();
- List<String> partValues = new ArrayList<String>();
-
- String pcols = currPart.getTableDesc().getProperties().getProperty(
+ private void setPrtnDesc(TableDesc table, Map<String, String> partSpec) throws Exception {
+ String pcols = table.getProperties().getProperty(
org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_PARTITION_COLUMNS);
- LinkedHashMap<String, String> partSpec = currPart.getPartSpec();
+ String[] partKeys = pcols.trim().split("/");
+ if (partSpec != null) {
+ rowWithPart[1] = createPartValue(partKeys, partSpec);
+ }
+ rowObjectInspector = createRowInspector(partKeys);
+ }
+ private StructObjectInspector createRowInspector(String[] partKeys) throws SerDeException {
+ List<String> partNames = new ArrayList<String>();
List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>();
- String[] partKeys = pcols.trim().split("/");
for (String key : partKeys) {
partNames.add(key);
- partValues.add(partSpec.get(key));
partObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
StructObjectInspector partObjectInspector = ObjectInspectorFactory
.getStandardStructObjectInspector(partNames, partObjectInspectors);
- rowObjectInspector = (StructObjectInspector) serde.getObjectInspector();
+ StructObjectInspector inspector = (StructObjectInspector) serde.getObjectInspector();
+
+ return ObjectInspectorFactory.getUnionStructObjectInspector(
+ Arrays.asList(inspector, partObjectInspector));
+ }
- rowWithPart[1] = partValues;
- rowObjectInspector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays
- .asList(new StructObjectInspector[] {rowObjectInspector, partObjectInspector}));
+ private List<String> createPartValue(String[] partKeys, Map<String, String> partSpec) {
+ List<String> partValues = new ArrayList<String>();
+ for (String key : partKeys) {
+ partValues.add(partSpec.get(key));
+ }
+ return partValues;
}
private void getNextPath() throws Exception {
@@ -290,7 +294,7 @@ public class FetchOperator implements Se
}
if (currPart != null) {
- setPrtnDesc();
+ setPrtnDesc(currPart.getTableDesc(), currPart.getPartSpec());
}
}
@@ -374,11 +378,14 @@ public class FetchOperator implements Se
} else {
// hack, get the first.
List<PartitionDesc> listParts = work.getPartDesc();
- currPart = listParts.get(0);
+ currPart = listParts.isEmpty() ? null : listParts.get(0);
}
}
}
+ /**
+ * returns output ObjectInspector, never null
+ */
public ObjectInspector getOutputObjectInspector() throws HiveException {
try {
if (work.getTblDir() != null) {
@@ -386,20 +393,23 @@ public class FetchOperator implements Se
Deserializer serde = tbl.getDeserializerClass().newInstance();
serde.initialize(job, tbl.getProperties());
return serde.getObjectInspector();
- } else if (work.getPartDesc() != null) {
- List<PartitionDesc> listParts = work.getPartDesc();
- if(listParts.size() == 0) {
- return null;
- }
- currPart = listParts.get(0);
- serde = currPart.getTableDesc().getDeserializerClass().newInstance();
- serde.initialize(job, currPart.getTableDesc().getProperties());
- setPrtnDesc();
- currPart = null;
- return rowObjectInspector;
+ }
+ TableDesc tbl;
+ Map<String, String> partSpec;
+ List<PartitionDesc> listParts = work.getPartDesc();
+ if (listParts == null || listParts.isEmpty()) {
+ tbl = work.getTblDesc();
+ partSpec = null;
} else {
- return null;
+ currPart = listParts.get(0);
+ tbl = currPart.getTableDesc();
+ partSpec = currPart.getPartSpec();
}
+ serde = tbl.getDeserializerClass().newInstance();
+ serde.initialize(job, tbl.getProperties());
+ setPrtnDesc(tbl, partSpec);
+ currPart = null;
+ return rowObjectInspector;
} catch (Exception e) {
throw new HiveException("Failed with exception " + e.getMessage()
+ org.apache.hadoop.util.StringUtils.stringifyException(e));
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java Fri Jul 20 03:50:52 2012
@@ -308,7 +308,7 @@ public class MapredLocalTask extends Tas
if (fetchOp.isEmptyTable()) {
//generate empty hashtable for empty table
- this.generateDummyHashTable(alias, bigTableBucket);
+ this.generateDummyHashTable(alias, getFileName(bigTableBucket));
continue;
}
@@ -381,12 +381,8 @@ public class MapredLocalTask extends Tas
}
// initialize the forward operator
ObjectInspector objectInspector = fetchOp.getOutputObjectInspector();
- if (objectInspector != null) {
- forwardOp.initialize(jobConf, new ObjectInspector[] {objectInspector});
- l4j.info("fetchoperator for " + entry.getKey() + " initialized");
- } else {
- fetchOp.setEmptyTable(true);
- }
+ forwardOp.initialize(jobConf, new ObjectInspector[] {objectInspector});
+ l4j.info("fetchoperator for " + entry.getKey() + " initialized");
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java Fri Jul 20 03:50:52 2012
@@ -482,7 +482,10 @@ public class SMBMapJoinOperator extends
.getBucketMatcherClass();
BucketMatcher bucketMatcher = (BucketMatcher) ReflectionUtils.newInstance(
bucketMatcherCls, null);
- this.getExecContext().setFileId(bucketMatcherCxt.getBucketFileNameMapping().get(currentInputFile));
+ Integer bucketNum = bucketMatcherCxt.getBucketFileNameMapping().get(currentInputFile);
+ if (bucketNum != null) {
+ this.getExecContext().setFileId(bucketNum);
+ }
LOG.info("set task id: " + this.getExecContext().getFileId());
bucketMatcher.setAliasBucketFileNameMapping(bucketMatcherCxt
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java Fri Jul 20 03:50:52 2012
@@ -238,6 +238,11 @@ public class BucketMapJoinOptimizer impl
aliasToPartitionBucketNumberMapping.put(alias, buckets);
aliasToPartitionBucketFileNamesMapping.put(alias, files);
}
+ } else {
+ if (!alias.equals(baseBigAlias)) {
+ aliasToPartitionBucketNumberMapping.put(alias, Arrays.<Integer>asList());
+ aliasToPartitionBucketFileNamesMapping.put(alias, new ArrayList<List<String>>());
+ }
}
} else {
if (!checkBucketColumns(tbl.getBucketCols(), mjDecs, index)) {
@@ -278,7 +283,7 @@ public class BucketMapJoinOptimizer impl
// in the big table to bucket file names in small tables.
for (int j = 0; j < joinAliases.size(); j++) {
String alias = joinAliases.get(j);
- if(alias.equals(baseBigAlias)) {
+ if (alias.equals(baseBigAlias)) {
continue;
}
for (List<String> names : aliasToPartitionBucketFileNamesMapping.get(alias)) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRTableScan1.java Fri Jul 20 03:50:52 2012
@@ -32,6 +32,7 @@ import org.apache.hadoop.hive.ql.lib.Nod
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.GenMRProcContext.GenMapRedCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
@@ -103,7 +104,9 @@ public class GenMRTableScan1 implements
confirmedPartns.addAll(tblSpec.partitions);
}
if (confirmedPartns.size() > 0) {
- PrunedPartitionList partList = new PrunedPartitionList(confirmedPartns, new HashSet<Partition>(), null);
+ Table source = parseCtx.getQB().getMetaData().getTableForAlias(alias);
+ PrunedPartitionList partList = new PrunedPartitionList(source, confirmedPartns,
+ new HashSet<Partition>(), null);
GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currWork, false, ctx, partList);
} else { // non-partitioned table
GenMapRedUtils.setTaskPlan(currAliasId, currTopOp, currWork, false, ctx);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java Fri Jul 20 03:50:52 2012
@@ -815,9 +815,10 @@ public final class GenMapRedUtils {
assert localPlan.getAliasToFetchWork().get(alias_id) == null;
localPlan.getAliasToWork().put(alias_id, topOp);
if (tblDir == null) {
+ tblDesc = Utilities.getTableDesc(partsList.getSourceTable());
localPlan.getAliasToFetchWork().put(
alias_id,
- new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc));
+ new FetchWork(FetchWork.convertPathToStringArray(partDir), partDesc, tblDesc));
} else {
localPlan.getAliasToFetchWork().put(alias_id,
new FetchWork(tblDir.toString(), tblDesc));
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java Fri Jul 20 03:50:52 2012
@@ -195,7 +195,8 @@ public class MapJoinProcessor implements
}
// create fetchwork for partitioned table
if (fetchWork == null) {
- fetchWork = new FetchWork(partDir, partDesc);
+ TableDesc table = newWork.getAliasToPartnInfo().get(alias).getTableDesc();
+ fetchWork = new FetchWork(partDir, partDesc, table);
}
// set alias to fetch work
newLocalWork.getAliasToFetchWork().put(alias, fetchWork);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Fri Jul 20 03:50:52 2012
@@ -244,7 +244,7 @@ public class PartitionPruner implements
}
// Now return the set of partitions
- ret = new PrunedPartitionList(true_parts, unkn_parts, denied_parts);
+ ret = new PrunedPartitionList(tab, true_parts, unkn_parts, denied_parts);
prunedPartitionsMap.put(key, ret);
return ret;
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java Fri Jul 20 03:50:52 2012
@@ -23,11 +23,16 @@ import java.util.List;
import java.util.Set;
import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
/**
* The list of pruned partitions.
*/
public class PrunedPartitionList {
+
+ // source table
+ private Table source;
+
// confirmed partitions - satisfy the partition criteria
private Set<Partition> confirmedPartns;
@@ -43,13 +48,18 @@ public class PrunedPartitionList {
* @param unknownPartns
* unknown partitions
*/
- public PrunedPartitionList(Set<Partition> confirmedPartns,
+ public PrunedPartitionList(Table source, Set<Partition> confirmedPartns,
Set<Partition> unknownPartns, Set<Partition> deniedPartns) {
+ this.source = source;
this.confirmedPartns = confirmedPartns;
this.unknownPartns = unknownPartns;
this.deniedPartns = deniedPartns;
}
+ public Table getSourceTable() {
+ return source;
+ }
+
/**
* get confirmed partitions.
*
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Jul 20 03:50:52 2012
@@ -7051,7 +7051,8 @@ public class SemanticAnalyzer extends Ba
inputs.add(new ReadEntity(part));
}
- fetch = new FetchWork(listP, partP, qb.getParseInfo()
+ TableDesc table = Utilities.getTableDesc(partsList.getSourceTable());
+ fetch = new FetchWork(listP, partP, table, qb.getParseInfo()
.getOuterQueryLimit());
noMapRed = true;
}
@@ -7060,18 +7061,7 @@ public class SemanticAnalyzer extends Ba
}
if (noMapRed) {
- if (fetch.getTblDesc() != null) {
- PlanUtils.configureInputJobPropertiesForStorageHandler(
- fetch.getTblDesc());
- } else if ( (fetch.getPartDesc() != null) && (!fetch.getPartDesc().isEmpty())){
- PartitionDesc pd0 = fetch.getPartDesc().get(0);
- TableDesc td = pd0.getTableDesc();
- if ((td != null)&&(td.getProperties() != null)
- && td.getProperties().containsKey(
- org.apache.hadoop.hive.metastore.api.Constants.META_TABLE_STORAGE)){
- PlanUtils.configureInputJobPropertiesForStorageHandler(td);
- }
- }
+ PlanUtils.configureInputJobPropertiesForStorageHandler(fetch.getTblDesc());
fetchTask = (FetchTask) TaskFactory.get(fetch, conf);
setFetchTask(fetchTask);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/FetchWork.java Fri Jul 20 03:50:52 2012
@@ -59,11 +59,13 @@ public class FetchWork implements Serial
this.limit = limit;
}
- public FetchWork(List<String> partDir, List<PartitionDesc> partDesc) {
- this(partDir, partDesc, -1);
+ public FetchWork(List<String> partDir, List<PartitionDesc> partDesc, TableDesc tblDesc) {
+ this(partDir, partDesc, tblDesc, -1);
}
- public FetchWork(List<String> partDir, List<PartitionDesc> partDesc, int limit) {
+ public FetchWork(List<String> partDir, List<PartitionDesc> partDesc,
+ TableDesc tblDesc, int limit) {
+ this.tblDesc = tblDesc;
this.partDir = new ArrayList<String>(partDir);
this.partDesc = new ArrayList<PartitionDesc>(partDesc);
this.limit = limit;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java Fri Jul 20 03:50:52 2012
@@ -126,9 +126,6 @@ public class MapredLocalWork implements
bucketMapjoinContext.deriveBucketMapJoinMapping();
}
for (FetchWork fetchWork : aliasToFetchWork.values()) {
- if (fetchWork.getTblDesc() == null) {
- continue;
- }
PlanUtils.configureInputJobPropertiesForStorageHandler(
fetchWork.getTblDesc());
}
Modified: hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin1.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin1.q?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin1.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/bucketmapjoin1.q Fri Jul 20 03:50:52 2012
@@ -1,14 +1,38 @@
CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+
+CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+
+set hive.optimize.bucketmapjoin = true;
+
+-- empty partitions (HIVE-3205)
+explain extended
+select /*+mapjoin(b)*/ a.key, a.value, b.value
+from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
+on a.key=b.key where b.ds="2008-04-08";
+
+select /*+mapjoin(b)*/ a.key, a.value, b.value
+from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
+on a.key=b.key where b.ds="2008-04-08";
+
+explain extended
+select /*+mapjoin(a)*/ a.key, a.value, b.value
+from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
+on a.key=b.key where b.ds="2008-04-08";
+
+select /*+mapjoin(a)*/ a.key, a.value, b.value
+from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
+on a.key=b.key where b.ds="2008-04-08";
+
load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin;
load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin;
-CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
-CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08');
Modified: hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin9.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin9.q?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin9.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/smb_mapjoin9.q Fri Jul 20 03:50:52 2012
@@ -1,17 +1,40 @@
create table hive_test_smb_bucket1 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets;
create table hive_test_smb_bucket2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets;
+set hive.optimize.bucketmapjoin = true;
+set hive.optimize.bucketmapjoin.sortedmerge = true;
+set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
+
+-- empty partitions (HIVE-3205)
+explain extended
+SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL;
+
+SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL;
+
+explain extended
+SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL;
+
+SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL;
+
set hive.enforce.bucketing = true;
set hive.enforce.sorting = true;
insert overwrite table hive_test_smb_bucket1 partition (ds='2010-10-15') select key, value from src;
insert overwrite table hive_test_smb_bucket2 partition (ds='2010-10-15') select key, value from src;
-set hive.optimize.bucketmapjoin = true;
-set hive.optimize.bucketmapjoin.sortedmerge = true;
-set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
-
-explain
+explain
create table smb_mapjoin9_results as
SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
FROM hive_test_smb_bucket1 a JOIN
Modified: hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin1.q.out?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucketmapjoin1.q.out Fri Jul 20 03:50:52 2012
@@ -3,6 +3,268 @@ PREHOOK: type: CREATETABLE
POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@srcbucket_mapjoin
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcbucket_mapjoin_part
+PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@srcbucket_mapjoin_part_2
+PREHOOK: query: -- empty partitions (HIVE-3205)
+explain extended
+select /*+mapjoin(b)*/ a.key, a.value, b.value
+from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
+on a.key=b.key where b.ds="2008-04-08"
+PREHOOK: type: QUERY
+POSTHOOK: query: -- empty partitions (HIVE-3205)
+explain extended
+select /*+mapjoin(b)*/ a.key, a.value, b.value
+from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
+on a.key=b.key where b.ds="2008-04-08"
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08"))))
+
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-3
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ b
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ b
+ TableScan
+ alias: b
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: (ds = '2008-04-08')
+ type: boolean
+ HashTable Sink Operator
+ condition expressions:
+ 0 {key} {value}
+ 1 {value} {ds}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 0
+ Bucket Mapjoin Context:
+ Alias Bucket Base File Name Mapping:
+ b {}
+ Alias Bucket File Name Mapping:
+ b {}
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ GatherStats: false
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {value} {ds}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col6, _col7
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col6
+ type: string
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col1, _col6, _col7
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col6
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types int:string:string
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Needs Tagging: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select /*+mapjoin(b)*/ a.key, a.value, b.value
+from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
+on a.key=b.key where b.ds="2008-04-08"
+PREHOOK: type: QUERY
+#### A masked pattern was here ####
+POSTHOOK: query: select /*+mapjoin(b)*/ a.key, a.value, b.value
+from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
+on a.key=b.key where b.ds="2008-04-08"
+POSTHOOK: type: QUERY
+#### A masked pattern was here ####
+PREHOOK: query: explain extended
+select /*+mapjoin(a)*/ a.key, a.value, b.value
+from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
+on a.key=b.key where b.ds="2008-04-08"
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+select /*+mapjoin(a)*/ a.key, a.value, b.value
+from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
+on a.key=b.key where b.ds="2008-04-08"
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part) a) (TOK_TABREF (TOK_TABNAME srcbucket_mapjoin_part_2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08"))))
+
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-1 depends on stages: Stage-3
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-3
+ Map Reduce Local Work
+ Alias -> Map Local Tables:
+ a
+ Fetch Operator
+ limit: -1
+ Alias -> Map Local Operator Tree:
+ a
+ TableScan
+ alias: a
+ GatherStats: false
+ HashTable Sink Operator
+ condition expressions:
+ 0 {key} {value}
+ 1 {value} {ds}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ Position of Big Table: 1
+ Bucket Mapjoin Context:
+ Alias Bucket Base File Name Mapping:
+ a {}
+ Alias Bucket File Name Mapping:
+ a {}
+
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ b
+ TableScan
+ alias: b
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: (ds = '2008-04-08')
+ type: boolean
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {value}
+ 1 {value} {ds}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col1, _col6, _col7
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col6
+ type: string
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col1, _col6, _col7
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col1
+ type: string
+ expr: _col6
+ type: string
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types int:string:string
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Local Work:
+ Map Reduce Local Work
+ Needs Tagging: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: select /*+mapjoin(a)*/ a.key, a.value, b.value
+from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
+on a.key=b.key where b.ds="2008-04-08"
+PREHOOK: type: QUERY
+#### A masked pattern was here ####
+POSTHOOK: query: select /*+mapjoin(a)*/ a.key, a.value, b.value
+from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
+on a.key=b.key where b.ds="2008-04-08"
+POSTHOOK: type: QUERY
+#### A masked pattern was here ####
PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin
PREHOOK: type: LOAD
PREHOOK: Output: default@srcbucket_mapjoin
@@ -15,11 +277,6 @@ PREHOOK: Output: default@srcbucket_mapjo
POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin
POSTHOOK: type: LOAD
POSTHOOK: Output: default@srcbucket_mapjoin
-PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
-PREHOOK: type: CREATETABLE
-POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: default@srcbucket_mapjoin_part
PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
PREHOOK: type: LOAD
PREHOOK: Output: default@srcbucket_mapjoin_part
@@ -45,11 +302,6 @@ PREHOOK: Output: default@srcbucket_mapjo
POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08')
POSTHOOK: type: LOAD
POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08
-PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
-PREHOOK: type: CREATETABLE
-POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: default@srcbucket_mapjoin_part_2
PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08')
PREHOOK: type: LOAD
PREHOOK: Output: default@srcbucket_mapjoin_part_2
Modified: hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin9.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin9.q.out?rev=1363639&r1=1363638&r2=1363639&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin9.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin9.q.out Fri Jul 20 03:50:52 2012
@@ -8,6 +8,216 @@ PREHOOK: type: CREATETABLE
POSTHOOK: query: create table hive_test_smb_bucket2 (key int, value string) partitioned by (ds string) clustered by (key) sorted by (key) into 2 buckets
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@hive_test_smb_bucket2
+PREHOOK: query: -- empty partitions (HIVE-3205)
+explain extended
+SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+PREHOOK: type: QUERY
+POSTHOOK: query: -- empty partitions (HIVE-3205)
+explain extended
+SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME hive_test_smb_bucket1) a) (TOK_TABREF (TOK_TABNAME hive_test_smb_bucket2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) ds)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) k2)) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL a) ds) '2010-10-15') (= (. (TOK_TABLE_OR_COL b) ds) '2010-10-15')) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) key))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ a
+ TableScan
+ alias: a
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: ((ds = '2010-10-15') and key is not null)
+ type: boolean
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {ds}
+ 1 {key} {value} {ds}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col2, _col5, _col6, _col7
+ Position of Big Table: 0
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col2
+ type: string
+ expr: _col5
+ type: int
+ expr: _col6
+ type: string
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col2, _col5, _col6, _col7
+ Select Operator
+ expressions:
+ expr: _col5
+ type: int
+ expr: _col6
+ type: string
+ expr: _col7
+ type: string
+ expr: _col0
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types int:string:string:int
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Needs Tagging: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+PREHOOK: type: QUERY
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+POSTHOOK: type: QUERY
+#### A masked pattern was here ####
+PREHOOK: query: explain extended
+SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+PREHOOK: type: QUERY
+POSTHOOK: query: explain extended
+SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME hive_test_smb_bucket1) a) (TOK_TABREF (TOK_TABNAME hive_test_smb_bucket2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) ds)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key) k2)) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL a) ds) '2010-10-15') (= (. (TOK_TABLE_OR_COL b) ds) '2010-10-15')) (TOK_FUNCTION TOK_ISNOTNULL (. (TOK_TABLE_OR_COL b) key))))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ b
+ TableScan
+ alias: b
+ GatherStats: false
+ Filter Operator
+ isSamplingPred: false
+ predicate:
+ expr: ((ds = '2010-10-15') and key is not null)
+ type: boolean
+ Sorted Merge Bucket Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ condition expressions:
+ 0 {key} {ds}
+ 1 {key} {value} {ds}
+ handleSkewJoin: false
+ keys:
+ 0 [Column[key]]
+ 1 [Column[key]]
+ outputColumnNames: _col0, _col2, _col5, _col6, _col7
+ Position of Big Table: 1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: int
+ expr: _col2
+ type: string
+ expr: _col5
+ type: int
+ expr: _col6
+ type: string
+ expr: _col7
+ type: string
+ outputColumnNames: _col0, _col2, _col5, _col6, _col7
+ Select Operator
+ expressions:
+ expr: _col5
+ type: int
+ expr: _col6
+ type: string
+ expr: _col7
+ type: string
+ expr: _col0
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ properties:
+ columns _col0,_col1,_col2,_col3
+ columns.types int:string:string:int
+ escape.delim \
+ serialization.format 1
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+ Needs Tagging: false
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+PREHOOK: type: QUERY
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT /* + MAPJOIN(a) */ b.key as k1, b.value, b.ds, a.key as k2
+FROM hive_test_smb_bucket1 a JOIN
+hive_test_smb_bucket2 b
+ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
+POSTHOOK: type: QUERY
+#### A masked pattern was here ####
PREHOOK: query: insert overwrite table hive_test_smb_bucket1 partition (ds='2010-10-15') select key, value from src
PREHOOK: type: QUERY
PREHOOK: Input: default@src
@@ -30,14 +240,14 @@ POSTHOOK: Lineage: hive_test_smb_bucket1
POSTHOOK: Lineage: hive_test_smb_bucket1 PARTITION(ds=2010-10-15).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: hive_test_smb_bucket2 PARTITION(ds=2010-10-15).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: hive_test_smb_bucket2 PARTITION(ds=2010-10-15).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-PREHOOK: query: explain
+PREHOOK: query: explain
create table smb_mapjoin9_results as
SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
FROM hive_test_smb_bucket1 a JOIN
hive_test_smb_bucket2 b
ON a.key = b.key WHERE a.ds = '2010-10-15' and b.ds='2010-10-15' and b.key IS NOT NULL
PREHOOK: type: CREATETABLE_AS_SELECT
-POSTHOOK: query: explain
+POSTHOOK: query: explain
create table smb_mapjoin9_results as
SELECT /* + MAPJOIN(b) */ b.key as k1, b.value, b.ds, a.key as k2
FROM hive_test_smb_bucket1 a JOIN