You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2014/10/02 03:46:48 UTC
svn commit: r1628876 [1/5] - in /hive/trunk:
common/src/java/org/apache/hadoop/hive/conf/ data/files/
itests/src/test/resources/ ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/exec/mr/
ql/src/java/org/apache/hadoop/hi...
Author: prasanthj
Date: Thu Oct 2 01:46:46 2014
New Revision: 1628876
URL: http://svn.apache.org/r1628876
Log:
HIVE-8151: Dynamic partition sort optimization inserts record wrongly to partition when used with GroupBy (Prasanth J reviewed by Gunther Hagleitner)
Added:
hive/trunk/data/files/dynpart_test.txt
hive/trunk/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q
hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_optimization2.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization2.q.out
Modified:
hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hive/trunk/itests/src/test/resources/testconfiguration.properties
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/test/queries/clientpositive/alter_partition_change_col.q
hive/trunk/ql/src/test/results/clientpositive/alter_partition_change_col.q.out
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out
hive/trunk/ql/src/test/results/clientpositive/auto_sortmerge_join_16.q.out
hive/trunk/ql/src/test/results/clientpositive/combine2.q.out
hive/trunk/ql/src/test/results/clientpositive/constprog_dp.q.out
hive/trunk/ql/src/test/results/clientpositive/delete_all_partitioned.q.out
hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out
hive/trunk/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby4_map.q.out
hive/trunk/ql/src/test/results/clientpositive/groupby4_map_skew.q.out
hive/trunk/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out
hive/trunk/ql/src/test/results/clientpositive/input30.q.out
hive/trunk/ql/src/test/results/clientpositive/input32.q.out
hive/trunk/ql/src/test/results/clientpositive/insert_into6.q.out
hive/trunk/ql/src/test/results/clientpositive/load_dyn_part1.q.out
hive/trunk/ql/src/test/results/clientpositive/load_dyn_part10.q.out
hive/trunk/ql/src/test/results/clientpositive/load_dyn_part14.q.out
hive/trunk/ql/src/test/results/clientpositive/load_dyn_part3.q.out
hive/trunk/ql/src/test/results/clientpositive/load_dyn_part4.q.out
hive/trunk/ql/src/test/results/clientpositive/load_dyn_part5.q.out
hive/trunk/ql/src/test/results/clientpositive/load_dyn_part8.q.out
hive/trunk/ql/src/test/results/clientpositive/load_dyn_part9.q.out
hive/trunk/ql/src/test/results/clientpositive/merge3.q.out
hive/trunk/ql/src/test/results/clientpositive/merge4.q.out
hive/trunk/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out
hive/trunk/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out
hive/trunk/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out
hive/trunk/ql/src/test/results/clientpositive/orc_analyze.q.out
hive/trunk/ql/src/test/results/clientpositive/orc_merge2.q.out
hive/trunk/ql/src/test/results/clientpositive/stats2.q.out
hive/trunk/ql/src/test/results/clientpositive/stats4.q.out
hive/trunk/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/auto_sortmerge_join_16.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/delete_all_partitioned.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/load_dyn_part1.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/load_dyn_part3.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/orc_analyze.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/orc_merge2.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/tez_dml.q.out
hive/trunk/ql/src/test/results/clientpositive/tez/update_all_partitioned.q.out
hive/trunk/ql/src/test/results/clientpositive/union10.q.out
hive/trunk/ql/src/test/results/clientpositive/union12.q.out
hive/trunk/ql/src/test/results/clientpositive/union4.q.out
hive/trunk/ql/src/test/results/clientpositive/union_remove_17.q.out
hive/trunk/ql/src/test/results/clientpositive/update_all_partitioned.q.out
Modified: hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Thu Oct 2 01:46:46 2014
@@ -1044,7 +1044,7 @@ public class HiveConf extends Configurat
"That means if reducer-num of the child RS is fixed (order by or forced bucketing) and small, it can make very slow, single MR.\n" +
"The optimization will be automatically disabled if number of reducers would be less than specified value."),
- HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", true,
+ HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", false,
"When enabled dynamic partitioning column will be globally sorted.\n" +
"This way we can keep only one record writer open for each partition value\n" +
"in the reducer thereby reducing the memory pressure on reducers."),
Added: hive/trunk/data/files/dynpart_test.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/dynpart_test.txt?rev=1628876&view=auto
==============================================================================
--- hive/trunk/data/files/dynpart_test.txt (added)
+++ hive/trunk/data/files/dynpart_test.txt Thu Oct 2 01:46:46 2014
@@ -0,0 +1,24 @@
+24526172.99-11.32
+245261710022.633952.8
+24526172.1-2026.3
+2452617552.96-1363.84
+24526171765.07-4648.8
+2452617879.07-2185.76
+24526177412.832071.68
+245261785.825.61
+2452617565.92196.48
+24526175362.01-600.28
+24526173423.95-3164.07
+24526384133.98-775.72
+245263810171.1660.48
+2452638317.87-3775.38
+2452638156.67-4626.56
+24526381327.0857.97
+24526381971.35-488.25
+2452638181.03-207.24
+2452638267.01-3266.36
+24526380.15-241.22
+24526381524.33494.37
+2452638150.39-162.12
+24526381413.19178.08
+24526384329.49-4000.51
Modified: hive/trunk/itests/src/test/resources/testconfiguration.properties
URL: http://svn.apache.org/viewvc/hive/trunk/itests/src/test/resources/testconfiguration.properties?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/itests/src/test/resources/testconfiguration.properties (original)
+++ hive/trunk/itests/src/test/resources/testconfiguration.properties Thu Oct 2 01:46:46 2014
@@ -75,6 +75,7 @@ minitez.query.files.shared=alter_merge_2
disable_merge_for_bucketing.q,\
dynpart_sort_opt_vectorization.q,\
dynpart_sort_optimization.q,\
+ dynpart_sort_optimization2.q,\
enforce_order.q,\
filter_join_breaktask.q,\
filter_join_breaktask2.q,\
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java Thu Oct 2 01:46:46 2014
@@ -29,6 +29,8 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -38,13 +40,13 @@ import org.apache.hadoop.hive.common.Sta
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.io.RecordUpdater;
-import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
import org.apache.hadoop.hive.ql.io.HivePartitioner;
import org.apache.hadoop.hive.ql.io.HivePassThroughOutputFormat;
+import org.apache.hadoop.hive.ql.io.RecordUpdater;
+import org.apache.hadoop.hive.ql.io.StatsProvidingRecordWriter;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveFatalException;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
@@ -72,14 +74,16 @@ import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.ReflectionUtils;
-import com.google.common.collect.Lists;
-
/**
* File Sink operator implementation.
**/
public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
Serializable {
+ public static final Log LOG = LogFactory.getLog(FileSinkOperator.class);
+ private static final boolean isInfoEnabled = LOG.isInfoEnabled();
+ private static final boolean isDebugEnabled = LOG.isDebugEnabled();
+
protected transient HashMap<String, FSPaths> valToPaths;
protected transient int numDynParts;
protected transient List<String> dpColNames;
@@ -101,10 +105,6 @@ public class FileSinkOperator extends Te
protected transient boolean isCollectRWStats;
private transient FSPaths prevFsp;
private transient FSPaths fpaths;
- private transient ObjectInspector keyOI;
- private transient List<Object> keyWritables;
- private transient List<String> keys;
- private transient int numKeyColToRead;
private StructField recIdField; // field to find record identifier in
private StructField bucketField; // field bucket is in in record id
private StructObjectInspector recIdInspector; // OI for inspecting record id
@@ -131,9 +131,6 @@ public class FileSinkOperator extends Te
int acidLastBucket = -1;
int acidFileOffset = -1;
- public FSPaths() {
- }
-
public FSPaths(Path specPath) {
tmpPath = Utilities.toTempPath(specPath);
taskOutputTempPath = Utilities.toTaskTempPath(specPath);
@@ -141,7 +138,9 @@ public class FileSinkOperator extends Te
finalPaths = new Path[numFiles];
outWriters = new RecordWriter[numFiles];
updaters = new RecordUpdater[numFiles];
- LOG.debug("Created slots for " + numFiles);
+ if (isDebugEnabled) {
+ LOG.debug("Created slots for " + numFiles);
+ }
stat = new Stat();
}
@@ -326,7 +325,6 @@ public class FileSinkOperator extends Te
parent = Utilities.toTempPath(conf.getDirName());
statsCollectRawDataSize = conf.isStatsCollectRawDataSize();
statsFromRecordWriter = new boolean[numFiles];
-
serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance();
serializer.initialize(null, conf.getTableInfo().getProperties());
outputClass = serializer.getSerializedClass();
@@ -363,20 +361,6 @@ public class FileSinkOperator extends Te
lbSetup();
}
- int numPart = 0;
- int numBuck = 0;
- if (conf.getPartitionCols() != null && !conf.getPartitionCols().isEmpty()) {
- numPart = conf.getPartitionCols().size();
- }
-
- // bucket number will exists only in PARTITION_BUCKET_SORTED mode
- if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
- numBuck = 1;
- }
- numKeyColToRead = numPart + numBuck;
- keys = Lists.newArrayListWithCapacity(numKeyColToRead);
- keyWritables = Lists.newArrayListWithCapacity(numKeyColToRead);
-
if (!bDynParts) {
fsp = new FSPaths(specPath);
@@ -423,7 +407,8 @@ public class FileSinkOperator extends Te
this.dpColNames = dpCtx.getDPColNames();
this.maxPartitions = dpCtx.getMaxPartitionsPerNode();
- assert numDynParts == dpColNames.size() : "number of dynamic paritions should be the same as the size of DP mapping";
+ assert numDynParts == dpColNames.size()
+ : "number of dynamic paritions should be the same as the size of DP mapping";
if (dpColNames != null && dpColNames.size() > 0) {
this.bDynParts = true;
@@ -441,6 +426,9 @@ public class FileSinkOperator extends Te
newFieldsOI.add(sf.getFieldObjectInspector());
newFieldsName.add(sf.getFieldName());
this.dpStartCol++;
+ } else {
+ // once we found the start column for partition column we are done
+ break;
}
}
assert newFieldsOI.size() > 0 : "new Fields ObjectInspector is empty";
@@ -457,11 +445,15 @@ public class FileSinkOperator extends Te
Set<Integer> seenBuckets = new HashSet<Integer>();
for (int idx = 0; idx < totalFiles; idx++) {
if (this.getExecContext() != null && this.getExecContext().getFileId() != null) {
- LOG.info("replace taskId from execContext ");
+ if (isInfoEnabled) {
+ LOG.info("replace taskId from execContext ");
+ }
taskId = Utilities.replaceTaskIdFromFilename(taskId, this.getExecContext().getFileId());
- LOG.info("new taskId: FS " + taskId);
+ if (isInfoEnabled) {
+ LOG.info("new taskId: FS " + taskId);
+ }
assert !multiFileSpray;
assert totalFiles == 1;
@@ -515,9 +507,13 @@ public class FileSinkOperator extends Te
try {
if (isNativeTable) {
fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, fsp.tmpPath, null);
- LOG.info("Final Path: FS " + fsp.finalPaths[filesIdx]);
+ if (isInfoEnabled) {
+ LOG.info("Final Path: FS " + fsp.finalPaths[filesIdx]);
+ }
fsp.outPaths[filesIdx] = fsp.getTaskOutPath(taskId);
- LOG.info("Writing to temp file: FS " + fsp.outPaths[filesIdx]);
+ if (isInfoEnabled) {
+ LOG.info("Writing to temp file: FS " + fsp.outPaths[filesIdx]);
+ }
} else {
fsp.finalPaths[filesIdx] = fsp.outPaths[filesIdx] = specPath;
}
@@ -532,7 +528,9 @@ public class FileSinkOperator extends Te
fsp.finalPaths[filesIdx] = fsp.getFinalPath(taskId, fsp.tmpPath, extension);
}
- LOG.info("New Final Path: FS " + fsp.finalPaths[filesIdx]);
+ if (isInfoEnabled) {
+ LOG.info("New Final Path: FS " + fsp.finalPaths[filesIdx]);
+ }
if (isNativeTable) {
// in recent hadoop versions, use deleteOnExit to clean tmp files.
@@ -604,14 +602,22 @@ public class FileSinkOperator extends Te
updateProgress();
// if DP is enabled, get the final output writers and prepare the real output row
- assert inputObjInspectors[0].getCategory() == ObjectInspector.Category.STRUCT : "input object inspector is not struct";
+ assert inputObjInspectors[0].getCategory() == ObjectInspector.Category.STRUCT
+ : "input object inspector is not struct";
if (bDynParts) {
+
+ // we need to read bucket number which is the last column in value (after partition columns)
+ if (conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
+ numDynParts += 1;
+ }
+
// copy the DP column values from the input row to dpVals
dpVals.clear();
dpWritables.clear();
- ObjectInspectorUtils.partialCopyToStandardObject(dpWritables, row, dpStartCol, numDynParts,
- (StructObjectInspector) inputObjInspectors[0], ObjectInspectorCopyOption.WRITABLE);
+ ObjectInspectorUtils.partialCopyToStandardObject(dpWritables, row, dpStartCol,numDynParts,
+ (StructObjectInspector) inputObjInspectors[0],ObjectInspectorCopyOption.WRITABLE);
+
// get a set of RecordWriter based on the DP column values
// pass the null value along to the escaping process to determine what the dir should be
for (Object o : dpWritables) {
@@ -621,16 +627,11 @@ public class FileSinkOperator extends Te
dpVals.add(o.toString());
}
}
- // use SubStructObjectInspector to serialize the non-partitioning columns in the input row
- recordValue = serializer.serialize(row, subSetOI);
- // when dynamic partition sorting is not used, the DPSortState will be NONE
- // in which we will fall back to old method of file system path creation
- // i.e, having as many record writers as distinct values in partition column
- if (conf.getDpSortState().equals(DPSortState.NONE)) {
- fpaths = getDynOutPaths(dpVals, lbDirName);
- }
+ fpaths = getDynOutPaths(dpVals, lbDirName);
+ // use SubStructObjectInspector to serialize the non-partitioning columns in the input row
+ recordValue = serializer.serialize(row, subSetOI);
} else {
if (lbDirName != null) {
fpaths = lookupListBucketingPaths(lbDirName);
@@ -686,8 +687,10 @@ public class FileSinkOperator extends Te
fpaths.updaters[++fpaths.acidFileOffset] = HiveFileFormatUtils.getAcidRecordUpdater(
jc, conf.getTableInfo(), bucketNum, conf, fpaths.outPaths[fpaths.acidFileOffset],
rowInspector, reporter, 0);
- LOG.debug("Created updater for bucket number " + bucketNum + " using file " +
- fpaths.outPaths[fpaths.acidFileOffset]);
+ if (isDebugEnabled) {
+ LOG.debug("Created updater for bucket number " + bucketNum + " using file " +
+ fpaths.outPaths[fpaths.acidFileOffset]);
+ }
}
if (conf.getWriteType() == AcidUtils.Operation.UPDATE) {
@@ -834,10 +837,8 @@ public class FileSinkOperator extends Te
if (dpDir != null) {
dpDir = appendToSource(lbDirName, dpDir);
pathKey = dpDir;
- int numericBucketNum = 0;
if(conf.getDpSortState().equals(DPSortState.PARTITION_BUCKET_SORTED)) {
String buckNum = row.get(row.size() - 1);
- numericBucketNum = Integer.valueOf(buckNum);
taskId = Utilities.replaceTaskIdFromFilename(Utilities.getTaskId(hconf), buckNum);
pathKey = appendToSource(taskId, dpDir);
}
@@ -918,26 +919,6 @@ public class FileSinkOperator extends Te
}
@Override
- public void startGroup() throws HiveException {
- if (!conf.getDpSortState().equals(DPSortState.NONE)) {
- keyOI = getGroupKeyObjectInspector();
- keys.clear();
- keyWritables.clear();
- ObjectInspectorUtils.partialCopyToStandardObject(keyWritables, getGroupKeyObject(), 0,
- numKeyColToRead, (StructObjectInspector) keyOI, ObjectInspectorCopyOption.WRITABLE);
-
- for (Object o : keyWritables) {
- if (o == null || o.toString().length() == 0) {
- keys.add(dpCtx.getDefaultPartitionName());
- } else {
- keys.add(o.toString());
- }
- }
- fpaths = getDynOutPaths(keys, null);
- }
- }
-
- @Override
public void closeOp(boolean abort) throws HiveException {
if (!bDynParts && !filesCreated) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Thu Oct 2 01:46:46 2014
@@ -18,7 +18,22 @@
package org.apache.hadoop.hive.ql.exec;
+import java.io.Serializable;
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryMXBean;
+import java.lang.reflect.Field;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
import javolution.util.FastBitSet;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -54,20 +69,6 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
-import java.io.Serializable;
-import java.lang.management.ManagementFactory;
-import java.lang.management.MemoryMXBean;
-import java.lang.reflect.Field;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
/**
* GroupBy operator implementation.
*/
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java Thu Oct 2 01:46:46 2014
@@ -498,8 +498,6 @@ public abstract class Operator<T extends
LOG.debug("Starting group for children:");
for (Operator<? extends OperatorDesc> op : childOperators) {
- op.setGroupKeyObjectInspector(groupKeyOI);
- op.setGroupKeyObject(groupKeyObject);
op.startGroup();
}
@@ -970,7 +968,6 @@ public abstract class Operator<T extends
}
protected transient Object groupKeyObject;
- protected transient ObjectInspector groupKeyOI;
public String getOperatorId() {
return operatorId;
@@ -1287,14 +1284,6 @@ public abstract class Operator<T extends
}
}
- public void setGroupKeyObjectInspector(ObjectInspector keyObjectInspector) {
- this.groupKeyOI = keyObjectInspector;
- }
-
- public ObjectInspector getGroupKeyObjectInspector() {
- return groupKeyOI;
- }
-
public static Operator createDummy() {
return new DummyOperator();
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java Thu Oct 2 01:46:46 2014
@@ -50,7 +50,6 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.OutputCollector;
@@ -67,6 +66,9 @@ public class ReduceSinkOperator extends
}
private static final Log LOG = LogFactory.getLog(ReduceSinkOperator.class.getName());
+ private static final boolean isInfoEnabled = LOG.isInfoEnabled();
+ private static final boolean isDebugEnabled = LOG.isDebugEnabled();
+ private static final boolean isTraceEnabled = LOG.isTraceEnabled();
private static final long serialVersionUID = 1L;
private static final MurmurHash hash = (MurmurHash) MurmurHash.getInstance();
@@ -117,6 +119,8 @@ public class ReduceSinkOperator extends
protected transient Object[] cachedValues;
protected transient List<List<Integer>> distinctColIndices;
protected transient Random random;
+ protected transient int bucketNumber;
+
/**
* This two dimensional array holds key data and a corresponding Union object
* which contains the tag identifying the aggregate expression for distinct columns.
@@ -144,8 +148,14 @@ public class ReduceSinkOperator extends
protected void initializeOp(Configuration hconf) throws HiveException {
try {
List<ExprNodeDesc> keys = conf.getKeyCols();
- LOG.debug("keys size is " + keys.size());
- for (ExprNodeDesc k : keys) LOG.debug("Key exprNodeDesc " + k.getExprString());
+
+ if (isDebugEnabled) {
+ LOG.debug("keys size is " + keys.size());
+ for (ExprNodeDesc k : keys) {
+ LOG.debug("Key exprNodeDesc " + k.getExprString());
+ }
+ }
+
keyEval = new ExprNodeEvaluator[keys.size()];
int i = 0;
for (ExprNodeDesc e : keys) {
@@ -184,7 +194,9 @@ public class ReduceSinkOperator extends
tag = conf.getTag();
tagByte[0] = (byte) tag;
skipTag = conf.getSkipTag();
- LOG.info("Using tag = " + tag);
+ if (isInfoEnabled) {
+ LOG.info("Using tag = " + tag);
+ }
TableDesc keyTableDesc = conf.getKeySerializeInfo();
keySerializer = (Serializer) keyTableDesc.getDeserializerClass()
@@ -284,7 +296,10 @@ public class ReduceSinkOperator extends
bucketInspector = (IntObjectInspector)bucketField.getFieldObjectInspector();
}
- LOG.info("keys are " + conf.getOutputKeyColumnNames() + " num distributions: " + conf.getNumDistributionKeys());
+ if (isInfoEnabled) {
+ LOG.info("keys are " + conf.getOutputKeyColumnNames() + " num distributions: " +
+ conf.getNumDistributionKeys());
+ }
keyObjectInspector = initEvaluatorsAndReturnStruct(keyEval,
distinctColIndices,
conf.getOutputKeyColumnNames(), numDistributionKeys, rowInspector);
@@ -304,15 +319,14 @@ public class ReduceSinkOperator extends
populateCachedDistributionKeys(row, 0);
// replace bucketing columns with hashcode % numBuckets
- int buckNum = -1;
if (bucketEval != null) {
- buckNum = computeBucketNumber(row, conf.getNumBuckets());
- cachedKeys[0][buckColIdxInKey] = new IntWritable(buckNum);
+ bucketNumber = computeBucketNumber(row, conf.getNumBuckets());
+ cachedKeys[0][buckColIdxInKey] = new Text(String.valueOf(bucketNumber));
} else if (conf.getWriteType() == AcidUtils.Operation.UPDATE ||
conf.getWriteType() == AcidUtils.Operation.DELETE) {
// In the non-partitioned case we still want to compute the bucket number for updates and
// deletes.
- buckNum = computeBucketNumber(row, conf.getNumBuckets());
+ bucketNumber = computeBucketNumber(row, conf.getNumBuckets());
}
HiveKey firstKey = toHiveKey(cachedKeys[0], tag, null);
@@ -328,7 +342,7 @@ public class ReduceSinkOperator extends
if (autoParallel && partitionEval.length > 0) {
hashCode = computeMurmurHash(firstKey);
} else {
- hashCode = computeHashCode(row, buckNum);
+ hashCode = computeHashCode(row);
}
firstKey.setHashCode(hashCode);
@@ -377,7 +391,9 @@ public class ReduceSinkOperator extends
// column directly.
Object recIdValue = acidRowInspector.getStructFieldData(row, recIdField);
buckNum = bucketInspector.get(recIdInspector.getStructFieldData(recIdValue, bucketField));
- LOG.debug("Acid choosing bucket number " + buckNum);
+ if (isTraceEnabled) {
+ LOG.trace("Acid choosing bucket number " + buckNum);
+ }
} else {
for (int i = 0; i < bucketEval.length; i++) {
Object o = bucketEval[i].evaluate(row);
@@ -422,7 +438,7 @@ public class ReduceSinkOperator extends
return hash.hash(firstKey.getBytes(), firstKey.getDistKeyLength(), 0);
}
- private int computeHashCode(Object row, int buckNum) throws HiveException {
+ private int computeHashCode(Object row) throws HiveException {
// Evaluate the HashCode
int keyHashCode = 0;
if (partitionEval.length == 0) {
@@ -446,8 +462,10 @@ public class ReduceSinkOperator extends
+ ObjectInspectorUtils.hashCode(o, partitionObjectInspectors[i]);
}
}
- LOG.debug("Going to return hash code " + (keyHashCode * 31 + buckNum));
- return buckNum < 0 ? keyHashCode : keyHashCode * 31 + buckNum;
+ if (isTraceEnabled) {
+ LOG.trace("Going to return hash code " + (keyHashCode * 31 + bucketNumber));
+ }
+ return bucketNumber < 0 ? keyHashCode : keyHashCode * 31 + bucketNumber;
}
private boolean partitionKeysAreNull(Object row) throws HiveException {
@@ -493,10 +511,19 @@ public class ReduceSinkOperator extends
}
private BytesWritable makeValueWritable(Object row) throws Exception {
+ int length = valueEval.length;
+
+ // in case of bucketed table, insert the bucket number as the last column in value
+ if (bucketEval != null) {
+ length -= 1;
+ cachedValues[length] = new Text(String.valueOf(bucketNumber));
+ }
+
// Evaluate the value
- for (int i = 0; i < valueEval.length; i++) {
+ for (int i = 0; i < length; i++) {
cachedValues[i] = valueEval[i].evaluate(row);
}
+
// Serialize the value
return (BytesWritable) valueSerializer.serialize(cachedValues, valueObjectInspector);
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java Thu Oct 2 01:46:46 2014
@@ -66,6 +66,8 @@ import org.apache.hadoop.util.StringUtil
public class ExecReducer extends MapReduceBase implements Reducer {
private static final Log LOG = LogFactory.getLog("ExecReducer");
+ private static final boolean isInfoEnabled = LOG.isInfoEnabled();
+ private static final boolean isTraceEnabled = LOG.isTraceEnabled();
private static final String PLAN_KEY = "__REDUCE_PLAN__";
// used to log memory usage periodically
@@ -75,7 +77,6 @@ public class ExecReducer extends MapRedu
private final Deserializer[] inputValueDeserializer = new Deserializer[Byte.MAX_VALUE];
private final Object[] valueObject = new Object[Byte.MAX_VALUE];
private final List<Object> row = new ArrayList<Object>(Utilities.reduceFieldNameList.size());
- private final boolean isLogInfoEnabled = LOG.isInfoEnabled();
// TODO: move to DynamicSerDe when it's ready
private Deserializer inputKeyDeserializer;
@@ -101,16 +102,18 @@ public class ExecReducer extends MapRedu
ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector keyObjectInspector;
- LOG.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax());
+ if (isInfoEnabled) {
+ LOG.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax());
- try {
- LOG.info("conf classpath = "
- + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
- LOG.info("thread classpath = "
- + Arrays.asList(((URLClassLoader) Thread.currentThread()
- .getContextClassLoader()).getURLs()));
- } catch (Exception e) {
- LOG.info("cannot get classpath: " + e.getMessage());
+ try {
+ LOG.info("conf classpath = "
+ + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
+ LOG.info("thread classpath = "
+ + Arrays.asList(((URLClassLoader) Thread.currentThread()
+ .getContextClassLoader()).getURLs()));
+ } catch (Exception e) {
+ LOG.info("cannot get classpath: " + e.getMessage());
+ }
}
jc = job;
@@ -147,7 +150,6 @@ public class ExecReducer extends MapRedu
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
ois.add(keyObjectInspector);
ois.add(valueObjectInspector[tag]);
- reducer.setGroupKeyObjectInspector(keyObjectInspector);
rowObjectInspector[tag] = ObjectInspectorFactory
.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
@@ -202,7 +204,9 @@ public class ExecReducer extends MapRedu
groupKey = new BytesWritable();
} else {
// If a operator wants to do some work at the end of a group
- LOG.trace("End Group");
+ if (isTraceEnabled) {
+ LOG.trace("End Group");
+ }
reducer.endGroup();
}
@@ -217,9 +221,11 @@ public class ExecReducer extends MapRedu
}
groupKey.set(keyWritable.get(), 0, keyWritable.getSize());
- LOG.trace("Start Group");
- reducer.setGroupKeyObject(keyObject);
+ if (isTraceEnabled) {
+ LOG.trace("Start Group");
+ }
reducer.startGroup();
+ reducer.setGroupKeyObject(keyObject);
}
// System.err.print(keyObject.toString());
while (values.hasNext()) {
@@ -239,12 +245,14 @@ public class ExecReducer extends MapRedu
row.clear();
row.add(keyObject);
row.add(valueObject[tag]);
- if (isLogInfoEnabled) {
+ if (isInfoEnabled) {
cntr++;
if (cntr == nextCntr) {
long used_memory = memoryMXBean.getHeapMemoryUsage().getUsed();
- LOG.info("ExecReducer: processing " + cntr
- + " rows: used memory = " + used_memory);
+ if (isInfoEnabled) {
+ LOG.info("ExecReducer: processing " + cntr
+ + " rows: used memory = " + used_memory);
+ }
nextCntr = getNextCntr(cntr);
}
}
@@ -290,17 +298,19 @@ public class ExecReducer extends MapRedu
public void close() {
// No row was processed
- if (oc == null) {
+ if (oc == null && isTraceEnabled) {
LOG.trace("Close called without any rows processed");
}
try {
if (groupKey != null) {
// If a operator wants to do some work at the end of a group
- LOG.trace("End Group");
+ if (isTraceEnabled) {
+ LOG.trace("End Group");
+ }
reducer.endGroup();
}
- if (isLogInfoEnabled) {
+ if (isInfoEnabled) {
LOG.info("ExecReducer: processed " + cntr + " rows: used memory = "
+ memoryMXBean.getHeapMemoryUsage().getUsed());
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java Thu Oct 2 01:46:46 2014
@@ -131,7 +131,6 @@ public class ReduceRecordSource implemen
.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
- reducer.setGroupKeyObjectInspector(keyObjectInspector);
if(vectorized) {
keyStructInspector = (StructObjectInspector) keyObjectInspector;
@@ -240,8 +239,8 @@ public class ReduceRecordSource implemen
}
groupKey.set(keyWritable.getBytes(), 0, keyWritable.getLength());
- reducer.setGroupKeyObject(keyObject);
reducer.startGroup();
+ reducer.setGroupKeyObject(keyObject);
}
/* this.keyObject passed via reference */
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorReduceSinkOperator.java Thu Oct 2 01:46:46 2014
@@ -41,7 +41,6 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion;
import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
// import org.apache.hadoop.util.StringUtils;
@@ -270,10 +269,9 @@ public class VectorReduceSinkOperator ex
populatedCachedDistributionKeys(vrg, rowIndex, 0);
// replace bucketing columns with hashcode % numBuckets
- int buckNum = -1;
if (bucketEval != null) {
- buckNum = computeBucketNumber(vrg, rowIndex, conf.getNumBuckets());
- cachedKeys[0][buckColIdxInKey] = new IntWritable(buckNum);
+ bucketNumber = computeBucketNumber(vrg, rowIndex, conf.getNumBuckets());
+ cachedKeys[0][buckColIdxInKey] = new Text(String.valueOf(bucketNumber));
}
HiveKey firstKey = toHiveKey(cachedKeys[0], tag, null);
int distKeyLength = firstKey.getDistKeyLength();
@@ -289,7 +287,7 @@ public class VectorReduceSinkOperator ex
if (autoParallel && partitionEval.length > 0) {
hashCode = computeMurmurHash(firstKey);
} else {
- hashCode = computeHashCode(vrg, rowIndex, buckNum);
+ hashCode = computeHashCode(vrg, rowIndex);
}
firstKey.setHashCode(hashCode);
@@ -417,7 +415,15 @@ public class VectorReduceSinkOperator ex
private BytesWritable makeValueWritable(VectorizedRowBatch vrg, int rowIndex)
throws HiveException, SerDeException {
- for (int i = 0; i < valueEval.length; i++) {
+ int length = valueEval.length;
+
+ // in case of bucketed table, insert the bucket number as the last column in value
+ if (bucketEval != null) {
+ length -= 1;
+ cachedValues[length] = new Text(String.valueOf(bucketNumber));
+ }
+
+ for (int i = 0; i < length; i++) {
int batchColumn = valueEval[i].getOutputColumn();
ColumnVector vectorColumn = vrg.cols[batchColumn];
cachedValues[i] = valueWriters[i].writeValue(vectorColumn, rowIndex);
@@ -426,7 +432,7 @@ public class VectorReduceSinkOperator ex
return (BytesWritable)valueSerializer.serialize(cachedValues, valueObjectInspector);
}
- private int computeHashCode(VectorizedRowBatch vrg, int rowIndex, int buckNum) throws HiveException {
+ private int computeHashCode(VectorizedRowBatch vrg, int rowIndex) throws HiveException {
// Evaluate the HashCode
int keyHashCode = 0;
if (partitionEval.length == 0) {
@@ -449,7 +455,7 @@ public class VectorReduceSinkOperator ex
partitionWriters[p].getObjectInspector());
}
}
- return buckNum < 0 ? keyHashCode : keyHashCode * 31 + buckNum;
+ return bucketNumber < 0 ? keyHashCode : keyHashCode * 31 + bucketNumber;
}
private boolean partitionKeysAreNull(VectorizedRowBatch vrg, int rowIndex)
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java Thu Oct 2 01:46:46 2014
@@ -71,7 +71,6 @@ import org.apache.hadoop.hive.ql.plan.Pl
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.io.IntWritable;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
@@ -85,6 +84,7 @@ import com.google.common.collect.Maps;
*/
public class SortedDynPartitionOptimizer implements Transform {
+ private static final String BUCKET_NUMBER_COL_NAME = "_bucket_number";
@Override
public ParseContext transform(ParseContext pCtx) throws SemanticException {
@@ -216,6 +216,13 @@ public class SortedDynPartitionOptimizer
ReduceSinkDesc rsConf = getReduceSinkDesc(partitionPositions, sortPositions, sortOrder,
newValueCols, bucketColumns, numBuckets, fsParent, fsOp.getConf().getWriteType());
+ if (!bucketColumns.isEmpty()) {
+ String tableAlias = outRR.getColumnInfos().get(0).getTabAlias();
+ ColumnInfo ci = new ColumnInfo(BUCKET_NUMBER_COL_NAME, TypeInfoFactory.stringTypeInfo,
+ tableAlias, true, true);
+ outRR.put(tableAlias, BUCKET_NUMBER_COL_NAME, ci);
+ }
+
// Create ReduceSink operator
ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
OperatorFactory.getAndMakeChild(rsConf, new RowSchema(outRR.getColumnInfos()), fsParent),
@@ -380,8 +387,11 @@ public class SortedDynPartitionOptimizer
// corresponding with bucket number and hence their OIs
for (Integer idx : keyColsPosInVal) {
if (idx < 0) {
- newKeyCols.add(new ExprNodeConstantDesc(TypeInfoFactory
- .getPrimitiveTypeInfoFromPrimitiveWritable(IntWritable.class), -1));
+ // add bucket number column to both key and value
+ ExprNodeConstantDesc encd = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo,
+ BUCKET_NUMBER_COL_NAME);
+ newKeyCols.add(encd);
+ newValueCols.add(encd);
} else {
newKeyCols.add(newValueCols.get(idx).clone());
}
@@ -418,6 +428,9 @@ public class SortedDynPartitionOptimizer
List<String> outCols = Utilities.getInternalColumnNamesFromSignature(parent.getSchema()
.getSignature());
ArrayList<String> outValColNames = Lists.newArrayList(outCols);
+ if (!bucketColumns.isEmpty()) {
+ outValColNames.add(BUCKET_NUMBER_COL_NAME);
+ }
List<FieldSchema> valFields = PlanUtils.getFieldSchemasFromColumnList(newValueCols,
outValColNames, 0, "");
TableDesc valueTable = PlanUtils.getReduceValueTableDesc(valFields);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Thu Oct 2 01:46:46 2014
@@ -6478,6 +6478,7 @@ public class SemanticAnalyzer extends Ba
int columnNumber = tableFields.size();
ArrayList<ExprNodeDesc> expressions = new ArrayList<ExprNodeDesc>(
columnNumber);
+
// MetadataTypedColumnsetSerDe does not need type conversions because it
// does the conversion to String by itself.
boolean isMetaDataSerDe = table_desc.getDeserializerClass().equals(
@@ -6545,17 +6546,19 @@ public class SemanticAnalyzer extends Ba
if (converted) {
// add the select operator
RowResolver rowResolver = new RowResolver();
- ArrayList<String> colName = new ArrayList<String>();
+ ArrayList<String> colNames = new ArrayList<String>();
+ Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < expressions.size(); i++) {
String name = getColumnInternalName(i);
rowResolver.put("", name, new ColumnInfo(name, expressions.get(i)
.getTypeInfo(), "", false));
- colName.add(name);
+ colNames.add(name);
+ colExprMap.put(name, expressions.get(i));
}
Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
- new SelectDesc(expressions, colName), new RowSchema(rowResolver
+ new SelectDesc(expressions, colNames), new RowSchema(rowResolver
.getColumnInfos()), input), rowResolver);
-
+ output.setColumnExprMap(colExprMap);
return output;
} else {
// not converted
Modified: hive/trunk/ql/src/test/queries/clientpositive/alter_partition_change_col.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/alter_partition_change_col.q?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/alter_partition_change_col.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/alter_partition_change_col.q Thu Oct 2 01:46:46 2014
@@ -1,6 +1,8 @@
SET hive.exec.dynamic.partition = true;
SET hive.exec.dynamic.partition.mode = nonstrict;
+-- SORT_QUERY_RESULTS
+
create table alter_partition_change_col0 (c1 string, c2 string);
load data local inpath '../../data/files/dec.txt' overwrite into table alter_partition_change_col0;
Added: hive/trunk/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q?rev=1628876&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/dynpart_sort_optimization2.q Thu Oct 2 01:46:46 2014
@@ -0,0 +1,246 @@
+set hive.optimize.sort.dynamic.partition=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.max.dynamic.partitions=1000;
+set hive.exec.max.dynamic.partitions.pernode=1000;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.enforce.bucketing=false;
+set hive.enforce.sorting=false;
+set hive.exec.submitviachild=true;
+set hive.exec.submit.local.task.via.child=true;
+
+drop table ss;
+drop table ss_orc;
+drop table ss_part;
+drop table ss_part_orc;
+
+create table ss (
+ss_sold_date_sk int,
+ss_net_paid_inc_tax float,
+ss_net_profit float);
+
+create table ss_part (
+ss_net_paid_inc_tax float,
+ss_net_profit float)
+partitioned by (ss_sold_date_sk int);
+
+load data local inpath '../../data/files/dynpart_test.txt' overwrite into table ss;
+
+explain insert overwrite table ss_part partition (ss_sold_date_sk)
+select ss_net_paid_inc_tax,
+ ss_net_profit,
+ ss_sold_date_sk
+ from ss
+ where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
+ group by ss_sold_date_sk,
+ ss_net_paid_inc_tax,
+ ss_net_profit
+ distribute by ss_sold_date_sk;
+
+insert overwrite table ss_part partition (ss_sold_date_sk)
+select ss_net_paid_inc_tax,
+ ss_net_profit,
+ ss_sold_date_sk
+ from ss
+ where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
+ group by ss_sold_date_sk,
+ ss_net_paid_inc_tax,
+ ss_net_profit
+ distribute by ss_sold_date_sk;
+
+desc formatted ss_part partition(ss_sold_date_sk=2452617);
+select * from ss_part where ss_sold_date_sk=2452617;
+
+desc formatted ss_part partition(ss_sold_date_sk=2452638);
+select * from ss_part where ss_sold_date_sk=2452638;
+
+explain insert overwrite table ss_part partition (ss_sold_date_sk)
+select ss_net_paid_inc_tax,
+ ss_net_profit,
+ ss_sold_date_sk
+ from ss
+ where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
+ distribute by ss_sold_date_sk;
+
+insert overwrite table ss_part partition (ss_sold_date_sk)
+select ss_net_paid_inc_tax,
+ ss_net_profit,
+ ss_sold_date_sk
+ from ss
+ where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
+ distribute by ss_sold_date_sk;
+
+desc formatted ss_part partition(ss_sold_date_sk=2452617);
+select * from ss_part where ss_sold_date_sk=2452617;
+
+desc formatted ss_part partition(ss_sold_date_sk=2452638);
+select * from ss_part where ss_sold_date_sk=2452638;
+
+set hive.optimize.sort.dynamic.partition=false;
+-- SORT DYNAMIC PARTITION DISABLED
+
+explain insert overwrite table ss_part partition (ss_sold_date_sk)
+select ss_net_paid_inc_tax,
+ ss_net_profit,
+ ss_sold_date_sk
+ from ss
+ where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
+ group by ss_sold_date_sk,
+ ss_net_paid_inc_tax,
+ ss_net_profit
+ distribute by ss_sold_date_sk;
+
+insert overwrite table ss_part partition (ss_sold_date_sk)
+select ss_net_paid_inc_tax,
+ ss_net_profit,
+ ss_sold_date_sk
+ from ss
+ where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
+ group by ss_sold_date_sk,
+ ss_net_paid_inc_tax,
+ ss_net_profit
+ distribute by ss_sold_date_sk;
+
+desc formatted ss_part partition(ss_sold_date_sk=2452617);
+select * from ss_part where ss_sold_date_sk=2452617;
+
+desc formatted ss_part partition(ss_sold_date_sk=2452638);
+select * from ss_part where ss_sold_date_sk=2452638;
+
+explain insert overwrite table ss_part partition (ss_sold_date_sk)
+select ss_net_paid_inc_tax,
+ ss_net_profit,
+ ss_sold_date_sk
+ from ss
+ where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
+ distribute by ss_sold_date_sk;
+
+insert overwrite table ss_part partition (ss_sold_date_sk)
+select ss_net_paid_inc_tax,
+ ss_net_profit,
+ ss_sold_date_sk
+ from ss
+ where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
+ distribute by ss_sold_date_sk;
+
+desc formatted ss_part partition(ss_sold_date_sk=2452617);
+select * from ss_part where ss_sold_date_sk=2452617;
+
+desc formatted ss_part partition(ss_sold_date_sk=2452638);
+select * from ss_part where ss_sold_date_sk=2452638;
+
+set hive.vectorized.execution.enabled=true;
+-- VECTORIZATION IS ENABLED
+
+create table ss_orc (
+ss_sold_date_sk int,
+ss_net_paid_inc_tax float,
+ss_net_profit float) stored as orc;
+
+create table ss_part_orc (
+ss_net_paid_inc_tax float,
+ss_net_profit float)
+partitioned by (ss_sold_date_sk int) stored as orc;
+
+insert overwrite table ss_orc select * from ss;
+
+drop table ss;
+drop table ss_part;
+
+explain insert overwrite table ss_part_orc partition (ss_sold_date_sk)
+select ss_net_paid_inc_tax,
+ ss_net_profit,
+ ss_sold_date_sk
+ from ss_orc
+ where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
+ group by ss_sold_date_sk,
+ ss_net_paid_inc_tax,
+ ss_net_profit
+ distribute by ss_sold_date_sk;
+
+insert overwrite table ss_part_orc partition (ss_sold_date_sk)
+select ss_net_paid_inc_tax,
+ ss_net_profit,
+ ss_sold_date_sk
+ from ss_orc
+ where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
+ group by ss_sold_date_sk,
+ ss_net_paid_inc_tax,
+ ss_net_profit
+ distribute by ss_sold_date_sk;
+
+desc formatted ss_part_orc partition(ss_sold_date_sk=2452617);
+select * from ss_part_orc where ss_sold_date_sk=2452617;
+
+desc formatted ss_part_orc partition(ss_sold_date_sk=2452638);
+select * from ss_part_orc where ss_sold_date_sk=2452638;
+
+explain insert overwrite table ss_part_orc partition (ss_sold_date_sk)
+select ss_net_paid_inc_tax,
+ ss_net_profit,
+ ss_sold_date_sk
+ from ss_orc
+ where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
+ distribute by ss_sold_date_sk;
+
+insert overwrite table ss_part_orc partition (ss_sold_date_sk)
+select ss_net_paid_inc_tax,
+ ss_net_profit,
+ ss_sold_date_sk
+ from ss_orc
+ where ss_sold_date_sk>=2452617 and ss_sold_date_sk<=2452638
+ distribute by ss_sold_date_sk;
+
+desc formatted ss_part_orc partition(ss_sold_date_sk=2452617);
+select * from ss_part_orc where ss_sold_date_sk=2452617;
+
+desc formatted ss_part_orc partition(ss_sold_date_sk=2452638);
+select * from ss_part_orc where ss_sold_date_sk=2452638;
+
+drop table ss_orc;
+drop table ss_part_orc;
+
+drop table if exists hive13_dp1;
+create table if not exists hive13_dp1 (
+ k1 int,
+ k2 int
+)
+PARTITIONED BY(`day` string)
+STORED AS ORC;
+
+set hive.optimize.sort.dynamic.partition=false;
+explain insert overwrite table `hive13_dp1` partition(`day`)
+select
+ key k1,
+ count(value) k2,
+ "day" `day`
+from src
+group by "day", key;
+
+insert overwrite table `hive13_dp1` partition(`day`)
+select
+ key k1,
+ count(value) k2,
+ "day" `day`
+from src
+group by "day", key;
+select * from hive13_dp1 limit 5;
+
+set hive.optimize.sort.dynamic.partition=true;
+explain insert overwrite table `hive13_dp1` partition(`day`)
+select
+ key k1,
+ count(value) k2,
+ "day" `day`
+from src
+group by "day", key;
+
+insert overwrite table `hive13_dp1` partition(`day`)
+select
+ key k1,
+ count(value) k2,
+ "day" `day`
+from src
+group by "day", key;
+select * from hive13_dp1 limit 5;
+
+drop table hive13_dp1;
Modified: hive/trunk/ql/src/test/results/clientpositive/alter_partition_change_col.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/alter_partition_change_col.q.out?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/alter_partition_change_col.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/alter_partition_change_col.q.out Thu Oct 2 01:46:46 2014
@@ -1,8 +1,12 @@
-PREHOOK: query: create table alter_partition_change_col0 (c1 string, c2 string)
+PREHOOK: query: -- SORT_QUERY_RESULTS
+
+create table alter_partition_change_col0 (c1 string, c2 string)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@alter_partition_change_col0
-POSTHOOK: query: create table alter_partition_change_col0 (c1 string, c2 string)
+POSTHOOK: query: -- SORT_QUERY_RESULTS
+
+create table alter_partition_change_col0 (c1 string, c2 string)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@alter_partition_change_col0
@@ -61,26 +65,26 @@ POSTHOOK: Input: default@alter_partition
POSTHOOK: Input: default@alter_partition_change_col1@p1=__HIVE_DEFAULT_PARTITION__
POSTHOOK: Input: default@alter_partition_change_col1@p1=abc
#### A masked pattern was here ####
-Tom 234.79 __HIVE_DEFAULT_PARTITION__
-Cluck 5.96 __HIVE_DEFAULT_PARTITION__
-Tom 19.00 __HIVE_DEFAULT_PARTITION__
-Mary 4.329 __HIVE_DEFAULT_PARTITION__
Beck 0.0 __HIVE_DEFAULT_PARTITION__
-Snow 55.71 __HIVE_DEFAULT_PARTITION__
-Mary 33.33 __HIVE_DEFAULT_PARTITION__
+Beck 0.0 abc
Beck 77.341 __HIVE_DEFAULT_PARTITION__
+Beck 77.341 abc
Beck 79.9 __HIVE_DEFAULT_PARTITION__
-Tom -12.25 __HIVE_DEFAULT_PARTITION__
Beck 79.9 abc
-Beck 0.0 abc
-Tom 19.00 abc
+Cluck 5.96 __HIVE_DEFAULT_PARTITION__
+Cluck 5.96 abc
+Mary 33.33 __HIVE_DEFAULT_PARTITION__
Mary 33.33 abc
-Tom -12.25 abc
+Mary 4.329 __HIVE_DEFAULT_PARTITION__
Mary 4.329 abc
+Snow 55.71 __HIVE_DEFAULT_PARTITION__
Snow 55.71 abc
-Beck 77.341 abc
+Tom -12.25 __HIVE_DEFAULT_PARTITION__
+Tom -12.25 abc
+Tom 19.00 __HIVE_DEFAULT_PARTITION__
+Tom 19.00 abc
+Tom 234.79 __HIVE_DEFAULT_PARTITION__
Tom 234.79 abc
-Cluck 5.96 abc
PREHOOK: query: -- Change c2 to decimal(10,0)
alter table alter_partition_change_col1 change c2 c2 decimal(10,0)
PREHOOK: type: ALTERTABLE_RENAMECOL
@@ -121,26 +125,26 @@ POSTHOOK: Input: default@alter_partition
POSTHOOK: Input: default@alter_partition_change_col1@p1=__HIVE_DEFAULT_PARTITION__
POSTHOOK: Input: default@alter_partition_change_col1@p1=abc
#### A masked pattern was here ####
-Tom 235 __HIVE_DEFAULT_PARTITION__
-Cluck 6 __HIVE_DEFAULT_PARTITION__
-Tom 19 __HIVE_DEFAULT_PARTITION__
-Mary 4 __HIVE_DEFAULT_PARTITION__
Beck 0 __HIVE_DEFAULT_PARTITION__
-Snow 56 __HIVE_DEFAULT_PARTITION__
-Mary 33 __HIVE_DEFAULT_PARTITION__
+Beck 0 abc
Beck 77 __HIVE_DEFAULT_PARTITION__
+Beck 77 abc
Beck 80 __HIVE_DEFAULT_PARTITION__
-Tom -12 __HIVE_DEFAULT_PARTITION__
Beck 80 abc
-Beck 0 abc
-Tom 19 abc
+Cluck 6 __HIVE_DEFAULT_PARTITION__
+Cluck 6 abc
+Mary 33 __HIVE_DEFAULT_PARTITION__
Mary 33 abc
-Tom -12 abc
+Mary 4 __HIVE_DEFAULT_PARTITION__
Mary 4 abc
+Snow 56 __HIVE_DEFAULT_PARTITION__
Snow 56 abc
-Beck 77 abc
+Tom -12 __HIVE_DEFAULT_PARTITION__
+Tom -12 abc
+Tom 19 __HIVE_DEFAULT_PARTITION__
+Tom 19 abc
+Tom 235 __HIVE_DEFAULT_PARTITION__
Tom 235 abc
-Cluck 6 abc
PREHOOK: query: -- Change the column type at the table level. Table-level describe shows the new type, but the existing partition does not.
alter table alter_partition_change_col1 change c2 c2 decimal(14,4)
PREHOOK: type: ALTERTABLE_RENAMECOL
@@ -191,26 +195,26 @@ POSTHOOK: Input: default@alter_partition
POSTHOOK: Input: default@alter_partition_change_col1@p1=__HIVE_DEFAULT_PARTITION__
POSTHOOK: Input: default@alter_partition_change_col1@p1=abc
#### A masked pattern was here ####
-Tom 235 __HIVE_DEFAULT_PARTITION__
-Cluck 6 __HIVE_DEFAULT_PARTITION__
-Tom 19 __HIVE_DEFAULT_PARTITION__
-Mary 4 __HIVE_DEFAULT_PARTITION__
Beck 0 __HIVE_DEFAULT_PARTITION__
-Snow 56 __HIVE_DEFAULT_PARTITION__
-Mary 33 __HIVE_DEFAULT_PARTITION__
+Beck 0 abc
Beck 77 __HIVE_DEFAULT_PARTITION__
+Beck 77 abc
Beck 80 __HIVE_DEFAULT_PARTITION__
-Tom -12 __HIVE_DEFAULT_PARTITION__
Beck 80 abc
-Beck 0 abc
-Tom 19 abc
+Cluck 6 __HIVE_DEFAULT_PARTITION__
+Cluck 6 abc
+Mary 33 __HIVE_DEFAULT_PARTITION__
Mary 33 abc
-Tom -12 abc
+Mary 4 __HIVE_DEFAULT_PARTITION__
Mary 4 abc
+Snow 56 __HIVE_DEFAULT_PARTITION__
Snow 56 abc
-Beck 77 abc
+Tom -12 __HIVE_DEFAULT_PARTITION__
+Tom -12 abc
+Tom 19 __HIVE_DEFAULT_PARTITION__
+Tom 19 abc
+Tom 235 __HIVE_DEFAULT_PARTITION__
Tom 235 abc
-Cluck 6 abc
PREHOOK: query: -- now change the column type of the existing partition
alter table alter_partition_change_col1 partition (p1='abc') change c2 c2 decimal(14,4)
PREHOOK: type: ALTERTABLE_RENAMECOL
@@ -248,26 +252,26 @@ POSTHOOK: Input: default@alter_partition
POSTHOOK: Input: default@alter_partition_change_col1@p1=__HIVE_DEFAULT_PARTITION__
POSTHOOK: Input: default@alter_partition_change_col1@p1=abc
#### A masked pattern was here ####
-Tom 235 __HIVE_DEFAULT_PARTITION__
-Cluck 6 __HIVE_DEFAULT_PARTITION__
-Tom 19 __HIVE_DEFAULT_PARTITION__
-Mary 4 __HIVE_DEFAULT_PARTITION__
Beck 0 __HIVE_DEFAULT_PARTITION__
-Snow 56 __HIVE_DEFAULT_PARTITION__
-Mary 33 __HIVE_DEFAULT_PARTITION__
+Beck 0.0 abc
Beck 77 __HIVE_DEFAULT_PARTITION__
-Beck 80 __HIVE_DEFAULT_PARTITION__
-Tom -12 __HIVE_DEFAULT_PARTITION__
+Beck 77.341 abc
Beck 79.9 abc
-Beck 0.0 abc
-Tom 19.00 abc
+Beck 80 __HIVE_DEFAULT_PARTITION__
+Cluck 5.96 abc
+Cluck 6 __HIVE_DEFAULT_PARTITION__
+Mary 33 __HIVE_DEFAULT_PARTITION__
Mary 33.33 abc
-Tom -12.25 abc
+Mary 4 __HIVE_DEFAULT_PARTITION__
Mary 4.329 abc
Snow 55.71 abc
-Beck 77.341 abc
+Snow 56 __HIVE_DEFAULT_PARTITION__
+Tom -12 __HIVE_DEFAULT_PARTITION__
+Tom -12.25 abc
+Tom 19 __HIVE_DEFAULT_PARTITION__
+Tom 19.00 abc
Tom 234.79 abc
-Cluck 5.96 abc
+Tom 235 __HIVE_DEFAULT_PARTITION__
PREHOOK: query: -- change column for default partition value
alter table alter_partition_change_col1 partition (p1='__HIVE_DEFAULT_PARTITION__') change c2 c2 decimal(14,4)
PREHOOK: type: ALTERTABLE_RENAMECOL
@@ -305,26 +309,26 @@ POSTHOOK: Input: default@alter_partition
POSTHOOK: Input: default@alter_partition_change_col1@p1=__HIVE_DEFAULT_PARTITION__
POSTHOOK: Input: default@alter_partition_change_col1@p1=abc
#### A masked pattern was here ####
-Tom 234.79 __HIVE_DEFAULT_PARTITION__
-Cluck 5.96 __HIVE_DEFAULT_PARTITION__
-Tom 19.00 __HIVE_DEFAULT_PARTITION__
-Mary 4.329 __HIVE_DEFAULT_PARTITION__
Beck 0.0 __HIVE_DEFAULT_PARTITION__
-Snow 55.71 __HIVE_DEFAULT_PARTITION__
-Mary 33.33 __HIVE_DEFAULT_PARTITION__
+Beck 0.0 abc
Beck 77.341 __HIVE_DEFAULT_PARTITION__
+Beck 77.341 abc
Beck 79.9 __HIVE_DEFAULT_PARTITION__
-Tom -12.25 __HIVE_DEFAULT_PARTITION__
Beck 79.9 abc
-Beck 0.0 abc
-Tom 19.00 abc
+Cluck 5.96 __HIVE_DEFAULT_PARTITION__
+Cluck 5.96 abc
+Mary 33.33 __HIVE_DEFAULT_PARTITION__
Mary 33.33 abc
-Tom -12.25 abc
+Mary 4.329 __HIVE_DEFAULT_PARTITION__
Mary 4.329 abc
+Snow 55.71 __HIVE_DEFAULT_PARTITION__
Snow 55.71 abc
-Beck 77.341 abc
+Tom -12.25 __HIVE_DEFAULT_PARTITION__
+Tom -12.25 abc
+Tom 19.00 __HIVE_DEFAULT_PARTITION__
+Tom 19.00 abc
+Tom 234.79 __HIVE_DEFAULT_PARTITION__
Tom 234.79 abc
-Cluck 5.96 abc
PREHOOK: query: -- Try out replace columns
alter table alter_partition_change_col1 partition (p1='abc') replace columns (c1 string)
PREHOOK: type: ALTERTABLE_REPLACECOLS
@@ -375,26 +379,26 @@ POSTHOOK: Input: default@alter_partition
POSTHOOK: Input: default@alter_partition_change_col1@p1=__HIVE_DEFAULT_PARTITION__
POSTHOOK: Input: default@alter_partition_change_col1@p1=abc
#### A masked pattern was here ####
-Tom 234.79 __HIVE_DEFAULT_PARTITION__
-Cluck 5.96 __HIVE_DEFAULT_PARTITION__
-Tom 19.00 __HIVE_DEFAULT_PARTITION__
-Mary 4.329 __HIVE_DEFAULT_PARTITION__
Beck 0.0 __HIVE_DEFAULT_PARTITION__
-Snow 55.71 __HIVE_DEFAULT_PARTITION__
-Mary 33.33 __HIVE_DEFAULT_PARTITION__
Beck 77.341 __HIVE_DEFAULT_PARTITION__
Beck 79.9 __HIVE_DEFAULT_PARTITION__
-Tom -12.25 __HIVE_DEFAULT_PARTITION__
Beck NULL abc
Beck NULL abc
-Tom NULL abc
+Beck NULL abc
+Cluck 5.96 __HIVE_DEFAULT_PARTITION__
+Cluck NULL abc
+Mary 33.33 __HIVE_DEFAULT_PARTITION__
+Mary 4.329 __HIVE_DEFAULT_PARTITION__
Mary NULL abc
-Tom NULL abc
Mary NULL abc
+Snow 55.71 __HIVE_DEFAULT_PARTITION__
Snow NULL abc
-Beck NULL abc
+Tom -12.25 __HIVE_DEFAULT_PARTITION__
+Tom 19.00 __HIVE_DEFAULT_PARTITION__
+Tom 234.79 __HIVE_DEFAULT_PARTITION__
+Tom NULL abc
+Tom NULL abc
Tom NULL abc
-Cluck NULL abc
PREHOOK: query: alter table alter_partition_change_col1 replace columns (c1 string)
PREHOOK: type: ALTERTABLE_REPLACECOLS
PREHOOK: Input: default@alter_partition_change_col1
@@ -428,26 +432,26 @@ POSTHOOK: Input: default@alter_partition
POSTHOOK: Input: default@alter_partition_change_col1@p1=__HIVE_DEFAULT_PARTITION__
POSTHOOK: Input: default@alter_partition_change_col1@p1=abc
#### A masked pattern was here ####
-Tom __HIVE_DEFAULT_PARTITION__
-Cluck __HIVE_DEFAULT_PARTITION__
-Tom __HIVE_DEFAULT_PARTITION__
-Mary __HIVE_DEFAULT_PARTITION__
Beck __HIVE_DEFAULT_PARTITION__
-Snow __HIVE_DEFAULT_PARTITION__
-Mary __HIVE_DEFAULT_PARTITION__
Beck __HIVE_DEFAULT_PARTITION__
Beck __HIVE_DEFAULT_PARTITION__
-Tom __HIVE_DEFAULT_PARTITION__
Beck abc
Beck abc
-Tom abc
+Beck abc
+Cluck __HIVE_DEFAULT_PARTITION__
+Cluck abc
+Mary __HIVE_DEFAULT_PARTITION__
+Mary __HIVE_DEFAULT_PARTITION__
Mary abc
-Tom abc
Mary abc
+Snow __HIVE_DEFAULT_PARTITION__
Snow abc
-Beck abc
+Tom __HIVE_DEFAULT_PARTITION__
+Tom __HIVE_DEFAULT_PARTITION__
+Tom __HIVE_DEFAULT_PARTITION__
+Tom abc
+Tom abc
Tom abc
-Cluck abc
PREHOOK: query: -- Try add columns
alter table alter_partition_change_col1 add columns (c2 decimal(14,4))
PREHOOK: type: ALTERTABLE_ADDCOLS
@@ -497,26 +501,26 @@ POSTHOOK: Input: default@alter_partition
POSTHOOK: Input: default@alter_partition_change_col1@p1=__HIVE_DEFAULT_PARTITION__
POSTHOOK: Input: default@alter_partition_change_col1@p1=abc
#### A masked pattern was here ####
-Tom 234.79 __HIVE_DEFAULT_PARTITION__
-Cluck 5.96 __HIVE_DEFAULT_PARTITION__
-Tom 19.00 __HIVE_DEFAULT_PARTITION__
-Mary 4.329 __HIVE_DEFAULT_PARTITION__
Beck 0.0 __HIVE_DEFAULT_PARTITION__
-Snow 55.71 __HIVE_DEFAULT_PARTITION__
-Mary 33.33 __HIVE_DEFAULT_PARTITION__
Beck 77.341 __HIVE_DEFAULT_PARTITION__
Beck 79.9 __HIVE_DEFAULT_PARTITION__
-Tom -12.25 __HIVE_DEFAULT_PARTITION__
Beck NULL abc
Beck NULL abc
-Tom NULL abc
+Beck NULL abc
+Cluck 5.96 __HIVE_DEFAULT_PARTITION__
+Cluck NULL abc
+Mary 33.33 __HIVE_DEFAULT_PARTITION__
+Mary 4.329 __HIVE_DEFAULT_PARTITION__
Mary NULL abc
-Tom NULL abc
Mary NULL abc
+Snow 55.71 __HIVE_DEFAULT_PARTITION__
Snow NULL abc
-Beck NULL abc
+Tom -12.25 __HIVE_DEFAULT_PARTITION__
+Tom 19.00 __HIVE_DEFAULT_PARTITION__
+Tom 234.79 __HIVE_DEFAULT_PARTITION__
+Tom NULL abc
+Tom NULL abc
Tom NULL abc
-Cluck NULL abc
PREHOOK: query: alter table alter_partition_change_col1 partition (p1='abc') add columns (c2 decimal(14,4))
PREHOOK: type: ALTERTABLE_ADDCOLS
PREHOOK: Input: default@alter_partition_change_col1
@@ -552,23 +556,23 @@ POSTHOOK: Input: default@alter_partition
POSTHOOK: Input: default@alter_partition_change_col1@p1=__HIVE_DEFAULT_PARTITION__
POSTHOOK: Input: default@alter_partition_change_col1@p1=abc
#### A masked pattern was here ####
-Tom 234.79 __HIVE_DEFAULT_PARTITION__
-Cluck 5.96 __HIVE_DEFAULT_PARTITION__
-Tom 19.00 __HIVE_DEFAULT_PARTITION__
-Mary 4.329 __HIVE_DEFAULT_PARTITION__
Beck 0.0 __HIVE_DEFAULT_PARTITION__
-Snow 55.71 __HIVE_DEFAULT_PARTITION__
-Mary 33.33 __HIVE_DEFAULT_PARTITION__
+Beck 0.0 abc
Beck 77.341 __HIVE_DEFAULT_PARTITION__
+Beck 77.341 abc
Beck 79.9 __HIVE_DEFAULT_PARTITION__
-Tom -12.25 __HIVE_DEFAULT_PARTITION__
Beck 79.9 abc
-Beck 0.0 abc
-Tom 19.00 abc
+Cluck 5.96 __HIVE_DEFAULT_PARTITION__
+Cluck 5.96 abc
+Mary 33.33 __HIVE_DEFAULT_PARTITION__
Mary 33.33 abc
-Tom -12.25 abc
+Mary 4.329 __HIVE_DEFAULT_PARTITION__
Mary 4.329 abc
+Snow 55.71 __HIVE_DEFAULT_PARTITION__
Snow 55.71 abc
-Beck 77.341 abc
+Tom -12.25 __HIVE_DEFAULT_PARTITION__
+Tom -12.25 abc
+Tom 19.00 __HIVE_DEFAULT_PARTITION__
+Tom 19.00 abc
+Tom 234.79 __HIVE_DEFAULT_PARTITION__
Tom 234.79 abc
-Cluck 5.96 abc
Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out Thu Oct 2 01:46:46 2014
@@ -98,11 +98,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 5 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 5 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 5 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- partition level analyze statistics for specific parition
@@ -158,11 +158,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 9 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 9 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 9 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 9 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE
@@ -181,11 +181,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- partition level analyze statistics for all partitions
@@ -245,11 +245,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE
@@ -268,11 +268,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- both partitions will be pruned
@@ -331,11 +331,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: zip (type: bigint)
outputColumnNames: _col0
- Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: NONE
ListSink
PREHOOK: query: -- basicStatState: COMPLETE colStatState: PARTIAL
@@ -354,7 +354,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string)
outputColumnNames: _col0
@@ -377,7 +377,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: year (type: string)
outputColumnNames: _col0
@@ -402,7 +402,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: _col0, _col1
@@ -425,7 +425,7 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: _col0, _col1
@@ -471,11 +471,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 8 Data size: 722 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 723 Basic stats: COMPLETE Column stats: PARTIAL
ListSink
PREHOOK: query: -- This is to test filter expression evaluation on partition column
@@ -496,7 +496,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (locid > 0) (type: boolean)
Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
@@ -532,7 +532,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (locid > 0) (type: boolean)
Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
@@ -568,7 +568,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 7 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 7 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (locid > 0) (type: boolean)
Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
Modified: hive/trunk/ql/src/test/results/clientpositive/auto_sortmerge_join_16.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/auto_sortmerge_join_16.q.out?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/auto_sortmerge_join_16.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/auto_sortmerge_join_16.q.out Thu Oct 2 01:46:46 2014
@@ -232,6 +232,16 @@ POSTHOOK: Input: default@bucket_small@pr
0 val_0 val_0 day1 1
0 val_0 val_0 day1 1
0 val_0 val_0 day1 1
+103 val_103 val_103 day1 1
+103 val_103 val_103 day1 1
+103 val_103 val_103 day1 1
+103 val_103 val_103 day1 1
+374 val_374 val_374 day1 1
+374 val_374 val_374 day1 1
+172 val_172 val_172 day1 1
+172 val_172 val_172 day1 1
+172 val_172 val_172 day1 1
+172 val_172 val_172 day1 1
169 val_169 val_169 day1 1
169 val_169 val_169 day1 1
169 val_169 val_169 day1 1
@@ -240,13 +250,3 @@ POSTHOOK: Input: default@bucket_small@pr
169 val_169 val_169 day1 1
169 val_169 val_169 day1 1
169 val_169 val_169 day1 1
-374 val_374 val_374 day1 1
-374 val_374 val_374 day1 1
-172 val_172 val_172 day1 1
-172 val_172 val_172 day1 1
-172 val_172 val_172 day1 1
-172 val_172 val_172 day1 1
-103 val_103 val_103 day1 1
-103 val_103 val_103 day1 1
-103 val_103 val_103 day1 1
-103 val_103 val_103 day1 1
Modified: hive/trunk/ql/src/test/results/clientpositive/combine2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/combine2.q.out?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/combine2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/combine2.q.out Thu Oct 2 01:46:46 2014
@@ -263,7 +263,7 @@ STAGE PLANS:
columns.types string
#### A masked pattern was here ####
name default.combine2
- numFiles 1
+ numFiles 3
numRows 3
partition_columns value
partition_columns.types string
@@ -398,7 +398,7 @@ STAGE PLANS:
columns.types string
#### A masked pattern was here ####
name default.combine2
- numFiles 1
+ numFiles 3
numRows 3
partition_columns value
partition_columns.types string
Modified: hive/trunk/ql/src/test/results/clientpositive/constprog_dp.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/constprog_dp.q.out?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/constprog_dp.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/constprog_dp.q.out Thu Oct 2 01:46:46 2014
@@ -16,8 +16,13 @@ insert overwrite table dest partition (d
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
- Stage-0 depends on stages: Stage-1
+ Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5
+ Stage-4
+ Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
Stage-2 depends on stages: Stage-0
+ Stage-3
+ Stage-5
+ Stage-6 depends on stages: Stage-5
STAGE PLANS:
Stage: Stage-1
@@ -30,23 +35,23 @@ STAGE PLANS:
expressions: key (type: string), value (type: string), ds (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col2 (type: string)
- sort order: +
- Map-reduce partition columns: _col2 (type: string)
+ File Output Operator
+ compressed: false
Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
- Reduce Operator Tree:
- Extract
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.dest
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest
+
+ Stage: Stage-7
+ Conditional Operator
+
+ Stage: Stage-4
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
Stage: Stage-0
Move Operator
@@ -63,6 +68,36 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest
+
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest
+
+ Stage: Stage-6
+ Move Operator
+ files:
+ hdfs directory: true
+#### A masked pattern was here ####
+
PREHOOK: query: from srcpart
insert overwrite table dest partition (ds) select key, value, ds where ds='2008-04-08'
PREHOOK: type: QUERY
Modified: hive/trunk/ql/src/test/results/clientpositive/delete_all_partitioned.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/delete_all_partitioned.q.out?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/delete_all_partitioned.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/delete_all_partitioned.q.out Thu Oct 2 01:46:46 2014
@@ -84,3 +84,5 @@ POSTHOOK: Input: default@acid_dap
POSTHOOK: Input: default@acid_dap@ds=today
POSTHOOK: Input: default@acid_dap@ds=tomorrow
#### A masked pattern was here ####
+-1071480828 aw724t8c5558x2xneC624 today
+-1072076362 2uLyD28144vklju213J1mr today
Modified: hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out?rev=1628876&r1=1628875&r2=1628876&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out Thu Oct 2 01:46:46 2014
@@ -342,11 +342,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col4 (type: tinyint), -1 (type: int)
+ key expressions: _col4 (type: tinyint), '_bucket_number' (type: string)
sort order: ++
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
+ value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string)
Execution mode: vectorized
Reduce Operator Tree:
Extract
@@ -399,11 +399,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col4 (type: tinyint), -1 (type: int), _col3 (type: float)
+ key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float)
sort order: +++
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
+ value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string)
Execution mode: vectorized
Reduce Operator Tree:
Extract
@@ -691,11 +691,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col4 (type: tinyint), -1 (type: int)
+ key expressions: _col4 (type: tinyint), '_bucket_number' (type: string)
sort order: ++
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
+ value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string)
Execution mode: vectorized
Reduce Operator Tree:
Extract
@@ -748,11 +748,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col4 (type: tinyint), -1 (type: int), _col3 (type: float)
+ key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float)
sort order: +++
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
+ value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string)
Execution mode: vectorized
Reduce Operator Tree:
Extract
@@ -2063,11 +2063,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col4 (type: tinyint), -1 (type: int), _col3 (type: float)
+ key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float)
sort order: +++
Map-reduce partition columns: _col4 (type: tinyint)
Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint)
+ value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint), '_bucket_number' (type: string)
Execution mode: vectorized
Reduce Operator Tree:
Extract