You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by we...@apache.org on 2016/07/14 22:09:02 UTC
hive git commit: HIVE-13040 : Handle empty bucket creations more
efficiently (Ashutosh Chauhan, reviewed by Prasanth Jayachandran)
Repository: hive
Updated Branches:
refs/heads/branch-1 8f500f8ad -> 3e51861a2
HIVE-13040 : Handle empty bucket creations more efficiently (Ashutosh Chauhan, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3e51861a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3e51861a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3e51861a
Branch: refs/heads/branch-1
Commit: 3e51861a215f62e842489f584a87b5be96316a41
Parents: 8f500f8
Author: Wei Zheng <we...@apache.org>
Authored: Thu Jul 14 15:09:48 2016 -0700
Committer: Wei Zheng <we...@apache.org>
Committed: Thu Jul 14 15:09:48 2016 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/StatsNoJobTask.java | 67 ++++++++++---------
.../apache/hadoop/hive/ql/exec/Utilities.java | 5 +-
.../org/apache/hadoop/hive/ql/io/AcidUtils.java | 17 +++--
.../apache/hadoop/hive/ql/io/orc/OrcFile.java | 8 +++
.../hadoop/hive/ql/io/orc/OrcInputFormat.java | 16 +++--
.../hadoop/hive/ql/io/orc/OrcOutputFormat.java | 17 ++---
.../hive/ql/txn/compactor/CompactorMR.java | 2 +-
.../hadoop/hive/ql/txn/compactor/Initiator.java | 2 +-
.../hive/ql/io/orc/TestInputOutputFormat.java | 10 +--
.../dynpart_sort_opt_vectorization.q.out | 4 +-
.../tez/dynpart_sort_opt_vectorization.q.out | 8 +--
.../tez/dynpart_sort_optimization.q.out | 70 +++++++++-----------
.../apache/hadoop/hive/shims/Hadoop23Shims.java | 2 +-
13 files changed, 121 insertions(+), 107 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
index 0d99cbc..fe49e15 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/StatsNoJobTask.java
@@ -123,7 +123,7 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable
class StatsCollection implements Runnable {
- private Partition partn;
+ private final Partition partn;
public StatsCollection(Partition part) {
this.partn = part;
@@ -148,7 +148,7 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable
boolean statsAvailable = false;
for(FileStatus file: fileList) {
if (!file.isDir()) {
- InputFormat<?, ?> inputFormat = (InputFormat<?, ?>) ReflectionUtil.newInstance(
+ InputFormat<?, ?> inputFormat = ReflectionUtil.newInstance(
partn.getInputFormatClass(), jc);
InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0,
new String[] { partn.getLocation() });
@@ -193,7 +193,7 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable
"Failed with exception " + e.getMessage() + "\n" + StringUtils.stringifyException(e));
// Before updating the partition params, if any partition params is null
- // and if statsReliable is true then updatePartition() function will fail
+ // and if statsReliable is true then updatePartition() function will fail
// the task by returning 1
if (work.isStatsReliable()) {
partUpdates.put(tPart.getSd().getLocation(), null);
@@ -244,40 +244,45 @@ public class StatsNoJobTask extends Task<StatsNoJobWork> implements Serializable
boolean statsAvailable = false;
for(FileStatus file: fileList) {
if (!file.isDir()) {
- InputFormat<?, ?> inputFormat = (InputFormat<?, ?>) ReflectionUtil.newInstance(
+ InputFormat<?, ?> inputFormat = ReflectionUtil.newInstance(
table.getInputFormatClass(), jc);
- InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0, new String[] { table
- .getDataLocation().toString() });
- org.apache.hadoop.mapred.RecordReader<?, ?> recordReader =
- inputFormat.getRecordReader(dummySplit, jc, Reporter.NULL);
- StatsProvidingRecordReader statsRR;
- if (recordReader instanceof StatsProvidingRecordReader) {
- statsRR = (StatsProvidingRecordReader) recordReader;
- numRows += statsRR.getStats().getRowCount();
- rawDataSize += statsRR.getStats().getRawDataSize();
- fileSize += file.getLen();
+ InputSplit dummySplit = new FileSplit(file.getPath(), 0, 0, new String[]{table
+ .getDataLocation().toString()});
+ if (file.getLen() == 0) {
numFiles += 1;
statsAvailable = true;
+ } else {
+ org.apache.hadoop.mapred.RecordReader<?, ?> recordReader =
+ inputFormat.getRecordReader(dummySplit, jc, Reporter.NULL);
+ StatsProvidingRecordReader statsRR;
+ if (recordReader instanceof StatsProvidingRecordReader) {
+ statsRR = (StatsProvidingRecordReader) recordReader;
+ numRows += statsRR.getStats().getRowCount();
+ rawDataSize += statsRR.getStats().getRawDataSize();
+ fileSize += file.getLen();
+ numFiles += 1;
+ statsAvailable = true;
+ }
+ recordReader.close();
}
- recordReader.close();
}
- }
- if (statsAvailable) {
- parameters.put(StatsSetupConst.ROW_COUNT, String.valueOf(numRows));
- parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize));
- parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize));
- parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles));
- parameters.put(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK, StatsSetupConst.TRUE);
-
- db.alterTable(tableFullName, new Table(tTable));
-
- String msg = "Table " + tableFullName + " stats: [" + toString(parameters) + ']';
- LOG.debug(msg);
- console.printInfo(msg);
- } else {
- String msg = "Table " + tableFullName + " does not provide stats.";
- LOG.debug(msg);
+ if (statsAvailable) {
+ parameters.put(StatsSetupConst.ROW_COUNT, String.valueOf(numRows));
+ parameters.put(StatsSetupConst.RAW_DATA_SIZE, String.valueOf(rawDataSize));
+ parameters.put(StatsSetupConst.TOTAL_SIZE, String.valueOf(fileSize));
+ parameters.put(StatsSetupConst.NUM_FILES, String.valueOf(numFiles));
+ parameters.put(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK, StatsSetupConst.TRUE);
+
+ db.alterTable(tableFullName, new Table(tTable));
+
+ String msg = "Table " + tableFullName + " stats: [" + toString(parameters) + ']';
+ LOG.debug(msg);
+ console.printInfo(msg);
+ } else {
+ String msg = "Table " + tableFullName + " does not provide stats.";
+ LOG.debug(msg);
+ }
}
} catch (Exception e) {
console.printInfo("[Warning] could not update stats for " + tableFullName + ".",
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 4093134..0a32e6c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -2135,7 +2135,7 @@ public final class Utilities {
taskIDToFile = removeTempOrDuplicateFiles(items, fs);
// if the table is bucketed and enforce bucketing, we should check and generate all buckets
- if (dpCtx.getNumBuckets() > 0 && taskIDToFile != null) {
+ if (dpCtx.getNumBuckets() > 0 && taskIDToFile != null && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) {
// refresh the file list
items = fs.listStatus(parts[i].getPath());
// get the missing buckets and generate empty buckets
@@ -2155,8 +2155,7 @@ public final class Utilities {
FileStatus[] items = fs.listStatus(path);
taskIDToFile = removeTempOrDuplicateFiles(items, fs);
if(taskIDToFile != null && taskIDToFile.size() > 0 && conf != null && conf.getTable() != null
- && (conf.getTable().getNumBuckets() > taskIDToFile.size())
- && (HiveConf.getBoolVar(hconf, HiveConf.ConfVars.HIVEENFORCEBUCKETING))) {
+ && (conf.getTable().getNumBuckets() > taskIDToFile.size()) && !"tez".equalsIgnoreCase(hconf.get(ConfVars.HIVE_EXECUTION_ENGINE.varname))) {
// get the missing buckets and generate empty buckets for non-dynamic partition
String taskID1 = taskIDToFile.keySet().iterator().next();
Path bucketPath = taskIDToFile.values().iterator().next().getPath();
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index 8bcf6d7..7d1517d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -102,6 +102,7 @@ public class AcidUtils {
Pattern.compile("[0-9]+_[0-9]+");
public static final PathFilter hiddenFileFilter = new PathFilter(){
+ @Override
public boolean accept(Path p){
String name = p.getName();
return !name.startsWith("_") && !name.startsWith(".");
@@ -460,7 +461,14 @@ public class AcidUtils {
return false;
}
- /**
+ public static Directory getAcidState(Path directory,
+ Configuration conf,
+ ValidTxnList txnList
+ ) throws IOException {
+ return getAcidState(directory, conf, txnList, false);
+ }
+
+ /**
* Get the ACID state of the given directory. It finds the minimal set of
* base and diff directories. Note that because major compactions don't
* preserve the history, we can't use a base directory that includes a
@@ -473,7 +481,8 @@ public class AcidUtils {
*/
public static Directory getAcidState(Path directory,
Configuration conf,
- ValidTxnList txnList
+ ValidTxnList txnList,
+ boolean ignoreEmptyFiles
) throws IOException {
FileSystem fs = directory.getFileSystem(conf);
FileStatus bestBase = null;
@@ -513,7 +522,7 @@ public class AcidUtils {
// it is possible that the cleaner is running and removing these original files,
// in which case recursing through them could cause us to get an error.
originalDirectories.add(child);
- } else {
+ } else if (!ignoreEmptyFiles || child.getLen() != 0) {
original.add(child);
}
}
@@ -590,7 +599,7 @@ public class AcidUtils {
}
};
}
-
+
/**
* Find the original files (non-ACID layout) recursively under the partition
* directory.
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
index 906eb6b..dc00e38 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
@@ -491,6 +491,14 @@ public final class OrcFile {
return this;
}
+ public FileSystem getFileSystem() {
+ return fileSystemValue;
+ }
+
+ public Configuration getConfiguration() {
+ return configuration;
+ }
+
public int getBufferSize() {
return bufferSizeValue;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 35469d1..94b5461 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -633,12 +633,14 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
public List<OrcSplit> getSplits() throws IOException {
List<OrcSplit> splits = Lists.newArrayList();
for (FileStatus fileStatus : fileStatuses) {
- TreeMap<Long, BlockLocation> blockOffsets = SHIMS.getLocationsWithOffset(fs, fileStatus);
- for (Map.Entry<Long, BlockLocation> entry : blockOffsets.entrySet()) {
- OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), entry.getKey(),
- entry.getValue().getLength(), entry.getValue().getHosts(), null, isOriginal, true,
- deltas, -1, fileStatus.getLen());
- splits.add(orcSplit);
+ if (fileStatus.getLen() != 0) {
+ TreeMap<Long, BlockLocation> blockOffsets = SHIMS.getLocationsWithOffset(fs, fileStatus);
+ for (Map.Entry<Long, BlockLocation> entry : blockOffsets.entrySet()) {
+ OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), entry.getKey(),
+ entry.getValue().getLength(), entry.getValue().getHosts(), null, isOriginal, true,
+ deltas, -1, fileStatus.getLen());
+ splits.add(orcSplit);
+ }
}
}
@@ -710,7 +712,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
public SplitStrategy call() throws IOException {
final SplitStrategy splitStrategy;
AcidUtils.Directory dirInfo = AcidUtils.getAcidState(dir,
- context.conf, context.transactionList);
+ context.conf, context.transactionList, true);
List<DeltaMetaData> deltas = AcidUtils.serializeDeltas(dirInfo.getCurrentDirectories());
Path base = dirInfo.getBaseDirectory();
List<FileStatus> original = dirInfo.getOriginalFiles();
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
index bc55677..7d1b994 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.java
@@ -93,23 +93,20 @@ public class OrcOutputFormat extends FileOutputFormat<NullWritable, OrcSerdeRow>
@Override
public void close(boolean b) throws IOException {
- // if we haven't written any rows, we need to create a file with a
- // generic schema.
if (writer == null) {
- // a row with no columns
- ObjectInspector inspector = ObjectInspectorFactory.
- getStandardStructObjectInspector(new ArrayList<String>(),
- new ArrayList<ObjectInspector>());
- options.inspector(inspector);
- writer = OrcFile.createWriter(path, options);
+ // we are closing a file without writing any data in it
+ FileSystem fs = options.getFileSystem() == null ?
+ path.getFileSystem(options.getConfiguration()) : options.getFileSystem();
+ fs.createNewFile(path);
+ return;
}
writer.close();
}
@Override
public SerDeStats getStats() {
- stats.setRawDataSize(writer.getRawDataSize());
- stats.setRowCount(writer.getNumberOfRows());
+ stats.setRawDataSize(null == writer ? 0 : writer.getRawDataSize());
+ stats.setRowCount(null == writer ? 0 : writer.getNumberOfRows());
return stats;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
index e7ea70f..d99bbd4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
@@ -200,7 +200,7 @@ public class CompactorMR {
// and discovering that in getSplits is too late as we then have no way to pass it to our
// mapper.
- AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(sd.getLocation()), conf, txns);
+ AcidUtils.Directory dir = AcidUtils.getAcidState(new Path(sd.getLocation()), conf, txns, true);
List<AcidUtils.ParsedDelta> parsedDeltas = dir.getCurrentDirectories();
int maxDeltastoHandle = conf.getIntVar(HiveConf.ConfVars.COMPACTOR_MAX_NUM_DELTA);
if(parsedDeltas.size() > maxDeltastoHandle) {
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
index 1a63f99..d654b76 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
@@ -253,7 +253,7 @@ public class Initiator extends CompactorThread {
boolean noBase = false;
Path location = new Path(sd.getLocation());
FileSystem fs = location.getFileSystem(conf);
- AcidUtils.Directory dir = AcidUtils.getAcidState(location, conf, txns);
+ AcidUtils.Directory dir = AcidUtils.getAcidState(location, conf, txns, false);
Path base = dir.getBaseDirectory();
long baseSize = 0;
FileStatus stat = null;
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index 31d561b..47abc74 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -537,11 +537,11 @@ public class TestInputOutputFormat {
public void testFileGenerator() throws Exception {
OrcInputFormat.Context context = new OrcInputFormat.Context(conf);
MockFileSystem fs = new MockFileSystem(conf,
- new MockFile("mock:/a/b/part-00", 1000, new byte[0]),
- new MockFile("mock:/a/b/part-01", 1000, new byte[0]),
- new MockFile("mock:/a/b/_part-02", 1000, new byte[0]),
- new MockFile("mock:/a/b/.part-03", 1000, new byte[0]),
- new MockFile("mock:/a/b/part-04", 1000, new byte[0]));
+ new MockFile("mock:/a/b/part-00", 1000, new byte[1]),
+ new MockFile("mock:/a/b/part-01", 1000, new byte[1]),
+ new MockFile("mock:/a/b/_part-02", 1000, new byte[1]),
+ new MockFile("mock:/a/b/.part-03", 1000, new byte[1]),
+ new MockFile("mock:/a/b/part-04", 1000, new byte[1]));
OrcInputFormat.FileGenerator gen =
new OrcInputFormat.FileGenerator(context, fs,
new MockPath(fs, "mock:/a/b"));
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
index 4c8ddd3..87f19ab 100644
--- a/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
@@ -1114,7 +1114,7 @@ Partition Parameters:
numFiles 8
numRows 6
rawDataSize 120
- totalSize 2212
+ totalSize 2016
#### A masked pattern was here ####
# Storage Information
@@ -1200,7 +1200,7 @@ Partition Parameters:
numFiles 8
numRows 6
rawDataSize 120
- totalSize 2212
+ totalSize 2016
#### A masked pattern was here ####
# Storage Information
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
index 22afcbf..6967548 100644
--- a/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
@@ -1163,10 +1163,10 @@ Protect Mode: None
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE true
- numFiles 8
+ numFiles 4
numRows 6
rawDataSize 120
- totalSize 2212
+ totalSize 2016
#### A masked pattern was here ####
# Storage Information
@@ -1249,10 +1249,10 @@ Protect Mode: None
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE true
- numFiles 8
+ numFiles 4
numRows 6
rawDataSize 120
- totalSize 2212
+ totalSize 2016
#### A masked pattern was here ####
# Storage Information
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out
index 6df203a..3032b21 100644
--- a/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out
+++ b/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out
@@ -1076,7 +1076,7 @@ Protect Mode: None
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE true
- numFiles 8
+ numFiles 4
numRows 6
rawDataSize 156
totalSize 162
@@ -1162,7 +1162,7 @@ Protect Mode: None
#### A masked pattern was here ####
Partition Parameters:
COLUMN_STATS_ACCURATE true
- numFiles 8
+ numFiles 4
numRows 6
rawDataSize 156
totalSize 162
@@ -2435,7 +2435,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Tez
-#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
@@ -2449,15 +2448,15 @@ STAGE PLANS:
predicate: (s = 'foo') (type: boolean)
Statistics: Num rows: 1974 Data size: 53304 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: si (type: smallint), b (type: bigint), f (type: float), t (type: tinyint), i (type: int)
- outputColumnNames: _col0, _col1, _col2, _col4, _col5
+ expressions: si (type: smallint), b (type: bigint), f (type: float), 'foo' (type: string), t (type: tinyint), i (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 1974 Data size: 53304 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: 'foo' (type: string), _col4 (type: tinyint), _col5 (type: int)
+ key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
sort order: +++
- Map-reduce partition columns: 'foo' (type: string), _col4 (type: tinyint), _col5 (type: int)
+ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Statistics: Num rows: 1974 Data size: 53304 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), 'foo' (type: string), _col4 (type: tinyint), _col5 (type: int)
+ value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Reducer 2
Reduce Operator Tree:
Select Operator
@@ -2506,7 +2505,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Tez
-#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
@@ -2520,15 +2518,15 @@ STAGE PLANS:
predicate: (t = 27) (type: boolean)
Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), i (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3, _col5
+ expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), 27 (type: tinyint), i (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col3 (type: string), 27 (type: tinyint), _col5 (type: int)
+ key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
sort order: +++
- Map-reduce partition columns: _col3 (type: string), 27 (type: tinyint), _col5 (type: int)
+ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), 27 (type: tinyint), _col5 (type: int)
+ value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Reducer 2
Reduce Operator Tree:
Select Operator
@@ -2577,7 +2575,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Tez
-#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
@@ -2591,15 +2588,15 @@ STAGE PLANS:
predicate: (i = 100) (type: boolean)
Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), t (type: tinyint)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), t (type: tinyint), 100 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col3 (type: string), _col4 (type: tinyint), 100 (type: int)
+ key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
sort order: +++
- Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), 100 (type: int)
+ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Statistics: Num rows: 429 Data size: 53255 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), 100 (type: int)
+ value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Reducer 2
Reduce Operator Tree:
Select Operator
@@ -2648,7 +2645,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Tez
-#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
@@ -2662,15 +2658,15 @@ STAGE PLANS:
predicate: ((i = 100) and (t = 27)) (type: boolean)
Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3
+ expressions: si (type: smallint), b (type: bigint), f (type: float), s (type: string), 27 (type: tinyint), 100 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col3 (type: string), 27 (type: tinyint), 100 (type: int)
+ key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
sort order: +++
- Map-reduce partition columns: _col3 (type: string), 27 (type: tinyint), 100 (type: int)
+ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Statistics: Num rows: 214 Data size: 26565 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), 27 (type: tinyint), 100 (type: int)
+ value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Reducer 2
Reduce Operator Tree:
Select Operator
@@ -2719,7 +2715,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Tez
-#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
@@ -2733,15 +2728,15 @@ STAGE PLANS:
predicate: ((i = 100) and (s = 'foo')) (type: boolean)
Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: si (type: smallint), b (type: bigint), f (type: float), t (type: tinyint)
- outputColumnNames: _col0, _col1, _col2, _col4
+ expressions: si (type: smallint), b (type: bigint), f (type: float), 'foo' (type: string), t (type: tinyint), 100 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: 'foo' (type: string), _col4 (type: tinyint), 100 (type: int)
+ key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
sort order: +++
- Map-reduce partition columns: 'foo' (type: string), _col4 (type: tinyint), 100 (type: int)
+ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), 'foo' (type: string), _col4 (type: tinyint), 100 (type: int)
+ value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Reducer 2
Reduce Operator Tree:
Select Operator
@@ -2790,7 +2785,6 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-1
Tez
-#### A masked pattern was here ####
Edges:
Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
@@ -2804,15 +2798,15 @@ STAGE PLANS:
predicate: ((t = 27) and (s = 'foo')) (type: boolean)
Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: si (type: smallint), b (type: bigint), f (type: float), i (type: int)
- outputColumnNames: _col0, _col1, _col2, _col5
+ expressions: si (type: smallint), b (type: bigint), f (type: float), 'foo' (type: string), 27 (type: tinyint), i (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: 'foo' (type: string), 27 (type: tinyint), _col5 (type: int)
+ key expressions: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
sort order: +++
- Map-reduce partition columns: 'foo' (type: string), 27 (type: tinyint), _col5 (type: int)
+ Map-reduce partition columns: _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Statistics: Num rows: 987 Data size: 26652 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), 'foo' (type: string), 27 (type: tinyint), _col5 (type: int)
+ value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int)
Reducer 2
Reduce Operator Tree:
Select Operator
http://git-wip-us.apache.org/repos/asf/hive/blob/3e51861a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
----------------------------------------------------------------------
diff --git a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
index 2e09882..5f78481 100644
--- a/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
+++ b/shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java
@@ -163,7 +163,7 @@ public class Hadoop23Shims extends HadoopShimsSecure {
Iterator<FileStatus> it = result.iterator();
while (it.hasNext()) {
FileStatus stat = it.next();
- if (!stat.isFile()) {
+ if (!stat.isFile() || (stat.getLen() == 0 && !stat.getPath().toUri().getScheme().equals("nullscan"))) {
it.remove();
}
}