You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2018/06/26 05:12:44 UTC
hive git commit: HIVE-19980: GenericUDTFGetSplits fails when order by
query returns 0 rows (Prasanth Jayachandran reviewed by Jason Dere)
Repository: hive
Updated Branches:
refs/heads/master b5160e744 -> bd5d2b70c
HIVE-19980: GenericUDTFGetSplits fails when order by query returns 0 rows (Prasanth Jayachandran reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bd5d2b70
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bd5d2b70
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bd5d2b70
Branch: refs/heads/master
Commit: bd5d2b70c2be194cada5c168421e22eb918fc684
Parents: b5160e7
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Mon Jun 25 22:09:40 2018 -0700
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Mon Jun 25 22:09:40 2018 -0700
----------------------------------------------------------------------
.../hive/jdbc/TestJdbcGenericUDTFGetSplits.java | 3 ++
.../hive/ql/exec/tez/HiveSplitGenerator.java | 40 ++++++++++++--------
2 files changed, 27 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bd5d2b70/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java
index c8a428c..b94868b 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java
@@ -124,6 +124,9 @@ public class TestJdbcGenericUDTFGetSplits {
query = "select get_splits(" + "'select value from " + tableName + " order by under_col', 5)";
runQuery(query, getConfigs(), 1);
+ query = "select get_splits(" + "'select value from " + tableName + " order by under_col limit 0', 5)";
+ runQuery(query, getConfigs(), 0);
+
query = "select get_splits(" +
"'select `value` from (select value from " + tableName + " where value is not null order by value) as t', 5)";
runQuery(query, getConfigs(), 1);
http://git-wip-us.apache.org/repos/asf/hive/blob/bd5d2b70/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
index 6daa8df..15c14c9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
@@ -210,26 +210,34 @@ public class HiveSplitGenerator extends InputInitializer {
if (generateSingleSplit &&
conf.get(HiveConf.ConfVars.HIVETEZINPUTFORMAT.varname).equals(HiveInputFormat.class.getName())) {
MapWork mapWork = Utilities.getMapWork(jobConf);
- splits = new InputSplit[1];
List<Path> paths = Utilities.getInputPathsTez(jobConf, mapWork);
FileSystem fs = paths.get(0).getFileSystem(jobConf);
FileStatus[] fileStatuses = fs.listStatus(paths.get(0));
- FileStatus fileStatus = fileStatuses[0];
- Preconditions.checkState(paths.size() == 1 && fileStatuses.length == 1 &&
- mapWork.getAliasToPartnInfo().size() == 1,
- "Requested to generate single split. Paths and fileStatuses are expected to be 1. " +
- "Got paths: " + paths.size() + " fileStatuses: " + fileStatuses.length);
- BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
- Set<String> hostsSet = new HashSet<>();
- for (BlockLocation location : locations) {
- hostsSet.addAll(Lists.newArrayList(location.getHosts()));
+ if (fileStatuses.length == 0) {
+ // generate single split typically happens when reading data out of order by queries.
+ // if order by query returns no rows, no files will exists in input path
+ splits = new InputSplit[0];
+ } else {
+ // if files exists in input path then it has to be 1 as this code path gets triggered only
+ // of order by queries which is expected to write only one file (written by one reducer)
+ Preconditions.checkState(paths.size() == 1 && fileStatuses.length == 1 &&
+ mapWork.getAliasToPartnInfo().size() == 1,
+ "Requested to generate single split. Paths and fileStatuses are expected to be 1. " +
+ "Got paths: " + paths.size() + " fileStatuses: " + fileStatuses.length);
+ splits = new InputSplit[1];
+ FileStatus fileStatus = fileStatuses[0];
+ BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
+ Set<String> hostsSet = new HashSet<>();
+ for (BlockLocation location : locations) {
+ hostsSet.addAll(Lists.newArrayList(location.getHosts()));
+ }
+ String[] hosts = hostsSet.toArray(new String[0]);
+ FileSplit fileSplit = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen(), hosts);
+ String alias = mapWork.getAliases().get(0);
+ PartitionDesc partDesc = mapWork.getAliasToPartnInfo().get(alias);
+ String partIF = partDesc.getInputFileFormatClassName();
+ splits[0] = new HiveInputFormat.HiveInputSplit(fileSplit, partIF);
}
- String[] hosts = hostsSet.toArray(new String[0]);
- FileSplit fileSplit = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen(), hosts);
- String alias = mapWork.getAliases().get(0);
- PartitionDesc partDesc = mapWork.getAliasToPartnInfo().get(alias);
- String partIF = partDesc.getInputFileFormatClassName();
- splits[0] = new HiveInputFormat.HiveInputSplit(fileSplit, partIF);
} else {
// Raw splits
splits = inputFormat.getSplits(jobConf, (int) (availableSlots * waves));