You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2018/06/26 05:12:44 UTC

hive git commit: HIVE-19980: GenericUDTFGetSplits fails when order by query returns 0 rows (Prasanth Jayachandran reviewed by Jason Dere)

Repository: hive
Updated Branches:
  refs/heads/master b5160e744 -> bd5d2b70c


HIVE-19980: GenericUDTFGetSplits fails when order by query returns 0 rows (Prasanth Jayachandran reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bd5d2b70
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bd5d2b70
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bd5d2b70

Branch: refs/heads/master
Commit: bd5d2b70c2be194cada5c168421e22eb918fc684
Parents: b5160e7
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Mon Jun 25 22:09:40 2018 -0700
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Mon Jun 25 22:09:40 2018 -0700

----------------------------------------------------------------------
 .../hive/jdbc/TestJdbcGenericUDTFGetSplits.java |  3 ++
 .../hive/ql/exec/tez/HiveSplitGenerator.java    | 40 ++++++++++++--------
 2 files changed, 27 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/bd5d2b70/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java
index c8a428c..b94868b 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcGenericUDTFGetSplits.java
@@ -124,6 +124,9 @@ public class TestJdbcGenericUDTFGetSplits {
     query = "select get_splits(" + "'select value from " + tableName + " order by under_col', 5)";
     runQuery(query, getConfigs(), 1);
 
+    query = "select get_splits(" + "'select value from " + tableName + " order by under_col limit 0', 5)";
+    runQuery(query, getConfigs(), 0);
+
     query = "select get_splits(" +
       "'select `value` from (select value from " + tableName + " where value is not null order by value) as t', 5)";
     runQuery(query, getConfigs(), 1);

http://git-wip-us.apache.org/repos/asf/hive/blob/bd5d2b70/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
index 6daa8df..15c14c9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
@@ -210,26 +210,34 @@ public class HiveSplitGenerator extends InputInitializer {
         if (generateSingleSplit &&
           conf.get(HiveConf.ConfVars.HIVETEZINPUTFORMAT.varname).equals(HiveInputFormat.class.getName())) {
           MapWork mapWork = Utilities.getMapWork(jobConf);
-          splits = new InputSplit[1];
           List<Path> paths = Utilities.getInputPathsTez(jobConf, mapWork);
           FileSystem fs = paths.get(0).getFileSystem(jobConf);
           FileStatus[] fileStatuses = fs.listStatus(paths.get(0));
-          FileStatus fileStatus = fileStatuses[0];
-          Preconditions.checkState(paths.size() == 1 && fileStatuses.length == 1 &&
-              mapWork.getAliasToPartnInfo().size() == 1,
-            "Requested to generate single split. Paths and fileStatuses are expected to be 1. " +
-              "Got paths: " + paths.size() + " fileStatuses: " + fileStatuses.length);
-          BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
-          Set<String> hostsSet = new HashSet<>();
-          for (BlockLocation location : locations) {
-            hostsSet.addAll(Lists.newArrayList(location.getHosts()));
+          if (fileStatuses.length == 0) {
+            // generate single split typically happens when reading data out of order by queries.
+            // if order by query returns no rows, no files will exists in input path
+            splits = new InputSplit[0];
+          } else {
+            // if files exists in input path then it has to be 1 as this code path gets triggered only
+            // of order by queries which is expected to write only one file (written by one reducer)
+            Preconditions.checkState(paths.size() == 1 && fileStatuses.length == 1 &&
+                mapWork.getAliasToPartnInfo().size() == 1,
+              "Requested to generate single split. Paths and fileStatuses are expected to be 1. " +
+                "Got paths: " + paths.size() + " fileStatuses: " + fileStatuses.length);
+            splits = new InputSplit[1];
+            FileStatus fileStatus = fileStatuses[0];
+            BlockLocation[] locations = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
+            Set<String> hostsSet = new HashSet<>();
+            for (BlockLocation location : locations) {
+              hostsSet.addAll(Lists.newArrayList(location.getHosts()));
+            }
+            String[] hosts = hostsSet.toArray(new String[0]);
+            FileSplit fileSplit = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen(), hosts);
+            String alias = mapWork.getAliases().get(0);
+            PartitionDesc partDesc = mapWork.getAliasToPartnInfo().get(alias);
+            String partIF = partDesc.getInputFileFormatClassName();
+            splits[0] = new HiveInputFormat.HiveInputSplit(fileSplit, partIF);
           }
-          String[] hosts = hostsSet.toArray(new String[0]);
-          FileSplit fileSplit = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen(), hosts);
-          String alias = mapWork.getAliases().get(0);
-          PartitionDesc partDesc = mapWork.getAliasToPartnInfo().get(alias);
-          String partIF = partDesc.getInputFileFormatClassName();
-          splits[0] = new HiveInputFormat.HiveInputSplit(fileSplit, partIF);
         } else {
           // Raw splits
           splits = inputFormat.getSplits(jobConf, (int) (availableSlots * waves));