You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by cw...@apache.org on 2012/03/16 02:50:18 UTC

svn commit: r1301310 - /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java

Author: cws
Date: Fri Mar 16 01:50:17 2012
New Revision: 1301310

URL: http://svn.apache.org/viewvc?rev=1301310&view=rev
Log:
HIVE-2778 [jira] Fail on table sampling
(Navis Ryu via Carl Steinbach)

Summary:
HIVE-2778 fix NPE on table sampling

Trying table sampling on any non-empty table throws NPE. This does not occur by
test on mini-MR.  <div class="preformatted panel" style="border-width:
1px;"><div class="preformattedContent panelContent"> <pre>select count(*) from
emp tablesample (0.1 percent);      Total MapReduce jobs = 1 Launching Job 1 out
of 1 Number of reduce tasks determined at compile time: 1 In order to change the
average load for a reducer (in bytes):   set
hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum
number of reducers:   set hive.exec.reducers.max=<number> In order to set a
constant number of reducers:   set mapred.reduce.tasks=<number>
java.lang.NullPointerException 	at
org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.sampleSplits(CombineHiveInputFormat.java:450)
	at
org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:403)
	at org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:971) 	at
org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:963) 	at
org.apache.hadoop.mapred.JobClient.access$500(JobClient.java:170) 	at
org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:880) 	at
org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:833) 	at
java.security.AccessController.doPrivileged(Native Method) 	at
javax.security.auth.Subject.doAs(Subject.java:396) 	at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1127)
	at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:833) 	at
org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:807) 	at
org.apache.hadoop.hive.ql.exec.ExecDriver.execute(ExecDriver.java:432) 	at
org.apache.hadoop.hive.ql.exec.MapRedTask.execute(MapRedTask.java:136) 	at
org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:134) 	at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:57) 	at
org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1332) 	at
org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1123) 	at
org.apache.hadoop.hive.ql.Driver.run(Driver.java:931) 	at
org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:255) 	at
org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:212) 	at
org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) 	at
org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:671) 	at
org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:554) 	at
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 	at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
	at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
	at java.lang.reflect.Method.invoke(Method.java:597) 	at
org.apache.hadoop.util.RunJar.main(RunJar.java:186) Job Submission failed with
exception 'java.lang.NullPointerException(null)' FAILED: Execution Error, return
code 1 from org.apache.hadoop.hive.ql.exec.MapRedTask  </pre> </div></div>

Test Plan: EMPTY

Reviewers: JIRA, cwsteinbach

Reviewed By: cwsteinbach

Differential Revision: https://reviews.facebook.net/D1593

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1301310&r1=1301309&r2=1301310&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Fri Mar 16 01:50:17 2012
@@ -435,14 +435,16 @@ public class CombineHiveInputFormat<K ex
     List<InputSplitShim> retLists = new ArrayList<InputSplitShim>();
     Map<String, ArrayList<InputSplitShim>> aliasToSplitList = new HashMap<String, ArrayList<InputSplitShim>>();
     Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
+    Map<String, ArrayList<String>> pathToAliasesNoScheme = removeScheme(pathToAliases);
 
     // Populate list of exclusive splits for every sampled alias
     //
     for (InputSplitShim split : splits) {
       String alias = null;
       for (Path path : split.getPaths()) {
+        boolean schemeless = path.toUri().getScheme() == null;
         List<String> l = HiveFileFormatUtils.doGetAliasesFromPath(
-            pathToAliases, path);
+            schemeless ? pathToAliasesNoScheme : pathToAliases, path);
         // a path for a split unqualified the split from being sampled if:
         // 1. it serves more than one alias
         // 2. the alias it serves is not sampled
@@ -500,6 +502,15 @@ public class CombineHiveInputFormat<K ex
     return retLists;
   }
 
+  Map<String, ArrayList<String>> removeScheme(Map<String, ArrayList<String>> pathToAliases) {
+    Map<String, ArrayList<String>> result = new HashMap<String, ArrayList<String>>();
+    for (Map.Entry <String, ArrayList<String>> entry : pathToAliases.entrySet()) {
+      String newKey = new Path(entry.getKey()).toUri().getPath();
+      result.put(newKey, entry.getValue());
+    }
+    return result;
+  }
+
   /**
    * Create a generic Hive RecordReader than can iterate over all chunks in a
    * CombinedFileSplit.