You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by cw...@apache.org on 2012/03/16 02:50:18 UTC
svn commit: r1301310 -
/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
Author: cws
Date: Fri Mar 16 01:50:17 2012
New Revision: 1301310
URL: http://svn.apache.org/viewvc?rev=1301310&view=rev
Log:
HIVE-2778 [jira] Fail on table sampling
(Navis Ryu via Carl Steinbach)
Summary:
HIVE-2778 fix NPE on table sampling
Trying table sampling on any non-empty table throws NPE. This does not occur by
test on mini-MR. <div class="preformatted panel" style="border-width:
1px;"><div class="preformattedContent panelContent"> <pre>select count(*) from
emp tablesample (0.1 percent); Total MapReduce jobs = 1 Launching Job 1 out
of 1 Number of reduce tasks determined at compile time: 1 In order to change the
average load for a reducer (in bytes): set
hive.exec.reducers.bytes.per.reducer=<number> In order to limit the maximum
number of reducers: set hive.exec.reducers.max=<number> In order to set a
constant number of reducers: set mapred.reduce.tasks=<number>
java.lang.NullPointerException at
org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.sampleSplits(CombineHiveInputFormat.java:450)
at
org.apache.hadoop.hive.ql.io.CombineHiveInputFormat.getSplits(CombineHiveInputFormat.java:403)
at org.apache.hadoop.mapred.JobClient.writeOldSplits(JobClient.java:971) at
org.apache.hadoop.mapred.JobClient.writeSplits(JobClient.java:963) at
org.apache.hadoop.mapred.JobClient.access$500(JobClient.java:170) at
org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:880) at
org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:833) at
java.security.AccessController.doPrivileged(Native Method) at
javax.security.auth.Subject.doAs(Subject.java:396) at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1127)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:833) at
org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:807) at
org.apache.hadoop.hive.ql.exec.ExecDriver.execute(ExecDriver.java:432) at
org.apache.hadoop.hive.ql.exec.MapRedTask.execute(MapRedTask.java:136) at
org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:134) at
org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:57) at
org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1332) at
org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1123) at
org.apache.hadoop.hive.ql.Driver.run(Driver.java:931) at
org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:255) at
org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:212) at
org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) at
org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:671) at
org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:554) at
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597) at
org.apache.hadoop.util.RunJar.main(RunJar.java:186) Job Submission failed with
exception 'java.lang.NullPointerException(null)' FAILED: Execution Error, return
code 1 from org.apache.hadoop.hive.ql.exec.MapRedTask </pre> </div></div>
Test Plan: EMPTY
Reviewers: JIRA, cwsteinbach
Reviewed By: cwsteinbach
Differential Revision: https://reviews.facebook.net/D1593
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1301310&r1=1301309&r2=1301310&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Fri Mar 16 01:50:17 2012
@@ -435,14 +435,16 @@ public class CombineHiveInputFormat<K ex
List<InputSplitShim> retLists = new ArrayList<InputSplitShim>();
Map<String, ArrayList<InputSplitShim>> aliasToSplitList = new HashMap<String, ArrayList<InputSplitShim>>();
Map<String, ArrayList<String>> pathToAliases = mrwork.getPathToAliases();
+ Map<String, ArrayList<String>> pathToAliasesNoScheme = removeScheme(pathToAliases);
// Populate list of exclusive splits for every sampled alias
//
for (InputSplitShim split : splits) {
String alias = null;
for (Path path : split.getPaths()) {
+ boolean schemeless = path.toUri().getScheme() == null;
List<String> l = HiveFileFormatUtils.doGetAliasesFromPath(
- pathToAliases, path);
+ schemeless ? pathToAliasesNoScheme : pathToAliases, path);
// a path for a split unqualified the split from being sampled if:
// 1. it serves more than one alias
// 2. the alias it serves is not sampled
@@ -500,6 +502,15 @@ public class CombineHiveInputFormat<K ex
return retLists;
}
+ Map<String, ArrayList<String>> removeScheme(Map<String, ArrayList<String>> pathToAliases) {
+ Map<String, ArrayList<String>> result = new HashMap<String, ArrayList<String>>();
+ for (Map.Entry <String, ArrayList<String>> entry : pathToAliases.entrySet()) {
+ String newKey = new Path(entry.getKey()).toUri().getPath();
+ result.put(newKey, entry.getValue());
+ }
+ return result;
+ }
+
/**
* Create a generic Hive RecordReader than can iterate over all chunks in a
* CombinedFileSplit.