You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2020/10/13 16:11:21 UTC

[iceberg] branch master updated: MR: Fix NPE when InputSplit.getLocations is called on mappers (#1582)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new 1d5f52b  MR: Fix NPE when InputSplit.getLocations is called on mappers (#1582)
1d5f52b is described below

commit 1d5f52b712dd7eba5edcb85fec4e3f6bd2574f72
Author: Shardul Mahadik <sm...@linkedin.com>
AuthorDate: Tue Oct 13 09:11:12 2020 -0700

    MR: Fix NPE when InputSplit.getLocations is called on mappers (#1582)
---
 mr/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergSplit.java | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mr/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergSplit.java b/mr/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergSplit.java
index 7c95fbe..0d6c440 100644
--- a/mr/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergSplit.java
+++ b/mr/src/main/java/org/apache/iceberg/mr/mapreduce/IcebergSplit.java
@@ -74,9 +74,13 @@ public class IcebergSplit extends InputSplit implements org.apache.hadoop.mapred
 
   @Override
   public String[] getLocations() {
-    if (locations == null) {
+    // The implementation of getLocations() is only meant to be used during split computation
+    // getLocations() won't be accurate when called on worker nodes and will always return "*"
+    if (locations == null && conf != null) {
       boolean localityPreferred = conf.getBoolean(InputFormatConfig.LOCALITY, false);
       locations = localityPreferred ? Util.blockLocations(task, conf) : ANYWHERE;
+    } else {
+      locations = ANYWHERE;
     }
 
     return locations;