You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/10/30 22:52:52 UTC

[3/3] hive git commit: HIVE-12294 : log line "Duplicate ID in column ID list" appears in the logs (Sergey Shelukhin, reviewed by Vikram Dixit K)

HIVE-12294 : log line "Duplicate ID <number> in column ID list" appears in the logs (Sergey Shelukhin, reviewed by Vikram Dixit K)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/894a499f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/894a499f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/894a499f

Branch: refs/heads/master
Commit: 894a499f131b44fd039627fb9550f24dc7226124
Parents: 19c42ac
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Oct 30 14:24:49 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Oct 30 14:46:34 2015 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java     | 7 ++++++-
 .../org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java | 8 ++++----
 2 files changed, 10 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/894a499f/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 73037ea..af40137 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -282,7 +282,12 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     assert idStrs == null || knownNames.length == idStrs.length;
     HashMap<String, Integer> nameIdMap = new HashMap<>();
     for (int i = 0; i < knownNames.length; ++i) {
-      nameIdMap.put(knownNames[i], idStrs != null ? Integer.parseInt(idStrs[i]) : i);
+      Integer newId = (idStrs != null) ? Integer.parseInt(idStrs[i]) : i;
+      Integer oldId = nameIdMap.put(knownNames[i], newId);
+      if (oldId != null && oldId.intValue() != newId.intValue()) {
+        throw new RuntimeException("Multiple IDs for " + knownNames[i] + " in column strings: ["
+            + idStr + "], [" + nameStr + "]");
+      }
     }
     List<PredicateLeaf> leaves = sarg.getLeaves();
     for (int i = 0; i < leaves.size(); ++i) {

http://git-wip-us.apache.org/repos/asf/hive/blob/894a499f/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
index fc0a4b7..0c7ac30 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
@@ -148,14 +148,14 @@ public final class ColumnProjectionUtils {
     List<Integer> result = new ArrayList<Integer>(list.length);
     for (String element : list) {
       // it may contain duplicates, remove duplicates
-      // TODO: WTF? This would break many assumptions elsewhere if it did.
-      //       Column names' and column ids' lists are supposed to be correlated.
       Integer toAdd = Integer.parseInt(element);
       if (!result.contains(toAdd)) {
         result.add(toAdd);
-      } else if (LOG.isInfoEnabled()) {
-        LOG.info("Duplicate ID " + toAdd + " in column ID list");
       }
+      // NOTE: some code uses this list to correlate with column names, and yet these lists may
+      //       contain duplicates, which this call will remove and the other won't. As far as I can
+      //       tell, no code will actually use these two methods together; all is good if the code
+      //       gets the ID list without relying on this method. Or maybe it just works by magic.
     }
     return result;
   }