You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2015/10/30 22:52:52 UTC
[3/3] hive git commit: HIVE-12294 : log line "Duplicate ID
in column ID list" appears in the logs (Sergey Shelukhin,
reviewed by Vikram Dixit K)
HIVE-12294 : log line "Duplicate ID <number> in column ID list" appears in the logs (Sergey Shelukhin, reviewed by Vikram Dixit K)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/894a499f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/894a499f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/894a499f
Branch: refs/heads/master
Commit: 894a499f131b44fd039627fb9550f24dc7226124
Parents: 19c42ac
Author: Sergey Shelukhin <se...@apache.org>
Authored: Fri Oct 30 14:24:49 2015 -0700
Committer: Sergey Shelukhin <se...@apache.org>
Committed: Fri Oct 30 14:46:34 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java | 7 ++++++-
.../org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java | 8 ++++----
2 files changed, 10 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/894a499f/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 73037ea..af40137 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -282,7 +282,12 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
assert idStrs == null || knownNames.length == idStrs.length;
HashMap<String, Integer> nameIdMap = new HashMap<>();
for (int i = 0; i < knownNames.length; ++i) {
- nameIdMap.put(knownNames[i], idStrs != null ? Integer.parseInt(idStrs[i]) : i);
+ Integer newId = (idStrs != null) ? Integer.parseInt(idStrs[i]) : i;
+ Integer oldId = nameIdMap.put(knownNames[i], newId);
+ if (oldId != null && oldId.intValue() != newId.intValue()) {
+ throw new RuntimeException("Multiple IDs for " + knownNames[i] + " in column strings: ["
+ + idStr + "], [" + nameStr + "]");
+ }
}
List<PredicateLeaf> leaves = sarg.getLeaves();
for (int i = 0; i < leaves.size(); ++i) {
http://git-wip-us.apache.org/repos/asf/hive/blob/894a499f/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
index fc0a4b7..0c7ac30 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/ColumnProjectionUtils.java
@@ -148,14 +148,14 @@ public final class ColumnProjectionUtils {
List<Integer> result = new ArrayList<Integer>(list.length);
for (String element : list) {
// it may contain duplicates, remove duplicates
- // TODO: WTF? This would break many assumptions elsewhere if it did.
- // Column names' and column ids' lists are supposed to be correlated.
Integer toAdd = Integer.parseInt(element);
if (!result.contains(toAdd)) {
result.add(toAdd);
- } else if (LOG.isInfoEnabled()) {
- LOG.info("Duplicate ID " + toAdd + " in column ID list");
}
+ // NOTE: some code uses this list to correlate with column names, and yet these lists may
+ // contain duplicates, which this call will remove and the other won't. As far as I can
+ // tell, no code will actually use these two methods together; all is good if the code
+ // gets the ID list without relying on this method. Or maybe it just works by magic.
}
return result;
}