You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by na...@apache.org on 2019/11/01 04:54:35 UTC
[incubator-hudi] 01/02: [HUDI-313] Fix select count star error when
querying a realtime table
This is an automated email from the ASF dual-hosted git repository.
nagarwal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
commit 3251d62bd3c740b25139029a1913d1cf5a57173f
Author: Wenning Ding <we...@amazon.com>
AuthorDate: Wed Oct 23 13:53:57 2019 -0700
[HUDI-313] Fix select count star error when querying a realtime table
---
.../realtime/HoodieParquetRealtimeInputFormat.java | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
index d37ae2a..3e42724 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
@@ -197,10 +197,27 @@ public class HoodieParquetRealtimeInputFormat extends HoodieParquetInputFormat i
return configuration;
}
+ /**
+ * Hive will append read columns' ids to old columns' ids during getRecordReader. In some cases, e.g. SELECT COUNT(*),
+ * the read columns' id is an empty string and Hive will combine it with Hoodie required projection ids and becomes
+ * e.g. ",2,0,3" and will cause an error. This method is used to avoid this situation.
+ */
+ private static synchronized Configuration cleanProjectionColumnIds(Configuration conf) {
+ String columnIds = conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
+ if (!columnIds.isEmpty() && columnIds.charAt(0) == ',') {
+ conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, columnIds.substring(1));
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("The projection Ids: {" + columnIds + "} start with ','. First comma is removed");
+ }
+ }
+ return conf;
+ }
+
@Override
public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSplit split, final JobConf job,
final Reporter reporter) throws IOException {
+ this.conf = cleanProjectionColumnIds(job);
LOG.info("Before adding Hoodie columns, Projections :" + job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR)
+ ", Ids :" + job.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));