You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by na...@apache.org on 2019/11/01 04:54:35 UTC

[incubator-hudi] 01/02: [HUDI-313] Fix select count star error when querying a realtime table

This is an automated email from the ASF dual-hosted git repository.

nagarwal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git

commit 3251d62bd3c740b25139029a1913d1cf5a57173f
Author: Wenning Ding <we...@amazon.com>
AuthorDate: Wed Oct 23 13:53:57 2019 -0700

    [HUDI-313] Fix select count star error when querying a realtime table
---
 .../realtime/HoodieParquetRealtimeInputFormat.java      | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
index d37ae2a..3e42724 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
@@ -197,10 +197,27 @@ public class HoodieParquetRealtimeInputFormat extends HoodieParquetInputFormat i
     return configuration;
   }
 
+  /**
+   * Hive will append read columns' ids to old columns' ids during getRecordReader. In some cases, e.g. SELECT COUNT(*),
+   * the read columns' id is an empty string and Hive will combine it with Hoodie required projection ids and becomes
+   * e.g. ",2,0,3" and will cause an error. This method is used to avoid this situation.
+   */
+  private static synchronized Configuration cleanProjectionColumnIds(Configuration conf) {
+    String columnIds = conf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR);
+    if (!columnIds.isEmpty() && columnIds.charAt(0) == ',') {
+      conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, columnIds.substring(1));
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("The projection Ids: {" + columnIds + "} start with ','. First comma is removed");
+      }
+    }
+    return conf;
+  }
+
   @Override
   public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSplit split, final JobConf job,
       final Reporter reporter) throws IOException {
 
+    this.conf = cleanProjectionColumnIds(job);
     LOG.info("Before adding Hoodie columns, Projections :" + job.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR)
         + ", Ids :" + job.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));