You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2020/03/05 09:32:24 UTC
[hive] branch master updated: HIVE-22973: Handle 0 length batches in LlapArrowRowRecordReader (Shubham Chaurasia, reviewed by Jason Dere)

This is an automated email from the ASF dual-hosted git repository.

jdere pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 20e50f2  HIVE-22973: Handle 0 length batches in LlapArrowRowRecordReader (Shubham Chaurasia, reviewed by Jason Dere)
20e50f2 is described below

commit 20e50f2dc33c6a980d53d220354fd3c2ee0341fe
Author: Shubham Chaurasia <sc...@cloudera.com>
AuthorDate: Thu Mar 5 01:31:34 2020 -0800

    HIVE-22973: Handle 0 length batches in LlapArrowRowRecordReader (Shubham Chaurasia, reviewed by Jason Dere)
---
 .../org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java | 45 ++++++++++++++++++++++
 .../hadoop/hive/llap/LlapArrowRowRecordReader.java | 22 +++++++----
 2 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
index 0fac1e4..3c0532c 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java
@@ -73,6 +73,7 @@ public abstract class BaseJdbcWithMiniLlap {
   private static String dataFileDir;
   private static Path kvDataFilePath;
   private static Path dataTypesFilePath;
+  private static Path over10KFilePath;
 
   protected static MiniHS2 miniHS2 = null;
   protected static HiveConf conf = null;
@@ -86,6 +87,7 @@ public abstract class BaseJdbcWithMiniLlap {
     dataFileDir = conf.get("test.data.files").replace('\\', '/').replace("c:", "");
     kvDataFilePath = new Path(dataFileDir, "kv1.txt");
     dataTypesFilePath = new Path(dataFileDir, "datatypes.txt");
+    over10KFilePath = new Path(dataFileDir, "over10k");
     Map<String, String> confOverlay = new HashMap<String, String>();
     miniHS2.start(confOverlay);
     miniHS2.getDFS().getFileSystem().mkdirs(new Path("/apps_staging_dir/anonymous"));
@@ -185,6 +187,21 @@ public abstract class BaseJdbcWithMiniLlap {
     stmt.close();
   }
 
+  protected void createOver10KTable(String tableName) throws Exception {
+    try (Statement stmt = hs2Conn.createStatement()) {
+
+      String createQuery =
+          "create table " + tableName + " (t tinyint, si smallint, i int, b bigint, f float, d double, bo boolean, "
+              + "s string, ts timestamp, `dec` decimal(4,2), bin binary) row format delimited fields terminated by '|'";
+
+      // create table
+      stmt.execute("DROP TABLE IF EXISTS " + tableName);
+      stmt.execute(createQuery);
+      // load data
+      stmt.execute("load data local inpath '" + over10KFilePath.toString() + "' into table " + tableName);
+    }
+  }
+
   @Test(timeout = 60000)
   public void testLlapInputFormatEndToEnd() throws Exception {
     createTestTable("testtab1");
@@ -206,6 +223,34 @@ public abstract class BaseJdbcWithMiniLlap {
     assertEquals(0, rowCount);
   }
 
+  @Test(timeout = 300000)
+  public void testLlapInputFormatEndToEndWithMultipleBatches() throws Exception {
+    String tableName = "over10k_table";
+
+    createOver10KTable(tableName);
+
+    int rowCount;
+
+    // Try with more than one batch
+    RowCollector rowCollector = new RowCollector();
+    String query = "select * from " + tableName;
+    rowCount = processQuery(query, 1, rowCollector);
+    assertEquals(9999, rowCount);
+
+    // Try with less than one batch
+    rowCollector.rows.clear();
+    query = "select * from " + tableName + " where s = 'rachel brown'";
+    rowCount = processQuery(query, 1, rowCollector);
+    assertEquals(17, rowCount);
+
+    // Try empty rows query
+    rowCollector.rows.clear();
+    query = "select * from " + tableName + " where false";
+    rowCount = processQuery(query, 1, rowCollector);
+    assertEquals(0, rowCount);
+  }
+
+
   @Test(timeout = 60000)
   public void testNonAsciiStrings() throws Exception {
     createTestTable("testtab_nonascii");
diff --git a/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapArrowRowRecordReader.java b/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapArrowRowRecordReader.java
index d4179d5..24a82c7 100644
--- a/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapArrowRowRecordReader.java
+++ b/llap-ext-client/src/java/org/apache/hadoop/hive/llap/LlapArrowRowRecordReader.java
@@ -61,16 +61,22 @@ public class LlapArrowRowRecordReader extends LlapRowRecordReader {
       //This is either the first batch or we've used up the current batch buffer
       batchSize = 0;
       rowIndex = 0;
-      hasNext = reader.next(key, data);
-      if(hasNext) {
+
+      // since HIVE-22856, a zero length batch doesn't mean that we won't have any more batches
+      // we can have more batches with data even after after a zero length batch
+      // we should keep trying until we get a batch with some data or reader.next() returns false
+      while (batchSize == 0 && (hasNext = reader.next(key, data))) {
+        List<FieldVector> vectors = batchData.getVectorSchemaRoot().getFieldVectors();
+        //hasNext implies there is some column in the batch
+        Preconditions.checkState(vectors.size() > 0);
+        //All the vectors have the same length,
+        //we can get the number of rows from the first vector
+        batchSize = vectors.get(0).getValueCount();
+      }
+
+      if (hasNext) {
         //There is another batch to buffer
         try {
-          List<FieldVector> vectors = batchData.getVectorSchemaRoot().getFieldVectors();
-          //hasNext implies there is some column in the batch
-          Preconditions.checkState(vectors.size() > 0);
-          //All the vectors have the same length,
-          //we can get the number of rows from the first vector
-          batchSize = vectors.get(0).getValueCount();
           ArrowWrapperWritable wrapper = new ArrowWrapperWritable(batchData.getVectorSchemaRoot());
           currentBatch = (Object[][]) serde.deserialize(wrapper);
           StructObjectInspector rowOI = (StructObjectInspector) serde.getObjectInspector();