You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2021/12/08 20:28:37 UTC

[drill] branch master updated: DRILL-8069: remove use of excel sheet getLastRowNum (#2396)

This is an automated email from the ASF dual-hosted git repository.

cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new f60817d  DRILL-8069: remove use of excel sheet getLastRowNum (#2396)
f60817d is described below

commit f60817decc5a1290b1114165d1a0c9dfb77e0d58
Author: PJ Fanning <pj...@users.noreply.github.com>
AuthorDate: Wed Dec 8 21:28:26 2021 +0100

    DRILL-8069: remove use of excel sheet getLastRowNum (#2396)
    
    * DRILL-8069: remove use of excel sheet getLastRowNum
    
    * reintroduce logic to shortcut if no rows in sheet
    
    * Update ExcelBatchReader.java
    
    * Update ExcelBatchReader.java
    
    * Update ExcelBatchReader.java
    
    * upgrade excel-streaming-reader
    
    * remove '= 0' on variable
---
 contrib/format-excel/pom.xml                       |  2 +-
 .../drill/exec/store/excel/ExcelBatchReader.java   | 28 +++++++++++++++-------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/contrib/format-excel/pom.xml b/contrib/format-excel/pom.xml
index be414a1..b618889 100644
--- a/contrib/format-excel/pom.xml
+++ b/contrib/format-excel/pom.xml
@@ -52,7 +52,7 @@
     <dependency>
       <groupId>com.github.pjfanning</groupId>
       <artifactId>excel-streaming-reader</artifactId>
-      <version>3.2.3</version>
+      <version>3.2.4</version>
     </dependency>
     <dependency>
       <groupId>org.apache.logging.log4j</groupId>
diff --git a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
index 132da1f..ce39783 100644
--- a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
+++ b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
@@ -257,15 +257,18 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> {
   }
 
   private void buildColumnWritersFromProvidedSchema(TupleMetadata finalSchema) {
+    if (rowIterator == null) {
+      rowIterator = sheet.rowIterator();
+    }
+
     // Case for empty sheet
-    if (sheet.getLastRowNum() == 0) {
+    if (rowIterator == null || !rowIterator.hasNext()) {
       return;
     }
 
     columnWriters = new ArrayList<>();
     metadataColumnWriters = new ArrayList<>();
     cellWriterArray = new ArrayList<>();
-    rowIterator = sheet.iterator();
 
     // Get the number of columns.
     // This method also advances the row reader to the location of the first row of data
@@ -281,8 +284,12 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> {
   }
 
   private void getColumnHeaders(SchemaBuilder builder) {
+    if (rowIterator == null) {
+      rowIterator = sheet.rowIterator();
+    }
+
     // Case for empty sheet
-    if (sheet.getLastRowNum() == 0) {
+    if (rowIterator == null || !rowIterator.hasNext()) {
       builder.buildSchema();
       return;
     }
@@ -290,8 +297,6 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> {
     columnWriters = new ArrayList<>();
     metadataColumnWriters = new ArrayList<>();
 
-    rowIterator = sheet.iterator();
-
     // Get the number of columns.
     // This method also advances the row reader to the location of the first row of data
     setFirstRow();
@@ -435,10 +440,15 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> {
   }
 
   private boolean nextLine(RowSetLoader rowWriter) {
-    if (sheet.getLastRowNum() == 0) {
-      // Case for empty sheet
-      return false;
-    } else if (recordCount >= readerConfig.lastRow) {
+    if (rowIterator == null) {
+      rowIterator = sheet.rowIterator();
+    }
+
+    if (currentRow == null && rowIterator != null && rowIterator.hasNext()) {
+      currentRow = rowIterator.next();
+    }
+
+    if (currentRow == null || recordCount >= readerConfig.lastRow) {
       return false;
     }