You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2021/12/08 20:28:37 UTC
[drill] branch master updated: DRILL-8069: remove use of excel sheet getLastRowNum (#2396)
This is an automated email from the ASF dual-hosted git repository.
cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new f60817d DRILL-8069: remove use of excel sheet getLastRowNum (#2396)
f60817d is described below
commit f60817decc5a1290b1114165d1a0c9dfb77e0d58
Author: PJ Fanning <pj...@users.noreply.github.com>
AuthorDate: Wed Dec 8 21:28:26 2021 +0100
DRILL-8069: remove use of excel sheet getLastRowNum (#2396)
* DRILL-8069: remove use of excel sheet getLastRowNum
* reintroduce logic to shortcut if no rows in sheet
* Update ExcelBatchReader.java
* Update ExcelBatchReader.java
* Update ExcelBatchReader.java
* upgrade excel-streaming-reader
* remove '= 0' on variable
---
contrib/format-excel/pom.xml | 2 +-
.../drill/exec/store/excel/ExcelBatchReader.java | 28 +++++++++++++++-------
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/contrib/format-excel/pom.xml b/contrib/format-excel/pom.xml
index be414a1..b618889 100644
--- a/contrib/format-excel/pom.xml
+++ b/contrib/format-excel/pom.xml
@@ -52,7 +52,7 @@
<dependency>
<groupId>com.github.pjfanning</groupId>
<artifactId>excel-streaming-reader</artifactId>
- <version>3.2.3</version>
+ <version>3.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
diff --git a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
index 132da1f..ce39783 100644
--- a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
+++ b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
@@ -257,15 +257,18 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> {
}
private void buildColumnWritersFromProvidedSchema(TupleMetadata finalSchema) {
+ if (rowIterator == null) {
+ rowIterator = sheet.rowIterator();
+ }
+
// Case for empty sheet
- if (sheet.getLastRowNum() == 0) {
+ if (rowIterator == null || !rowIterator.hasNext()) {
return;
}
columnWriters = new ArrayList<>();
metadataColumnWriters = new ArrayList<>();
cellWriterArray = new ArrayList<>();
- rowIterator = sheet.iterator();
// Get the number of columns.
// This method also advances the row reader to the location of the first row of data
@@ -281,8 +284,12 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> {
}
private void getColumnHeaders(SchemaBuilder builder) {
+ if (rowIterator == null) {
+ rowIterator = sheet.rowIterator();
+ }
+
// Case for empty sheet
- if (sheet.getLastRowNum() == 0) {
+ if (rowIterator == null || !rowIterator.hasNext()) {
builder.buildSchema();
return;
}
@@ -290,8 +297,6 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> {
columnWriters = new ArrayList<>();
metadataColumnWriters = new ArrayList<>();
- rowIterator = sheet.iterator();
-
// Get the number of columns.
// This method also advances the row reader to the location of the first row of data
setFirstRow();
@@ -435,10 +440,15 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> {
}
private boolean nextLine(RowSetLoader rowWriter) {
- if (sheet.getLastRowNum() == 0) {
- // Case for empty sheet
- return false;
- } else if (recordCount >= readerConfig.lastRow) {
+ if (rowIterator == null) {
+ rowIterator = sheet.rowIterator();
+ }
+
+ if (currentRow == null && rowIterator != null && rowIterator.hasNext()) {
+ currentRow = rowIterator.next();
+ }
+
+ if (currentRow == null || recordCount >= readerConfig.lastRow) {
return false;
}