You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by dz...@apache.org on 2023/04/18 15:26:36 UTC

[drill] 08/15: DRILL-8411: GoogleSheets Reader Will Not Read More than 1K Rows (#2774)

This is an automated email from the ASF dual-hosted git repository.

dzamo pushed a commit to branch 1.21
in repository https://gitbox.apache.org/repos/asf/drill.git

commit fd227154adfb5cd66d7e01f10c4401bbe776e978
Author: Charles S. Givre <cg...@apache.org>
AuthorDate: Wed Mar 15 13:42:38 2023 -0400

    DRILL-8411: GoogleSheets Reader Will Not Read More than 1K Rows (#2774)
---
 .../java/org/apache/drill/common/Typifier.java     |  4 ++--
 .../googlesheets/GoogleSheetsBatchReader.java      | 23 +++++++++++-----------
 .../columns/GoogleSheetsColumnWriter.java          |  6 ++++--
 .../utils/GoogleSheetsRangeBuilder.java            |  9 ++++++++-
 .../drill/exec/fn/impl/TestAggregateFunctions.java |  2 +-
 5 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/common/src/main/java/org/apache/drill/common/Typifier.java b/common/src/main/java/org/apache/drill/common/Typifier.java
index 1c4b5701f2..5a416fd828 100644
--- a/common/src/main/java/org/apache/drill/common/Typifier.java
+++ b/common/src/main/java/org/apache/drill/common/Typifier.java
@@ -272,7 +272,7 @@ public class Typifier {
    * @param date Input date string
    * @return LocalDateTime representation of the input String.
    */
-  private static LocalDateTime stringAsDateTime(String date) {
+  public static LocalDateTime stringAsDateTime(String date) {
     for (DateTimeFormatter format : formats) {
       try {
         return LocalDateTime.parse(date, format);
@@ -289,7 +289,7 @@ public class Typifier {
    * @param date Input date string
    * @return LocalDateTime representation of the input String.
    */
-  private static LocalDate stringAsDate(String date) {
+  public static LocalDate stringAsDate(String date) {
     for (DateTimeFormatter format : dateFormats) {
       try {
         return LocalDate.parse(date, format);
diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java
index db518fdb6a..90eebe8507 100644
--- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java
+++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/GoogleSheetsBatchReader.java
@@ -62,7 +62,7 @@ public class GoogleSheetsBatchReader implements ManagedReader<SchemaNegotiator>
   // The default batch size is 1k rows.  It appears that Google sets the maximum batch size at 1000
   // rows. There is conflicting information about this online, but during testing, ranges with more than
   // 1000 rows would throw invalid request errors.
-  private static final int BATCH_SIZE = 1000;
+  protected static final int BATCH_SIZE = 1000;
   private static final String SHEET_COLUMN_NAME = "_sheets";
   private static final String TITLE_COLUMN_NAME = "_title";
 
@@ -225,12 +225,7 @@ public class GoogleSheetsBatchReader implements ManagedReader<SchemaNegotiator>
   @Override
   public boolean next() {
     logger.debug("Processing batch.");
-    while (!rowWriter.isFull()) {
-      if (!processRow()) {
-        return false;
-      }
-    }
-    return true;
+    return processRow();
   }
 
   private boolean processRow() {
@@ -240,12 +235,16 @@ public class GoogleSheetsBatchReader implements ManagedReader<SchemaNegotiator>
         // Get next range
         String range = rangeBuilder.next();
         if (range == null) {
+          rangeBuilder.lastBatch();
           return false;
         }
         data = GoogleSheetsUtils.getDataFromRange(service, sheetID, range);
       } else {
         List<String> batches = rangeBuilder.nextBatch();
-        if (!batches.isEmpty()) {
+        if (batches == null) {
+          rangeBuilder.lastBatch();
+          return false;
+        } else if (!batches.isEmpty()) {
           data = GoogleSheetsUtils.getBatchData(service, sheetID, batches);
         } else {
           data = Collections.emptyList();
@@ -293,12 +292,14 @@ public class GoogleSheetsBatchReader implements ManagedReader<SchemaNegotiator>
       rowWriter.save();
     }
 
-    // If the results contained less than the batch size, stop iterating.
-    if (rowWriter.rowCount() < BATCH_SIZE) {
+    // If there is another batch, return true
+    if (rowWriter.rowCount() + BATCH_SIZE < rangeBuilder.getRowCount()) {
+      return true;
+    } else {
+      // If the results contained less than the batch size, stop iterating.
       rangeBuilder.lastBatch();
       return false;
     }
-    return true;
   }
 
   private void projectMetadata() {
diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/columns/GoogleSheetsColumnWriter.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/columns/GoogleSheetsColumnWriter.java
index 471106c61d..37aa8cdfb6 100644
--- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/columns/GoogleSheetsColumnWriter.java
+++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/columns/GoogleSheetsColumnWriter.java
@@ -19,6 +19,7 @@
 package org.apache.drill.exec.store.googlesheets.columns;
 
 import org.apache.commons.lang3.StringUtils;
+import org.apache.drill.common.Typifier;
 import org.apache.drill.exec.physical.resultSet.RowSetLoader;
 import org.apache.drill.exec.vector.accessor.ScalarWriter;
 import org.slf4j.Logger;
@@ -27,6 +28,7 @@ import org.slf4j.LoggerFactory;
 import java.time.Instant;
 import java.time.LocalDate;
 import java.time.LocalTime;
+import java.time.ZoneOffset;
 
 public abstract class GoogleSheetsColumnWriter {
   protected static final Logger logger = LoggerFactory.getLogger(GoogleSheetsColumnWriter.class);
@@ -106,7 +108,7 @@ public abstract class GoogleSheetsColumnWriter {
       if (StringUtils.isNotEmpty(stringValue)) {
         LocalDate finalValue;
         try {
-          finalValue = LocalDate.parse(stringValue);
+          finalValue = Typifier.stringAsDate(stringValue);
         } catch (NumberFormatException e) {
           finalValue = null;
         }
@@ -211,7 +213,7 @@ public abstract class GoogleSheetsColumnWriter {
       if (StringUtils.isNotEmpty(stringValue)) {
         Instant finalValue;
         try {
-          finalValue = Instant.parse(stringValue);
+          finalValue = Typifier.stringAsDateTime(stringValue).toInstant(ZoneOffset.UTC);
         } catch (NumberFormatException e) {
           finalValue = null;
         }
diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsRangeBuilder.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsRangeBuilder.java
index f5e8417db5..eb03193bad 100644
--- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsRangeBuilder.java
+++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/utils/GoogleSheetsRangeBuilder.java
@@ -146,7 +146,7 @@ public class GoogleSheetsRangeBuilder implements Iterator<String> {
     if (!hasMore) {
       return null;
     } else if (getStartIndex() > getEndIndex()) {
-      hasMore = false;
+      lastBatch();
       return null;
     }
 
@@ -197,6 +197,9 @@ public class GoogleSheetsRangeBuilder implements Iterator<String> {
   private List<String> buildBatchList() {
     if (isStarQuery) {
       return null;
+    } else if (getStartIndex() > getEndIndex()) {
+      hasMore = false;
+      return null;
     }
 
     List<String> batchList = new ArrayList<>();
@@ -238,6 +241,10 @@ public class GoogleSheetsRangeBuilder implements Iterator<String> {
     return buildBatchList();
   }
 
+  public int getRowCount() {
+    return rowCount;
+  }
+
   @Override
   public String toString() {
     return new PlanStringBuilder(this)
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java
index edf619074b..804aea35d1 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestAggregateFunctions.java
@@ -1275,7 +1275,7 @@ public class TestAggregateFunctions extends ClusterTest {
   @Test
   public void testAggregateWithPivot() throws Exception {
     String query = "SELECT * FROM (\n" +
-        "SELECT education_level, salary, marital_status, extract(year from age(birth_date)) age\n" +
+        "SELECT education_level, salary, marital_status, extract(year from age('2023-02-23', birth_date)) age\n" +
         "FROM cp.`employee.json`)\n" +
         "PIVOT (avg(salary) avg_salary, avg(age) avg_age FOR marital_status IN ('M' married, 'S' single))";
     testBuilder()