You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2022/10/21 15:58:37 UTC

[drill] branch master updated: DRILL-8335: Add Ability to Query GoogleSheets Tabs by Index (#2680)

This is an automated email from the ASF dual-hosted git repository.

cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 76c9e7390f DRILL-8335: Add Ability to Query GoogleSheets Tabs by Index (#2680)
76c9e7390f is described below

commit 76c9e7390f3a9740d13341672faa14f914f0c541
Author: Charles S. Givre <cg...@apache.org>
AuthorDate: Fri Oct 21 11:58:30 2022 -0400

    DRILL-8335: Add Ability to Query GoogleSheets Tabs by Index (#2680)
    
    * Initial commit
    
    * Final Commit
---
 contrib/storage-googlesheets/README.md             |   9 ++
 .../exec/store/googlesheets/DrillDataStore.java    |   7 +-
 .../schema/GoogleSheetsDrillSchema.java            |  39 +++++-
 .../googlesheets/TestGoogleSheetsQueries.java      | 131 +++++++++++++++++++++
 4 files changed, 183 insertions(+), 3 deletions(-)

diff --git a/contrib/storage-googlesheets/README.md b/contrib/storage-googlesheets/README.md
index 4bba995140..ca27ff69f4 100644
--- a/contrib/storage-googlesheets/README.md
+++ b/contrib/storage-googlesheets/README.md
@@ -92,6 +92,15 @@ FROM <plugin name>.<sheet ID>.<tab name>
 ```
 Note that you must specify the tab name to successfully query Google Sheets.
 
+### Accessing Tabs by Index
+If you don't know the names of the available tabs in your GoogleSheets document, you can query the sheets by index using the `tab[n]` format.  Indexing starts at zero and every Sheets document must have at least one sheet.  Note that this must be enclosed in backticks.
+
+```sql
+SELECT * 
+FROM googlesheets.<sheet id>.`tab[0]`
+```
+
+
 ### Metadata
 You can obtain a list of available sheets by querying the `INFORMATION_SCHEMA` as shown below.  Assuming that you have a connection to Google Sheets called `googlesheets`:
 
diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/DrillDataStore.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/DrillDataStore.java
index 5ef932e494..8dc184cf4c 100644
--- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/DrillDataStore.java
+++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/DrillDataStore.java
@@ -60,7 +60,12 @@ public class DrillDataStore<V extends Serializable> extends AbstractMemoryDataSt
     logger.debug("Saving credentials to token table");
     String accessToken = new String(keyValueMap.get(tokenTable.ACCESS_TOKEN_KEY), StandardCharsets.UTF_8);
     String refreshToken = new String(keyValueMap.get(tokenTable.REFRESH_TOKEN_KEY), StandardCharsets.UTF_8);
-    String expiresIn = new String(keyValueMap.get(tokenTable.EXPIRES_IN_KEY), StandardCharsets.UTF_8);
+
+    String expiresIn = "";
+    // Avoids an NPE if the expires_in field is not set.
+    if (keyValueMap.containsKey(tokenTable.EXPIRES_IN_KEY)) {
+      expiresIn = new String(keyValueMap.get(tokenTable.EXPIRES_IN_KEY), StandardCharsets.UTF_8);
+    }
     tokenTable.setAccessToken(accessToken);
     tokenTable.setRefreshToken(refreshToken);
     tokenTable.setExpiresIn(expiresIn);
diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsDrillSchema.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsDrillSchema.java
index d84132f654..07814dd6f2 100644
--- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsDrillSchema.java
+++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsDrillSchema.java
@@ -39,18 +39,24 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * This class represents the actual tab within a GoogleSheets document.
  */
 public class GoogleSheetsDrillSchema extends AbstractSchema {
   private static final Logger logger = LoggerFactory.getLogger(GoogleSheetsDrillSchema.class);
+  private static final Pattern TAB_PATTERN = Pattern.compile("^tab\\[(\\d+)\\]$");
 
   private final Map<String, DynamicDrillTable> activeTables = CaseInsensitiveMap.newHashMap();
+  private final List<DynamicDrillTable> tableList;
+
   private final GoogleSheetsStoragePlugin plugin;
   private final Sheets sheetsService;
   private final SchemaConfig schemaConfig;
@@ -67,6 +73,7 @@ public class GoogleSheetsDrillSchema extends AbstractSchema {
     this.fileToken = fileToken;
     this.parent = (GoogleSheetsRootSchema) parent;
     this.sheetsService = sheetsService;
+    this.tableList = new ArrayList<>();
   }
 
   @Override
@@ -81,6 +88,18 @@ public class GoogleSheetsDrillSchema extends AbstractSchema {
       populateActiveTables();
     }
 
+    // If the user provides the index of a tab, return the table at that index.
+    int tabIndex = getTabIndex(tableName);
+    if (tabIndex > -1) {
+      if (tabIndex > tableList.size()) {
+        throw UserException.dataReadError()
+          .message("Tab not found at index " + tabIndex)
+          .build(logger);
+      }
+      return tableList.get(tabIndex);
+    }
+
+    // Otherwise, retrieve the table from the active tables list.
     logger.debug("Getting table: {}", tableName);
     DynamicDrillTable table = activeTables.computeIfAbsent(tableName, this::getDrillTable);
     if (table != null) {
@@ -92,6 +111,22 @@ public class GoogleSheetsDrillSchema extends AbstractSchema {
     return table;
   }
 
+  private int getTabIndex(String tableName) {
+    Matcher matcher = TAB_PATTERN.matcher(tableName);
+    if (matcher.find()) {
+      int tabIndex = Integer.parseInt(matcher.group(1));
+      if (tabIndex < 0) {
+        throw UserException.internalError()
+          .message("Google Sheets tab index must be greater than zero.")
+          .build(logger);
+      }
+
+      return tabIndex;
+    } else {
+      return -1;
+    }
+  }
+
   private DynamicDrillTable getDrillTable(String tableName) {
     logger.debug("Getting Drill Table {}", tableName);
     return activeTables.get(tableName);
@@ -155,8 +190,8 @@ public class GoogleSheetsDrillSchema extends AbstractSchema {
     };
   }
 
-  private DynamicDrillTable registerTable(String name, DynamicDrillTable table) {
+  private void registerTable(String name, DynamicDrillTable table) {
     activeTables.put(name, table);
-    return table;
+    tableList.add(table);
   }
 }
diff --git a/contrib/storage-googlesheets/src/test/java/org/apache/drill/exec/store/googlesheets/TestGoogleSheetsQueries.java b/contrib/storage-googlesheets/src/test/java/org/apache/drill/exec/store/googlesheets/TestGoogleSheetsQueries.java
index 1f1d8be142..24eab41040 100644
--- a/contrib/storage-googlesheets/src/test/java/org/apache/drill/exec/store/googlesheets/TestGoogleSheetsQueries.java
+++ b/contrib/storage-googlesheets/src/test/java/org/apache/drill/exec/store/googlesheets/TestGoogleSheetsQueries.java
@@ -134,6 +134,38 @@ public class TestGoogleSheetsQueries extends ClusterTest {
     new RowSetComparison(expected).verifyAndClearAll(results);
   }
 
+  @Test
+  public void testStarQueryWithTabs() throws Exception {
+    try {
+      initializeTokens("googlesheets");
+    } catch (PluginException e) {
+      fail(e.getMessage());
+    }
+
+    String sql = String.format("SELECT * FROM googlesheets.`%s`.`tab[1]` WHERE `Col2` < 6.0", sheetID);
+    RowSet results = queryBuilder().sql(sql).rowSet();
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .addNullable("Col1", MinorType.VARCHAR)
+      .addNullable("Col2", MinorType.FLOAT8)
+      .addNullable("Col3", MinorType.DATE)
+      .buildSchema();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow("Rosaline  Thales", 1.0, null)
+      .addRow("Abdolhossein  Detlev", 2.0001, LocalDate.parse("2020-04-30"))
+      .addRow(null, 4.0, LocalDate.parse("2020-06-30"))
+      .addRow("Yunus  Elena", 3.5, LocalDate.parse("2021-01-15"))
+      .addRow("Swaran  Ohiyesa", -63.8, LocalDate.parse("2021-04-08"))
+      .addRow("Kalani  Godabert", 0.0, LocalDate.parse("2021-06-28"))
+      .addRow("Caishen  Origenes", 5.0E-7, LocalDate.parse("2021-07-09"))
+      .addRow("Toufik  Gurgen", 2.0, LocalDate.parse("2021-11-05"))
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+
   @Test
   public void testSchemataInformationSchema() throws Exception {
     try {
@@ -185,6 +217,30 @@ public class TestGoogleSheetsQueries extends ClusterTest {
     new RowSetComparison(expected).verifyAndClearAll(results);
   }
 
+  @Test
+  public void testImplicitFieldsWithTabs() throws Exception {
+    // Tests special case of only implicit metadata fields being projected.
+    try {
+      initializeTokens("googlesheets");
+    } catch (PluginException e) {
+      fail(e.getMessage());
+    }
+
+    String sql = String.format("SELECT _sheets FROM googlesheets.`%s`.`tab[1]` LIMIT 1", sheetID);
+    RowSet results = queryBuilder().sql(sql).rowSet();
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .addArray("_sheets", MinorType.VARCHAR)
+      .buildSchema();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow((Object) strArray("TestSheet1", "MixedSheet"))
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+
   @Ignore("Implicit columns have some projection issues. See DRILL-7080.  Once this is resolved, re-enable this test.")
   @Test
   public void testStarAndImplicitFields() throws Exception {
@@ -284,6 +340,53 @@ public class TestGoogleSheetsQueries extends ClusterTest {
     new RowSetComparison(expected).verifyAndClearAll(results);
   }
 
+  @Test
+  public void testInvalidTab() throws Exception {
+    try {
+      initializeTokens("googlesheets");
+    } catch (PluginException e) {
+      fail(e.getMessage());
+    }
+
+    try {
+      String sql = String.format("SELECT * FROM googlesheets.`%s`.`tab[5]` WHERE `Col2` < 6.0", sheetID);
+      queryBuilder().sql(sql).run();
+      fail();
+    } catch (Exception e) {
+      assertTrue(e.getMessage().contains("Tab not found at index 5"));
+    }
+  }
+
+  @Test
+  public void testWithExplicitColumnsWithTab() throws Exception {
+    try {
+      initializeTokens("googlesheets");
+    } catch (PluginException e) {
+      fail(e.getMessage());
+    }
+
+    String sql = String.format("SELECT Col1, Col3 FROM googlesheets.`%s`.`tab[1]` WHERE `Col2` < 6.0", sheetID);
+    RowSet results = queryBuilder().sql(sql).rowSet();
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .addNullable("Col1", MinorType.VARCHAR)
+      .addNullable("Col3", MinorType.DATE)
+      .buildSchema();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow("Rosaline  Thales", null)
+      .addRow("Abdolhossein  Detlev", LocalDate.parse("2020-04-30"))
+      .addRow(null, LocalDate.parse("2020-06-30"))
+      .addRow("Yunus  Elena", LocalDate.parse("2021-01-15"))
+      .addRow("Swaran  Ohiyesa", LocalDate.parse("2021-04-08"))
+      .addRow("Kalani  Godabert",LocalDate.parse("2021-06-28"))
+      .addRow("Caishen  Origenes", LocalDate.parse("2021-07-09"))
+      .addRow("Toufik  Gurgen", LocalDate.parse("2021-11-05"))
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
   @Test
   public void testWithExplicitColumnsInDifferentOrder() throws Exception {
     try {
@@ -425,6 +528,34 @@ public class TestGoogleSheetsQueries extends ClusterTest {
     new RowSetComparison(expected).verifyAndClearAll(results);
   }
 
+  @Test
+  public void testSchemaProvisioningWithTab() throws Exception {
+    try {
+      initializeTokens("googlesheets");
+    } catch (PluginException e) {
+      fail(e.getMessage());
+    }
+
+    String sql = String.format("SELECT * FROM table(`googlesheets`.`%s`.`tab[1]` (schema => 'inline=(`Col1` VARCHAR, `Col2` INTEGER, `Col3` VARCHAR)')) LIMIT 5", sheetID);
+    RowSet results = queryBuilder().sql(sql).rowSet();
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .addNullable("Col1", MinorType.VARCHAR)
+      .addNullable("Col2", MinorType.INT)
+      .addNullable("Col3", MinorType.VARCHAR)
+      .buildSchema();
+
+    RowSet expected = client.rowSetBuilder(expectedSchema)
+      .addRow("Rosaline  Thales", 1, null)
+      .addRow("Abdolhossein  Detlev", 2, "2020-04-30")
+      .addRow("Yosuke  Simon", null, "2020-05-22")
+      .addRow(null, 4, "2020-06-30")
+      .addRow("Avitus  Stribog", 500000, "2020-07-27")
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
   /**
    * This function is used for testing only.  It initializes a {@link PersistentTokenTable} and populates it
    * with a valid access and refresh token.