You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2022/10/21 15:58:37 UTC
[drill] branch master updated: DRILL-8335: Add Ability to Query GoogleSheets Tabs by Index (#2680)
This is an automated email from the ASF dual-hosted git repository.
cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 76c9e7390f DRILL-8335: Add Ability to Query GoogleSheets Tabs by Index (#2680)
76c9e7390f is described below
commit 76c9e7390f3a9740d13341672faa14f914f0c541
Author: Charles S. Givre <cg...@apache.org>
AuthorDate: Fri Oct 21 11:58:30 2022 -0400
DRILL-8335: Add Ability to Query GoogleSheets Tabs by Index (#2680)
* Initial commit
* Final Commit
---
contrib/storage-googlesheets/README.md | 9 ++
.../exec/store/googlesheets/DrillDataStore.java | 7 +-
.../schema/GoogleSheetsDrillSchema.java | 39 +++++-
.../googlesheets/TestGoogleSheetsQueries.java | 131 +++++++++++++++++++++
4 files changed, 183 insertions(+), 3 deletions(-)
diff --git a/contrib/storage-googlesheets/README.md b/contrib/storage-googlesheets/README.md
index 4bba995140..ca27ff69f4 100644
--- a/contrib/storage-googlesheets/README.md
+++ b/contrib/storage-googlesheets/README.md
@@ -92,6 +92,15 @@ FROM <plugin name>.<sheet ID>.<tab name>
```
Note that you must specify the tab name to successfully query Google Sheets.
+### Accessing Tabs by Index
+If you don't know the names of the available tabs in your GoogleSheets document, you can query the sheets by index using the `tab[n]` format. Indexing starts at zero and every Sheets document must have at least one sheet. Note that this must be enclosed in backticks.
+
+```sql
+SELECT *
+FROM googlesheets.<sheet id>.`tab[0]`
+```
+
+
### Metadata
You can obtain a list of available sheets by querying the `INFORMATION_SCHEMA` as shown below. Assuming that you have a connection to Google Sheets called `googlesheets`:
diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/DrillDataStore.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/DrillDataStore.java
index 5ef932e494..8dc184cf4c 100644
--- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/DrillDataStore.java
+++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/DrillDataStore.java
@@ -60,7 +60,12 @@ public class DrillDataStore<V extends Serializable> extends AbstractMemoryDataSt
logger.debug("Saving credentials to token table");
String accessToken = new String(keyValueMap.get(tokenTable.ACCESS_TOKEN_KEY), StandardCharsets.UTF_8);
String refreshToken = new String(keyValueMap.get(tokenTable.REFRESH_TOKEN_KEY), StandardCharsets.UTF_8);
- String expiresIn = new String(keyValueMap.get(tokenTable.EXPIRES_IN_KEY), StandardCharsets.UTF_8);
+
+ String expiresIn = "";
+ // Avoids an NPE if the expires_in field is not set.
+ if (keyValueMap.containsKey(tokenTable.EXPIRES_IN_KEY)) {
+ expiresIn = new String(keyValueMap.get(tokenTable.EXPIRES_IN_KEY), StandardCharsets.UTF_8);
+ }
tokenTable.setAccessToken(accessToken);
tokenTable.setRefreshToken(refreshToken);
tokenTable.setExpiresIn(expiresIn);
diff --git a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsDrillSchema.java b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsDrillSchema.java
index d84132f654..07814dd6f2 100644
--- a/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsDrillSchema.java
+++ b/contrib/storage-googlesheets/src/main/java/org/apache/drill/exec/store/googlesheets/schema/GoogleSheetsDrillSchema.java
@@ -39,18 +39,24 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
/**
* This class represents the actual tab within a GoogleSheets document.
*/
public class GoogleSheetsDrillSchema extends AbstractSchema {
private static final Logger logger = LoggerFactory.getLogger(GoogleSheetsDrillSchema.class);
+ private static final Pattern TAB_PATTERN = Pattern.compile("^tab\\[(\\d+)\\]$");
private final Map<String, DynamicDrillTable> activeTables = CaseInsensitiveMap.newHashMap();
+ private final List<DynamicDrillTable> tableList;
+
private final GoogleSheetsStoragePlugin plugin;
private final Sheets sheetsService;
private final SchemaConfig schemaConfig;
@@ -67,6 +73,7 @@ public class GoogleSheetsDrillSchema extends AbstractSchema {
this.fileToken = fileToken;
this.parent = (GoogleSheetsRootSchema) parent;
this.sheetsService = sheetsService;
+ this.tableList = new ArrayList<>();
}
@Override
@@ -81,6 +88,18 @@ public class GoogleSheetsDrillSchema extends AbstractSchema {
populateActiveTables();
}
+ // If the user provides the index of a tab, return the table at that index.
+ int tabIndex = getTabIndex(tableName);
+ if (tabIndex > -1) {
+ if (tabIndex > tableList.size()) {
+ throw UserException.dataReadError()
+ .message("Tab not found at index " + tabIndex)
+ .build(logger);
+ }
+ return tableList.get(tabIndex);
+ }
+
+ // Otherwise, retrieve the table from the active tables list.
logger.debug("Getting table: {}", tableName);
DynamicDrillTable table = activeTables.computeIfAbsent(tableName, this::getDrillTable);
if (table != null) {
@@ -92,6 +111,22 @@ public class GoogleSheetsDrillSchema extends AbstractSchema {
return table;
}
+ private int getTabIndex(String tableName) {
+ Matcher matcher = TAB_PATTERN.matcher(tableName);
+ if (matcher.find()) {
+ int tabIndex = Integer.parseInt(matcher.group(1));
+ if (tabIndex < 0) {
+ throw UserException.internalError()
+ .message("Google Sheets tab index must be greater than zero.")
+ .build(logger);
+ }
+
+ return tabIndex;
+ } else {
+ return -1;
+ }
+ }
+
private DynamicDrillTable getDrillTable(String tableName) {
logger.debug("Getting Drill Table {}", tableName);
return activeTables.get(tableName);
@@ -155,8 +190,8 @@ public class GoogleSheetsDrillSchema extends AbstractSchema {
};
}
- private DynamicDrillTable registerTable(String name, DynamicDrillTable table) {
+ private void registerTable(String name, DynamicDrillTable table) {
activeTables.put(name, table);
- return table;
+ tableList.add(table);
}
}
diff --git a/contrib/storage-googlesheets/src/test/java/org/apache/drill/exec/store/googlesheets/TestGoogleSheetsQueries.java b/contrib/storage-googlesheets/src/test/java/org/apache/drill/exec/store/googlesheets/TestGoogleSheetsQueries.java
index 1f1d8be142..24eab41040 100644
--- a/contrib/storage-googlesheets/src/test/java/org/apache/drill/exec/store/googlesheets/TestGoogleSheetsQueries.java
+++ b/contrib/storage-googlesheets/src/test/java/org/apache/drill/exec/store/googlesheets/TestGoogleSheetsQueries.java
@@ -134,6 +134,38 @@ public class TestGoogleSheetsQueries extends ClusterTest {
new RowSetComparison(expected).verifyAndClearAll(results);
}
+ @Test
+ public void testStarQueryWithTabs() throws Exception {
+ try {
+ initializeTokens("googlesheets");
+ } catch (PluginException e) {
+ fail(e.getMessage());
+ }
+
+ String sql = String.format("SELECT * FROM googlesheets.`%s`.`tab[1]` WHERE `Col2` < 6.0", sheetID);
+ RowSet results = queryBuilder().sql(sql).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addNullable("Col1", MinorType.VARCHAR)
+ .addNullable("Col2", MinorType.FLOAT8)
+ .addNullable("Col3", MinorType.DATE)
+ .buildSchema();
+
+ RowSet expected = client.rowSetBuilder(expectedSchema)
+ .addRow("Rosaline Thales", 1.0, null)
+ .addRow("Abdolhossein Detlev", 2.0001, LocalDate.parse("2020-04-30"))
+ .addRow(null, 4.0, LocalDate.parse("2020-06-30"))
+ .addRow("Yunus Elena", 3.5, LocalDate.parse("2021-01-15"))
+ .addRow("Swaran Ohiyesa", -63.8, LocalDate.parse("2021-04-08"))
+ .addRow("Kalani Godabert", 0.0, LocalDate.parse("2021-06-28"))
+ .addRow("Caishen Origenes", 5.0E-7, LocalDate.parse("2021-07-09"))
+ .addRow("Toufik Gurgen", 2.0, LocalDate.parse("2021-11-05"))
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(results);
+ }
+
+
@Test
public void testSchemataInformationSchema() throws Exception {
try {
@@ -185,6 +217,30 @@ public class TestGoogleSheetsQueries extends ClusterTest {
new RowSetComparison(expected).verifyAndClearAll(results);
}
+ @Test
+ public void testImplicitFieldsWithTabs() throws Exception {
+ // Tests special case of only implicit metadata fields being projected.
+ try {
+ initializeTokens("googlesheets");
+ } catch (PluginException e) {
+ fail(e.getMessage());
+ }
+
+ String sql = String.format("SELECT _sheets FROM googlesheets.`%s`.`tab[1]` LIMIT 1", sheetID);
+ RowSet results = queryBuilder().sql(sql).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addArray("_sheets", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = client.rowSetBuilder(expectedSchema)
+ .addRow((Object) strArray("TestSheet1", "MixedSheet"))
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(results);
+ }
+
+
@Ignore("Implicit columns have some projection issues. See DRILL-7080. Once this is resolved, re-enable this test.")
@Test
public void testStarAndImplicitFields() throws Exception {
@@ -284,6 +340,53 @@ public class TestGoogleSheetsQueries extends ClusterTest {
new RowSetComparison(expected).verifyAndClearAll(results);
}
+ @Test
+ public void testInvalidTab() throws Exception {
+ try {
+ initializeTokens("googlesheets");
+ } catch (PluginException e) {
+ fail(e.getMessage());
+ }
+
+ try {
+ String sql = String.format("SELECT * FROM googlesheets.`%s`.`tab[5]` WHERE `Col2` < 6.0", sheetID);
+ queryBuilder().sql(sql).run();
+ fail();
+ } catch (Exception e) {
+ assertTrue(e.getMessage().contains("Tab not found at index 5"));
+ }
+ }
+
+ @Test
+ public void testWithExplicitColumnsWithTab() throws Exception {
+ try {
+ initializeTokens("googlesheets");
+ } catch (PluginException e) {
+ fail(e.getMessage());
+ }
+
+ String sql = String.format("SELECT Col1, Col3 FROM googlesheets.`%s`.`tab[1]` WHERE `Col2` < 6.0", sheetID);
+ RowSet results = queryBuilder().sql(sql).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addNullable("Col1", MinorType.VARCHAR)
+ .addNullable("Col3", MinorType.DATE)
+ .buildSchema();
+
+ RowSet expected = client.rowSetBuilder(expectedSchema)
+ .addRow("Rosaline Thales", null)
+ .addRow("Abdolhossein Detlev", LocalDate.parse("2020-04-30"))
+ .addRow(null, LocalDate.parse("2020-06-30"))
+ .addRow("Yunus Elena", LocalDate.parse("2021-01-15"))
+ .addRow("Swaran Ohiyesa", LocalDate.parse("2021-04-08"))
+ .addRow("Kalani Godabert",LocalDate.parse("2021-06-28"))
+ .addRow("Caishen Origenes", LocalDate.parse("2021-07-09"))
+ .addRow("Toufik Gurgen", LocalDate.parse("2021-11-05"))
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(results);
+ }
+
@Test
public void testWithExplicitColumnsInDifferentOrder() throws Exception {
try {
@@ -425,6 +528,34 @@ public class TestGoogleSheetsQueries extends ClusterTest {
new RowSetComparison(expected).verifyAndClearAll(results);
}
+ @Test
+ public void testSchemaProvisioningWithTab() throws Exception {
+ try {
+ initializeTokens("googlesheets");
+ } catch (PluginException e) {
+ fail(e.getMessage());
+ }
+
+ String sql = String.format("SELECT * FROM table(`googlesheets`.`%s`.`tab[1]` (schema => 'inline=(`Col1` VARCHAR, `Col2` INTEGER, `Col3` VARCHAR)')) LIMIT 5", sheetID);
+ RowSet results = queryBuilder().sql(sql).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addNullable("Col1", MinorType.VARCHAR)
+ .addNullable("Col2", MinorType.INT)
+ .addNullable("Col3", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = client.rowSetBuilder(expectedSchema)
+ .addRow("Rosaline Thales", 1, null)
+ .addRow("Abdolhossein Detlev", 2, "2020-04-30")
+ .addRow("Yosuke Simon", null, "2020-05-22")
+ .addRow(null, 4, "2020-06-30")
+ .addRow("Avitus Stribog", 500000, "2020-07-27")
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(results);
+ }
+
/**
* This function is used for testing only. It initializes a {@link PersistentTokenTable} and populates it
* with a valid access and refresh token.