You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ze...@apache.org on 2023/11/17 22:08:23 UTC
(arrow-adbc) branch main updated: perf(go/adbc/driver/snowflake): GetObjects call is slow even when filters are provided (#1285)
This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 888bec92 perf(go/adbc/driver/snowflake): GetObjects call is slow even when filters are provided (#1285)
888bec92 is described below
commit 888bec925d744a4db1199105b256ba52e62628c3
Author: Ryan Syed <sy...@icloud.com>
AuthorDate: Fri Nov 17 14:08:17 2023 -0800
perf(go/adbc/driver/snowflake): GetObjects call is slow even when filters are provided (#1285)
## Replaced some cursor calls with static calls and filtered early when
possible
If a share isn't associated with a database then SELECT DATABASE_NAME
FROM INFORMATION_SCHEMA.DATABASES call will not list it and therefore it
doesn't seem necessary to call SHOW SHARES LIKE '%database_name%' to get
a list and check if a DB isn't created for it. Therefore, those checks
were removed.
### Comparison of performance improvements:
I have created another PR:
https://github.com/apache/arrow-adbc/pull/1299 with additional tests for
`GetObjects`.
| Test | Before | After | % Improvement |
|-----|---------------------|------------------|-------------------|
| CanGetObjectsAll | 17.6 s | 3 s | 82.5 |
| CanGetObjectsCatalogs | 503 ms | 333 ms | 33.80 |
| CanGetObjectsCatalogsWithPattern | 421 ms | 369 ms | 12.35 |
| CanGetObjectsDbSchemas | 4.4 s | 694 ms | 84.36 |
| CanGetObjectsDbSchemasWithPattern | 4 s | 807 ms | 79.825 |
| CanGetObjectsTables | 18 s | 2.8 s | 84.44 |
| CanGetObjectsTablesWithPattern | 17.6 s | 2.9 s | 83.52 |
---
go/adbc/driver/snowflake/connection.go | 174 +++++++++++++--------------------
1 file changed, 66 insertions(+), 108 deletions(-)
diff --git a/go/adbc/driver/snowflake/connection.go b/go/adbc/driver/snowflake/connection.go
index 8dc77392..69160203 100644
--- a/go/adbc/driver/snowflake/connection.go
+++ b/go/adbc/driver/snowflake/connection.go
@@ -282,60 +282,20 @@ func (c *cnxn) getObjectsDbSchemas(ctx context.Context, depth adbc.ObjectDepth,
conditions := make([]string, 0)
if catalog != nil && *catalog != "" {
- conditions = append(conditions, ` CATALOG_NAME LIKE \'`+*catalog+`\'`)
+ conditions = append(conditions, ` CATALOG_NAME LIKE '`+*catalog+`'`)
}
if dbSchema != nil && *dbSchema != "" {
- conditions = append(conditions, ` SCHEMA_NAME LIKE \'`+*dbSchema+`\'`)
+ conditions = append(conditions, ` SCHEMA_NAME LIKE '`+*dbSchema+`'`)
}
cond := strings.Join(conditions, " AND ")
- if cond != "" {
- cond = `statement := 'SELECT * FROM (' || statement || ') WHERE ` + cond + `';`
- }
result = make(map[string][]string)
- const queryPrefix = `DECLARE
- c1 CURSOR FOR SELECT DATABASE_NAME FROM INFORMATION_SCHEMA.DATABASES;
- res RESULTSET;
- counter INTEGER DEFAULT 0;
- statement VARCHAR DEFAULT '';
- BEGIN
- FOR rec IN c1 DO
- LET sharelist RESULTSET := (EXECUTE IMMEDIATE 'SHOW SHARES LIKE \'%' || rec.database_name || '%\'');
- LET cnt RESULTSET := (SELECT COUNT(*) FROM TABLE(RESULT_SCAN(LAST_QUERY_ID())));
- LET cnt_cur CURSOR for cnt;
- LET share_cnt INTEGER DEFAULT 0;
- OPEN cnt_cur;
- FETCH cnt_cur INTO share_cnt;
- CLOSE cnt_cur;
-
- IF (share_cnt > 0) THEN
- LET c2 CURSOR for sharelist;
- LET created_on TIMESTAMP;
- LET kind VARCHAR DEFAULT '';
- LET share_name VARCHAR DEFAULT '';
- LET dbname VARCHAR DEFAULT '';
- OPEN c2;
- FETCH c2 INTO created_on, kind, share_name, dbname;
- CLOSE c2;
- IF (dbname = '') THEN
- CONTINUE;
- END IF;
- END IF;
- IF (counter > 0) THEN
- statement := statement || ' UNION ALL ';
- END IF;
- statement := statement || ' SELECT CATALOG_NAME, SCHEMA_NAME FROM ' || rec.database_name || '.INFORMATION_SCHEMA.SCHEMATA';
- counter := counter + 1;
- END FOR;
- `
- const querySuffix = `
- res := (EXECUTE IMMEDIATE :statement);
- RETURN TABLE (res);
- END;`
-
- query := queryPrefix + cond + querySuffix
+ query := `SELECT CATALOG_NAME, SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA`
+ if cond != "" {
+ query += " WHERE " + cond
+ }
var rows *sql.Rows
rows, err = c.sqldb.QueryContext(ctx, query)
if err != nil {
@@ -520,85 +480,29 @@ func (c *cnxn) getObjectsTables(ctx context.Context, depth adbc.ObjectDepth, cat
conditions := make([]string, 0)
if catalog != nil && *catalog != "" {
- conditions = append(conditions, ` TABLE_CATALOG ILIKE \'`+*catalog+`\'`)
+ conditions = append(conditions, ` TABLE_CATALOG ILIKE '`+*catalog+`'`)
}
if dbSchema != nil && *dbSchema != "" {
- conditions = append(conditions, ` TABLE_SCHEMA ILIKE \'`+*dbSchema+`\'`)
+ conditions = append(conditions, ` TABLE_SCHEMA ILIKE '`+*dbSchema+`'`)
}
if tableName != nil && *tableName != "" {
- conditions = append(conditions, ` TABLE_NAME ILIKE \'`+*tableName+`\'`)
+ conditions = append(conditions, ` TABLE_NAME ILIKE '`+*tableName+`'`)
}
- const queryPrefix = `DECLARE
- c1 CURSOR FOR SELECT DATABASE_NAME FROM INFORMATION_SCHEMA.DATABASES;
- res RESULTSET;
- counter INTEGER DEFAULT 0;
- statement VARCHAR DEFAULT '';
- BEGIN
- FOR rec IN c1 DO
- LET sharelist RESULTSET := (EXECUTE IMMEDIATE 'SHOW SHARES LIKE \'%' || rec.database_name || '%\'');
- LET cnt RESULTSET := (SELECT COUNT(*) FROM TABLE(RESULT_SCAN(LAST_QUERY_ID())));
- LET cnt_cur CURSOR for cnt;
- LET share_cnt INTEGER DEFAULT 0;
- OPEN cnt_cur;
- FETCH cnt_cur INTO share_cnt;
- CLOSE cnt_cur;
-
- IF (share_cnt > 0) THEN
- LET c2 CURSOR for sharelist;
- LET created_on TIMESTAMP;
- LET kind VARCHAR DEFAULT '';
- LET share_name VARCHAR DEFAULT '';
- LET dbname VARCHAR DEFAULT '';
- OPEN c2;
- FETCH c2 INTO created_on, kind, share_name, dbname;
- CLOSE c2;
- IF (dbname = '') THEN
- CONTINUE;
- END IF;
- END IF;
- IF (counter > 0) THEN
- statement := statement || ' UNION ALL ';
- END IF;
- `
-
- const noSchema = `statement := statement || ' SELECT table_catalog, table_schema, table_name, table_type FROM ' || rec.database_name || '.INFORMATION_SCHEMA.TABLES';
- counter := counter + 1;
- END FOR;
- `
-
- const getSchema = `statement := statement ||
- ' SELECT
- table_catalog, table_schema, table_name, column_name,
- ordinal_position, is_nullable::boolean, data_type, numeric_precision,
- numeric_precision_radix, numeric_scale, is_identity::boolean,
- identity_generation, identity_increment,
- character_maximum_length, character_octet_length, datetime_precision, comment
- FROM ' || rec.database_name || '.INFORMATION_SCHEMA.COLUMNS';
-
- counter := counter + 1;
- END FOR;
- `
-
- const querySuffix = `
- res := (EXECUTE IMMEDIATE :statement);
- RETURN TABLE (res);
- END;`
-
// first populate the tables and table types
var rows *sql.Rows
var tblConditions []string
if len(tableType) > 0 {
- tblConditions = append(conditions, ` TABLE_TYPE IN (\'`+strings.Join(tableType, `\',\'`)+`\')`)
+ tblConditions = append(conditions, ` TABLE_TYPE IN ('`+strings.Join(tableType, `','`)+`')`)
} else {
tblConditions = conditions
}
cond := strings.Join(tblConditions, " AND ")
+ query := "SELECT table_catalog, table_schema, table_name, table_type FROM INFORMATION_SCHEMA.TABLES"
if cond != "" {
- cond = `statement := 'SELECT * FROM (' || statement || ') WHERE ` + cond + `';`
+ query += " WHERE " + cond
}
- query := queryPrefix + noSchema + cond + querySuffix
rows, err = c.sqldb.QueryContext(ctx, query)
if err != nil {
err = errToAdbcErr(adbc.StatusIO, err)
@@ -622,6 +526,16 @@ func (c *cnxn) getObjectsTables(ctx context.Context, depth adbc.ObjectDepth, cat
}
if includeSchema {
+ conditions := make([]string, 0)
+ if catalog != nil && *catalog != "" {
+ conditions = append(conditions, ` TABLE_CATALOG ILIKE \'`+*catalog+`\'`)
+ }
+ if dbSchema != nil && *dbSchema != "" {
+ conditions = append(conditions, ` TABLE_SCHEMA ILIKE \'`+*dbSchema+`\'`)
+ }
+ if tableName != nil && *tableName != "" {
+ conditions = append(conditions, ` TABLE_NAME ILIKE \'`+*tableName+`\'`)
+ }
// if we need to include the schemas of the tables, make another fetch
// to fetch the columns and column info
if columnName != nil && *columnName != "" {
@@ -633,6 +547,50 @@ func (c *cnxn) getObjectsTables(ctx context.Context, depth adbc.ObjectDepth, cat
}
cond = `statement := 'SELECT * FROM (' || statement || ')` + cond +
` ORDER BY table_catalog, table_schema, table_name, ordinal_position';`
+
+ var queryPrefix = `DECLARE
+ c1 CURSOR FOR SELECT DATABASE_NAME FROM INFORMATION_SCHEMA.DATABASES;
+ res RESULTSET;
+ counter INTEGER DEFAULT 0;
+ statement VARCHAR DEFAULT '';
+ BEGIN
+ FOR rec IN c1 DO
+ IF (counter > 0) THEN
+ statement := statement || ' UNION ALL ';
+ END IF;
+ `
+
+ const getSchema = `statement := statement ||
+ ' SELECT
+ table_catalog, table_schema, table_name, column_name,
+ ordinal_position, is_nullable::boolean, data_type, numeric_precision,
+ numeric_precision_radix, numeric_scale, is_identity::boolean,
+ identity_generation, identity_increment,
+ character_maximum_length, character_octet_length, datetime_precision, comment
+ FROM ' || rec.database_name || '.INFORMATION_SCHEMA.COLUMNS';
+
+ counter := counter + 1;
+ END FOR;
+ `
+
+ const querySuffix = `
+ res := (EXECUTE IMMEDIATE :statement);
+ RETURN TABLE (res);
+ END;`
+
+ if catalog != nil && *catalog != "" {
+ queryPrefix = `DECLARE
+ c1 CURSOR FOR SELECT DATABASE_NAME FROM INFORMATION_SCHEMA.DATABASES WHERE DATABASE_NAME ILIKE '` + *catalog + `';` +
+ `res RESULTSET;
+ counter INTEGER DEFAULT 0;
+ statement VARCHAR DEFAULT '';
+ BEGIN
+ FOR rec IN c1 DO
+ IF (counter > 0) THEN
+ statement := statement || ' UNION ALL ';
+ END IF;
+ `
+ }
query = queryPrefix + getSchema + cond + querySuffix
rows, err = c.sqldb.QueryContext(ctx, query)
if err != nil {