You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ze...@apache.org on 2023/11/17 22:08:23 UTC

(arrow-adbc) branch main updated: perf(go/adbc/driver/snowflake): GetObjects call is slow even when filters are provided (#1285)

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new 888bec92 perf(go/adbc/driver/snowflake): GetObjects call is slow even when filters are provided (#1285)
888bec92 is described below

commit 888bec925d744a4db1199105b256ba52e62628c3
Author: Ryan Syed <sy...@icloud.com>
AuthorDate: Fri Nov 17 14:08:17 2023 -0800

    perf(go/adbc/driver/snowflake): GetObjects call is slow even when filters are provided (#1285)
    
    ## Replaced some cursor calls with static calls and filtered early when
    possible
    
    If a share isn't associated with a database then SELECT DATABASE_NAME
    FROM INFORMATION_SCHEMA.DATABASES call will not list it and therefore it
    doesn't seem necessary to call SHOW SHARES LIKE '%database_name%' to get
    a list and check if a DB isn't created for it. Therefore, those checks
    were removed.
    
    ### Comparison of performance improvements:
    
    I have created another PR:
    https://github.com/apache/arrow-adbc/pull/1299 with additional tests for
    `GetObjects`.
    
    | Test | Before | After | % Improvement |
    |-----|---------------------|------------------|-------------------|
    | CanGetObjectsAll | 17.6 s | 3 s | 82.5  |
    | CanGetObjectsCatalogs | 503 ms | 333 ms | 33.80 |
    | CanGetObjectsCatalogsWithPattern | 421 ms | 369 ms | 12.35 |
    | CanGetObjectsDbSchemas | 4.4 s | 694 ms | 84.36 |
    | CanGetObjectsDbSchemasWithPattern | 4 s | 807 ms | 79.825 |
    | CanGetObjectsTables | 18 s | 2.8 s | 84.44 |
    | CanGetObjectsTablesWithPattern | 17.6 s | 2.9 s | 83.52 |
---
 go/adbc/driver/snowflake/connection.go | 174 +++++++++++++--------------------
 1 file changed, 66 insertions(+), 108 deletions(-)

diff --git a/go/adbc/driver/snowflake/connection.go b/go/adbc/driver/snowflake/connection.go
index 8dc77392..69160203 100644
--- a/go/adbc/driver/snowflake/connection.go
+++ b/go/adbc/driver/snowflake/connection.go
@@ -282,60 +282,20 @@ func (c *cnxn) getObjectsDbSchemas(ctx context.Context, depth adbc.ObjectDepth,
 
 	conditions := make([]string, 0)
 	if catalog != nil && *catalog != "" {
-		conditions = append(conditions, ` CATALOG_NAME LIKE \'`+*catalog+`\'`)
+		conditions = append(conditions, ` CATALOG_NAME LIKE '`+*catalog+`'`)
 	}
 	if dbSchema != nil && *dbSchema != "" {
-		conditions = append(conditions, ` SCHEMA_NAME LIKE \'`+*dbSchema+`\'`)
+		conditions = append(conditions, ` SCHEMA_NAME LIKE '`+*dbSchema+`'`)
 	}
 
 	cond := strings.Join(conditions, " AND ")
-	if cond != "" {
-		cond = `statement := 'SELECT * FROM (' || statement || ') WHERE ` + cond + `';`
-	}
 
 	result = make(map[string][]string)
-	const queryPrefix = `DECLARE
-	    c1 CURSOR FOR SELECT DATABASE_NAME FROM INFORMATION_SCHEMA.DATABASES;
-			res RESULTSET;
-			counter INTEGER DEFAULT 0;
-			statement VARCHAR DEFAULT '';
-		BEGIN
-		  FOR rec IN c1 DO
-				LET sharelist RESULTSET := (EXECUTE IMMEDIATE 'SHOW SHARES LIKE \'%' || rec.database_name || '%\'');
-				LET cnt RESULTSET := (SELECT COUNT(*) FROM TABLE(RESULT_SCAN(LAST_QUERY_ID())));
-				LET cnt_cur CURSOR for cnt;
-				LET share_cnt INTEGER DEFAULT 0;
-				OPEN cnt_cur;
-				FETCH cnt_cur INTO share_cnt;
-				CLOSE cnt_cur;
-
-				IF (share_cnt > 0) THEN
-					LET c2 CURSOR for sharelist;
-					LET created_on TIMESTAMP;
-					LET kind VARCHAR DEFAULT '';
-					LET share_name VARCHAR DEFAULT '';
-					LET dbname VARCHAR DEFAULT '';
-					OPEN c2;
-					FETCH c2 INTO created_on, kind, share_name, dbname;
-					CLOSE c2;
-					IF (dbname = '') THEN
-						CONTINUE;
-					END IF;
-				END IF;
 
-				IF (counter > 0) THEN
-				  statement := statement || ' UNION ALL ';
-				END IF;
-				statement := statement || ' SELECT CATALOG_NAME, SCHEMA_NAME FROM ' || rec.database_name || '.INFORMATION_SCHEMA.SCHEMATA';
-				counter := counter + 1;
-			END FOR;
-		  `
-	const querySuffix = `
-	    res := (EXECUTE IMMEDIATE :statement);
-			RETURN TABLE (res);
-		END;`
-
-	query := queryPrefix + cond + querySuffix
+	query := `SELECT CATALOG_NAME, SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA`
+	if cond != "" {
+		query += " WHERE " + cond
+	}
 	var rows *sql.Rows
 	rows, err = c.sqldb.QueryContext(ctx, query)
 	if err != nil {
@@ -520,85 +480,29 @@ func (c *cnxn) getObjectsTables(ctx context.Context, depth adbc.ObjectDepth, cat
 
 	conditions := make([]string, 0)
 	if catalog != nil && *catalog != "" {
-		conditions = append(conditions, ` TABLE_CATALOG ILIKE \'`+*catalog+`\'`)
+		conditions = append(conditions, ` TABLE_CATALOG ILIKE '`+*catalog+`'`)
 	}
 	if dbSchema != nil && *dbSchema != "" {
-		conditions = append(conditions, ` TABLE_SCHEMA ILIKE \'`+*dbSchema+`\'`)
+		conditions = append(conditions, ` TABLE_SCHEMA ILIKE '`+*dbSchema+`'`)
 	}
 	if tableName != nil && *tableName != "" {
-		conditions = append(conditions, ` TABLE_NAME ILIKE \'`+*tableName+`\'`)
+		conditions = append(conditions, ` TABLE_NAME ILIKE '`+*tableName+`'`)
 	}
 
-	const queryPrefix = `DECLARE
-		c1 CURSOR FOR SELECT DATABASE_NAME FROM INFORMATION_SCHEMA.DATABASES;
-		res RESULTSET;
-		counter INTEGER DEFAULT 0;
-		statement VARCHAR DEFAULT '';
-	BEGIN
-		FOR rec IN c1 DO
-			LET sharelist RESULTSET := (EXECUTE IMMEDIATE 'SHOW SHARES LIKE \'%' || rec.database_name || '%\'');
-			LET cnt RESULTSET := (SELECT COUNT(*) FROM TABLE(RESULT_SCAN(LAST_QUERY_ID())));
-			LET cnt_cur CURSOR for cnt;
-			LET share_cnt INTEGER DEFAULT 0;
-			OPEN cnt_cur;
-			FETCH cnt_cur INTO share_cnt;
-			CLOSE cnt_cur;
-
-			IF (share_cnt > 0) THEN
-				LET c2 CURSOR for sharelist;
-				LET created_on TIMESTAMP;
-				LET kind VARCHAR DEFAULT '';
-				LET share_name VARCHAR DEFAULT '';
-				LET dbname VARCHAR DEFAULT '';
-				OPEN c2;
-				FETCH c2 INTO created_on, kind, share_name, dbname;
-				CLOSE c2;
-				IF (dbname = '') THEN
-					CONTINUE;
-				END IF;
-			END IF;
-			IF (counter > 0) THEN
-				statement := statement || ' UNION ALL ';
-			END IF;
-			`
-
-	const noSchema = `statement := statement || ' SELECT table_catalog, table_schema, table_name, table_type FROM ' || rec.database_name || '.INFORMATION_SCHEMA.TABLES';
-			counter := counter + 1;
-		END FOR;
-		`
-
-	const getSchema = `statement := statement ||
-		' SELECT
-				table_catalog, table_schema, table_name, column_name,
-				ordinal_position, is_nullable::boolean, data_type, numeric_precision,
-				numeric_precision_radix, numeric_scale, is_identity::boolean,
-				identity_generation, identity_increment,
-				character_maximum_length, character_octet_length, datetime_precision, comment
-		FROM ' || rec.database_name || '.INFORMATION_SCHEMA.COLUMNS';
-
-		  counter := counter + 1;
-		END FOR;
-	  `
-
-	const querySuffix = `
-		res := (EXECUTE IMMEDIATE :statement);
-		RETURN TABLE (res);
-	END;`
-
 	// first populate the tables and table types
 	var rows *sql.Rows
 	var tblConditions []string
 	if len(tableType) > 0 {
-		tblConditions = append(conditions, ` TABLE_TYPE IN (\'`+strings.Join(tableType, `\',\'`)+`\')`)
+		tblConditions = append(conditions, ` TABLE_TYPE IN ('`+strings.Join(tableType, `','`)+`')`)
 	} else {
 		tblConditions = conditions
 	}
 
 	cond := strings.Join(tblConditions, " AND ")
+	query := "SELECT table_catalog, table_schema, table_name, table_type FROM INFORMATION_SCHEMA.TABLES"
 	if cond != "" {
-		cond = `statement := 'SELECT * FROM (' || statement || ') WHERE ` + cond + `';`
+		query += " WHERE " + cond
 	}
-	query := queryPrefix + noSchema + cond + querySuffix
 	rows, err = c.sqldb.QueryContext(ctx, query)
 	if err != nil {
 		err = errToAdbcErr(adbc.StatusIO, err)
@@ -622,6 +526,16 @@ func (c *cnxn) getObjectsTables(ctx context.Context, depth adbc.ObjectDepth, cat
 	}
 
 	if includeSchema {
+		conditions := make([]string, 0)
+		if catalog != nil && *catalog != "" {
+			conditions = append(conditions, ` TABLE_CATALOG ILIKE \'`+*catalog+`\'`)
+		}
+		if dbSchema != nil && *dbSchema != "" {
+			conditions = append(conditions, ` TABLE_SCHEMA ILIKE \'`+*dbSchema+`\'`)
+		}
+		if tableName != nil && *tableName != "" {
+			conditions = append(conditions, ` TABLE_NAME ILIKE \'`+*tableName+`\'`)
+		}
 		// if we need to include the schemas of the tables, make another fetch
 		// to fetch the columns and column info
 		if columnName != nil && *columnName != "" {
@@ -633,6 +547,50 @@ func (c *cnxn) getObjectsTables(ctx context.Context, depth adbc.ObjectDepth, cat
 		}
 		cond = `statement := 'SELECT * FROM (' || statement || ')` + cond +
 			` ORDER BY table_catalog, table_schema, table_name, ordinal_position';`
+
+		var queryPrefix = `DECLARE
+			c1 CURSOR FOR SELECT DATABASE_NAME FROM INFORMATION_SCHEMA.DATABASES;
+			res RESULTSET;
+			counter INTEGER DEFAULT 0;
+			statement VARCHAR DEFAULT '';
+		BEGIN
+			FOR rec IN c1 DO
+				IF (counter > 0) THEN
+					statement := statement || ' UNION ALL ';
+				END IF;
+				`
+
+		const getSchema = `statement := statement ||
+			' SELECT
+					table_catalog, table_schema, table_name, column_name,
+					ordinal_position, is_nullable::boolean, data_type, numeric_precision,
+					numeric_precision_radix, numeric_scale, is_identity::boolean,
+					identity_generation, identity_increment,
+					character_maximum_length, character_octet_length, datetime_precision, comment
+			FROM ' || rec.database_name || '.INFORMATION_SCHEMA.COLUMNS';
+
+			  counter := counter + 1;
+			END FOR;
+		  `
+
+		const querySuffix = `
+			res := (EXECUTE IMMEDIATE :statement);
+			RETURN TABLE (res);
+		END;`
+
+		if catalog != nil && *catalog != "" {
+			queryPrefix = `DECLARE
+				c1 CURSOR FOR SELECT DATABASE_NAME FROM INFORMATION_SCHEMA.DATABASES WHERE DATABASE_NAME ILIKE '` + *catalog + `';` +
+				`res RESULTSET;
+				counter INTEGER DEFAULT 0;
+				statement VARCHAR DEFAULT '';
+			BEGIN
+				FOR rec IN c1 DO
+					IF (counter > 0) THEN
+						statement := statement || ' UNION ALL ';
+					END IF;
+					`
+		}
 		query = queryPrefix + getSchema + cond + querySuffix
 		rows, err = c.sqldb.QueryContext(ctx, query)
 		if err != nil {