You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "WillAyd (via GitHub)" <gi...@apache.org> on 2023/05/11 23:54:32 UTC

[GitHub] [arrow-adbc] WillAyd commented on a diff in pull request #679: feat:(c/driver/postgres): Implement GetObjectsDbSchemas for Postgres

WillAyd commented on code in PR #679:
URL: https://github.com/apache/arrow-adbc/pull/679#discussion_r1191784402


##########
c/driver/postgresql/connection.cc:
##########
@@ -145,6 +145,40 @@ AdbcStatusCode PostgresConnection::GetInfo(struct AdbcConnection* connection,
   return BatchToArrayStream(&array, &schema, out, error);
 }
 
+AdbcStatusCode PostgresConnectionGetSchemasImpl(PGconn* conn, int depth,
+                                                struct ArrowArray* db_schemas_col,
+                                                struct AdbcError* error) {
+  struct ArrowArray* db_schema_items = db_schemas_col->children[0];
+  struct ArrowArray* schema_name_col = db_schema_items->children[0];
+  struct ArrowArray* schema_tables_col = db_schema_items->children[1];
+
+  const char* stmt =
+      "SELECT nspname FROM pg_catalog.pg_namespace WHERE "
+      "nspname !~ '^pg_' AND nspname <> 'information_schema'";
+  auto result_helper = PqResultHelper(conn, stmt);
+
+  pg_result* result = result_helper.Execute();
+  ExecStatusType pq_status = PQresultStatus(result);
+  if (pq_status == PGRES_TUPLES_OK) {

Review Comment:
   This pg looping is getting pretty repetitive. Feels like there is a way to refactor into the existing ResultHelper class and create an iterator over the results



##########
c/driver/postgresql/connection.cc:
##########
@@ -182,7 +216,12 @@ AdbcStatusCode PostgresConnectionGetObjectsImpl(
         if (depth == ADBC_OBJECT_DEPTH_CATALOGS) {
           CHECK_NA(INTERNAL, ArrowArrayAppendNull(catalog_db_schemas_col, 1), error);
         } else {
-          return ADBC_STATUS_NOT_IMPLEMENTED;
+          if (depth >= ADBC_OBJECT_DEPTH_DB_SCHEMAS) {
+            RAISE_ADBC(PostgresConnectionGetSchemasImpl(conn, depth,

Review Comment:
   Here we are grabbing the associated schemas in a loop, but this brings up a structural question. I assume from the adbc.h header that GetInfo should maintain a hierarchy from catalog -> schema -> table -> constraints, etc...
   
   However, the current implementation (shared with sqlite) creates the overall array structure in a call to `AdbcInitConnectionObjectsSchema`, but at face value I don't see how this preserves the hierarchy between elements. With this happening in a loop, it ends up writing the schema name for as many databases as exist. I could certainly move it out of the loop to fix that, but figured out ask about the hierarchical intent of the result before moving too much around



##########
c/driver/postgresql/postgresql_test.cc:
##########
@@ -204,6 +203,53 @@ TEST_F(PostgresConnectionTest, GetObjectsGetCatalogs) {
   EXPECT_TRUE(seen_tempalte1_db) << "template1 database does not exist";
 }
 
+TEST_F(PostgresConnectionTest, GetObjectsGetDbSchemas) {
+  ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error));
+  ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error));
+
+  if (!quirks()->supports_get_objects()) {
+    GTEST_SKIP();
+  }
+
+  adbc_validation::StreamReader reader;
+  ASSERT_THAT(AdbcConnectionGetObjects(&connection, ADBC_OBJECT_DEPTH_DB_SCHEMAS, nullptr,
+                                       nullptr, nullptr, nullptr, nullptr,
+                                       &reader.stream.value, &error),
+              IsOkStatus(&error));
+  ASSERT_NO_FATAL_FAILURE(reader.GetSchema());
+  ASSERT_NO_FATAL_FAILURE(reader.Next());
+  ASSERT_NE(nullptr, reader.array->release);
+  ASSERT_GT(reader.array->length, 0);
+
+  bool seen_public = false;
+
+  do {
+    for (int64_t row = 0; row < reader.array->length; row++) {
+      ArrowStringView val =
+          ArrowArrayViewGetStringUnsafe(reader.array_view->children[0], row);
+      auto val_str = std::string(val.data, val.size_bytes);
+      if (val_str == "postgres") {
+        // ASSERT_EQ(reader.array->children[1]->children[0]->length, 1);

Review Comment:
   This commented assertion and the one on L244 relate to the comment about whether or not a hierarchy should exist between the elements inspected by GetInfo



##########
c/driver/postgresql/postgresql_test.cc:
##########
@@ -204,6 +203,53 @@ TEST_F(PostgresConnectionTest, GetObjectsGetCatalogs) {
   EXPECT_TRUE(seen_tempalte1_db) << "template1 database does not exist";
 }
 
+TEST_F(PostgresConnectionTest, GetObjectsGetDbSchemas) {
+  ASSERT_THAT(AdbcConnectionNew(&connection, &error), IsOkStatus(&error));
+  ASSERT_THAT(AdbcConnectionInit(&connection, &database, &error), IsOkStatus(&error));
+
+  if (!quirks()->supports_get_objects()) {
+    GTEST_SKIP();
+  }
+
+  adbc_validation::StreamReader reader;
+  ASSERT_THAT(AdbcConnectionGetObjects(&connection, ADBC_OBJECT_DEPTH_DB_SCHEMAS, nullptr,
+                                       nullptr, nullptr, nullptr, nullptr,
+                                       &reader.stream.value, &error),
+              IsOkStatus(&error));
+  ASSERT_NO_FATAL_FAILURE(reader.GetSchema());
+  ASSERT_NO_FATAL_FAILURE(reader.Next());
+  ASSERT_NE(nullptr, reader.array->release);
+  ASSERT_GT(reader.array->length, 0);
+
+  bool seen_public = false;
+
+  do {
+    for (int64_t row = 0; row < reader.array->length; row++) {
+      ArrowStringView val =
+          ArrowArrayViewGetStringUnsafe(reader.array_view->children[0], row);
+      auto val_str = std::string(val.data, val.size_bytes);
+      if (val_str == "postgres") {
+        // ASSERT_EQ(reader.array->children[1]->children[0]->length, 1);

Review Comment:
   This commented assertion and the one on L244 relate to the question about whether or not a hierarchy should exist between the elements inspected by GetInfo



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org