You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "WillAyd (via GitHub)" <gi...@apache.org> on 2023/04/07 19:29:19 UTC

[GitHub] [arrow-adbc] WillAyd opened a new pull request, #577: feat(c/driver/postgres): Implement GetTableSchema

WillAyd opened a new pull request, #577:
URL: https://github.com/apache/arrow-adbc/pull/577

   Definitely a few things need to be ironed out and potentially done as pre-cursors, but I think this is far along enough to review


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] paleolimbot commented on pull request #577: feat(c/driver/postgres): Implement GetTableSchema

Posted by "paleolimbot (via GitHub)" <gi...@apache.org>.
paleolimbot commented on PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577#issuecomment-1501016990

   Just a note that #573 is also related to the postgres type system! (In particular, it factors out the PostgresType -> ArrowSchema step that, as you noted here, is needed in more than one place).


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] lidavidm merged pull request #577: feat(c/driver/postgresql): Implement GetTableSchema

Posted by "lidavidm (via GitHub)" <gi...@apache.org>.
lidavidm merged PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] lidavidm commented on a diff in pull request #577: feat(c/driver/postgres): Implement GetTableSchema

Posted by "lidavidm (via GitHub)" <gi...@apache.org>.
lidavidm commented on code in PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577#discussion_r1161364187


##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +48,97 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  // TODO: sqlite uses a StringBuilder class that seems roughly equivalent

Review Comment:
   See #92 and #314. I sort of wanted to avoid having a single mega cmake project (like Arrow) but it does make it a little harder to share common snippets.
   
   It may be time to pull out a `c/driver/utils` that can (for now) be referenced in multiple drivers (the same way `c/vendor/nanoarrow` is)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] lidavidm commented on a diff in pull request #577: feat(c/driver/postgres): Implement GetTableSchema

Posted by "lidavidm (via GitHub)" <gi...@apache.org>.
lidavidm commented on code in PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577#discussion_r1184916709


##########
c/driver/postgresql/connection.cc:
##########
@@ -17,15 +17,39 @@
 
 #include "connection.h"
 
+#include <cinttypes>
 #include <cstring>
 #include <memory>
+#include <string>
 
 #include <adbc.h>
+#include <libpq-fe.h>
 
 #include "database.h"
 #include "utils.h"
 
 namespace adbcpq {
+
+class PqResultHelper {

Review Comment:
   nit: wrap in anonymous namespace



##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +71,76 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  AdbcStatusCode final_status = ADBC_STATUS_OK;
+  struct StringBuilder query = {0};
+  if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL;
+
+  if (StringBuilderAppend(
+          &query, "%s",
+          "SELECT attname, atttypid "
+          "FROM pg_catalog.pg_class AS cls "
+          "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+          "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+          "WHERE attr.attnum >= 0 AND cls.oid = '") != 0)
+    return ADBC_STATUS_INTERNAL;
+
+  if (db_schema != nullptr) {
+    char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema));
+    if (schema == NULL) {
+      SetError(error, "%s%s", "Faled to escape schema: ", PQerrorMessage(conn_));
+      return ADBC_STATUS_INVALID_ARGUMENT;
+    }
+
+    int ret = StringBuilderAppend(&query, "%s%s", schema, ".");
+    PQfreemem(schema);
+
+    if (ret != 0) return ADBC_STATUS_INTERNAL;
+  }
+
+  char* table = PQescapeIdentifier(conn_, table_name, strlen(table_name));
+  if (table == NULL) {
+    SetError(error, "%s%s", "Failed to escape table: ", PQerrorMessage(conn_));
+    return ADBC_STATUS_INVALID_ARGUMENT;
+  }
+
+  int ret = StringBuilderAppend(&query, "%s%s", table_name, "'::regclass::oid");
+  PQfreemem(table);
+
+  if (ret != 0) return ADBC_STATUS_INTERNAL;
+
+  PqResultHelper result_helper = PqResultHelper{conn_, query.buffer};
+  StringBuilderReset(&query);
+  pg_result* result = result_helper.Execute();
+
+  ExecStatusType pq_status = PQresultStatus(result);
+  if (pq_status == PGRES_TUPLES_OK) {
+    int num_rows = PQntuples(result);
+    ArrowSchemaInit(schema);

Review Comment:
   This would leave a half-initialized schema in the output argument if we error below which may trip up callers; maybe use a nanoarrow::UniqueSchema and move the schema at the end?



##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +49,80 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  AdbcStatusCode final_status = ADBC_STATUS_OK;
+  struct StringBuilder query = {0};
+  if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL;
+
+  if (StringBuilderAppend(
+          &query, "%s",
+          "SELECT attname, atttypid "
+          "FROM pg_catalog.pg_class AS cls "
+          "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+          "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+          "WHERE attr.attnum >= 0 AND cls.oid = '") != 0)
+    return ADBC_STATUS_INTERNAL;
+
+  if (db_schema != nullptr) {
+    char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema));
+    if (schema == NULL) {
+      SetError(error, "%s%s", "Faled to escape schema: ", PQerrorMessage(conn_));
+      return ADBC_STATUS_INVALID_ARGUMENT;
+    }
+
+    int ret = StringBuilderAppend(&query, "%s%s", schema, ".");
+    PQfreemem(schema);
+
+    if (ret != 0) return ADBC_STATUS_INTERNAL;
+  }
+
+  char* table = PQescapeIdentifier(conn_, table_name, strlen(table_name));
+  if (table == NULL) {
+    SetError(error, "%s%s", "Failed to escape table: ", PQerrorMessage(conn_));
+    return ADBC_STATUS_INVALID_ARGUMENT;
+  }
+
+  int ret = StringBuilderAppend(&query, "%s%s", table_name, "'::regclass::oid");
+  PQfreemem(table);
+
+  if (ret != 0) return ADBC_STATUS_INTERNAL;

Review Comment:
   I suppose it's just here twice so it's OK for now



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] WillAyd commented on a diff in pull request #577: feat(c/driver/postgres): Implement GetTableSchema

Posted by "WillAyd (via GitHub)" <gi...@apache.org>.
WillAyd commented on code in PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577#discussion_r1185541860


##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +71,76 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  AdbcStatusCode final_status = ADBC_STATUS_OK;
+  struct StringBuilder query = {0};
+  if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL;
+
+  if (StringBuilderAppend(
+          &query, "%s",
+          "SELECT attname, atttypid "
+          "FROM pg_catalog.pg_class AS cls "
+          "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+          "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+          "WHERE attr.attnum >= 0 AND cls.oid = '") != 0)
+    return ADBC_STATUS_INTERNAL;
+
+  if (db_schema != nullptr) {
+    char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema));
+    if (schema == NULL) {
+      SetError(error, "%s%s", "Faled to escape schema: ", PQerrorMessage(conn_));
+      return ADBC_STATUS_INVALID_ARGUMENT;
+    }
+
+    int ret = StringBuilderAppend(&query, "%s%s", schema, ".");
+    PQfreemem(schema);
+
+    if (ret != 0) return ADBC_STATUS_INTERNAL;
+  }
+
+  char* table = PQescapeIdentifier(conn_, table_name, strlen(table_name));
+  if (table == NULL) {
+    SetError(error, "%s%s", "Failed to escape table: ", PQerrorMessage(conn_));
+    return ADBC_STATUS_INVALID_ARGUMENT;
+  }
+
+  int ret = StringBuilderAppend(&query, "%s%s", table_name, "'::regclass::oid");
+  PQfreemem(table);
+
+  if (ret != 0) return ADBC_STATUS_INTERNAL;
+
+  PqResultHelper result_helper = PqResultHelper{conn_, query.buffer};
+  StringBuilderReset(&query);
+  pg_result* result = result_helper.Execute();
+
+  ExecStatusType pq_status = PQresultStatus(result);
+  if (pq_status == PGRES_TUPLES_OK) {
+    int num_rows = PQntuples(result);
+    ArrowSchemaInit(schema);

Review Comment:
   Cool thanks for the tip. Think I did what you are asking - if so then probably want to do the same thing in statement.cc:InferSchema



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] WillAyd commented on pull request #577: feat(c/driver/postgres): Implement GetTableSchema

Posted by "WillAyd (via GitHub)" <gi...@apache.org>.
WillAyd commented on PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577#issuecomment-1528081187

   After #628 (whether accepted or not) will come back to this and clean up the string handling. I think the general structure is in place though, leveraging the great work @paleolimbot did on the type mapping


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] WillAyd commented on a diff in pull request #577: feat(c/driver/postgres): Implement GetTableSchema

Posted by "WillAyd (via GitHub)" <gi...@apache.org>.
WillAyd commented on code in PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577#discussion_r1160917606


##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +48,97 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  // TODO: sqlite uses a StringBuilder class that seems roughly equivalent
+  // to a similar tool in arrow/util/string_builder.h but wasn't clear on
+  // structure and how to use, so relying on simplistic appends for now
+  AdbcStatusCode final_status;
+
+  std::string query =
+      "SELECT attname, atttypid "
+      "FROM pg_catalog.pg_class AS cls "
+      "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+      "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+      "WHERE attr.attnum >= 0 AND cls.oid = '";
+  if (db_schema != nullptr) {
+    query.append(db_schema);
+    query.append(".");
+  }
+  query.append(table_name);
+  query.append("'::regclass::oid");
+
+  // char* stmt = PQescapeLiteral(conn_, query.c_str(), query.length());
+  // if (stmt == nullptr) {
+  //   SetError(error, "Failed to get table schema: ", PQerrorMessage(conn_));
+  //  return ADBC_STATUS_INVALID_ARGUMENT;
+  // }
+
+  pg_result* result = PQexec(conn_, query.c_str());
+  // PQfreemem(stmt);
+
+  ExecStatusType pq_status = PQresultStatus(result);
+  if (pq_status == PGRES_TUPLES_OK) {
+    int num_rows = PQntuples(result);
+    ArrowSchemaInit(schema);
+    CHECK_NA_ADBC(ArrowSchemaSetTypeStruct(schema, num_rows), error);
+
+    // TODO: much of this code is copied from statement.cc InferSchema

Review Comment:
   I _think_ this should be moved out of statement.cc and probably put into type.cc



##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +48,97 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  // TODO: sqlite uses a StringBuilder class that seems roughly equivalent

Review Comment:
   To clarify this comment a bit further, knowing that the sqlite connector was just of put together as a POC I wasn't sure if we wanted to pull the StringBuilder out of that and make it shared across all drivers or if I should provide a separate implementation for postgres (seems like that might have been started?)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] WillAyd commented on a diff in pull request #577: feat(c/driver/postgres): Implement GetTableSchema

Posted by "WillAyd (via GitHub)" <gi...@apache.org>.
WillAyd commented on code in PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577#discussion_r1183072135


##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +49,80 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  AdbcStatusCode final_status = ADBC_STATUS_OK;
+  struct StringBuilder query = {0};
+  if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL;
+
+  if (StringBuilderAppend(
+          &query, "%s",
+          "SELECT attname, atttypid "
+          "FROM pg_catalog.pg_class AS cls "
+          "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+          "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+          "WHERE attr.attnum >= 0 AND cls.oid = '") != 0)
+    return ADBC_STATUS_INTERNAL;
+
+  if (db_schema != nullptr) {
+    char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema));

Review Comment:
   A bit tangential to this but I don't see the current test suite as validating that we are safe against SQL injection attacks. Probably a reasonable follow up to set those types of test in place



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] WillAyd commented on a diff in pull request #577: feat(c/driver/postgres): Implement GetTableSchema

Posted by "WillAyd (via GitHub)" <gi...@apache.org>.
WillAyd commented on code in PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577#discussion_r1183868618


##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +49,80 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  AdbcStatusCode final_status = ADBC_STATUS_OK;
+  struct StringBuilder query = {0};
+  if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL;
+
+  if (StringBuilderAppend(
+          &query, "%s",
+          "SELECT attname, atttypid "
+          "FROM pg_catalog.pg_class AS cls "
+          "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+          "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+          "WHERE attr.attnum >= 0 AND cls.oid = '") != 0)
+    return ADBC_STATUS_INTERNAL;
+
+  if (db_schema != nullptr) {
+    char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema));

Review Comment:
   See https://github.com/apache/arrow-adbc/issues/643



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] WillAyd commented on a diff in pull request #577: feat(c/driver/postgres): Implement GetTableSchema

Posted by "WillAyd (via GitHub)" <gi...@apache.org>.
WillAyd commented on code in PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577#discussion_r1183886626


##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +49,80 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  AdbcStatusCode final_status = ADBC_STATUS_OK;
+  struct StringBuilder query = {0};
+  if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL;
+
+  if (StringBuilderAppend(
+          &query, "%s",
+          "SELECT attname, atttypid "
+          "FROM pg_catalog.pg_class AS cls "
+          "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+          "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+          "WHERE attr.attnum >= 0 AND cls.oid = '") != 0)
+    return ADBC_STATUS_INTERNAL;
+
+  if (db_schema != nullptr) {
+    char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema));
+    if (schema == NULL) {
+      SetError(error, "%s%s", "Faled to escape schema: ", PQerrorMessage(conn_));
+      return ADBC_STATUS_INVALID_ARGUMENT;
+    }
+
+    int ret = StringBuilderAppend(&query, "%s%s", schema, ".");
+    PQfreemem(schema);
+
+    if (ret != 0) return ADBC_STATUS_INTERNAL;
+  }
+
+  char* table = PQescapeIdentifier(conn_, table_name, strlen(table_name));
+  if (table == NULL) {
+    SetError(error, "%s%s", "Failed to escape table: ", PQerrorMessage(conn_));
+    return ADBC_STATUS_INVALID_ARGUMENT;
+  }
+
+  int ret = StringBuilderAppend(&query, "%s%s", table_name, "'::regclass::oid");
+  PQfreemem(table);
+
+  if (ret != 0) return ADBC_STATUS_INTERNAL;
+
+  pg_result* result = PQexec(conn_, query.buffer);
+  StringBuilderReset(&query);
+
+  ExecStatusType pq_status = PQresultStatus(result);
+  if (pq_status == PGRES_TUPLES_OK) {
+    int num_rows = PQntuples(result);
+    ArrowSchemaInit(schema);
+    CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(schema, num_rows), error);
+
+    ArrowError na_error;
+    for (int row = 0; row < num_rows; row++) {
+      const char* colname = PQgetvalue(result, row, 0);
+      const Oid pg_oid = static_cast<uint32_t>(
+          std::strtol(PQgetvalue(result, row, 1), /*str_end=*/nullptr, /*base=*/10));
+
+      PostgresType pg_type;
+      if (type_resolver_->Find(pg_oid, &pg_type, &na_error) != NANOARROW_OK) {

Review Comment:
   Have to excuse my poor C++, but I'm guessing this is as simple as a `PgResultHelper` class with an `execute` method that has `PQclear` in its destructor?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] lidavidm commented on a diff in pull request #577: feat(c/driver/postgres): Implement GetTableSchema

Posted by "lidavidm (via GitHub)" <gi...@apache.org>.
lidavidm commented on code in PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577#discussion_r1183605071


##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +49,80 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  AdbcStatusCode final_status = ADBC_STATUS_OK;
+  struct StringBuilder query = {0};
+  if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL;
+
+  if (StringBuilderAppend(
+          &query, "%s",
+          "SELECT attname, atttypid "
+          "FROM pg_catalog.pg_class AS cls "
+          "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+          "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+          "WHERE attr.attnum >= 0 AND cls.oid = '") != 0)
+    return ADBC_STATUS_INTERNAL;
+
+  if (db_schema != nullptr) {
+    char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema));
+    if (schema == NULL) {
+      SetError(error, "%s%s", "Faled to escape schema: ", PQerrorMessage(conn_));
+      return ADBC_STATUS_INVALID_ARGUMENT;
+    }
+
+    int ret = StringBuilderAppend(&query, "%s%s", schema, ".");
+    PQfreemem(schema);
+
+    if (ret != 0) return ADBC_STATUS_INTERNAL;
+  }
+
+  char* table = PQescapeIdentifier(conn_, table_name, strlen(table_name));
+  if (table == NULL) {
+    SetError(error, "%s%s", "Failed to escape table: ", PQerrorMessage(conn_));
+    return ADBC_STATUS_INVALID_ARGUMENT;
+  }
+
+  int ret = StringBuilderAppend(&query, "%s%s", table_name, "'::regclass::oid");
+  PQfreemem(table);
+
+  if (ret != 0) return ADBC_STATUS_INTERNAL;

Review Comment:
   Might be worth factoring out something?



##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +49,80 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  AdbcStatusCode final_status = ADBC_STATUS_OK;
+  struct StringBuilder query = {0};
+  if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL;
+
+  if (StringBuilderAppend(
+          &query, "%s",
+          "SELECT attname, atttypid "
+          "FROM pg_catalog.pg_class AS cls "
+          "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+          "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+          "WHERE attr.attnum >= 0 AND cls.oid = '") != 0)
+    return ADBC_STATUS_INTERNAL;
+
+  if (db_schema != nullptr) {
+    char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema));
+    if (schema == NULL) {
+      SetError(error, "%s%s", "Faled to escape schema: ", PQerrorMessage(conn_));
+      return ADBC_STATUS_INVALID_ARGUMENT;
+    }
+
+    int ret = StringBuilderAppend(&query, "%s%s", schema, ".");
+    PQfreemem(schema);
+
+    if (ret != 0) return ADBC_STATUS_INTERNAL;
+  }
+
+  char* table = PQescapeIdentifier(conn_, table_name, strlen(table_name));
+  if (table == NULL) {
+    SetError(error, "%s%s", "Failed to escape table: ", PQerrorMessage(conn_));
+    return ADBC_STATUS_INVALID_ARGUMENT;
+  }
+
+  int ret = StringBuilderAppend(&query, "%s%s", table_name, "'::regclass::oid");
+  PQfreemem(table);
+
+  if (ret != 0) return ADBC_STATUS_INTERNAL;
+
+  pg_result* result = PQexec(conn_, query.buffer);
+  StringBuilderReset(&query);
+
+  ExecStatusType pq_status = PQresultStatus(result);
+  if (pq_status == PGRES_TUPLES_OK) {
+    int num_rows = PQntuples(result);
+    ArrowSchemaInit(schema);
+    CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(schema, num_rows), error);
+
+    ArrowError na_error;
+    for (int row = 0; row < num_rows; row++) {
+      const char* colname = PQgetvalue(result, row, 0);
+      const Oid pg_oid = static_cast<uint32_t>(
+          std::strtol(PQgetvalue(result, row, 1), /*str_end=*/nullptr, /*base=*/10));
+
+      PostgresType pg_type;
+      if (type_resolver_->Find(pg_oid, &pg_type, &na_error) != NANOARROW_OK) {

Review Comment:
   Do we need to clear the result set + disconnect in this branch? Maybe we can use final_status instead of the early return?
   
   (...maybe we want an RAII helper over pg_result)



##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +49,80 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  AdbcStatusCode final_status = ADBC_STATUS_OK;
+  struct StringBuilder query = {0};
+  if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL;
+
+  if (StringBuilderAppend(
+          &query, "%s",
+          "SELECT attname, atttypid "
+          "FROM pg_catalog.pg_class AS cls "
+          "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+          "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+          "WHERE attr.attnum >= 0 AND cls.oid = '") != 0)
+    return ADBC_STATUS_INTERNAL;
+
+  if (db_schema != nullptr) {
+    char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema));
+    if (schema == NULL) {
+      SetError(error, "%s%s", "Faled to escape schema: ", PQerrorMessage(conn_));
+      return ADBC_STATUS_INVALID_ARGUMENT;
+    }
+
+    int ret = StringBuilderAppend(&query, "%s%s", schema, ".");
+    PQfreemem(schema);
+
+    if (ret != 0) return ADBC_STATUS_INTERNAL;
+  }
+
+  char* table = PQescapeIdentifier(conn_, table_name, strlen(table_name));
+  if (table == NULL) {
+    SetError(error, "%s%s", "Failed to escape table: ", PQerrorMessage(conn_));
+    return ADBC_STATUS_INVALID_ARGUMENT;
+  }
+
+  int ret = StringBuilderAppend(&query, "%s%s", table_name, "'::regclass::oid");
+  PQfreemem(table);
+
+  if (ret != 0) return ADBC_STATUS_INTERNAL;
+
+  pg_result* result = PQexec(conn_, query.buffer);
+  StringBuilderReset(&query);
+
+  ExecStatusType pq_status = PQresultStatus(result);
+  if (pq_status == PGRES_TUPLES_OK) {
+    int num_rows = PQntuples(result);
+    ArrowSchemaInit(schema);
+    CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(schema, num_rows), error);
+
+    ArrowError na_error;
+    for (int row = 0; row < num_rows; row++) {
+      const char* colname = PQgetvalue(result, row, 0);
+      const Oid pg_oid = static_cast<uint32_t>(
+          std::strtol(PQgetvalue(result, row, 1), /*str_end=*/nullptr, /*base=*/10));
+
+      PostgresType pg_type;
+      if (type_resolver_->Find(pg_oid, &pg_type, &na_error) != NANOARROW_OK) {
+        SetError(error, "%s%d%s%s%s%" PRIu32, "Column #", row + 1, " (\"", colname,
+                 "\") has unknown type code ", pg_oid);
+        return ADBC_STATUS_NOT_IMPLEMENTED;
+      }
+
+      CHECK_NA(INTERNAL, pg_type.WithFieldName(colname).SetSchema(schema->children[row]),
+               error);
+    }
+  } else {
+    SetError(error, "%s%s", "Failed to get table schema: ", PQerrorMessage(conn_));
+    final_status = ADBC_STATUS_IO;
+  }
+  PQclear(result);
+
+  // Disconnect since PostgreSQL connections can be heavy.
+  {
+    AdbcStatusCode status = database_->Disconnect(&conn_, error);
+    if (status != ADBC_STATUS_OK) final_status = status;
+  }

Review Comment:
   We don't connect in this method, so we probably shouldn't disconnect if I'm not mistaken?



##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +49,80 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  AdbcStatusCode final_status = ADBC_STATUS_OK;
+  struct StringBuilder query = {0};
+  if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL;
+
+  if (StringBuilderAppend(
+          &query, "%s",
+          "SELECT attname, atttypid "
+          "FROM pg_catalog.pg_class AS cls "
+          "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+          "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+          "WHERE attr.attnum >= 0 AND cls.oid = '") != 0)
+    return ADBC_STATUS_INTERNAL;
+
+  if (db_schema != nullptr) {
+    char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema));

Review Comment:
   Agreed, do you want to file something?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] WillAyd commented on a diff in pull request #577: feat(c/driver/postgres): Implement GetTableSchema

Posted by "WillAyd (via GitHub)" <gi...@apache.org>.
WillAyd commented on code in PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577#discussion_r1183883042


##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +49,80 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  AdbcStatusCode final_status = ADBC_STATUS_OK;
+  struct StringBuilder query = {0};
+  if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL;
+
+  if (StringBuilderAppend(
+          &query, "%s",
+          "SELECT attname, atttypid "
+          "FROM pg_catalog.pg_class AS cls "
+          "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+          "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+          "WHERE attr.attnum >= 0 AND cls.oid = '") != 0)
+    return ADBC_STATUS_INTERNAL;
+
+  if (db_schema != nullptr) {
+    char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema));
+    if (schema == NULL) {
+      SetError(error, "%s%s", "Faled to escape schema: ", PQerrorMessage(conn_));
+      return ADBC_STATUS_INVALID_ARGUMENT;
+    }
+
+    int ret = StringBuilderAppend(&query, "%s%s", schema, ".");
+    PQfreemem(schema);
+
+    if (ret != 0) return ADBC_STATUS_INTERNAL;
+  }
+
+  char* table = PQescapeIdentifier(conn_, table_name, strlen(table_name));
+  if (table == NULL) {
+    SetError(error, "%s%s", "Failed to escape table: ", PQerrorMessage(conn_));
+    return ADBC_STATUS_INVALID_ARGUMENT;
+  }
+
+  int ret = StringBuilderAppend(&query, "%s%s", table_name, "'::regclass::oid");
+  PQfreemem(table);
+
+  if (ret != 0) return ADBC_STATUS_INTERNAL;

Review Comment:
   Think its worth creating a private method to resolve the schema / table name? I can see that being used a few places. May also be worth creating in postgres_utils 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [arrow-adbc] lidavidm commented on a diff in pull request #577: feat(c/driver/postgresql): Implement GetTableSchema

Posted by "lidavidm (via GitHub)" <gi...@apache.org>.
lidavidm commented on code in PR #577:
URL: https://github.com/apache/arrow-adbc/pull/577#discussion_r1186013361


##########
c/driver/postgresql/connection.cc:
##########
@@ -47,7 +71,76 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
                                                   const char* table_name,
                                                   struct ArrowSchema* schema,
                                                   struct AdbcError* error) {
-  return ADBC_STATUS_NOT_IMPLEMENTED;
+  AdbcStatusCode final_status = ADBC_STATUS_OK;
+  struct StringBuilder query = {0};
+  if (StringBuilderInit(&query, /*initial_size=*/256) != 0) return ADBC_STATUS_INTERNAL;
+
+  if (StringBuilderAppend(
+          &query, "%s",
+          "SELECT attname, atttypid "
+          "FROM pg_catalog.pg_class AS cls "
+          "INNER JOIN pg_catalog.pg_attribute AS attr ON cls.oid = attr.attrelid "
+          "INNER JOIN pg_catalog.pg_type AS typ ON attr.atttypid = typ.oid "
+          "WHERE attr.attnum >= 0 AND cls.oid = '") != 0)
+    return ADBC_STATUS_INTERNAL;
+
+  if (db_schema != nullptr) {
+    char* schema = PQescapeIdentifier(conn_, db_schema, strlen(db_schema));
+    if (schema == NULL) {
+      SetError(error, "%s%s", "Faled to escape schema: ", PQerrorMessage(conn_));
+      return ADBC_STATUS_INVALID_ARGUMENT;
+    }
+
+    int ret = StringBuilderAppend(&query, "%s%s", schema, ".");
+    PQfreemem(schema);
+
+    if (ret != 0) return ADBC_STATUS_INTERNAL;
+  }
+
+  char* table = PQescapeIdentifier(conn_, table_name, strlen(table_name));
+  if (table == NULL) {
+    SetError(error, "%s%s", "Failed to escape table: ", PQerrorMessage(conn_));
+    return ADBC_STATUS_INVALID_ARGUMENT;
+  }
+
+  int ret = StringBuilderAppend(&query, "%s%s", table_name, "'::regclass::oid");
+  PQfreemem(table);
+
+  if (ret != 0) return ADBC_STATUS_INTERNAL;
+
+  PqResultHelper result_helper = PqResultHelper{conn_, query.buffer};
+  StringBuilderReset(&query);
+  pg_result* result = result_helper.Execute();
+
+  ExecStatusType pq_status = PQresultStatus(result);
+  if (pq_status == PGRES_TUPLES_OK) {
+    int num_rows = PQntuples(result);
+    ArrowSchemaInit(schema);

Review Comment:
   Ah, yeah. Though that got removed by the other refactor anyways.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org