You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2023/06/14 19:15:14 UTC

[arrow-adbc] branch main updated: docs: add a C++ quickstart (#794)

This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new 489a84e3 docs: add a C++ quickstart (#794)
489a84e3 is described below

commit 489a84e301092cc7f438bf33c2c41ab15a7856f4
Author: David Li <li...@gmail.com>
AuthorDate: Wed Jun 14 15:15:08 2023 -0400

    docs: add a C++ quickstart (#794)
    
    Fixes #250.
---
 docs/source/cpp/index.rst                     |   1 +
 docs/source/cpp/{index.rst => quickstart.rst} |  14 +-
 docs/source/cpp/recipe/CMakeLists.txt         |  38 +++++
 docs/source/cpp/recipe/quickstart.cc          | 211 ++++++++++++++++++++++++++
 docs/source/ext/adbc_cookbook.py              |   8 +-
 5 files changed, 260 insertions(+), 12 deletions(-)

diff --git a/docs/source/cpp/index.rst b/docs/source/cpp/index.rst
index 295d8733..add0e29e 100644
--- a/docs/source/cpp/index.rst
+++ b/docs/source/cpp/index.rst
@@ -22,6 +22,7 @@ C and C++
 .. toctree::
    :maxdepth: 2
 
+   quickstart
    driver_manager
    concurrency
    api/index
diff --git a/docs/source/cpp/index.rst b/docs/source/cpp/quickstart.rst
similarity index 88%
copy from docs/source/cpp/index.rst
copy to docs/source/cpp/quickstart.rst
index 295d8733..a382f387 100644
--- a/docs/source/cpp/index.rst
+++ b/docs/source/cpp/quickstart.rst
@@ -15,13 +15,9 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-=========
-C and C++
-=========
+==========
+Quickstart
+==========
 
-.. toctree::
-   :maxdepth: 2
-
-   driver_manager
-   concurrency
-   api/index
+.. recipe:: recipe/quickstart.cc
+   :language: cpp
diff --git a/docs/source/cpp/recipe/CMakeLists.txt b/docs/source/cpp/recipe/CMakeLists.txt
new file mode 100644
index 00000000..80899d16
--- /dev/null
+++ b/docs/source/cpp/recipe/CMakeLists.txt
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+cmake_minimum_required(VERSION 3.18)
+include(FetchContent)
+
+project(adbc_cookbook_recipes
+        VERSION "1.0.0"
+        LANGUAGES CXX)
+
+set(CMAKE_CXX_STANDARD 17)
+
+find_package(AdbcDriverManager REQUIRED)
+
+fetchcontent_declare(nanoarrow
+                     GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
+                     GIT_TAG 75dc80774a2f8c2c66b50fccf5d3e5fa13fab796
+                     GIT_SHALLOW TRUE)
+fetchcontent_makeavailable(nanoarrow)
+
+add_executable(quickstart quickstart.cc)
+target_include_directories(quickstart SYSTEM PRIVATE ${nanoarrow_SOURCE_DIR}/dist)
+target_link_libraries(quickstart PRIVATE AdbcDriverManager::adbc_driver_manager_shared
+                                         nanoarrow)
diff --git a/docs/source/cpp/recipe/quickstart.cc b/docs/source/cpp/recipe/quickstart.cc
new file mode 100644
index 00000000..459a6345
--- /dev/null
+++ b/docs/source/cpp/recipe/quickstart.cc
@@ -0,0 +1,211 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// RECIPE STARTS HERE
+
+/// Here we'll briefly tour basic features of ADBC with the SQLite
+/// driver in C++17.
+
+/// Installation
+/// ============
+///
+/// This quickstart is actually a literate C++ file.  You can clone
+/// the repository, build the sample, and follow along.
+///
+/// We'll assume you're using conda-forge_ for dependencies.  CMake, a
+/// C++17 compiler, and the ADBC libraries are required.  They can be
+/// installed as follows:
+///
+/// .. code-block:: shell
+///
+///    mamba install cmake compilers libadbc-driver-manager libadbc-driver-sqlite
+///
+/// .. _conda-forge: https://conda-forge.org/
+
+/// Building
+/// ========
+///
+/// We'll use CMake_ here.  From a source checkout of the ADBC repository:
+///
+/// .. code-block:: shell
+///
+///    cd docs/source/cpp/recipe
+///    cmake .
+///    cmake --build . --target quickstart
+///    ./quickstart
+///
+/// .. _CMake: https://cmake.org/
+
+/// Using ADBC
+/// ==========
+///
+/// Let's start with some includes:
+
+// For EXIT_SUCCESS
+#include <cstdlib>
+// For strerror
+#include <cstring>
+#include <iostream>
+
+#include <adbc.h>
+#include <nanoarrow.h>
+
+/// Then we'll add some (very basic) error checking helpers.
+
+// Error-checking helper for ADBC calls.
+// Assumes that there is an AdbcError named `error` in scope.
+#define CHECK_ADBC(EXPR)                                          \
+  if (AdbcStatusCode status = (EXPR); status != ADBC_STATUS_OK) { \
+    if (error.message != nullptr) {                               \
+      std::cerr << error.message << std::endl;                    \
+    }                                                             \
+    return EXIT_FAILURE;                                          \
+  }
+
+// Error-checking helper for ArrowArrayStream.
+#define CHECK_STREAM(STREAM, EXPR)                            \
+  if (int status = (EXPR); status != 0) {                     \
+    std::cerr << "(" << std::strerror(status) << "): ";       \
+    const char* message = (STREAM).get_last_error(&(STREAM)); \
+    if (message != nullptr) {                                 \
+      std::cerr << message << std::endl;                      \
+    } else {                                                  \
+      std::cerr << "(no error message)" << std::endl;         \
+    }                                                         \
+    return EXIT_FAILURE;                                      \
+  }
+
+// Error-checking helper for Nanoarrow.
+#define CHECK_NANOARROW(EXPR)                                              \
+  if (int status = (EXPR); status != 0) {                                  \
+    std::cerr << "(" << std::strerror(status) << "): failed" << std::endl; \
+    return EXIT_FAILURE;                                                   \
+  }
+
+int main() {
+  /// Loading the Driver
+  /// ------------------
+  ///
+  /// We'll load the SQLite driver using the driver manager.  We don't
+  /// have to explicitly link to the driver this way.
+
+  AdbcError error = {};
+
+  AdbcDatabase database = {};
+  CHECK_ADBC(AdbcDatabaseNew(&database, &error));
+  /// The way the driver manager knows what driver we want is via the
+  /// ``driver`` option.
+  CHECK_ADBC(AdbcDatabaseSetOption(&database, "driver", "adbc_driver_sqlite", &error));
+  CHECK_ADBC(AdbcDatabaseInit(&database, &error));
+
+  /// Creating a Connection
+  /// ---------------------
+  ///
+  /// ADBC distinguishes between "databases", "connections", and
+  /// "statements".  A "database" holds shared state across multiple
+  /// connections.  For example, in the SQLite driver, it holds the
+  /// actual instance of SQLite.  A "connection" is one connection to
+  /// the database.
+
+  AdbcConnection connection = {};
+  CHECK_ADBC(AdbcConnectionNew(&connection, &error));
+  CHECK_ADBC(AdbcConnectionInit(&connection, &database, &error));
+
+  /// Creating a Statement
+  /// --------------------
+  ///
+  /// A statement lets us execute queries.  They are used for both
+  /// prepared and non-prepared ("ad-hoc") queries.
+
+  AdbcStatement statement = {};
+  CHECK_ADBC(AdbcStatementNew(&connection, &statement, &error));
+
+  /// Executing a Query
+  /// -----------------
+  ///
+  /// We execute a query by setting the query on the statement, then
+  /// calling :cpp:func:`AdbcStatementExecuteQuery`.  The results come
+  /// back through the `Arrow C Data Interface`_.
+  ///
+  /// .. _Arrow C Data Interface: https://arrow.apache.org/docs/format/CDataInterface.html
+
+  struct ArrowArrayStream stream = {};
+  int64_t rows_affected = -1;
+
+  CHECK_ADBC(AdbcStatementSetSqlQuery(&statement, "SELECT 42 AS THEANSWER", &error));
+  CHECK_ADBC(AdbcStatementExecuteQuery(&statement, &stream, &rows_affected, &error));
+
+  /// While the API gives us the number of rows, the SQLite driver
+  /// can't actually know how many rows there are in the result set
+  /// ahead of time, so this value will actually just be ``-1`` to
+  /// indicate that the value is not known.
+  std::cout << "Got " << rows_affected << " rows" << std::endl;
+
+  /// We need an Arrow implementation to read the actual results.  We
+  /// can use `Arrow C++`_ or `Nanoarrow`_ for that.  For simplicity,
+  /// we'll use Nanoarrow here.  (The CMake configuration for this
+  /// example downloads and builds Nanoarrow from source as part of
+  /// the build.)
+  ///
+  /// .. _Arrow C++: https://arrow.apache.org/docs/cpp/index.html
+  /// .. _Nanoarrow: https://github.com/apache/arrow-nanoarrow
+
+  /// First we'll get the schema of the data:
+  ArrowSchema schema = {};
+  CHECK_STREAM(stream, stream.get_schema(&stream, &schema));
+
+  /// Then we can use Nanoarrow to print it:
+  char buf[1024] = {};
+  ArrowSchemaToString(&schema, buf, sizeof(buf), /*recursive=*/1);
+  std::cout << buf << std::endl;
+
+  /// Now we can read the data.  The data comes as a stream of Arrow
+  /// record batches.
+  while (true) {
+    ArrowArray batch = {};
+    CHECK_STREAM(stream, stream.get_next(&stream, &batch));
+
+    if (batch.release == nullptr) {
+      // Stream has ended
+      break;
+    }
+
+    /// We can use Nanoarrow to print out the data, too.
+    ArrowArrayView view = {};
+    CHECK_NANOARROW(ArrowArrayViewInitFromSchema(&view, &schema, nullptr));
+    CHECK_NANOARROW(ArrowArrayViewSetArray(&view, &batch, nullptr));
+    std::cout << "Got a batch with " << batch.length << " rows" << std::endl;
+    for (int64_t i = 0; i < batch.length; i++) {
+      std::cout << "THEANSWER[" << i
+                << "] = " << view.children[0]->buffer_views[1].data.as_int64[i]
+                << std::endl;
+    }
+    ArrowArrayViewReset(&view);
+  }
+
+  std::cout << "Finished reading result set" << std::endl;
+  stream.release(&stream);
+
+  /// Cleanup
+  /// -------
+  /// At the end, we must release all our resources.
+
+  CHECK_ADBC(AdbcStatementRelease(&statement, &error));
+  CHECK_ADBC(AdbcConnectionRelease(&connection, &error));
+  CHECK_ADBC(AdbcDatabaseRelease(&database, &error));
+  return EXIT_SUCCESS;
+}
diff --git a/docs/source/ext/adbc_cookbook.py b/docs/source/ext/adbc_cookbook.py
index 93533e77..3b00cbc4 100644
--- a/docs/source/ext/adbc_cookbook.py
+++ b/docs/source/ext/adbc_cookbook.py
@@ -24,6 +24,7 @@ import docutils
 from docutils.parsers.rst import directives
 from docutils.statemachine import StringList
 from sphinx.util.docutils import SphinxDirective
+from sphinx.util.nodes import nested_parse_with_titles
 from sphinx.util.typing import OptionSpec
 
 
@@ -98,7 +99,7 @@ class RecipeDirective(SphinxDirective):
                 if line.strip().startswith(prefix):
                     line_type = "prose"
                     # Remove prefix and next whitespace
-                    line = line[len(prefix) + 1 :]
+                    line = line.lstrip()[len(prefix) + 1 :]
                 else:
                     line_type = "code"
 
@@ -124,9 +125,10 @@ class RecipeDirective(SphinxDirective):
         for fragment in fragments:
             parsed = docutils.nodes.Element()
             if fragment.kind == "prose":
-                self.state.nested_parse(
+                # TODO: this doesn't seem to handle title hierarchy right
+                nested_parse_with_titles(
+                    self.state,
                     StringList([line.content for line in fragment.lines], source=""),
-                    self.content_offset,
                     parsed,
                 )
             elif fragment.kind == "code":