You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by gi...@apache.org on 2021/12/03 18:56:50 UTC

[arrow-site] branch asf-site updated: Updating built site (build 82364da1112734fc547763b9646d978e2c189d09)

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 20a3800  Updating built site (build 82364da1112734fc547763b9646d978e2c189d09)
20a3800 is described below

commit 20a3800bf645cd72bf651080c6cb3b6a0a6c3272
Author: Jonathan Keane <jk...@gmail.com>
AuthorDate: Fri Dec 3 18:54:01 2021 +0000

    Updating built site (build 82364da1112734fc547763b9646d978e2c189d09)
---
 blog/2021/12/03/arrow-duckdb/index.html | 670 ++++++++++++++++++++++++++++++++
 blog/index.html                         |  15 +
 docs/c_glib/index.html                  |   4 +-
 feed.xml                                | 458 +++++++++++++++++++---
 release/0.1.0.html                      |   4 +-
 release/0.10.0.html                     |   4 +-
 release/0.11.0.html                     |   4 +-
 release/0.11.1.html                     |   4 +-
 release/0.12.0.html                     |   4 +-
 release/0.13.0.html                     |   4 +-
 release/0.14.0.html                     |   4 +-
 release/0.14.1.html                     |   4 +-
 release/0.15.0.html                     |   4 +-
 release/0.15.1.html                     |   4 +-
 release/0.16.0.html                     |   4 +-
 release/0.17.0.html                     |   4 +-
 release/0.17.1.html                     |   4 +-
 release/0.2.0.html                      |   4 +-
 release/0.3.0.html                      |   4 +-
 release/0.4.0.html                      |   4 +-
 release/0.4.1.html                      |   4 +-
 release/0.5.0.html                      |   4 +-
 release/0.6.0.html                      |   4 +-
 release/0.7.0.html                      |   4 +-
 release/0.7.1.html                      |   4 +-
 release/0.8.0.html                      |   4 +-
 release/0.9.0.html                      |   4 +-
 release/1.0.0.html                      |   4 +-
 release/1.0.1.html                      |   4 +-
 release/2.0.0.html                      |   4 +-
 release/3.0.0.html                      |   4 +-
 release/4.0.0.html                      |   4 +-
 release/4.0.1.html                      |   4 +-
 release/5.0.0.html                      |   4 +-
 release/6.0.0.html                      |   4 +-
 release/6.0.1.html                      |   4 +-
 release/index.html                      |   4 +-
 37 files changed, 1158 insertions(+), 121 deletions(-)

diff --git a/blog/2021/12/03/arrow-duckdb/index.html b/blog/2021/12/03/arrow-duckdb/index.html
new file mode 100644
index 0000000..2d612d3
--- /dev/null
+++ b/blog/2021/12/03/arrow-duckdb/index.html
@@ -0,0 +1,670 @@
+<!DOCTYPE html>
+<html lang="en-US">
+  <head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <!-- The above meta tags *must* come first in the head; any other head content must come *after* these tags -->
+    
+    <title>DuckDB quacks Arrow: A zero-copy data integration between Arrow and DuckDB | Apache Arrow</title>
+    
+
+    <!-- Begin Jekyll SEO tag v2.7.1 -->
+<meta name="generator" content="Jekyll v4.2.0" />
+<meta property="og:title" content="DuckDB quacks Arrow: A zero-copy data integration between Arrow and DuckDB" />
+<meta name="author" content="Pedro Holanda, Jonathan Keane" />
+<meta property="og:locale" content="en_US" />
+<meta name="description" content="TLDR: The zero-copy integration between DuckDB and Apache Arrow allows for rapid analysis of larger than memory datasets in Python and R using either SQL or relational APIs. This post is a collaboration with and cross-posted on the DuckDB blog. Part of Apache Arrow is an in-memory data format optimized for analytical libraries. Like Pandas and R Dataframes, it uses a columnar data model. But the Arrow project contains more than just the format: The Arrow [...]
+<meta property="og:description" content="TLDR: The zero-copy integration between DuckDB and Apache Arrow allows for rapid analysis of larger than memory datasets in Python and R using either SQL or relational APIs. This post is a collaboration with and cross-posted on the DuckDB blog. Part of Apache Arrow is an in-memory data format optimized for analytical libraries. Like Pandas and R Dataframes, it uses a columnar data model. But the Arrow project contains more than just the format: Th [...]
+<link rel="canonical" href="https://arrow.apache.org/blog/2021/12/03/arrow-duckdb/" />
+<meta property="og:url" content="https://arrow.apache.org/blog/2021/12/03/arrow-duckdb/" />
+<meta property="og:site_name" content="Apache Arrow" />
+<meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
+<meta property="og:type" content="article" />
+<meta property="article:published_time" content="2021-12-03T00:00:00-05:00" />
+<meta name="twitter:card" content="summary_large_image" />
+<meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
+<meta property="twitter:title" content="DuckDB quacks Arrow: A zero-copy data integration between Arrow and DuckDB" />
+<meta name="twitter:site" content="@ApacheArrow" />
+<meta name="twitter:creator" content="@Pedro Holanda, Jonathan Keane" />
+<script type="application/ld+json">
+{"description":"TLDR: The zero-copy integration between DuckDB and Apache Arrow allows for rapid analysis of larger than memory datasets in Python and R using either SQL or relational APIs. This post is a collaboration with and cross-posted on the DuckDB blog. Part of Apache Arrow is an in-memory data format optimized for analytical libraries. Like Pandas and R Dataframes, it uses a columnar data model. But the Arrow project contains more than just the format: The Arrow C++ library, whic [...]
+<!-- End Jekyll SEO tag -->
+
+
+    <!-- favicons -->
+    <link rel="icon" type="image/png" sizes="16x16" href="/img/favicon-16x16.png" id="light1">
+    <link rel="icon" type="image/png" sizes="32x32" href="/img/favicon-32x32.png" id="light2">
+    <link rel="apple-touch-icon" type="image/png" sizes="180x180" href="/img/apple-touch-icon.png" id="light3">
+    <link rel="apple-touch-icon" type="image/png" sizes="120x120" href="/img/apple-touch-icon-120x120.png" id="light4">
+    <link rel="apple-touch-icon" type="image/png" sizes="76x76" href="/img/apple-touch-icon-76x76.png" id="light5">
+    <link rel="apple-touch-icon" type="image/png" sizes="60x60" href="/img/apple-touch-icon-60x60.png" id="light6">
+    <!-- dark mode favicons -->
+    <link rel="icon" type="image/png" sizes="16x16" href="/img/favicon-16x16-dark.png" id="dark1">
+    <link rel="icon" type="image/png" sizes="32x32" href="/img/favicon-32x32-dark.png" id="dark2">
+    <link rel="apple-touch-icon" type="image/png" sizes="180x180" href="/img/apple-touch-icon-dark.png" id="dark3">
+    <link rel="apple-touch-icon" type="image/png" sizes="120x120" href="/img/apple-touch-icon-120x120-dark.png" id="dark4">
+    <link rel="apple-touch-icon" type="image/png" sizes="76x76" href="/img/apple-touch-icon-76x76-dark.png" id="dark5">
+    <link rel="apple-touch-icon" type="image/png" sizes="60x60" href="/img/apple-touch-icon-60x60-dark.png" id="dark6">
+
+    <script>
+      // Switch to the dark-mode favicons if prefers-color-scheme: dark
+      function onUpdate() {
+        light1 = document.querySelector('link#light1');
+        light2 = document.querySelector('link#light2');
+        light3 = document.querySelector('link#light3');
+        light4 = document.querySelector('link#light4');
+        light5 = document.querySelector('link#light5');
+        light6 = document.querySelector('link#light6');
+
+        dark1 = document.querySelector('link#dark1');
+        dark2 = document.querySelector('link#dark2');
+        dark3 = document.querySelector('link#dark3');
+        dark4 = document.querySelector('link#dark4');
+        dark5 = document.querySelector('link#dark5');
+        dark6 = document.querySelector('link#dark6');
+
+        if (matcher.matches) {
+          light1.remove();
+          light2.remove();
+          light3.remove();
+          light4.remove();
+          light5.remove();
+          light6.remove();
+          document.head.append(dark1);
+          document.head.append(dark2);
+          document.head.append(dark3);
+          document.head.append(dark4);
+          document.head.append(dark5);
+          document.head.append(dark6);
+        } else {
+          dark1.remove();
+          dark2.remove();
+          dark3.remove();
+          dark4.remove();
+          dark5.remove();
+          dark6.remove();
+          document.head.append(light1);
+          document.head.append(light2);
+          document.head.append(light3);
+          document.head.append(light4);
+          document.head.append(light5);
+          document.head.append(light6);
+        }
+      }
+      matcher = window.matchMedia('(prefers-color-scheme: dark)');
+      matcher.addListener(onUpdate);
+      onUpdate();
+    </script>
+
+    <link rel="stylesheet" href="//fonts.googleapis.com/css?family=Lato:300,300italic,400,400italic,700,700italic,900">
+
+    <link href="/css/main.css" rel="stylesheet">
+    <link href="/css/syntax.css" rel="stylesheet">
+    <script src="/javascript/main.js"></script>
+    
+    <!-- Global Site Tag (gtag.js) - Google Analytics -->
+<script async src="https://www.googletagmanager.com/gtag/js?id=UA-107500873-1"></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments)};
+  gtag('js', new Date());
+
+  gtag('config', 'UA-107500873-1');
+</script>
+
+    
+  </head>
+
+
+<body class="wrap">
+  <header>
+    <nav class="navbar navbar-expand-md navbar-dark bg-dark">
+  
+  <a class="navbar-brand no-padding" href="/"><img src="/img/arrow-inverse-300px.png" height="40px"/></a>
+  
+   <button class="navbar-toggler ml-auto" type="button" data-toggle="collapse" data-target="#arrow-navbar" aria-controls="arrow-navbar" aria-expanded="false" aria-label="Toggle navigation">
+    <span class="navbar-toggler-icon"></span>
+  </button>
+
+    <!-- Collect the nav links, forms, and other content for toggling -->
+    <div class="collapse navbar-collapse justify-content-end" id="arrow-navbar">
+      <ul class="nav navbar-nav">
+        <li class="nav-item"><a class="nav-link" href="/overview/" role="button" aria-haspopup="true" aria-expanded="false">Overview</a></li>
+        <li class="nav-item"><a class="nav-link" href="/faq/" role="button" aria-haspopup="true" aria-expanded="false">FAQ</a></li>
+        <li class="nav-item"><a class="nav-link" href="/blog" role="button" aria-haspopup="true" aria-expanded="false">Blog</a></li>
+        <li class="nav-item dropdown">
+          <a class="nav-link dropdown-toggle" href="#"
+             id="navbarDropdownGetArrow" role="button" data-toggle="dropdown"
+             aria-haspopup="true" aria-expanded="false">
+             Get Arrow
+          </a>
+          <div class="dropdown-menu" aria-labelledby="navbarDropdownGetArrow">
+            <a class="dropdown-item" href="/install/">Install</a>
+            <a class="dropdown-item" href="/release/">Releases</a>
+            <a class="dropdown-item" href="https://github.com/apache/arrow">Source Code</a>
+          </div>
+        </li>
+        <li class="nav-item dropdown">
+          <a class="nav-link dropdown-toggle" href="#"
+             id="navbarDropdownDocumentation" role="button" data-toggle="dropdown"
+             aria-haspopup="true" aria-expanded="false">
+             Documentation
+          </a>
+          <div class="dropdown-menu" aria-labelledby="navbarDropdownDocumentation">
+            <a class="dropdown-item" href="/docs">Project Docs</a>
+            <a class="dropdown-item" href="/docs/format/Columnar.html">Format</a>
+            <hr/>
+            <a class="dropdown-item" href="/docs/c_glib">C GLib</a>
+            <a class="dropdown-item" href="/docs/cpp">C++</a>
+            <a class="dropdown-item" href="https://github.com/apache/arrow/blob/master/csharp/README.md">C#</a>
+            <a class="dropdown-item" href="https://godoc.org/github.com/apache/arrow/go/arrow">Go</a>
+            <a class="dropdown-item" href="/docs/java">Java</a>
+            <a class="dropdown-item" href="/docs/js">JavaScript</a>
+            <a class="dropdown-item" href="https://arrow.juliadata.org/stable/">Julia</a>
+            <a class="dropdown-item" href="https://github.com/apache/arrow/blob/master/matlab/README.md">MATLAB</a>
+            <a class="dropdown-item" href="/docs/python">Python</a>
+            <a class="dropdown-item" href="/docs/r">R</a>
+            <a class="dropdown-item" href="https://github.com/apache/arrow/blob/master/ruby/README.md">Ruby</a>
+            <a class="dropdown-item" href="https://docs.rs/crate/arrow/">Rust</a>
+          </div>
+        </li>
+        <li class="nav-item dropdown">
+          <a class="nav-link dropdown-toggle" href="#"
+             id="navbarDropdownSubprojects" role="button" data-toggle="dropdown"
+             aria-haspopup="true" aria-expanded="false">
+             Subprojects
+          </a>
+          <div class="dropdown-menu" aria-labelledby="navbarDropdownSubprojects">
+            <a class="dropdown-item" href="/datafusion">DataFusion</a>
+          </div>
+        </li>
+        <li class="nav-item dropdown">
+          <a class="nav-link dropdown-toggle" href="#"
+             id="navbarDropdownCommunity" role="button" data-toggle="dropdown"
+             aria-haspopup="true" aria-expanded="false">
+             Community
+          </a>
+          <div class="dropdown-menu" aria-labelledby="navbarDropdownCommunity">
+            <a class="dropdown-item" href="/community/">Communication</a>
+            <a class="dropdown-item" href="/docs/developers/contributing.html">Contributing</a>
+            <a class="dropdown-item" href="https://issues.apache.org/jira/browse/ARROW">Issue Tracker</a>
+            <a class="dropdown-item" href="/committers/">Governance</a>
+            <a class="dropdown-item" href="/use_cases/">Use Cases</a>
+            <a class="dropdown-item" href="/powered_by/">Powered By</a>
+            <a class="dropdown-item" href="/security/">Security</a>
+            <a class="dropdown-item" href="https://www.apache.org/foundation/policies/conduct.html">Code of Conduct</a>
+          </div>
+        </li>
+        <li class="nav-item dropdown">
+          <a class="nav-link dropdown-toggle" href="#"
+             id="navbarDropdownASF" role="button" data-toggle="dropdown"
+             aria-haspopup="true" aria-expanded="false">
+             ASF Links
+          </a>
+          <div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdownASF">
+            <a class="dropdown-item" href="http://www.apache.org/">ASF Website</a>
+            <a class="dropdown-item" href="http://www.apache.org/licenses/">License</a>
+            <a class="dropdown-item" href="http://www.apache.org/foundation/sponsorship.html">Donate</a>
+            <a class="dropdown-item" href="http://www.apache.org/foundation/thanks.html">Thanks</a>
+            <a class="dropdown-item" href="http://www.apache.org/security/">Security</a>
+          </div>
+        </li>
+      </ul>
+    </div><!-- /.navbar-collapse -->
+  </nav>
+
+  </header>
+
+  <div class="container p-4 pt-5">
+    <div class="col-md-8 mx-auto">
+      <main role="main" class="pb-5">
+        
+<h1>
+  DuckDB quacks Arrow: A zero-copy data integration between Arrow and DuckDB
+</h1>
+<hr class="mt-4 mb-3">
+
+
+
+<p class="mb-4 pb-1">
+  <span class="badge badge-secondary">Published</span>
+  <span class="published mr-3">
+    03 Dec 2021
+  </span>
+  <br />
+  <span class="badge badge-secondary">By</span>
+  
+    Pedro Holanda, Jonathan Keane
+  
+
+  
+</p>
+
+
+        <!--
+
+-->
+
+<p><em>TLDR: The zero-copy integration between DuckDB and Apache Arrow allows for rapid analysis of larger than memory datasets in Python and R using either SQL or relational APIs.</em></p>
+
+<p>This post is a collaboration with and cross-posted on <a href="https://duckdb.org/2021/12/03/duck-arrow.html">the DuckDB blog</a>.</p>
+
+<p>Part of <a href="https://arrow.apache.org">Apache Arrow</a> is an in-memory data format optimized for analytical libraries. Like Pandas and R Dataframes, it uses a columnar data model. But the Arrow project contains more than just the format: The Arrow C++ library, which is accessible in Python, R, and Ruby via bindings, has additional features that allow you to compute efficiently on datasets. These additional features are on top of the implementation of the in-memory format describe [...]
+
+<p><a href="https://www.duckdb.org">DuckDB</a> is a new analytical data management system that is designed to run complex SQL queries within other processes. DuckDB has bindings for R and Python, among others. DuckDB can query Arrow datasets directly and stream query results back to Arrow. This integration allows users to query Arrow data using DuckDB’s SQL Interface and API, while taking advantage of DuckDB’s parallel vectorized execution engine, without requiring any extra data copying [...]
+
+<p>This integration is unique because it uses zero-copy streaming of data between DuckDB and Arrow and vice versa so that you can compose a query using both together. This results in three main benefits:</p>
+
+<ol>
+  <li><strong>Larger Than Memory Analysis:</strong> Since both libraries support streaming query results, we are capable of executing on data without fully loading it from disk. Instead, we can execute one batch at a time. This allows us to execute queries on data that is bigger than memory.</li>
+  <li><strong>Complex Data Types:</strong> DuckDB can efficiently process complex data types that can be stored in Arrow vectors, including arbitrarily nested structs, lists, and maps.</li>
+  <li><strong>Advanced Optimizer:</strong> DuckDB’s state-of-the-art optimizer can push down filters and projections directly into Arrow scans. As a result, only relevant columns and partitions will be read, allowing the system to e.g., take advantage of partition elimination in Parquet files. This significantly accelerates query execution.</li>
+</ol>
+
+<p>For those that are just interested in benchmarks, you can jump ahead <a href="#Benchmark Comparison">benchmark section below</a>.</p>
+
+<h2 id="quick-tour">Quick Tour</h2>
+<p>Before diving into the details of the integration, in this section we provide a quick motivating example of how powerful and simple to use is the DuckDB-Arrow integration. With a few lines of code, you can already start querying Arrow datasets. Say you want to analyze the infamous <a href="https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page">NYC Taxi Dataset</a> and figure out if groups tip more or less than single riders.</p>
+
+<h3 id="r">R</h3>
+<p>Both Arrow and DuckDB support dplyr pipelines for people more comfortable with using dplyr for their data analysis. The Arrow package includes two helper functions that allow us to pass data back and forth between Arrow and DuckDB (<code class="language-plaintext highlighter-rouge">to_duckdb()</code> and <code class="language-plaintext highlighter-rouge">to_arrow()</code>).
+This is especially useful in cases where something is supported in one of Arrow or DuckDB but not the other. For example, if you find a complex dplyr pipeline where the SQL translation doesn’t work with DuckDB, use <code class="language-plaintext highlighter-rouge">to_arrow()</code> before the pipeline to use the Arrow engine. Or, if you have a function (e.g., windowed aggregates) that aren’t yet implemented in Arrow, use <code class="language-plaintext highlighter-rouge">to_duckdb()</co [...]
+
+<div class="language-R highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">library</span><span class="p">(</span><span class="n">duckdb</span><span class="p">)</span><span class="w">
+</span><span class="n">library</span><span class="p">(</span><span class="n">arrow</span><span class="p">)</span><span class="w">
+</span><span class="n">library</span><span class="p">(</span><span class="n">dplyr</span><span class="p">)</span><span class="w">
+
+</span><span class="c1"># Open dataset using year,month folder partition</span><span class="w">
+</span><span class="n">ds</span><span class="w"> </span><span class="o">&lt;-</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">open_dataset</span><span class="p">(</span><span class="s2">"nyc-taxi"</span><span class="p">,</span><span class="w"> </span><span class="n">partitioning</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nf">c</span><span class="p">(</span><span class="s2">"year"</span><spa [...]
+
+</span><span class="n">ds</span><span class="w"> </span><span class="o">%&gt;%</span><span class="w">
+  </span><span class="c1"># Look only at 2015 on, where the number of passenger is positive, the trip distance is</span><span class="w">
+  </span><span class="c1"># greater than a quarter mile, and where the fare amount is positive</span><span class="w">
+  </span><span class="n">filter</span><span class="p">(</span><span class="n">year</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="m">2014</span><span class="w"> </span><span class="o">&amp;</span><span class="w"> </span><span class="n">passenger_count</span><span class="w"> </span><span class="o">&gt;</span><span class="w"> </span><span class="m">0</span><span class="w"> </span><span class="o">&amp;</span><span class="w"> </span><span class= [...]
+  </span><span class="c1"># Pass off to DuckDB</span><span class="w">
+  </span><span class="n">to_duckdb</span><span class="p">()</span><span class="w"> </span><span class="o">%&gt;%</span><span class="w">
+  </span><span class="n">group_by</span><span class="p">(</span><span class="n">passenger_count</span><span class="p">)</span><span class="w"> </span><span class="o">%&gt;%</span><span class="w">
+  </span><span class="n">mutate</span><span class="p">(</span><span class="n">tip_pct</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">tip_amount</span><span class="w"> </span><span class="o">/</span><span class="w"> </span><span class="n">fare_amount</span><span class="p">)</span><span class="w"> </span><span class="o">%&gt;%</span><span class="w">
+  </span><span class="n">summarise</span><span class="p">(</span><span class="w">
+    </span><span class="n">fare_amount</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">mean</span><span class="p">(</span><span class="n">fare_amount</span><span class="p">,</span><span class="w"> </span><span class="n">na.rm</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="kc">TRUE</span><span class="p">),</span><span class="w">
+    </span><span class="n">tip_amount</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">mean</span><span class="p">(</span><span class="n">tip_amount</span><span class="p">,</span><span class="w"> </span><span class="n">na.rm</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="kc">TRUE</span><span class="p">),</span><span class="w">
+    </span><span class="n">tip_pct</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">mean</span><span class="p">(</span><span class="n">tip_pct</span><span class="p">,</span><span class="w"> </span><span class="n">na.rm</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="kc">TRUE</span><span class="p">)</span><span class="w">
+  </span><span class="p">)</span><span class="w"> </span><span class="o">%&gt;%</span><span class="w">
+  </span><span class="n">arrange</span><span class="p">(</span><span class="n">passenger_count</span><span class="p">)</span><span class="w"> </span><span class="o">%&gt;%</span><span class="w">
+  </span><span class="n">collect</span><span class="p">()</span><span class="w">
+</span></code></pre></div></div>
+
+<h3 id="python">Python</h3>
+<p>The workflow in Python is as simple as it is in R. In this example we use DuckDB’s Relational API.</p>
+
+<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="kn">import</span> <span class="nn">duckdb</span>
+<span class="kn">import</span> <span class="nn">pyarrow</span> <span class="k">as</span> <span class="n">pa</span>
+<span class="kn">import</span> <span class="nn">pyarrow.dataset</span> <span class="k">as</span> <span class="n">ds</span>
+
+<span class="c1"># Open dataset using year,month folder partition
+</span><span class="n">nyc</span> <span class="o">=</span> <span class="n">ds</span><span class="p">.</span><span class="n">dataset</span><span class="p">(</span><span class="s">'nyc-taxi/'</span><span class="p">,</span> <span class="n">partitioning</span><span class="o">=</span><span class="p">[</span><span class="s">"year"</span><span class="p">,</span> <span class="s">"month"</span><span class="p">])</span>
+
+<span class="c1"># We transform the nyc dataset into a DuckDB relation
+</span><span class="n">nyc</span> <span class="o">=</span> <span class="n">duckdb</span><span class="p">.</span><span class="n">arrow</span><span class="p">(</span><span class="n">nyc</span><span class="p">)</span>
+
+<span class="c1"># Run same query again
+</span><span class="n">nyc</span><span class="p">.</span><span class="nb">filter</span><span class="p">(</span><span class="s">"year &gt; 2014 &amp; passenger_count &gt; 0 &amp; trip_distance &gt; 0.25 &amp; fare_amount &gt; 0"</span><span class="p">)</span>
+    <span class="p">.</span><span class="n">aggregate</span><span class="p">(</span><span class="s">"SELECT AVG(fare_amount), AVG(tip_amount), AVG(tip_amount / fare_amount) as tip_pct"</span><span class="p">,</span><span class="s">"passenger_count"</span><span class="p">).</span><span class="n">arrow</span><span class="p">()</span>
+</code></pre></div></div>
+
+<h2 id="duckdb-and-arrow-the-basics">DuckDB and Arrow: The Basics</h2>
+
+<p>In this section, we will look at some basic examples of the code needed to read and output Arrow tables in both Python and R.</p>
+
+<h4 id="setup">Setup</h4>
+
+<p>First we need to install DuckDB and Arrow. The installation process for both libraries in Python and R is shown below.</p>
+<div class="language-bash highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c"># Python Install</span>
+pip <span class="nb">install </span>duckdb
+pip <span class="nb">install </span>pyarrow
+</code></pre></div></div>
+
+<div class="language-R highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># R Install</span><span class="w">
+</span><span class="n">install.packages</span><span class="p">(</span><span class="s2">"duckdb"</span><span class="p">)</span><span class="w">
+</span><span class="n">install.packages</span><span class="p">(</span><span class="s2">"arrow"</span><span class="p">)</span><span class="w">
+</span></code></pre></div></div>
+
+<p>To execute the sample-examples in this section, we need to download the following custom parquet files:</p>
+<ul>
+  <li>https://github.com/duckdb/duckdb-web/blob/master/_posts/data/integers.parquet?raw=true</li>
+  <li>https://github.com/cwida/duckdb-data/releases/download/v1.0/lineitemsf1.snappy.parquet</li>
+</ul>
+
+<h4 id="python-1">Python</h4>
+
+<p>There are two ways in Python of querying data from Arrow:</p>
+<ol>
+  <li>Through the Relational API
+```py
+    <h1 id="reads-parquet-file-to-an-arrow-table">Reads Parquet File to an Arrow Table</h1>
+    <p>arrow_table = pq.read_table(‘integers.parquet’)</p>
+  </li>
+</ol>
+
+<h1 id="transforms-arrow-table---duckdb-relation">Transforms Arrow Table -&gt; DuckDB Relation</h1>
+<p>rel_from_arrow = duckdb.arrow(arrow_table)</p>
+
+<h1 id="we-can-run-a-sql-query-on-this-and-print-the-result">we can run a SQL query on this and print the result</h1>
+<p>print(rel_from_arrow.query(‘arrow_table’, ‘SELECT SUM(data) FROM arrow_table WHERE data &gt; 50’).fetchone())</p>
+
+<h1 id="transforms-duckdb-relation---arrow-table">Transforms DuckDB Relation -&gt; Arrow Table</h1>
+<p>arrow_table_from_duckdb = rel_from_arrow.arrow()</p>
+<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>
+2. By using replacement scans and querying the object directly with SQL:
+```py
+# Reads Parquet File to an Arrow Table
+arrow_table = pq.read_table('integers.parquet')
+
+# Gets Database Connection
+con = duckdb.connect()
+
+# we can run a SQL query on this and print the result
+print(con.execute('SELECT SUM(data) FROM arrow_table WHERE data &gt; 50').fetchone())
+
+# Transforms Query Result from DuckDB to Arrow Table
+# We can directly read the arrow object through DuckDB's replacement scans.
+con.execute("SELECT * FROM arrow_table").fetch_arrow_table()
+</code></pre></div></div>
+
+<p>It is possible to transform both DuckDB Relations and Query Results back to Arrow.</p>
+
+<h4 id="r-1">R</h4>
+
+<p>In R, you can interact with Arrow data in DuckDB by registering the table as a view (an alternative is to use dplyr as shown above).</p>
+<div class="language-r highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">library</span><span class="p">(</span><span class="n">duckdb</span><span class="p">)</span><span class="w">
+</span><span class="n">library</span><span class="p">(</span><span class="n">arrow</span><span class="p">)</span><span class="w">
+</span><span class="n">library</span><span class="p">(</span><span class="n">dplyr</span><span class="p">)</span><span class="w">
+
+</span><span class="c1"># Reads Parquet File to an Arrow Table</span><span class="w">
+</span><span class="n">arrow_table</span><span class="w"> </span><span class="o">&lt;-</span><span class="w"> </span><span class="n">arrow</span><span class="o">::</span><span class="n">read_parquet</span><span class="p">(</span><span class="s2">"integers.parquet"</span><span class="p">,</span><span class="w"> </span><span class="n">as_data_frame</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="kc">FALSE</span><span class="p">)</span><span class="w">
+
+</span><span class="c1"># Gets Database Connection</span><span class="w">
+</span><span class="n">con</span><span class="w"> </span><span class="o">&lt;-</span><span class="w"> </span><span class="n">dbConnect</span><span class="p">(</span><span class="n">duckdb</span><span class="o">::</span><span class="n">duckdb</span><span class="p">())</span><span class="w">
+
+</span><span class="c1"># Registers arrow table as a DuckDB view</span><span class="w">
+</span><span class="n">arrow</span><span class="o">::</span><span class="n">to_duckdb</span><span class="p">(</span><span class="n">arrow_table</span><span class="p">,</span><span class="w"> </span><span class="n">table_name</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">"arrow_table"</span><span class="p">,</span><span class="w"> </span><span class="n">con</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span  [...]
+
+</span><span class="c1"># we can run a SQL query on this and print the result</span><span class="w">
+</span><span class="n">print</span><span class="p">(</span><span class="n">dbGetQuery</span><span class="p">(</span><span class="n">con</span><span class="p">,</span><span class="w"> </span><span class="s2">"SELECT SUM(data) FROM arrow_table WHERE data &gt; 50"</span><span class="p">))</span><span class="w">
+
+</span><span class="c1"># Transforms Query Result from DuckDB to Arrow Table</span><span class="w">
+</span><span class="n">result</span><span class="w"> </span><span class="o">&lt;-</span><span class="w"> </span><span class="n">dbSendQuery</span><span class="p">(</span><span class="n">con</span><span class="p">,</span><span class="w"> </span><span class="s2">"SELECT * FROM arrow_table"</span><span class="p">)</span><span class="w">
+</span></code></pre></div></div>
+
+<h3 id="streaming-data-fromto-arrow">Streaming Data from/to Arrow</h3>
+<p>In the previous section, we depicted how to interact with Arrow tables. However, Arrow also allows users to interact with the data in a streaming fashion. Either consuming it (e.g., from an Arrow Dataset) or producing it (e.g., returning a RecordBatchReader). And of course, DuckDB is able to consume Datasets and produce RecordBatchReaders. This example uses the NYC Taxi Dataset, stored in Parquet files partitioned by year and month, which we can download through the Arrow R package:</p>
+<div class="language-R highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="n">arrow</span><span class="o">::</span><span class="n">copy_files</span><span class="p">(</span><span class="s2">"s3://ursa-labs-taxi-data"</span><span class="p">,</span><span class="w"> </span><span class="s2">"nyc-taxi"</span><span class="p">)</span><span class="w">
+</span></code></pre></div></div>
+
+<h4 id="python-2">Python</h4>
+<div class="language-py highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># Reads dataset partitioning it in year/month folder
+</span><span class="n">nyc_dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="p">.</span><span class="n">dataset</span><span class="p">(</span><span class="s">'nyc-taxi/'</span><span class="p">,</span> <span class="n">partitioning</span><span class="o">=</span><span class="p">[</span><span class="s">"year"</span><span class="p">,</span> <span class="s">"month"</span><span class="p">])</span>
+
+<span class="c1"># Gets Database Connection
+</span><span class="n">con</span> <span class="o">=</span> <span class="n">duckdb</span><span class="p">.</span><span class="n">connect</span><span class="p">()</span>
+
+<span class="n">query</span> <span class="o">=</span> <span class="n">con</span><span class="p">.</span><span class="n">execute</span><span class="p">(</span><span class="s">"SELECT * FROM nyc_dataset"</span><span class="p">)</span>
+<span class="c1"># DuckDB's queries can now produce a Record Batch Reader
+</span><span class="n">record_batch_reader</span> <span class="o">=</span> <span class="n">query</span><span class="p">.</span><span class="n">fetch_record_batch</span><span class="p">()</span>
+<span class="c1"># Which means we can stream the whole query per batch.
+# This retrieves the first batch
+</span><span class="n">chunk</span> <span class="o">=</span> <span class="n">record_batch_reader</span><span class="p">.</span><span class="n">read_next_batch</span><span class="p">()</span>
+</code></pre></div></div>
+<h4 id="r-2">R</h4>
+<div class="language-r highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># Reads dataset partitioning it in year/month folder</span><span class="w">
+</span><span class="n">nyc_dataset</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">open_dataset</span><span class="p">(</span><span class="s2">"nyc-taxi/"</span><span class="p">,</span><span class="w"> </span><span class="n">partitioning</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="nf">c</span><span class="p">(</span><span class="s2">"year"</span><span class="p">,</span><span class="w"> </span><sp [...]
+
+</span><span class="c1"># Gets Database Connection</span><span class="w">
+</span><span class="n">con</span><span class="w"> </span><span class="o">&lt;-</span><span class="w"> </span><span class="n">dbConnect</span><span class="p">(</span><span class="n">duckdb</span><span class="o">::</span><span class="n">duckdb</span><span class="p">())</span><span class="w">
+
+</span><span class="c1"># We can use the same function as before to register our arrow dataset</span><span class="w">
+</span><span class="n">duckdb</span><span class="o">::</span><span class="n">duckdb_register_arrow</span><span class="p">(</span><span class="n">con</span><span class="p">,</span><span class="w"> </span><span class="s2">"nyc"</span><span class="p">,</span><span class="w"> </span><span class="n">nyc_dataset</span><span class="p">)</span><span class="w">
+
+</span><span class="n">res</span><span class="w"> </span><span class="o">&lt;-</span><span class="w"> </span><span class="n">dbSendQuery</span><span class="p">(</span><span class="n">con</span><span class="p">,</span><span class="w"> </span><span class="s2">"SELECT * FROM nyc"</span><span class="p">,</span><span class="w"> </span><span class="n">arrow</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="kc">TRUE</span><span class="p">)</span><span cl [...]
+</span><span class="c1"># DuckDB's queries can now produce a Record Batch Reader</span><span class="w">
+</span><span class="n">record_batch_reader</span><span class="w"> </span><span class="o">&lt;-</span><span class="w"> </span><span class="n">duckdb</span><span class="o">::</span><span class="n">duckdb_fetch_record_batch</span><span class="p">(</span><span class="n">res</span><span class="p">)</span><span class="w">
+
+</span><span class="c1"># Which means we can stream the whole query per batch.</span><span class="w">
+</span><span class="c1"># This retrieves the first batch</span><span class="w">
+</span><span class="n">cur_batch</span><span class="w"> </span><span class="o">&lt;-</span><span class="w"> </span><span class="n">record_batch_reader</span><span class="o">$</span><span class="n">read_next_batch</span><span class="p">()</span><span class="w">
+</span></code></pre></div></div>
+
+<p>The preceding R code shows in low-level detail how the data is streaming. We provide the helper <code class="language-plaintext highlighter-rouge">to_arrow()</code> in the Arrow package which is a wrapper around this that makes it easy to incorporate this streaming into a dplyr pipeline. <sup id="fnref:1" role="doc-noteref"><a href="#fn:1" class="footnote" rel="footnote">1</a></sup></p>
+
+<h2 id="benchmark-comparison">Benchmark Comparison</h2>
+
+<p>Here we demonstrate in a simple benchmark the performance difference between querying Arrow datasets with DuckDB and querying Arrow datasets with Pandas.
+For both the Projection and Filter pushdown comparison, we will use Arrow tables. That is due to Pandas not being capable of consuming Arrow stream objects.</p>
+
+<p>For the NYC Taxi benchmarks, we used the <a href="https://www.monetdb.org/wiki/Scilens-configuration-standard">scilens diamonds configuration</a> and for the TPC-H benchmarks, we used an m1 MacBook Pro. In both cases, parallelism in DuckDB was used (which is now on by default).</p>
+
+<p>For the comparison with Pandas, note that DuckDB runs in parallel, while pandas only support single-threaded execution. Besides that, one should note that we are comparing automatic optimizations. DuckDB’s query optimizer can automatically push down filters and projections. This automatic optimization is not supported in pandas, but it is possible for users to manually perform some of these predicate and filter pushdowns by manually specifying them them in the <code class="language-pl [...]
+
+<h3 id="projection-pushdown">Projection Pushdown</h3>
+
+<p>In this example we run a simple aggregation on two columns of our lineitem table.</p>
+
+<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># DuckDB
+</span><span class="n">lineitem</span> <span class="o">=</span> <span class="n">pq</span><span class="p">.</span><span class="n">read_table</span><span class="p">(</span><span class="s">'lineitemsf1.snappy.parquet'</span><span class="p">)</span>
+<span class="n">con</span> <span class="o">=</span> <span class="n">duckdb</span><span class="p">.</span><span class="n">connect</span><span class="p">()</span>
+
+<span class="c1"># Transforms Query Result from DuckDB to Arrow Table
+</span><span class="n">con</span><span class="p">.</span><span class="n">execute</span><span class="p">(</span><span class="s">"""SELECT sum(l_extendedprice * l_discount) AS revenue
+                FROM
+                lineitem;"""</span><span class="p">).</span><span class="n">fetch_arrow_table</span><span class="p">()</span>
+
+</code></pre></div></div>
+
+<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># Pandas
+</span><span class="n">arrow_table</span> <span class="o">=</span> <span class="n">pq</span><span class="p">.</span><span class="n">read_table</span><span class="p">(</span><span class="s">'lineitemsf1.snappy.parquet'</span><span class="p">)</span>
+
+<span class="c1"># Converts an Arrow table to a Dataframe
+</span><span class="n">df</span> <span class="o">=</span> <span class="n">arrow_table</span><span class="p">.</span><span class="n">to_pandas</span><span class="p">()</span>
+
+<span class="c1"># Runs aggregation
+</span><span class="n">res</span> <span class="o">=</span>  <span class="n">pd</span><span class="p">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s">'sum'</span><span class="p">:</span> <span class="p">[(</span><span class="n">df</span><span class="p">.</span><span class="n">l_extendedprice</span> <span class="o">*</span> <span class="n">df</span><span class="p">.</span><span class="n">l_discount</span><span class="p">).</span><span class="nb">sum</span>< [...]
+
+<span class="c1"># Creates an Arrow Table from a Dataframe
+</span><span class="n">new_table</span> <span class="o">=</span> <span class="n">pa</span><span class="p">.</span><span class="n">Table</span><span class="p">.</span><span class="n">from_pandas</span><span class="p">(</span><span class="n">res</span><span class="p">)</span>
+
+</code></pre></div></div>
+
+<table>
+  <thead>
+    <tr>
+      <th>Name</th>
+      <th style="text-align: right">Time (s)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>DuckDB</td>
+      <td style="text-align: right">0.19</td>
+    </tr>
+    <tr>
+      <td>Pandas</td>
+      <td style="text-align: right">2.13</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>The lineitem table is composed of 16 columns, however, to execute this query only two columns <code class="language-plaintext highlighter-rouge">l_extendedprice</code> and  *  <code class="language-plaintext highlighter-rouge">l_discount</code> are necessary. Since DuckDB can push down the projection of these columns, it is capable of executing this query about one order of magnitude faster than Pandas.</p>
+
+<h3 id="filter-pushdown">Filter Pushdown</h3>
+
+<p>For our filter pushdown we repeat the same aggregation used in the previous section, but add filters on 4 more columns.</p>
+
+<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># DuckDB
+</span><span class="n">lineitem</span> <span class="o">=</span> <span class="n">pq</span><span class="p">.</span><span class="n">read_table</span><span class="p">(</span><span class="s">'lineitemsf1.snappy.parquet'</span><span class="p">)</span>
+
+<span class="c1"># Get database connection
+</span><span class="n">con</span> <span class="o">=</span> <span class="n">duckdb</span><span class="p">.</span><span class="n">connect</span><span class="p">()</span>
+
+<span class="c1"># Transforms Query Result from DuckDB to Arrow Table
+</span><span class="n">con</span><span class="p">.</span><span class="n">execute</span><span class="p">(</span><span class="s">"""SELECT sum(l_extendedprice * l_discount) AS revenue
+        FROM
+            lineitem
+        WHERE
+            l_shipdate &gt;= CAST('1994-01-01' AS date)
+            AND l_shipdate &lt; CAST('1995-01-01' AS date)
+            AND l_discount BETWEEN 0.05
+            AND 0.07
+            AND l_quantity &lt; 24; """</span><span class="p">).</span><span class="n">fetch_arrow_table</span><span class="p">()</span>
+
+</code></pre></div></div>
+
+<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># Pandas
+</span><span class="n">arrow_table</span> <span class="o">=</span> <span class="n">pq</span><span class="p">.</span><span class="n">read_table</span><span class="p">(</span><span class="s">'lineitemsf1.snappy.parquet'</span><span class="p">)</span>
+
+<span class="n">df</span> <span class="o">=</span> <span class="n">arrow_table</span><span class="p">.</span><span class="n">to_pandas</span><span class="p">()</span>
+<span class="n">filtered_df</span> <span class="o">=</span> <span class="n">lineitem</span><span class="p">[</span>
+        <span class="p">(</span><span class="n">lineitem</span><span class="p">.</span><span class="n">l_shipdate</span> <span class="o">&gt;=</span> <span class="s">"1994-01-01"</span><span class="p">)</span> <span class="o">&amp;</span>
+        <span class="p">(</span><span class="n">lineitem</span><span class="p">.</span><span class="n">l_shipdate</span> <span class="o">&lt;</span> <span class="s">"1995-01-01"</span><span class="p">)</span> <span class="o">&amp;</span>
+        <span class="p">(</span><span class="n">lineitem</span><span class="p">.</span><span class="n">l_discount</span> <span class="o">&gt;=</span> <span class="mf">0.05</span><span class="p">)</span> <span class="o">&amp;</span>
+        <span class="p">(</span><span class="n">lineitem</span><span class="p">.</span><span class="n">l_discount</span> <span class="o">&lt;=</span> <span class="mf">0.07</span><span class="p">)</span> <span class="o">&amp;</span>
+        <span class="p">(</span><span class="n">lineitem</span><span class="p">.</span><span class="n">l_quantity</span> <span class="o">&lt;</span> <span class="mi">24</span><span class="p">)]</span>
+
+<span class="n">res</span> <span class="o">=</span>  <span class="n">pd</span><span class="p">.</span><span class="n">DataFrame</span><span class="p">({</span><span class="s">'sum'</span><span class="p">:</span> <span class="p">[(</span><span class="n">filtered_df</span><span class="p">.</span><span class="n">l_extendedprice</span> <span class="o">*</span> <span class="n">filtered_df</span><span class="p">.</span><span class="n">l_discount</span><span class="p">).</span><span class="nb"> [...]
+<span class="n">new_table</span> <span class="o">=</span> <span class="n">pa</span><span class="p">.</span><span class="n">Table</span><span class="p">.</span><span class="n">from_pandas</span><span class="p">(</span><span class="n">res</span><span class="p">)</span>
+</code></pre></div></div>
+
+<table>
+  <thead>
+    <tr>
+      <th>Name</th>
+      <th>Time (s)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>DuckDB</td>
+      <td>0.04</td>
+    </tr>
+    <tr>
+      <td>Pandas</td>
+      <td>2.29</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>The difference now between DuckDB and Pandas is more drastic, being two orders of magnitude faster than Pandas. Again, since both the filter and projection are pushed down to Arrow, DuckDB reads less data than Pandas, which can’t automatically perform this optimization.</p>
+
+<h3 id="streaming">Streaming</h3>
+
+<p>As demonstrated before, DuckDB is capable of consuming and producing Arrow data in a streaming fashion. In this section we run a simple benchmark, to showcase the benefits in speed and memory usage when comparing it to full materialization and Pandas. This example uses the full NYC taxi dataset which you can download</p>
+
+<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># DuckDB
+# Open dataset using year,month folder partition
+</span><span class="n">nyc</span> <span class="o">=</span> <span class="n">ds</span><span class="p">.</span><span class="n">dataset</span><span class="p">(</span><span class="s">'nyc-taxi/'</span><span class="p">,</span> <span class="n">partitioning</span><span class="o">=</span><span class="p">[</span><span class="s">"year"</span><span class="p">,</span> <span class="s">"month"</span><span class="p">])</span>
+
+<span class="c1"># Get database connection
+</span><span class="n">con</span> <span class="o">=</span> <span class="n">duckdb</span><span class="p">.</span><span class="n">connect</span><span class="p">()</span>
+
+<span class="c1"># Run query that selects part of the data
+</span><span class="n">query</span> <span class="o">=</span> <span class="n">con</span><span class="p">.</span><span class="n">execute</span><span class="p">(</span><span class="s">"SELECT total_amount, passenger_count,year FROM nyc where total_amount &gt; 100 and year &gt; 2014"</span><span class="p">)</span>
+
+<span class="c1"># Create Record Batch Reader from Query Result.
+# "fetch_record_batch()" also accepts an extra parameter related to the desired produced chunk size.
+</span><span class="n">record_batch_reader</span> <span class="o">=</span> <span class="n">query</span><span class="p">.</span><span class="n">fetch_record_batch</span><span class="p">()</span>
+
+<span class="c1"># Retrieve all batch chunks
+</span><span class="n">chunk</span> <span class="o">=</span> <span class="n">record_batch_reader</span><span class="p">.</span><span class="n">read_next_batch</span><span class="p">()</span>
+<span class="k">while</span> <span class="nb">len</span><span class="p">(</span><span class="n">chunk</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
+    <span class="n">chunk</span> <span class="o">=</span> <span class="n">record_batch_reader</span><span class="p">.</span><span class="n">read_next_batch</span><span class="p">()</span>
+</code></pre></div></div>
+
+<div class="language-python highlighter-rouge"><div class="highlight"><pre class="highlight"><code><span class="c1"># Pandas
+# We must exclude one of the columns of the NYC dataset due to an unimplemented cast in Arrow.
+</span><span class="n">working_columns</span> <span class="o">=</span> <span class="p">[</span><span class="s">"vendor_id"</span><span class="p">,</span><span class="s">"pickup_at"</span><span class="p">,</span><span class="s">"dropoff_at"</span><span class="p">,</span><span class="s">"passenger_count"</span><span class="p">,</span><span class="s">"trip_distance"</span><span class="p">,</span><span class="s">"pickup_longitude"</span><span class="p">,</span>
+    <span class="s">"pickup_latitude"</span><span class="p">,</span><span class="s">"store_and_fwd_flag"</span><span class="p">,</span><span class="s">"dropoff_longitude"</span><span class="p">,</span><span class="s">"dropoff_latitude"</span><span class="p">,</span><span class="s">"payment_type"</span><span class="p">,</span>
+    <span class="s">"fare_amount"</span><span class="p">,</span><span class="s">"extra"</span><span class="p">,</span><span class="s">"mta_tax"</span><span class="p">,</span><span class="s">"tip_amount"</span><span class="p">,</span><span class="s">"tolls_amount"</span><span class="p">,</span><span class="s">"total_amount"</span><span class="p">,</span><span class="s">"year"</span><span class="p">,</span> <span class="s">"month"</span><span class="p">]</span>
+
+<span class="c1"># Open dataset using year,month folder partition
+</span><span class="n">nyc_dataset</span> <span class="o">=</span> <span class="n">ds</span><span class="p">.</span><span class="n">dataset</span><span class="p">(</span><span class="nb">dir</span><span class="p">,</span> <span class="n">partitioning</span><span class="o">=</span><span class="p">[</span><span class="s">"year"</span><span class="p">,</span> <span class="s">"month"</span><span class="p">])</span>
+<span class="c1"># Generate a scanner to skip problematic column
+</span><span class="n">dataset_scanner</span> <span class="o">=</span> <span class="n">nyc_dataset</span><span class="p">.</span><span class="n">scanner</span><span class="p">(</span><span class="n">columns</span><span class="o">=</span><span class="n">working_columns</span><span class="p">)</span>
+
+<span class="c1"># Materialize dataset to an Arrow Table
+</span><span class="n">nyc_table</span> <span class="o">=</span> <span class="n">dataset_scanner</span><span class="p">.</span><span class="n">to_table</span><span class="p">()</span>
+
+<span class="c1"># Generate Dataframe from Arow Table
+</span><span class="n">nyc_df</span> <span class="o">=</span> <span class="n">nyc_table</span><span class="p">.</span><span class="n">to_pandas</span><span class="p">()</span>
+
+<span class="c1"># Apply Filter
+</span><span class="n">filtered_df</span> <span class="o">=</span> <span class="n">nyc_df</span><span class="p">[</span>
+    <span class="p">(</span><span class="n">nyc_df</span><span class="p">.</span><span class="n">total_amount</span> <span class="o">&gt;</span> <span class="mi">100</span><span class="p">)</span> <span class="o">&amp;</span>
+    <span class="p">(</span><span class="n">nyc_df</span><span class="p">.</span><span class="n">year</span> <span class="o">&gt;</span><span class="mi">2014</span><span class="p">)]</span>
+
+<span class="c1"># Apply Projection
+</span><span class="n">res</span> <span class="o">=</span> <span class="n">filtered_df</span><span class="p">[[</span><span class="s">"total_amount"</span><span class="p">,</span> <span class="s">"passenger_count"</span><span class="p">,</span><span class="s">"year"</span><span class="p">]]</span>
+
+<span class="c1"># Transform Result back to an Arrow Table
+</span><span class="n">new_table</span> <span class="o">=</span> <span class="n">pa</span><span class="p">.</span><span class="n">Table</span><span class="p">.</span><span class="n">from_pandas</span><span class="p">(</span><span class="n">res</span><span class="p">)</span>
+</code></pre></div></div>
+
+<table>
+  <thead>
+    <tr>
+      <th>Name</th>
+      <th>Time (s)</th>
+      <th>Peak Memory Usage (GBs)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>DuckDB</td>
+      <td>0.05</td>
+      <td>0.3</td>
+    </tr>
+    <tr>
+      <td>Pandas</td>
+      <td>146.91</td>
+      <td>248</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>The difference in times between DuckDB and Pandas is a combination of all the integration benefits we explored in this article. In DuckDB the filter pushdown is applied to perform partition elimination (i.e., we skip reading the Parquet files where the year is &lt;= 2014). The filter pushdown is also used to eliminate unrelated row_groups (i.e., row groups where the total amount is always &lt;= 100). Due to our projection pushdown, Arrow only has to read the columns of interest from t [...]
+
+<p>In the table above, we also depict the comparison of peak memory usage between DuckDB (Streaming) and Pandas (Fully-Materializing).  In DuckDB, we only need to load the row-group of interest into memory. Hence our memory usage is low. We also have constant memory usage since we only have to keep one of these row groups in-memory at a time. Pandas, on the other hand, has to fully materialize all Parquet files when executing the query. Because of this, we see a constant steep increase i [...]
+
+<h2 id="conclusion-and-feedback">Conclusion and Feedback</h2>
+<p>In this blog post, we mainly showcased how to execute queries on Arrow datasets with DuckDB. There are additional libraries that can also consume the Arrow format but they have different purposes and capabilities. As always, we are happy to hear if you want to see benchmarks with different tools for a post in the future! Feel free to drop us an <a href="mailto:pedro@duckdblabs.com;jon@voltrondata.com">email</a> or share your thoughts directly in the Hacker News post.</p>
+
+<p>Last but not least, if you encounter any problems when using our integration, please open an issue in in either <a href="https://github.com/duckdb/duckdb/issues">DuckDB’s - issue tracker</a>  or <a href="https://issues.apache.org/jira/projects/ARROW/">Arrow’s - issue tracker</a>, depending on which library has a problem.</p>
+
+<div class="footnotes" role="doc-endnotes">
+  <ol>
+    <li id="fn:1" role="doc-endnote">
+      <p>In Arrow 6.0.0, <code class="language-plaintext highlighter-rouge">to_arrow()</code> currently returns the full table, but will allow full streaming in our upcoming 7.0.0 release. <a href="#fnref:1" class="reversefootnote" role="doc-backlink">&#8617;</a></p>
+    </li>
+  </ol>
+</div>
+
+      </main>
+    </div>
+
+    <hr/>
+<footer class="footer">
+  <div class="row">
+    <div class="col-md-9">
+      <p>Apache Arrow, Arrow, Apache, the Apache feather logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.</p>
+      <p>&copy; 2016-2021 The Apache Software Foundation</p>
+    </div>
+    <div class="col-md-3">
+      <a class="d-sm-none d-md-inline pr-2" href="https://www.apache.org/events/current-event.html">
+        <img src="https://www.apache.org/events/current-event-234x60.png"/>
+      </a>
+    </div>
+  </div>
+</footer>
+
+  </div>
+</body>
+</html>
diff --git a/blog/index.html b/blog/index.html
index ff3ce12..b1d07a7 100644
--- a/blog/index.html
+++ b/blog/index.html
@@ -225,6 +225,21 @@
   
   <p>
     <h3>
+      <a href="/blog/2021/12/03/arrow-duckdb/">DuckDB quacks Arrow: A zero-copy data integration between Arrow and DuckDB</a>
+    </h3>
+    
+    <p>
+    <span class="blog-list-date">
+      3 December 2021
+    </span>
+    </p>
+    TLDR: The zero-copy integration between DuckDB and Apache Arrow allows for rapid analysis of larger than memory datasets in Python and R using either SQL or relational APIs. This post is a collaboration with and cross-posted on the DuckDB blog. Part of Apache Arrow is an in-memory data format optimized...
+  </p>
+  
+
+  
+  <p>
+    <h3>
       <a href="/blog/2021/11/19/datafusion-6.0.0/">Apache Arrow DataFusion 6.0.0 Release</a>
     </h3>
     
diff --git a/docs/c_glib/index.html b/docs/c_glib/index.html
index 9f4ac16..e8a1da4 100644
--- a/docs/c_glib/index.html
+++ b/docs/c_glib/index.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow GLib (C)" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow GLib (C) Apache Arrow GLib is a wrapper library for Apache Arrow C++. Apache Arrow GLib provides C API. Apache Arrow GLib supports GObject Introspection. It means that you can create language bindings at runtime or compile time automatically. API reference manuals Apache Arrow GLib Apache Parquet GLib Gandiva GLib Plasma GLib","url":"https://arrow.apache.org/docs/c_glib/","headline":"Apache Arrow GLib (C)","dateModified":"2021-11-24T15:50:19-05:00","datePubli [...]
+{"description":"Apache Arrow GLib (C) Apache Arrow GLib is a wrapper library for Apache Arrow C++. Apache Arrow GLib provides C API. Apache Arrow GLib supports GObject Introspection. It means that you can create language bindings at runtime or compile time automatically. API reference manuals Apache Arrow GLib Apache Parquet GLib Gandiva GLib Plasma GLib","url":"https://arrow.apache.org/docs/c_glib/","headline":"Apache Arrow GLib (C)","dateModified":"2021-12-03T13:53:36-05:00","datePubli [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/feed.xml b/feed.xml
index 753d3d8..cbfabe9 100644
--- a/feed.xml
+++ b/feed.xml
@@ -1,4 +1,407 @@
-<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom" ><generator uri="https://jekyllrb.com/" version="4.2.0">Jekyll</generator><link href="https://arrow.apache.org/feed.xml" rel="self" type="application/atom+xml" /><link href="https://arrow.apache.org/" rel="alternate" type="text/html" /><updated>2021-11-24T15:50:19-05:00</updated><id>https://arrow.apache.org/feed.xml</id><title type="html">Apache Arrow</title><subtitle>Apache Arrow is a cross-language developm [...]
+<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom" ><generator uri="https://jekyllrb.com/" version="4.2.0">Jekyll</generator><link href="https://arrow.apache.org/feed.xml" rel="self" type="application/atom+xml" /><link href="https://arrow.apache.org/" rel="alternate" type="text/html" /><updated>2021-12-03T13:53:36-05:00</updated><id>https://arrow.apache.org/feed.xml</id><title type="html">Apache Arrow</title><subtitle>Apache Arrow is a cross-language developm [...]
+
+--&gt;
+
+&lt;p&gt;&lt;em&gt;TLDR: The zero-copy integration between DuckDB and Apache Arrow allows for rapid analysis of larger than memory datasets in Python and R using either SQL or relational APIs.&lt;/em&gt;&lt;/p&gt;
+
+&lt;p&gt;This post is a collaboration with and cross-posted on &lt;a href=&quot;https://duckdb.org/2021/12/03/duck-arrow.html&quot;&gt;the DuckDB blog&lt;/a&gt;.&lt;/p&gt;
+
+&lt;p&gt;Part of &lt;a href=&quot;https://arrow.apache.org&quot;&gt;Apache Arrow&lt;/a&gt; is an in-memory data format optimized for analytical libraries. Like Pandas and R Dataframes, it uses a columnar data model. But the Arrow project contains more than just the format: The Arrow C++ library, which is accessible in Python, R, and Ruby via bindings, has additional features that allow you to compute efficiently on datasets. These additional features are on top of the implementation of t [...]
+
+&lt;p&gt;&lt;a href=&quot;https://www.duckdb.org&quot;&gt;DuckDB&lt;/a&gt; is a new analytical data management system that is designed to run complex SQL queries within other processes. DuckDB has bindings for R and Python, among others. DuckDB can query Arrow datasets directly and stream query results back to Arrow. This integration allows users to query Arrow data using DuckDB’s SQL Interface and API, while taking advantage of DuckDB’s parallel vectorized execution engine, without requ [...]
+
+&lt;p&gt;This integration is unique because it uses zero-copy streaming of data between DuckDB and Arrow and vice versa so that you can compose a query using both together. This results in three main benefits:&lt;/p&gt;
+
+&lt;ol&gt;
+  &lt;li&gt;&lt;strong&gt;Larger Than Memory Analysis:&lt;/strong&gt; Since both libraries support streaming query results, we are capable of executing on data without fully loading it from disk. Instead, we can execute one batch at a time. This allows us to execute queries on data that is bigger than memory.&lt;/li&gt;
+  &lt;li&gt;&lt;strong&gt;Complex Data Types:&lt;/strong&gt; DuckDB can efficiently process complex data types that can be stored in Arrow vectors, including arbitrarily nested structs, lists, and maps.&lt;/li&gt;
+  &lt;li&gt;&lt;strong&gt;Advanced Optimizer:&lt;/strong&gt; DuckDB’s state-of-the-art optimizer can push down filters and projections directly into Arrow scans. As a result, only relevant columns and partitions will be read, allowing the system to e.g., take advantage of partition elimination in Parquet files. This significantly accelerates query execution.&lt;/li&gt;
+&lt;/ol&gt;
+
+&lt;p&gt;For those that are just interested in benchmarks, you can jump ahead &lt;a href=&quot;#Benchmark Comparison&quot;&gt;benchmark section below&lt;/a&gt;.&lt;/p&gt;
+
+&lt;h2 id=&quot;quick-tour&quot;&gt;Quick Tour&lt;/h2&gt;
+&lt;p&gt;Before diving into the details of the integration, in this section we provide a quick motivating example of how powerful and simple to use is the DuckDB-Arrow integration. With a few lines of code, you can already start querying Arrow datasets. Say you want to analyze the infamous &lt;a href=&quot;https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page&quot;&gt;NYC Taxi Dataset&lt;/a&gt; and figure out if groups tip more or less than single riders.&lt;/p&gt;
+
+&lt;h3 id=&quot;r&quot;&gt;R&lt;/h3&gt;
+&lt;p&gt;Both Arrow and DuckDB support dplyr pipelines for people more comfortable with using dplyr for their data analysis. The Arrow package includes two helper functions that allow us to pass data back and forth between Arrow and DuckDB (&lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;to_duckdb()&lt;/code&gt; and &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;to_arrow()&lt;/code&gt;).
+This is especially useful in cases where something is supported in one of Arrow or DuckDB but not the other. For example, if you find a complex dplyr pipeline where the SQL translation doesn’t work with DuckDB, use &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;to_arrow()&lt;/code&gt; before the pipeline to use the Arrow engine. Or, if you have a function (e.g., windowed aggregates) that aren’t yet implemented in Arrow, use &lt;code class=&quot;language-plaintext high [...]
+
+&lt;div class=&quot;language-R highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;library&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;library&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;arrow&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;library&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dplyr&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+
+&lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# Open dataset using year,month folder partition&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;ds&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;lt;-&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;arrow&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;::&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;open_dataset&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s2&quot;&gt;&quot;nyc-taxi&quot;&lt;/span&gt;&lt;span class=&quot [...]
+
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;ds&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;%&amp;gt;%&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+  &lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# Look only at 2015 on, where the number of passenger is positive, the trip distance is&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+  &lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# greater than a quarter mile, and where the fare amount is positive&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+  &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;filter&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;year&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;gt;&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;m&quot;&gt;2014&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;amp;&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt; [...]
+  &lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# Pass off to DuckDB&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+  &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;to_duckdb&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;%&amp;gt;%&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+  &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;group_by&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;passenger_count&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;%&amp;gt;%&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+  &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;mutate&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;tip_pct&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;tip_amount&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;/&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span& [...]
+  &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;summarise&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+    &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fare_amount&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;mean&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fare_amount&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;na.rm& [...]
+    &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;tip_amount&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;mean&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;tip_amount&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;na.rm&lt [...]
+    &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;tip_pct&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;mean&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;tip_pct&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;na.rm&lt;/span [...]
+  &lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;%&amp;gt;%&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+  &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;arrange&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;passenger_count&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;%&amp;gt;%&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+  &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;collect&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;h3 id=&quot;python&quot;&gt;Python&lt;/h3&gt;
+&lt;p&gt;The workflow in Python is as simple as it is in R. In this example we use DuckDB’s Relational API.&lt;/p&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;kn&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;nn&quot;&gt;duckdb&lt;/span&gt;
+&lt;span class=&quot;kn&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;nn&quot;&gt;pyarrow&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;as&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;pa&lt;/span&gt;
+&lt;span class=&quot;kn&quot;&gt;import&lt;/span&gt; &lt;span class=&quot;nn&quot;&gt;pyarrow.dataset&lt;/span&gt; &lt;span class=&quot;k&quot;&gt;as&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;ds&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# Open dataset using year,month folder partition
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nyc&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;ds&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dataset&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&apos;nyc-taxi/&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;partitioning&lt;/span&gt;&lt;span class=&quot;o [...]
+
+&lt;span class=&quot;c1&quot;&gt;# We transform the nyc dataset into a DuckDB relation
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nyc&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;arrow&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nyc&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# Run same query again
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nyc&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;nb&quot;&gt;filter&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;year &amp;gt; 2014 &amp;amp; passenger_count &amp;gt; 0 &amp;amp; trip_distance &amp;gt; 0.25 &amp;amp; fare_amount &amp;gt; 0&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+    &lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;aggregate&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;SELECT AVG(fare_amount), AVG(tip_amount), AVG(tip_amount / fare_amount) as tip_pct&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;passenger_count&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;arrow& [...]
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;h2 id=&quot;duckdb-and-arrow-the-basics&quot;&gt;DuckDB and Arrow: The Basics&lt;/h2&gt;
+
+&lt;p&gt;In this section, we will look at some basic examples of the code needed to read and output Arrow tables in both Python and R.&lt;/p&gt;
+
+&lt;h4 id=&quot;setup&quot;&gt;Setup&lt;/h4&gt;
+
+&lt;p&gt;First we need to install DuckDB and Arrow. The installation process for both libraries in Python and R is shown below.&lt;/p&gt;
+&lt;div class=&quot;language-bash highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;c&quot;&gt;# Python Install&lt;/span&gt;
+pip &lt;span class=&quot;nb&quot;&gt;install &lt;/span&gt;duckdb
+pip &lt;span class=&quot;nb&quot;&gt;install &lt;/span&gt;pyarrow
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-R highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;c1&quot;&gt;# R Install&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;install.packages&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s2&quot;&gt;&quot;duckdb&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;install.packages&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s2&quot;&gt;&quot;arrow&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;p&gt;To execute the sample-examples in this section, we need to download the following custom parquet files:&lt;/p&gt;
+&lt;ul&gt;
+  &lt;li&gt;https://github.com/duckdb/duckdb-web/blob/master/_posts/data/integers.parquet?raw=true&lt;/li&gt;
+  &lt;li&gt;https://github.com/cwida/duckdb-data/releases/download/v1.0/lineitemsf1.snappy.parquet&lt;/li&gt;
+&lt;/ul&gt;
+
+&lt;h4 id=&quot;python-1&quot;&gt;Python&lt;/h4&gt;
+
+&lt;p&gt;There are two ways in Python of querying data from Arrow:&lt;/p&gt;
+&lt;ol&gt;
+  &lt;li&gt;Through the Relational API
+```py
+    &lt;h1 id=&quot;reads-parquet-file-to-an-arrow-table&quot;&gt;Reads Parquet File to an Arrow Table&lt;/h1&gt;
+    &lt;p&gt;arrow_table = pq.read_table(‘integers.parquet’)&lt;/p&gt;
+  &lt;/li&gt;
+&lt;/ol&gt;
+
+&lt;h1 id=&quot;transforms-arrow-table---duckdb-relation&quot;&gt;Transforms Arrow Table -&amp;gt; DuckDB Relation&lt;/h1&gt;
+&lt;p&gt;rel_from_arrow = duckdb.arrow(arrow_table)&lt;/p&gt;
+
+&lt;h1 id=&quot;we-can-run-a-sql-query-on-this-and-print-the-result&quot;&gt;we can run a SQL query on this and print the result&lt;/h1&gt;
+&lt;p&gt;print(rel_from_arrow.query(‘arrow_table’, ‘SELECT SUM(data) FROM arrow_table WHERE data &amp;gt; 50’).fetchone())&lt;/p&gt;
+
+&lt;h1 id=&quot;transforms-duckdb-relation---arrow-table&quot;&gt;Transforms DuckDB Relation -&amp;gt; Arrow Table&lt;/h1&gt;
+&lt;p&gt;arrow_table_from_duckdb = rel_from_arrow.arrow()&lt;/p&gt;
+&lt;div class=&quot;language-plaintext highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;
+2. By using replacement scans and querying the object directly with SQL:
+```py
+# Reads Parquet File to an Arrow Table
+arrow_table = pq.read_table(&apos;integers.parquet&apos;)
+
+# Gets Database Connection
+con = duckdb.connect()
+
+# we can run a SQL query on this and print the result
+print(con.execute(&apos;SELECT SUM(data) FROM arrow_table WHERE data &amp;gt; 50&apos;).fetchone())
+
+# Transforms Query Result from DuckDB to Arrow Table
+# We can directly read the arrow object through DuckDB&apos;s replacement scans.
+con.execute(&quot;SELECT * FROM arrow_table&quot;).fetch_arrow_table()
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;p&gt;It is possible to transform both DuckDB Relations and Query Results back to Arrow.&lt;/p&gt;
+
+&lt;h4 id=&quot;r-1&quot;&gt;R&lt;/h4&gt;
+
+&lt;p&gt;In R, you can interact with Arrow data in DuckDB by registering the table as a view (an alternative is to use dplyr as shown above).&lt;/p&gt;
+&lt;div class=&quot;language-r highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;library&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;library&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;arrow&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;library&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dplyr&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+
+&lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# Reads Parquet File to an Arrow Table&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;arrow_table&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;lt;-&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;arrow&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;::&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;read_parquet&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s2&quot;&gt;&quot;integers.parquet&quot;&lt;/span&gt;&lt [...]
+
+&lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# Gets Database Connection&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;con&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;lt;-&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dbConnect&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;::&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;())& [...]
+
+&lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# Registers arrow table as a DuckDB view&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;arrow&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;::&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;to_duckdb&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;arrow_table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;table_name&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;= [...]
+
+&lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# we can run a SQL query on this and print the result&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;print&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dbGetQuery&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;con&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;s2&quot;&gt;&quot;SELECT SUM(data) FROM arrow_table WHERE data &amp;gt; 50&quot;&lt;/span&gt;&lt;span class=&quot;p&quot; [...]
+
+&lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# Transforms Query Result from DuckDB to Arrow Table&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;result&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;lt;-&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dbSendQuery&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;con&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;s2&quot;&gt;&quot;S [...]
+&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;h3 id=&quot;streaming-data-fromto-arrow&quot;&gt;Streaming Data from/to Arrow&lt;/h3&gt;
+&lt;p&gt;In the previous section, we depicted how to interact with Arrow tables. However, Arrow also allows users to interact with the data in a streaming fashion. Either consuming it (e.g., from an Arrow Dataset) or producing it (e.g., returning a RecordBatchReader). And of course, DuckDB is able to consume Datasets and produce RecordBatchReaders. This example uses the NYC Taxi Dataset, stored in Parquet files partitioned by year and month, which we can download through the Arrow R pack [...]
+&lt;div class=&quot;language-R highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;n&quot;&gt;arrow&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;::&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;copy_files&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s2&quot;&gt;&quot;s3://ursa-labs-taxi-data&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&qu [...]
+&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;h4 id=&quot;python-2&quot;&gt;Python&lt;/h4&gt;
+&lt;div class=&quot;language-py highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;c1&quot;&gt;# Reads dataset partitioning it in year/month folder
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nyc_dataset&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;ds&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dataset&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&apos;nyc-taxi/&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;partitioning&lt;/span&gt;&lt;span class [...]
+
+&lt;span class=&quot;c1&quot;&gt;# Gets Database Connection
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;con&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;connect&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+
+&lt;span class=&quot;n&quot;&gt;query&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;con&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;execute&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;SELECT * FROM nyc_dataset&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;span class=&quot;c1&quot;&gt;# DuckDB&apos;s queries can now produce a Record Batch Reader
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;record_batch_reader&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;query&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fetch_record_batch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+&lt;span class=&quot;c1&quot;&gt;# Which means we can stream the whole query per batch.
+# This retrieves the first batch
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;chunk&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;record_batch_reader&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;read_next_batch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+&lt;h4 id=&quot;r-2&quot;&gt;R&lt;/h4&gt;
+&lt;div class=&quot;language-r highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;c1&quot;&gt;# Reads dataset partitioning it in year/month folder&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nyc_dataset&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;open_dataset&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s2&quot;&gt;&quot;nyc-taxi/&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n& [...]
+
+&lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# Gets Database Connection&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;con&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;lt;-&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dbConnect&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;::&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;())& [...]
+
+&lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# We can use the same function as before to register our arrow dataset&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;::&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;duckdb_register_arrow&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;con&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;s2&quot;&gt;&quot;nyc&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;w [...]
+
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;res&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;lt;-&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dbSendQuery&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;con&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;s2&quot;&gt;&quot;SELE [...]
+&lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# DuckDB&apos;s queries can now produce a Record Batch Reader&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;record_batch_reader&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;lt;-&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;::&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;duckdb_fetch_record_batch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;res&lt;/span&gt;&lt;spa [...]
+
+&lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# Which means we can stream the whole query per batch.&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;c1&quot;&gt;# This retrieves the first batch&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;cur_batch&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;o&quot;&gt;&amp;lt;-&lt;/span&gt;&lt;span class=&quot;w&quot;&gt; &lt;/span&gt;&lt;span class=&quot;n&quot;&gt;record_batch_reader&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;$&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;read_next_batch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;&lt;span class=&quot;w&quot;&gt;
+&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;p&gt;The preceding R code shows in low-level detail how the data is streaming. We provide the helper &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;to_arrow()&lt;/code&gt; in the Arrow package which is a wrapper around this that makes it easy to incorporate this streaming into a dplyr pipeline. &lt;sup id=&quot;fnref:1&quot; role=&quot;doc-noteref&quot;&gt;&lt;a href=&quot;#fn:1&quot; class=&quot;footnote&quot; rel=&quot;footnote&quot;&gt;1&lt;/a&gt;&lt;/sup&gt;&l [...]
+
+&lt;h2 id=&quot;benchmark-comparison&quot;&gt;Benchmark Comparison&lt;/h2&gt;
+
+&lt;p&gt;Here we demonstrate in a simple benchmark the performance difference between querying Arrow datasets with DuckDB and querying Arrow datasets with Pandas.
+For both the Projection and Filter pushdown comparison, we will use Arrow tables. That is due to Pandas not being capable of consuming Arrow stream objects.&lt;/p&gt;
+
+&lt;p&gt;For the NYC Taxi benchmarks, we used the &lt;a href=&quot;https://www.monetdb.org/wiki/Scilens-configuration-standard&quot;&gt;scilens diamonds configuration&lt;/a&gt; and for the TPC-H benchmarks, we used an m1 MacBook Pro. In both cases, parallelism in DuckDB was used (which is now on by default).&lt;/p&gt;
+
+&lt;p&gt;For the comparison with Pandas, note that DuckDB runs in parallel, while pandas only support single-threaded execution. Besides that, one should note that we are comparing automatic optimizations. DuckDB’s query optimizer can automatically push down filters and projections. This automatic optimization is not supported in pandas, but it is possible for users to manually perform some of these predicate and filter pushdowns by manually specifying them them in the &lt;code class=&qu [...]
+
+&lt;h3 id=&quot;projection-pushdown&quot;&gt;Projection Pushdown&lt;/h3&gt;
+
+&lt;p&gt;In this example we run a simple aggregation on two columns of our lineitem table.&lt;/p&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;c1&quot;&gt;# DuckDB
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lineitem&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;pq&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;read_table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&apos;lineitemsf1.snappy.parquet&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;con&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;connect&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# Transforms Query Result from DuckDB to Arrow Table
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;con&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;execute&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;&quot;&quot;SELECT sum(l_extendedprice * l_discount) AS revenue
+                FROM
+                lineitem;&quot;&quot;&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fetch_arrow_table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;c1&quot;&gt;# Pandas
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;arrow_table&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;pq&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;read_table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&apos;lineitemsf1.snappy.parquet&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# Converts an Arrow table to a Dataframe
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;df&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;arrow_table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;to_pandas&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# Runs aggregation
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;res&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;  &lt;span class=&quot;n&quot;&gt;pd&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;DataFrame&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;({&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&apos;sum&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;:&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;[(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;df [...]
+
+&lt;span class=&quot;c1&quot;&gt;# Creates an Arrow Table from a Dataframe
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;new_table&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;pa&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;from_pandas&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;res&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/ [...]
+
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;table&gt;
+  &lt;thead&gt;
+    &lt;tr&gt;
+      &lt;th&gt;Name&lt;/th&gt;
+      &lt;th style=&quot;text-align: right&quot;&gt;Time (s)&lt;/th&gt;
+    &lt;/tr&gt;
+  &lt;/thead&gt;
+  &lt;tbody&gt;
+    &lt;tr&gt;
+      &lt;td&gt;DuckDB&lt;/td&gt;
+      &lt;td style=&quot;text-align: right&quot;&gt;0.19&lt;/td&gt;
+    &lt;/tr&gt;
+    &lt;tr&gt;
+      &lt;td&gt;Pandas&lt;/td&gt;
+      &lt;td style=&quot;text-align: right&quot;&gt;2.13&lt;/td&gt;
+    &lt;/tr&gt;
+  &lt;/tbody&gt;
+&lt;/table&gt;
+
+&lt;p&gt;The lineitem table is composed of 16 columns, however, to execute this query only two columns &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;l_extendedprice&lt;/code&gt; and  *  &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;l_discount&lt;/code&gt; are necessary. Since DuckDB can push down the projection of these columns, it is capable of executing this query about one order of magnitude faster than Pandas.&lt;/p&gt;
+
+&lt;h3 id=&quot;filter-pushdown&quot;&gt;Filter Pushdown&lt;/h3&gt;
+
+&lt;p&gt;For our filter pushdown we repeat the same aggregation used in the previous section, but add filters on 4 more columns.&lt;/p&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;c1&quot;&gt;# DuckDB
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lineitem&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;pq&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;read_table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&apos;lineitemsf1.snappy.parquet&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# Get database connection
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;con&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;connect&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# Transforms Query Result from DuckDB to Arrow Table
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;con&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;execute&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;&quot;&quot;SELECT sum(l_extendedprice * l_discount) AS revenue
+        FROM
+            lineitem
+        WHERE
+            l_shipdate &amp;gt;= CAST(&apos;1994-01-01&apos; AS date)
+            AND l_shipdate &amp;lt; CAST(&apos;1995-01-01&apos; AS date)
+            AND l_discount BETWEEN 0.05
+            AND 0.07
+            AND l_quantity &amp;lt; 24; &quot;&quot;&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;).&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fetch_arrow_table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;c1&quot;&gt;# Pandas
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;arrow_table&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;pq&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;read_table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&apos;lineitemsf1.snappy.parquet&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+
+&lt;span class=&quot;n&quot;&gt;df&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;arrow_table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;to_pandas&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+&lt;span class=&quot;n&quot;&gt;filtered_df&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;lineitem&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;
+        &lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lineitem&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;l_shipdate&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;gt;=&lt;/span&gt; &lt;span class=&quot;s&quot;&gt;&quot;1994-01-01&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;amp;&lt;/span&gt;
+        &lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lineitem&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;l_shipdate&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&lt;/span&gt; &lt;span class=&quot;s&quot;&gt;&quot;1995-01-01&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;amp;&lt;/span&gt;
+        &lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lineitem&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;l_discount&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;gt;=&lt;/span&gt; &lt;span class=&quot;mf&quot;&gt;0.05&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;amp;&lt;/span&gt;
+        &lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lineitem&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;l_discount&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;=&lt;/span&gt; &lt;span class=&quot;mf&quot;&gt;0.07&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;amp;&lt;/span&gt;
+        &lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;lineitem&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;l_quantity&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;lt;&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;24&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)]&lt;/span&gt;
+
+&lt;span class=&quot;n&quot;&gt;res&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;  &lt;span class=&quot;n&quot;&gt;pd&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;DataFrame&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;({&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&apos;sum&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;:&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;[(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;filtered_df&lt; [...]
+&lt;span class=&quot;n&quot;&gt;new_table&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;pa&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;from_pandas&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;res&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;table&gt;
+  &lt;thead&gt;
+    &lt;tr&gt;
+      &lt;th&gt;Name&lt;/th&gt;
+      &lt;th&gt;Time (s)&lt;/th&gt;
+    &lt;/tr&gt;
+  &lt;/thead&gt;
+  &lt;tbody&gt;
+    &lt;tr&gt;
+      &lt;td&gt;DuckDB&lt;/td&gt;
+      &lt;td&gt;0.04&lt;/td&gt;
+    &lt;/tr&gt;
+    &lt;tr&gt;
+      &lt;td&gt;Pandas&lt;/td&gt;
+      &lt;td&gt;2.29&lt;/td&gt;
+    &lt;/tr&gt;
+  &lt;/tbody&gt;
+&lt;/table&gt;
+
+&lt;p&gt;The difference now between DuckDB and Pandas is more drastic, being two orders of magnitude faster than Pandas. Again, since both the filter and projection are pushed down to Arrow, DuckDB reads less data than Pandas, which can’t automatically perform this optimization.&lt;/p&gt;
+
+&lt;h3 id=&quot;streaming&quot;&gt;Streaming&lt;/h3&gt;
+
+&lt;p&gt;As demonstrated before, DuckDB is capable of consuming and producing Arrow data in a streaming fashion. In this section we run a simple benchmark, to showcase the benefits in speed and memory usage when comparing it to full materialization and Pandas. This example uses the full NYC taxi dataset which you can download&lt;/p&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;c1&quot;&gt;# DuckDB
+# Open dataset using year,month folder partition
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nyc&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;ds&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dataset&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&apos;nyc-taxi/&apos;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;partitioning&lt;/span&gt;&lt;span class=&quot;o [...]
+
+&lt;span class=&quot;c1&quot;&gt;# Get database connection
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;con&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;duckdb&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;connect&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# Run query that selects part of the data
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;query&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;con&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;execute&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;SELECT total_amount, passenger_count,year FROM nyc where total_amount &amp;gt; 100 and year &amp;gt; 2014&quot;&lt;/span&gt;&lt;span class=&quot;p&quot [...]
+
+&lt;span class=&quot;c1&quot;&gt;# Create Record Batch Reader from Query Result.
+# &quot;fetch_record_batch()&quot; also accepts an extra parameter related to the desired produced chunk size.
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;record_batch_reader&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;query&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;fetch_record_batch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# Retrieve all batch chunks
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;chunk&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;record_batch_reader&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;read_next_batch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+&lt;span class=&quot;k&quot;&gt;while&lt;/span&gt; &lt;span class=&quot;nb&quot;&gt;len&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;chunk&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;gt;&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;0&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;:&lt;/span&gt;
+    &lt;span class=&quot;n&quot;&gt;chunk&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;record_batch_reader&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;read_next_batch&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;div class=&quot;language-python highlighter-rouge&quot;&gt;&lt;div class=&quot;highlight&quot;&gt;&lt;pre class=&quot;highlight&quot;&gt;&lt;code&gt;&lt;span class=&quot;c1&quot;&gt;# Pandas
+# We must exclude one of the columns of the NYC dataset due to an unimplemented cast in Arrow.
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;working_columns&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;vendor_id&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;pickup_at&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;dropoff_at&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,& [...]
+    &lt;span class=&quot;s&quot;&gt;&quot;pickup_latitude&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;store_and_fwd_flag&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;dropoff_longitude&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;dropoff_latitude&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span c [...]
+    &lt;span class=&quot;s&quot;&gt;&quot;fare_amount&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;extra&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;mta_tax&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;tip_amount&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;tolls [...]
+
+&lt;span class=&quot;c1&quot;&gt;# Open dataset using year,month folder partition
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nyc_dataset&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;ds&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dataset&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;nb&quot;&gt;dir&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;partitioning&lt;/span&gt;&lt;span class=&quot;o&quot;&gt [...]
+&lt;span class=&quot;c1&quot;&gt;# Generate a scanner to skip problematic column
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;dataset_scanner&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nyc_dataset&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;scanner&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;columns&lt;/span&gt;&lt;span class=&quot;o&quot;&gt;=&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;working_columns&lt;/span&gt;&lt;span clas [...]
+
+&lt;span class=&quot;c1&quot;&gt;# Materialize dataset to an Arrow Table
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nyc_table&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;dataset_scanner&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;to_table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# Generate Dataframe from Arow Table
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nyc_df&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nyc_table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;to_pandas&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;()&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# Apply Filter
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;filtered_df&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;nyc_df&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[&lt;/span&gt;
+    &lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nyc_df&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;total_amount&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;gt;&lt;/span&gt; &lt;span class=&quot;mi&quot;&gt;100&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;amp;&lt;/span&gt;
+    &lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;nyc_df&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;year&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;&amp;gt;&lt;/span&gt;&lt;span class=&quot;mi&quot;&gt;2014&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)]&lt;/span&gt;
+
+&lt;span class=&quot;c1&quot;&gt;# Apply Projection
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;res&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;filtered_df&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;[[&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;total_amount&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt; &lt;span class=&quot;s&quot;&gt;&quot;passenger_count&quot;&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;,&lt;/span&gt;&lt;span class=&quot;s&quot;&gt;&quot;year&quo [...]
+
+&lt;span class=&quot;c1&quot;&gt;# Transform Result back to an Arrow Table
+&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;new_table&lt;/span&gt; &lt;span class=&quot;o&quot;&gt;=&lt;/span&gt; &lt;span class=&quot;n&quot;&gt;pa&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;Table&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;.&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;from_pandas&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;(&lt;/span&gt;&lt;span class=&quot;n&quot;&gt;res&lt;/span&gt;&lt;span class=&quot;p&quot;&gt;)&lt;/ [...]
+&lt;/code&gt;&lt;/pre&gt;&lt;/div&gt;&lt;/div&gt;
+
+&lt;table&gt;
+  &lt;thead&gt;
+    &lt;tr&gt;
+      &lt;th&gt;Name&lt;/th&gt;
+      &lt;th&gt;Time (s)&lt;/th&gt;
+      &lt;th&gt;Peak Memory Usage (GBs)&lt;/th&gt;
+    &lt;/tr&gt;
+  &lt;/thead&gt;
+  &lt;tbody&gt;
+    &lt;tr&gt;
+      &lt;td&gt;DuckDB&lt;/td&gt;
+      &lt;td&gt;0.05&lt;/td&gt;
+      &lt;td&gt;0.3&lt;/td&gt;
+    &lt;/tr&gt;
+    &lt;tr&gt;
+      &lt;td&gt;Pandas&lt;/td&gt;
+      &lt;td&gt;146.91&lt;/td&gt;
+      &lt;td&gt;248&lt;/td&gt;
+    &lt;/tr&gt;
+  &lt;/tbody&gt;
+&lt;/table&gt;
+
+&lt;p&gt;The difference in times between DuckDB and Pandas is a combination of all the integration benefits we explored in this article. In DuckDB the filter pushdown is applied to perform partition elimination (i.e., we skip reading the Parquet files where the year is &amp;lt;= 2014). The filter pushdown is also used to eliminate unrelated row_groups (i.e., row groups where the total amount is always &amp;lt;= 100). Due to our projection pushdown, Arrow only has to read the columns of i [...]
+
+&lt;p&gt;In the table above, we also depict the comparison of peak memory usage between DuckDB (Streaming) and Pandas (Fully-Materializing).  In DuckDB, we only need to load the row-group of interest into memory. Hence our memory usage is low. We also have constant memory usage since we only have to keep one of these row groups in-memory at a time. Pandas, on the other hand, has to fully materialize all Parquet files when executing the query. Because of this, we see a constant steep incr [...]
+
+&lt;h2 id=&quot;conclusion-and-feedback&quot;&gt;Conclusion and Feedback&lt;/h2&gt;
+&lt;p&gt;In this blog post, we mainly showcased how to execute queries on Arrow datasets with DuckDB. There are additional libraries that can also consume the Arrow format but they have different purposes and capabilities. As always, we are happy to hear if you want to see benchmarks with different tools for a post in the future! Feel free to drop us an &lt;a href=&quot;mailto:pedro@duckdblabs.com;jon@voltrondata.com&quot;&gt;email&lt;/a&gt; or share your thoughts directly in the Hacker  [...]
+
+&lt;p&gt;Last but not least, if you encounter any problems when using our integration, please open an issue in in either &lt;a href=&quot;https://github.com/duckdb/duckdb/issues&quot;&gt;DuckDB’s - issue tracker&lt;/a&gt;  or &lt;a href=&quot;https://issues.apache.org/jira/projects/ARROW/&quot;&gt;Arrow’s - issue tracker&lt;/a&gt;, depending on which library has a problem.&lt;/p&gt;
+
+&lt;div class=&quot;footnotes&quot; role=&quot;doc-endnotes&quot;&gt;
+  &lt;ol&gt;
+    &lt;li id=&quot;fn:1&quot; role=&quot;doc-endnote&quot;&gt;
+      &lt;p&gt;In Arrow 6.0.0, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;to_arrow()&lt;/code&gt; currently returns the full table, but will allow full streaming in our upcoming 7.0.0 release. &lt;a href=&quot;#fnref:1&quot; class=&quot;reversefootnote&quot; role=&quot;doc-backlink&quot;&gt;&amp;#8617;&lt;/a&gt;&lt;/p&gt;
+    &lt;/li&gt;
+  &lt;/ol&gt;
+&lt;/div&gt;</content><author><name>Pedro Holanda, Jonathan Keane</name></author><category term="application" /><summary type="html">TLDR: The zero-copy integration between DuckDB and Apache Arrow allows for rapid analysis of larger than memory datasets in Python and R using either SQL or relational APIs. This post is a collaboration with and cross-posted on the DuckDB blog. Part of Apache Arrow is an in-memory data format optimized for analytical libraries. Like Pandas and R Dataframes, [...]
 
 --&gt;
 
@@ -1296,55 +1699,4 @@ C++, R, Python and JavaScript.
 The list is available &lt;a href=&quot;https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20status%20%3D%20Resolved%20AND%20fixVersion%20%3D%204.0.1&quot;&gt;here&lt;/a&gt;, with the list of contributors &lt;a href=&quot;/release/4.0.1.html#contributors&quot;&gt;here&lt;/a&gt;
 and changelog &lt;a href=&quot;/release/4.0.1.html&quot;&gt;here&lt;/a&gt;.&lt;/p&gt;
 
-&lt;p&gt;As usual, see &lt;a href=&quot;/install/&quot;&gt;the install page&lt;/a&gt; for instructions on how to install it.&lt;/p&gt;</content><author><name>pmc</name></author><category term="release" /><summary type="html">The Apache Arrow team is pleased to announce the 4.0.1 release. This release covers general bug fixes on the different implementations, notably C++, R, Python and JavaScript. The list is available here, with the list of contributors here and changelog here. As usual, [...]
-
---&gt;
-
-&lt;p&gt;The Apache Arrow Rust community is excited to announce that its migration to a new development workflow is now complete! If you’re considering Rust as a language for working with columnar data, read on and see how your use case might benefit from our new and improved project setup.&lt;/p&gt;
-
-&lt;p&gt;In recent months, members of the community have been working closely with Arrow’s &lt;a href=&quot;https://arrow.apache.org/committers/&quot;&gt;Project Management Committee&lt;/a&gt; and other contributors to expand the set of available workflows for Arrow implementations. The goal was to define a new development process that ultimately:&lt;/p&gt;
-&lt;ul&gt;
-  &lt;li&gt;Enables a faster release cadence that adheres to &lt;a href=&quot;https://semver.org/&quot;&gt;SemVer&lt;/a&gt; where appropriate&lt;/li&gt;
-  &lt;li&gt;Encourages maximum participation from the wider community with unified tooling&lt;/li&gt;
-  &lt;li&gt;Ensures that we continue to uphold the tenets of &lt;a href=&quot;https://www.apache.org/theapacheway/&quot;&gt;The Apache Way&lt;/a&gt;&lt;/li&gt;
-&lt;/ul&gt;
-
-&lt;p&gt;If you’re just here for the highlights, the major outcomes of these discussions are as follows:&lt;/p&gt;
-&lt;ul&gt;
-  &lt;li&gt;The Rust projects have moved to separate repositories, outside the main Arrow &lt;a href=&quot;https://en.wikipedia.org/wiki/Monorepo&quot;&gt;monorepo&lt;/a&gt;
-    &lt;ul&gt;
-      &lt;li&gt;&lt;a href=&quot;https://github.com/apache/arrow-rs&quot;&gt;arrow-rs&lt;/a&gt; for the core Arrow, Arrow Flight, and Parquet implementations in Rust&lt;/li&gt;
-      &lt;li&gt;&lt;a href=&quot;https://github.com/apache/arrow-datafusion&quot;&gt;arrow-datafusion&lt;/a&gt; for DataFusion and Ballista (more on these projects below!)&lt;/li&gt;
-    &lt;/ul&gt;
-  &lt;/li&gt;
-  &lt;li&gt;The Rust community will use GitHub Issues for tracking feature development and issues, replacing the Jira instance maintained by the Apache Software Foundation (ASF)&lt;/li&gt;
-  &lt;li&gt;DataFusion and Ballista will follow a new release cycle, independent of the main Arrow releases&lt;/li&gt;
-&lt;/ul&gt;
-
-&lt;p&gt;But why, as a community, have we decided to change our processes? Let’s take a slightly more in-depth look at the Rust implementation’s needs.&lt;/p&gt;
-
-&lt;h2 id=&quot;project-structure&quot;&gt;Project Structure&lt;/h2&gt;
-&lt;p&gt;The Rust implementation of Arrow actually consists of several distinct projects, or in Rust parlance, &lt;a href=&quot;https://doc.rust-lang.org/book/ch07-01-packages-and-crates.html&quot;&gt;“crates”&lt;/a&gt;. In addition to the core crates, namely &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;arrow&lt;/code&gt;, &lt;code class=&quot;language-plaintext highlighter-rouge&quot;&gt;arrow-flight&lt;/code&gt;, and &lt;code class=&quot;language-plaintext highlig [...]
-&lt;ul&gt;
-  &lt;li&gt;&lt;a href=&quot;https://github.com/apache/arrow-datafusion/datafusion&quot;&gt;DataFusion&lt;/a&gt;: an extensible in-memory query execution engine using Arrow as its format&lt;/li&gt;
-  &lt;li&gt;&lt;a href=&quot;https://github.com/apache/arrow-datafusion/ballista&quot;&gt;Ballista&lt;/a&gt;: a distributed compute platform, powered by Apache Arrow and DataFusion&lt;/li&gt;
-&lt;/ul&gt;
-
-&lt;p&gt;Whilst these projects are all closely related, with many shared contributors, they’re very much at different stages in their respective lifecycles. The core Arrow crate, as an implementation of a spec, has strict compatibility requirements with other versions of Arrow, and this is tested via rigorous cross-language integration tests.&lt;/p&gt;
-
-&lt;p&gt;However, at the other end of the spectrum, DataFusion and Ballista are still nascent projects in their own right that undergo frequent backwards-incompatible changes. In the old workflow, DataFusion was released in lockstep with Arrow; because DataFusion users often need newly-contributed features or bugfixes on a tighter schedule than Arrow releases, we observed that many people in the community simply resorted to referencing our GitHub repository directly, rather than properly [...]
-
-&lt;p&gt;Ultimately, the decision was made to split the Rust crates into two separate repositories: &lt;a href=&quot;https://github.com/apache/arrow-rs&quot;&gt;arrow-rs&lt;/a&gt; for the core Arrow functionality, and &lt;a href=&quot;https://github.com/apache/arrow-datafusion&quot;&gt;arrow-datafusion&lt;/a&gt; for DataFusion and Ballista. There’s still work to be done on determining the exact release workflows for the latter, but this leaves us in a much better position to meet the bro [...]
-
-&lt;h2 id=&quot;community-participation&quot;&gt;Community Participation&lt;/h2&gt;
-&lt;p&gt;All Apache projects are built on volunteer contribution; it’s a core principle of both the ASF and open-source software development more broadly. One point of friction that was observed in the previous workflow for the Rust community in particular was the requirement for issues to be logged in Arrow’s Jira project. This step required would-be contributors to first register an account, and then receive a permissions grant to manage tickets.&lt;/p&gt;
-
-&lt;p&gt;To streamline this process for new community members, we’ve taken the decision to migrate to GitHub Issues for tracking both new development work and known bugs that need addressing, and bootstrapped our new repositories by importing their respective tickets from Jira. Creating issues to track non-trivial proposed features and enhancements is still required; this creates an opportunity for community review and helps ensure that feedback is delivered as early in the process as po [...]
-
-&lt;h2 id=&quot;get-involved&quot;&gt;Get Involved&lt;/h2&gt;
-&lt;p&gt;To further improve the onboarding flow for new Arrow contributors, we have started the process of labeling select issues as “good first issue” in &lt;a href=&quot;https://github.com/apache/arrow-rs/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22&quot;&gt;arrow-rs&lt;/a&gt; and &lt;a href=&quot;https://github.com/apache/arrow-datafusion/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22&quot;&gt;arrow-datafusion&lt;/a&gt;. These issues are small in scope bu [...]
-
-&lt;p&gt;Not quite sure where to start with a particular issue, or curious about the status of one of our projects? Join the Arrow &lt;a href=&quot;https://arrow.apache.org/community&quot;&gt;mailing lists&lt;/a&gt; or the #arrow-rust channel on the &lt;a href=&quot;https://s.apache.org/slack-invite&quot;&gt;ASF Slack&lt;/a&gt; server.&lt;/p&gt;
-
-&lt;h2 id=&quot;in-closing&quot;&gt;In Closing&lt;/h2&gt;
-&lt;p&gt;As a final note: nothing here is intended as prescriptive advice. As a community, we’ve decided that these processes are the best fit for the current status of our projects, but this may change over time! There is, after all, &lt;a href=&quot;https://en.wikipedia.org/wiki/No_Silver_Bullet&quot;&gt;no silver bullet&lt;/a&gt; for software engineering.&lt;/p&gt;</content><author><name>ruanpa</name></author><category term="application" /><summary type="html">The Apache Arrow Rust co [...]
\ No newline at end of file
+&lt;p&gt;As usual, see &lt;a href=&quot;/install/&quot;&gt;the install page&lt;/a&gt; for instructions on how to install it.&lt;/p&gt;</content><author><name>pmc</name></author><category term="release" /><summary type="html">The Apache Arrow team is pleased to announce the 4.0.1 release. This release covers general bug fixes on the different implementations, notably C++, R, Python and JavaScript. The list is available here, with the list of contributors here and changelog here. As usual, [...]
\ No newline at end of file
diff --git a/release/0.1.0.html b/release/0.1.0.html
index 40040ec..623d131 100644
--- a/release/0.1.0.html
+++ b/release/0.1.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="0.1.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.1.0 (10 October 2016) Download Source Release: [apache-arrow-0.1.0.tar.gz][6] Verification: [md5][3], [asc][7] Changelog Contributors $ git shortlog -sn d5aa7c46..apache-arrow-0.1.0 49 Wes McKinney 27 Uwe L. Korn 25 Julien Le Dem 13 Micah Kornfield 11 Steven Phillips 6 Jihoon Son 5 Laurent Goujon 5 adeneche 4 Dan Robinson 4 proflin 2 Jacques Nadeau 1 Christopher C. Aycock 1 Edmon Begoli 1 Kai Zheng 1 MechCoder 1 Minji Kim 1 Philipp Moritz 1 Smyatkin Maxim 1 [...]
+{"description":"Apache Arrow 0.1.0 (10 October 2016) Download Source Release: [apache-arrow-0.1.0.tar.gz][6] Verification: [md5][3], [asc][7] Changelog Contributors $ git shortlog -sn d5aa7c46..apache-arrow-0.1.0 49 Wes McKinney 27 Uwe L. Korn 25 Julien Le Dem 13 Micah Kornfield 11 Steven Phillips 6 Jihoon Son 5 Laurent Goujon 5 adeneche 4 Dan Robinson 4 proflin 2 Jacques Nadeau 1 Christopher C. Aycock 1 Edmon Begoli 1 Kai Zheng 1 MechCoder 1 Minji Kim 1 Philipp Moritz 1 Smyatkin Maxim 1 [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.10.0.html b/release/0.10.0.html
index 6bf13be..4b30aeb 100644
--- a/release/0.10.0.html
+++ b/release/0.10.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.10.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.10.0 (6 August 2018) This is a major release. Download Source Artifacts Binary Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.9.0..apache-arrow-0.10.0 70 Antoine Pitrou 49 Kouhei Sutou 40 Korn, Uwe 37 Wes McKinney 32 Krisztián Szűcs 30 Andy Grove 20 Philipp Moritz 13 Phillip Cloud 11 Bryan Cutler 11 yosuke shiro 7 Dimitri Vorona 6 Zhijun Fu 5 Bruce Mitchener 5 Joshua Storck 5 Robert Nishihara 5 ptaylor 4 Maximilian Roos 4 Sebastien Binet 3 [...]
+{"description":"Apache Arrow 0.10.0 (6 August 2018) This is a major release. Download Source Artifacts Binary Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.9.0..apache-arrow-0.10.0 70 Antoine Pitrou 49 Kouhei Sutou 40 Korn, Uwe 37 Wes McKinney 32 Krisztián Szűcs 30 Andy Grove 20 Philipp Moritz 13 Phillip Cloud 11 Bryan Cutler 11 yosuke shiro 7 Dimitri Vorona 6 Zhijun Fu 5 Bruce Mitchener 5 Joshua Storck 5 Robert Nishihara 5 ptaylor 4 Maximilian Roos 4 Sebastien Binet 3 [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.11.0.html b/release/0.11.0.html
index 5c0ae09..5c62bf3 100644
--- a/release/0.11.0.html
+++ b/release/0.11.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.11.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.11.0 (8 October 2018) This is a major release. Download Source Artifacts Binary Artifacts Git tag Contributors This includes patches from Apache Parquet that were merged. $ git shortlog -sn apache-arrow-0.10.0..apache-arrow-0.11.0 166 Wes McKinney 59 Uwe L. Korn 57 Deepak Majeti 54 Kouhei Sutou 50 Krisztián Szűcs 48 Antoine Pitrou 38 Korn, Uwe 29 Pindikura Ravindra 21 praveenbingo 9 Vivekanand Vellanki 8 Philipp Moritz 8 Sebastien Binet 7 Paddy Horan 7 Phil [...]
+{"description":"Apache Arrow 0.11.0 (8 October 2018) This is a major release. Download Source Artifacts Binary Artifacts Git tag Contributors This includes patches from Apache Parquet that were merged. $ git shortlog -sn apache-arrow-0.10.0..apache-arrow-0.11.0 166 Wes McKinney 59 Uwe L. Korn 57 Deepak Majeti 54 Kouhei Sutou 50 Krisztián Szűcs 48 Antoine Pitrou 38 Korn, Uwe 29 Pindikura Ravindra 21 praveenbingo 9 Vivekanand Vellanki 8 Philipp Moritz 8 Sebastien Binet 7 Paddy Horan 7 Phil [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.11.1.html b/release/0.11.1.html
index 62704b5..aa66e4e 100644
--- a/release/0.11.1.html
+++ b/release/0.11.1.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.11.1 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.11.1 (19 October 2018) This is a bugfix release to address a Python packaging issue with zlib that resulted in bug ARROW-3514. Download Source Artifacts Binary Artifacts Git tag Changelog New Features and Improvements ARROW-3353 - [Packaging] Build python 3.7 wheels ARROW-3534 - [Python] Update zlib library in manylinux1 image ARROW-3546 - [Python] Provide testing setup to verify wheel binaries work in one or more common Linux distributions ARROW-3565 - [Py [...]
+{"description":"Apache Arrow 0.11.1 (19 October 2018) This is a bugfix release to address a Python packaging issue with zlib that resulted in bug ARROW-3514. Download Source Artifacts Binary Artifacts Git tag Changelog New Features and Improvements ARROW-3353 - [Packaging] Build python 3.7 wheels ARROW-3534 - [Python] Update zlib library in manylinux1 image ARROW-3546 - [Python] Provide testing setup to verify wheel binaries work in one or more common Linux distributions ARROW-3565 - [Py [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.12.0.html b/release/0.12.0.html
index 2d35590..9351616 100644
--- a/release/0.12.0.html
+++ b/release/0.12.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.12.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.12.0 (20 January 2019) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts Git tag 8ca41384b5324bfd0ef3d3ed3f728e1d10ed73f0 Contributors This release includes 601 commits from 77 distinct contributors. $ git shortlog -sn apache-arrow-0.11.0..apache-arrow-0.12.0 94 Kouhei Sutou 76 Wes McKinney 61 Antoine Pitrou 60 Krisztián Szűcs 25 Pindikura Ravindra 25 Yosuke Shiro 21 Romain Francois 17 Chao Sun 16 [...]
+{"description":"Apache Arrow 0.12.0 (20 January 2019) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts Git tag 8ca41384b5324bfd0ef3d3ed3f728e1d10ed73f0 Contributors This release includes 601 commits from 77 distinct contributors. $ git shortlog -sn apache-arrow-0.11.0..apache-arrow-0.12.0 94 Kouhei Sutou 76 Wes McKinney 61 Antoine Pitrou 60 Krisztián Szűcs 25 Pindikura Ravindra 25 Yosuke Shiro 21 Romain Francois 17 Chao Sun 16 [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.13.0.html b/release/0.13.0.html
index dfaf71e..3efff24 100644
--- a/release/0.13.0.html
+++ b/release/0.13.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.13.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.13.0 (1 April 2019) This is a major release covering more than 2 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 550 commits from 81 distinct contributors. $ git shortlog -sn apache-arrow-0.12.0..apache-arrow-0.13.0 56 Wes McKinney 55 Kouhei Sutou 31 Antoine Pitrou 31 François Saint-Jacques 31 Korn, Uwe 30 Krisztián Szűcs 28 Andy Grove 27 Uwe L. Korn 21  [...]
+{"description":"Apache Arrow 0.13.0 (1 April 2019) This is a major release covering more than 2 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 550 commits from 81 distinct contributors. $ git shortlog -sn apache-arrow-0.12.0..apache-arrow-0.13.0 56 Wes McKinney 55 Kouhei Sutou 31 Antoine Pitrou 31 François Saint-Jacques 31 Korn, Uwe 30 Krisztián Szűcs 28 Andy Grove 27 Uwe L. Korn 21  [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.14.0.html b/release/0.14.0.html
index d6a2c91..d10d47d 100644
--- a/release/0.14.0.html
+++ b/release/0.14.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.14.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.14.0 (4 July 2019) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 570 commits from 78 distinct contributors. $ git shortlog -sn apache-arrow-0.13.0..apache-arrow-0.14.0 67 Antoine Pitrou 62 Wes McKinney 37 Sebastien Binet 34 Sutou Kouhei 25 Kouhei Sutou 24 Neal Richardson 22 Romain Francois 21 Joris Van den B [...]
+{"description":"Apache Arrow 0.14.0 (4 July 2019) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 570 commits from 78 distinct contributors. $ git shortlog -sn apache-arrow-0.13.0..apache-arrow-0.14.0 67 Antoine Pitrou 62 Wes McKinney 37 Sebastien Binet 34 Sutou Kouhei 25 Kouhei Sutou 24 Neal Richardson 22 Romain Francois 21 Joris Van den B [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.14.1.html b/release/0.14.1.html
index 4197757..71fcd65 100644
--- a/release/0.14.1.html
+++ b/release/0.14.1.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.14.1 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.14.1 (22 July 2019) This is a bugfix release to address a Python wheel packaging issues and Parquet forward compatibility problems. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 52 commits from 16 distinct contributors. $ git shortlog -sn apache-arrow-0.14.0..apache-arrow-0.14.1 15 Sutou Kouhei 10 Krisztián Szűcs 5 Antoine Pitrou 5 Wes McKinney 2 David Li 2 Eric Erhardt 2 Jo [...]
+{"description":"Apache Arrow 0.14.1 (22 July 2019) This is a bugfix release to address a Python wheel packaging issues and Parquet forward compatibility problems. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 52 commits from 16 distinct contributors. $ git shortlog -sn apache-arrow-0.14.0..apache-arrow-0.14.1 15 Sutou Kouhei 10 Krisztián Szűcs 5 Antoine Pitrou 5 Wes McKinney 2 David Li 2 Eric Erhardt 2 Jo [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.15.0.html b/release/0.15.0.html
index 0644864..9025d52 100644
--- a/release/0.15.0.html
+++ b/release/0.15.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.15.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.15.0 (5 October 2019) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 672 commits from 80 distinct contributors. $ git shortlog -sn apache-arrow-0.14.0..apache-arrow-0.15.0 96 Wes McKinney 63 Antoine Pitrou 59 tianchen 55 Sutou Kouhei 46 liyafan82 38 Neal Richardson 34 Joris Van den Bossche 29 Krisztián Szűcs  [...]
+{"description":"Apache Arrow 0.15.0 (5 October 2019) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 672 commits from 80 distinct contributors. $ git shortlog -sn apache-arrow-0.14.0..apache-arrow-0.15.0 96 Wes McKinney 63 Antoine Pitrou 59 tianchen 55 Sutou Kouhei 46 liyafan82 38 Neal Richardson 34 Joris Van den Bossche 29 Krisztián Szűcs  [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.15.1.html b/release/0.15.1.html
index d9c0e58..65662b3 100644
--- a/release/0.15.1.html
+++ b/release/0.15.1.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.15.1 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.15.1 (1 November 2019) This is a major release covering more than 1 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 41 commits from 13 distinct contributors. $ git shortlog -sn apache-arrow-0.15.0..apache-arrow-0.15.1 10 Wes McKinney 10 Antoine Pitrou 7 Krisztián Szűcs 2 tianchen92 2 Joris Van den Bossche 2 Sutou Kouhei 2 Anthony Abate 1 Uwe L. Korn 1 Pr [...]
+{"description":"Apache Arrow 0.15.1 (1 November 2019) This is a major release covering more than 1 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 41 commits from 13 distinct contributors. $ git shortlog -sn apache-arrow-0.15.0..apache-arrow-0.15.1 10 Wes McKinney 10 Antoine Pitrou 7 Krisztián Szűcs 2 tianchen92 2 Joris Van den Bossche 2 Sutou Kouhei 2 Anthony Abate 1 Uwe L. Korn 1 Pr [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.16.0.html b/release/0.16.0.html
index adcbb79..d93ab4a 100644
--- a/release/0.16.0.html
+++ b/release/0.16.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.16.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.16.0 (7 February 2020) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 709 commits from 99 distinct contributors. $ git shortlog -sn apache-arrow-0.15.1..apache-arrow-0.16.0 84 Antoine Pitrou 80 Sutou Kouhei 50 Neal Richardson 47 Krisztián Szűcs 44 Joris Van den Bossche 41 Wes McKinney 32 François Saint-Jacque [...]
+{"description":"Apache Arrow 0.16.0 (7 February 2020) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 709 commits from 99 distinct contributors. $ git shortlog -sn apache-arrow-0.15.1..apache-arrow-0.16.0 84 Antoine Pitrou 80 Sutou Kouhei 50 Neal Richardson 47 Krisztián Szűcs 44 Joris Van den Bossche 41 Wes McKinney 32 François Saint-Jacque [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.17.0.html b/release/0.17.0.html
index 17a777f..91d3617 100644
--- a/release/0.17.0.html
+++ b/release/0.17.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.17.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.17.0 (20 April 2020) This is a major release covering more than 2 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 569 commits from 79 distinct contributors. $ git shortlog -sn apache-arrow-0.16.0..apache-arrow-0.17.0 80 Antoine Pitrou 78 Krisztián Szűcs 58 Wes McKinney 55 Neal Richardson 39 Sutou Kouhei 30 Benjamin Kietzman 26 Joris Van den Bossche 20 An [...]
+{"description":"Apache Arrow 0.17.0 (20 April 2020) This is a major release covering more than 2 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 569 commits from 79 distinct contributors. $ git shortlog -sn apache-arrow-0.16.0..apache-arrow-0.17.0 80 Antoine Pitrou 78 Krisztián Szűcs 58 Wes McKinney 55 Neal Richardson 39 Sutou Kouhei 30 Benjamin Kietzman 26 Joris Van den Bossche 20 An [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.17.1.html b/release/0.17.1.html
index f351f7c..5da21cf 100644
--- a/release/0.17.1.html
+++ b/release/0.17.1.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.17.1 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.17.1 (18 May 2020) This is a patch release fixing bugs and regressions listed in the changelog below. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 27 commits from 11 distinct contributors. $ git shortlog -sn apache-arrow-0.17.0..apache-arrow-0.17.1 9 Krisztián Szűcs 4 Sutou Kouhei 3 Neal Richardson 3 Wes McKinney 2 Antoine Pitrou 1 Eric Erhardt 1 Joris Van den Bossche 1 Mic [...]
+{"description":"Apache Arrow 0.17.1 (18 May 2020) This is a patch release fixing bugs and regressions listed in the changelog below. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 27 commits from 11 distinct contributors. $ git shortlog -sn apache-arrow-0.17.0..apache-arrow-0.17.1 9 Krisztián Szűcs 4 Sutou Kouhei 3 Neal Richardson 3 Wes McKinney 2 Antoine Pitrou 1 Eric Erhardt 1 Joris Van den Bossche 1 Mic [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.2.0.html b/release/0.2.0.html
index 68574d1..e5318e6 100644
--- a/release/0.2.0.html
+++ b/release/0.2.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="0.2.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.2.0 (18 February 2017) Download Source Artifacts Git tag Changelog Contributors $ git shortlog -sn apache-arrow-0.1.0..apache-arrow-0.2.0 73 Wes McKinney 55 Uwe L. Korn 16 Julien Le Dem 4 Bryan Cutler 4 Nong Li 2 Christopher C. Aycock 2 Jingyuan Wang 2 Kouhei Sutou 2 Laurent Goujon 2 Leif Walsh 1 Emilio Lahr-Vivaz 1 Holden Karau 1 Li Jin 1 Mohamed Zenadi 1 Peter Hoffmann 1 Steven Phillips 1 adeneche 1 ahnj 1 vkorukanti New Features and Improvements ARROW-10 [...]
+{"description":"Apache Arrow 0.2.0 (18 February 2017) Download Source Artifacts Git tag Changelog Contributors $ git shortlog -sn apache-arrow-0.1.0..apache-arrow-0.2.0 73 Wes McKinney 55 Uwe L. Korn 16 Julien Le Dem 4 Bryan Cutler 4 Nong Li 2 Christopher C. Aycock 2 Jingyuan Wang 2 Kouhei Sutou 2 Laurent Goujon 2 Leif Walsh 1 Emilio Lahr-Vivaz 1 Holden Karau 1 Li Jin 1 Mohamed Zenadi 1 Peter Hoffmann 1 Steven Phillips 1 adeneche 1 ahnj 1 vkorukanti New Features and Improvements ARROW-10 [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.3.0.html b/release/0.3.0.html
index ad8040f..7778ee2 100644
--- a/release/0.3.0.html
+++ b/release/0.3.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="0.3.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.3.0 (5 May 2017) Read more in the release blog post Download Source Artifacts Git tag d8db8f8 Changelog Contributors $ git shortlog -sn apache-arrow-0.2.0..apache-arrow-0.3.0 119 Wes McKinney 55 Kouhei Sutou 18 Uwe L. Korn 17 Julien Le Dem 9 Phillip Cloud 6 Bryan Cutler 5 Emilio Lahr-Vivaz 5 Philipp Moritz 4 Jeff Knupp 4 Johan Mabille 4 Max Risuhin 3 Miki Tebeka 3 Steven Phillips 2 Brian Hulette 2 Jeff Reback 2 Leif Walsh 1 Deepak Majeti 1 Holden Karau 1 It [...]
+{"description":"Apache Arrow 0.3.0 (5 May 2017) Read more in the release blog post Download Source Artifacts Git tag d8db8f8 Changelog Contributors $ git shortlog -sn apache-arrow-0.2.0..apache-arrow-0.3.0 119 Wes McKinney 55 Kouhei Sutou 18 Uwe L. Korn 17 Julien Le Dem 9 Phillip Cloud 6 Bryan Cutler 5 Emilio Lahr-Vivaz 5 Philipp Moritz 4 Jeff Knupp 4 Johan Mabille 4 Max Risuhin 3 Miki Tebeka 3 Steven Phillips 2 Brian Hulette 2 Jeff Reback 2 Leif Walsh 1 Deepak Majeti 1 Holden Karau 1 It [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.4.0.html b/release/0.4.0.html
index 5de8c8b..642e5ce 100644
--- a/release/0.4.0.html
+++ b/release/0.4.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="0.4.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.4.0 (22 May 2017) Read more in the release blog post Download Source Artifacts Git tag a8f8ba0 Changelog Contributors $ git shortlog -sn apache-arrow-0.3.0..apache-arrow-0.4.0 28 Wes McKinney 18 Kouhei Sutou 9 Uwe L. Korn 3 Brian Hulette 3 Emilio Lahr-Vivaz 3 Philipp Moritz 3 Phillip Cloud 2 Julien Le Dem 1 Bryan Cutler 1 Jeff Reback 1 Max Risuhin 1 Wenchen Fan 1 bgosztonyi New Features and Improvements ARROW-1000 - [GLib] Move install document to Website A [...]
+{"description":"Apache Arrow 0.4.0 (22 May 2017) Read more in the release blog post Download Source Artifacts Git tag a8f8ba0 Changelog Contributors $ git shortlog -sn apache-arrow-0.3.0..apache-arrow-0.4.0 28 Wes McKinney 18 Kouhei Sutou 9 Uwe L. Korn 3 Brian Hulette 3 Emilio Lahr-Vivaz 3 Philipp Moritz 3 Phillip Cloud 2 Julien Le Dem 1 Bryan Cutler 1 Jeff Reback 1 Max Risuhin 1 Wenchen Fan 1 bgosztonyi New Features and Improvements ARROW-1000 - [GLib] Move install document to Website A [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.4.1.html b/release/0.4.1.html
index 07f68b4..249e45e 100644
--- a/release/0.4.1.html
+++ b/release/0.4.1.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="0.4.1 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.4.1 (9 June 2017) This is primarily a bug fix release, but also includes some packaging and documentation improvements. Read more in the release blog post. Download Source Artifacts Git tag 46315431 Changelog New Features and Improvements ARROW-1020 - [Format] Add additional language to Schema.fbs to clarify naive vs. localized Timestamp values ARROW-1034 - [Python] Enable creation of binary wheels on Windows / MSVC ARROW-1049 - [java] vector template clean [...]
+{"description":"Apache Arrow 0.4.1 (9 June 2017) This is primarily a bug fix release, but also includes some packaging and documentation improvements. Read more in the release blog post. Download Source Artifacts Git tag 46315431 Changelog New Features and Improvements ARROW-1020 - [Format] Add additional language to Schema.fbs to clarify naive vs. localized Timestamp values ARROW-1034 - [Python] Enable creation of binary wheels on Windows / MSVC ARROW-1049 - [java] vector template clean [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.5.0.html b/release/0.5.0.html
index 47050cd..c2f4dbf 100644
--- a/release/0.5.0.html
+++ b/release/0.5.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.5.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.5.0 (23 July 2017) This is a major release, with expanded features in the supported languages and additional integration test coverage between Java and C++. Read more in the release blog post. Download Source Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.4.1..apache-arrow-0.5.0 42 Wes McKinney 22 Uwe L. Korn 12 Kouhei Sutou 9 Max Risuhin 9 Phillip Cloud 6 Philipp Moritz 5 Steven Phillips 3 Julien Le Dem 2 Bryan Cutler 2 Kengo Seki 2 Max R [...]
+{"description":"Apache Arrow 0.5.0 (23 July 2017) This is a major release, with expanded features in the supported languages and additional integration test coverage between Java and C++. Read more in the release blog post. Download Source Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.4.1..apache-arrow-0.5.0 42 Wes McKinney 22 Uwe L. Korn 12 Kouhei Sutou 9 Max Risuhin 9 Phillip Cloud 6 Philipp Moritz 5 Steven Phillips 3 Julien Le Dem 2 Bryan Cutler 2 Kengo Seki 2 Max R [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.6.0.html b/release/0.6.0.html
index e5b03e0..4fcec62 100644
--- a/release/0.6.0.html
+++ b/release/0.6.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.6.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.6.0 (14 August 2017) This is a major release. Read more in the release blog post. Download Source Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.5.0..apache-arrow-0.6.0 48 Wes McKinney 7 siddharth 5 Matt Darwin 5 Max Risuhin 5 Philipp Moritz 4 Kouhei Sutou 3 Bryan Cutler 2 Emilio Lahr-Vivaz 2 Li Jin 2 Robert Nishihara 1 Antony Mayi 1 Marco Neumann 1 Stepan Kadlec 1 Steven Phillips 1 Yeolar 1 fjetter 1 rendel Changelog New Features and Impr [...]
+{"description":"Apache Arrow 0.6.0 (14 August 2017) This is a major release. Read more in the release blog post. Download Source Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.5.0..apache-arrow-0.6.0 48 Wes McKinney 7 siddharth 5 Matt Darwin 5 Max Risuhin 5 Philipp Moritz 4 Kouhei Sutou 3 Bryan Cutler 2 Emilio Lahr-Vivaz 2 Li Jin 2 Robert Nishihara 1 Antony Mayi 1 Marco Neumann 1 Stepan Kadlec 1 Steven Phillips 1 Yeolar 1 fjetter 1 rendel Changelog New Features and Impr [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.7.0.html b/release/0.7.0.html
index d51db78..f859ae5 100644
--- a/release/0.7.0.html
+++ b/release/0.7.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.7.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.7.0 (17 September 2017) This is a major release. Read more in the release blog post. Download Source Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.6.0..apache-arrow-0.7.0 58 Wes McKinney 14 Kouhei Sutou 11 Philipp Moritz 7 Phillip Cloud 6 siddharth 5 Uwe L. Korn 2 Bryan Cutler 2 HorimotoYasuhiro 2 Laurent Goujon 2 Li Jin 2 Max Risuhin 2 fjetter 1 Antony Mayi 1 Brecht Machiels 1 Fritz Obermeyer 1 Gonzalo Ortiz 1 Jeff Reback 1 Kyle Kelley 1 [...]
+{"description":"Apache Arrow 0.7.0 (17 September 2017) This is a major release. Read more in the release blog post. Download Source Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.6.0..apache-arrow-0.7.0 58 Wes McKinney 14 Kouhei Sutou 11 Philipp Moritz 7 Phillip Cloud 6 siddharth 5 Uwe L. Korn 2 Bryan Cutler 2 HorimotoYasuhiro 2 Laurent Goujon 2 Li Jin 2 Max Risuhin 2 fjetter 1 Antony Mayi 1 Brecht Machiels 1 Fritz Obermeyer 1 Gonzalo Ortiz 1 Jeff Reback 1 Kyle Kelley 1 [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.7.1.html b/release/0.7.1.html
index 4a5a026..4845faf 100644
--- a/release/0.7.1.html
+++ b/release/0.7.1.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.7.1 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.7.1 (1 October 2017) This is a minor bug release. It was motivated by ARROW-1601, but see the complete changelog. Download Source Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.7.0..apache-arrow-0.7.1 14 Wes McKinney 6 Kouhei Sutou 3 siddharth 2 Paul Taylor 2 Uwe L. Korn 1 Amir Malekpour 1 Bryan Cutler 1 Deepak Majeti 1 Kentaro Hayashi 1 Korn, Uwe 1 Li Jin 1 Rene Sugar 1 Tom Augspurger 1 Wataru Shimizu 1 m-nakamura145 1 rvernica Changelog  [...]
+{"description":"Apache Arrow 0.7.1 (1 October 2017) This is a minor bug release. It was motivated by ARROW-1601, but see the complete changelog. Download Source Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.7.0..apache-arrow-0.7.1 14 Wes McKinney 6 Kouhei Sutou 3 siddharth 2 Paul Taylor 2 Uwe L. Korn 1 Amir Malekpour 1 Bryan Cutler 1 Deepak Majeti 1 Kentaro Hayashi 1 Korn, Uwe 1 Li Jin 1 Rene Sugar 1 Tom Augspurger 1 Wataru Shimizu 1 m-nakamura145 1 rvernica Changelog  [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.8.0.html b/release/0.8.0.html
index c427d12..21a2346 100644
--- a/release/0.8.0.html
+++ b/release/0.8.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.8.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.8.0 (18 December 2017) This is a major release. Download Source Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.7.1..apache-arrow-0.8.0 90 Wes McKinney 23 Phillip Cloud 21 Kouhei Sutou 13 Licht-T 12 Korn, Uwe 12 Philipp Moritz 12 Uwe L. Korn 10 Bryan Cutler 5 Li Jin 5 Robert Nishihara 4 Paul Taylor 4 siddharth 3 Max Risuhin 3 Stephanie 2 Rene Sugar 2 Heimir Sverrisson 2 Brian Hulette 2 Yuliya Feldman 2 dhirschf 2 Matthias Vallentin 1 vkoruk [...]
+{"description":"Apache Arrow 0.8.0 (18 December 2017) This is a major release. Download Source Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.7.1..apache-arrow-0.8.0 90 Wes McKinney 23 Phillip Cloud 21 Kouhei Sutou 13 Licht-T 12 Korn, Uwe 12 Philipp Moritz 12 Uwe L. Korn 10 Bryan Cutler 5 Li Jin 5 Robert Nishihara 4 Paul Taylor 4 siddharth 3 Max Risuhin 3 Stephanie 2 Rene Sugar 2 Heimir Sverrisson 2 Brian Hulette 2 Yuliya Feldman 2 dhirschf 2 Matthias Vallentin 1 vkoruk [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/0.9.0.html b/release/0.9.0.html
index d6c31c3..1e2f791 100644
--- a/release/0.9.0.html
+++ b/release/0.9.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 0.9.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 0.9.0 (21 March 2018) This is a major release. Download Source Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.8.0..apache-arrow-0.9.0 52 Wes McKinney 52 Antoine Pitrou 25 Uwe L. Korn 14 Paul Taylor 13 Kouhei Sutou 13 Phillip Cloud 9 Robert Nishihara 9 Korn, Uwe 9 Jim Crist 8 Brian Hulette 7 Philipp Moritz 6 Panchen Xue 6 yosuke shiro 5 Mitar 5 Bryan Cutler 4 siddharth 3 Adam Seibert 3 Licht-T 3 moriyoshi 2 rvernica 2 Sidd 2 Albert Shieh 1 Ma [...]
+{"description":"Apache Arrow 0.9.0 (21 March 2018) This is a major release. Download Source Artifacts Git tag Contributors $ git shortlog -sn apache-arrow-0.8.0..apache-arrow-0.9.0 52 Wes McKinney 52 Antoine Pitrou 25 Uwe L. Korn 14 Paul Taylor 13 Kouhei Sutou 13 Phillip Cloud 9 Robert Nishihara 9 Korn, Uwe 9 Jim Crist 8 Brian Hulette 7 Philipp Moritz 6 Panchen Xue 6 yosuke shiro 5 Mitar 5 Bryan Cutler 4 siddharth 3 Adam Seibert 3 Licht-T 3 moriyoshi 2 rvernica 2 Sidd 2 Albert Shieh 1 Ma [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/1.0.0.html b/release/1.0.0.html
index 34cbf27..4ceff69 100644
--- a/release/1.0.0.html
+++ b/release/1.0.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 1.0.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 1.0.0 (24 July 2020) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 771 commits from 100 distinct contributors. $ git shortlog -sn apache-arrow-0.17.0..apache-arrow-1.0.0 125 Wes McKinney 72 Neal Richardson 61 Antoine Pitrou 56 Sutou Kouhei 55 Krisztián Szűcs 30 Joris Van den Bossche 24 Benjamin Kietzman 23 Dav [...]
+{"description":"Apache Arrow 1.0.0 (24 July 2020) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 771 commits from 100 distinct contributors. $ git shortlog -sn apache-arrow-0.17.0..apache-arrow-1.0.0 125 Wes McKinney 72 Neal Richardson 61 Antoine Pitrou 56 Sutou Kouhei 55 Krisztián Szűcs 30 Joris Van den Bossche 24 Benjamin Kietzman 23 Dav [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/1.0.1.html b/release/1.0.1.html
index cd6c8b8..deebaaf 100644
--- a/release/1.0.1.html
+++ b/release/1.0.1.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 1.0.1 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 1.0.1 (21 August 2020) This is a patch release addressing bugs in the 1.0.0 release. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 39 commits from 15 distinct contributors. $ git shortlog -sn apache-arrow-1.0.0..apache-arrow-1.0.1 9 Krisztián Szűcs 8 Neal Richardson 4 Benjamin Kietzman 3 Andy Grove 3 Antoine Pitrou 3 Uwe L. Korn 1 Jorge C. Leitao 1 Joris Van den Bossche 1 Mahm [...]
+{"description":"Apache Arrow 1.0.1 (21 August 2020) This is a patch release addressing bugs in the 1.0.0 release. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 39 commits from 15 distinct contributors. $ git shortlog -sn apache-arrow-1.0.0..apache-arrow-1.0.1 9 Krisztián Szűcs 8 Neal Richardson 4 Benjamin Kietzman 3 Andy Grove 3 Antoine Pitrou 3 Uwe L. Korn 1 Jorge C. Leitao 1 Joris Van den Bossche 1 Mahm [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/2.0.0.html b/release/2.0.0.html
index c124190..e229f23 100644
--- a/release/2.0.0.html
+++ b/release/2.0.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 2.0.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 2.0.0 (19 October 2020) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 511 commits from 81 distinct contributors. $ git shortlog -sn apache-arrow-1.0.0..apache-arrow-2.0.0 68 Jorge C. Leitao 48 Antoine Pitrou 40 Krisztián Szűcs 34 alamb 33 Neal Richardson 30 Andy Grove 25 Benjamin Kietzman 25 Joris Van den Boss [...]
+{"description":"Apache Arrow 2.0.0 (19 October 2020) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 511 commits from 81 distinct contributors. $ git shortlog -sn apache-arrow-1.0.0..apache-arrow-2.0.0 68 Jorge C. Leitao 48 Antoine Pitrou 40 Krisztián Szűcs 34 alamb 33 Neal Richardson 30 Andy Grove 25 Benjamin Kietzman 25 Joris Van den Boss [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/3.0.0.html b/release/3.0.0.html
index 11e3497..943b2e4 100644
--- a/release/3.0.0.html
+++ b/release/3.0.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 3.0.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 3.0.0 (26 January 2021) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 648 commits from 106 distinct contributors. $ git shortlog -sn apache-arrow-2.0.0..apache-arrow-3.0.0 71 Jorge C. Leitao 64 Sutou Kouhei 48 Antoine Pitrou 48 Heres, Daniel 27 Andy Grove 27 Neville Dipale 24 Joris Van den Bossche 19 Neal Rich [...]
+{"description":"Apache Arrow 3.0.0 (26 January 2021) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 648 commits from 106 distinct contributors. $ git shortlog -sn apache-arrow-2.0.0..apache-arrow-3.0.0 71 Jorge C. Leitao 64 Sutou Kouhei 48 Antoine Pitrou 48 Heres, Daniel 27 Andy Grove 27 Neville Dipale 24 Joris Van den Bossche 19 Neal Rich [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/4.0.0.html b/release/4.0.0.html
index 67d0329..bd02b91 100644
--- a/release/4.0.0.html
+++ b/release/4.0.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 4.0.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 4.0.0 (26 April 2021) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 719 commits from 114 distinct contributors. $ git shortlog -sn apache-arrow-3.0.0..apache-arrow-4.0.0 65 Antoine Pitrou 47 Andrew Lamb 41 Heres, Daniel 40 David Li 37 Sutou Kouhei 33 Neal Richardson 30 Weston Pace 28 Jorge C. Leitao 26 Kriszti [...]
+{"description":"Apache Arrow 4.0.0 (26 April 2021) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 719 commits from 114 distinct contributors. $ git shortlog -sn apache-arrow-3.0.0..apache-arrow-4.0.0 65 Antoine Pitrou 47 Andrew Lamb 41 Heres, Daniel 40 David Li 37 Sutou Kouhei 33 Neal Richardson 30 Weston Pace 28 Jorge C. Leitao 26 Kriszti [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/4.0.1.html b/release/4.0.1.html
index 71a88fb..5f8371e 100644
--- a/release/4.0.1.html
+++ b/release/4.0.1.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 4.0.1 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 4.0.1 (26 May 2021) This is a patch release covering a month of development and addressing small but important bugs in the different implementations. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 21 commits from 13 distinct contributors. $ git shortlog -sn apache-arrow-4.0.0..apache-arrow-4.0.1 3 Jorge C. Leitao 3 Joris Van den Bossche 2 Krisztián Szűcs 2 Ian Cook 2 David Li 2 [...]
+{"description":"Apache Arrow 4.0.1 (26 May 2021) This is a patch release covering a month of development and addressing small but important bugs in the different implementations. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 21 commits from 13 distinct contributors. $ git shortlog -sn apache-arrow-4.0.0..apache-arrow-4.0.1 3 Jorge C. Leitao 3 Joris Van den Bossche 2 Krisztián Szűcs 2 Ian Cook 2 David Li 2 [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/5.0.0.html b/release/5.0.0.html
index 804dac2..14ee476 100644
--- a/release/5.0.0.html
+++ b/release/5.0.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 5.0.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 5.0.0 (29 July 2021) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 684 commits from 99 distinct contributors in 2 Arrow repositories. 77 David Li 43 Krisztián Szűcs 42 Antoine Pitrou 35 Dominik Moritz 33 Nic Crane 27 Weston Pace 27 Sutou Kouhei 27 Andrew Lamb 23 Jonathan Keane 21 Joris Van den Bossche 17 Jorge [...]
+{"description":"Apache Arrow 5.0.0 (29 July 2021) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 684 commits from 99 distinct contributors in 2 Arrow repositories. 77 David Li 43 Krisztián Szűcs 42 Antoine Pitrou 35 Dominik Moritz 33 Nic Crane 27 Weston Pace 27 Sutou Kouhei 27 Andrew Lamb 23 Jonathan Keane 21 Joris Van den Bossche 17 Jorge [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/6.0.0.html b/release/6.0.0.html
index a08378e..a53bf4f 100644
--- a/release/6.0.0.html
+++ b/release/6.0.0.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 6.0.0 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 6.0.0 (26 October 2021) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 592 commits from 88 distinct contributors. 58 David Li 56 Antoine Pitrou 46 Neal Richardson 42 Sutou Kouhei 38 Jonathan Keane 34 Krisztián Szűcs 27 Matthew Topol 26 Nic Crane 23 Andrew Lamb 22 Joris Van den Bossche 21 Weston Pace 16 Alessand [...]
+{"description":"Apache Arrow 6.0.0 (26 October 2021) This is a major release covering more than 3 months of development. Download Source Artifacts Binary Artifacts For CentOS For Debian For Python For Ubuntu Git tag Contributors This release includes 592 commits from 88 distinct contributors. 58 David Li 56 Antoine Pitrou 46 Neal Richardson 42 Sutou Kouhei 38 Jonathan Keane 34 Krisztián Szűcs 27 Matthew Topol 26 Nic Crane 23 Andrew Lamb 22 Joris Van den Bossche 21 Weston Pace 16 Alessand [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/6.0.1.html b/release/6.0.1.html
index 4977566..abcbdb4 100644
--- a/release/6.0.1.html
+++ b/release/6.0.1.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Apache Arrow 6.0.1 Release" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow 6.0.1 (18 November 2021) This is a patch release covering more than 0 months of development. Download Source Artifacts Binary Artifacts For AlmaLinux For Amazon Linux For CentOS For C# For Debian For Python For Ubuntu Git tag Contributors This release includes 34 commits from 16 distinct contributors. $ git shortlog -sn apache-arrow-6.0.0..apache-arrow-6.0.1 7 Sutou Kouhei 4 Joris Van den Bossche 3 Antoine Pitrou 3 David Li 3 Krisztián Szűcs 3 Weston Pace 2 N [...]
+{"description":"Apache Arrow 6.0.1 (18 November 2021) This is a patch release covering more than 0 months of development. Download Source Artifacts Binary Artifacts For AlmaLinux For Amazon Linux For CentOS For C# For Debian For Python For Ubuntu Git tag Contributors This release includes 34 commits from 16 distinct contributors. $ git shortlog -sn apache-arrow-6.0.0..apache-arrow-6.0.1 7 Sutou Kouhei 4 Joris Van den Bossche 3 Antoine Pitrou 3 David Li 3 Krisztián Szűcs 3 Weston Pace 2 N [...]
 <!-- End Jekyll SEO tag -->
 
 
diff --git a/release/index.html b/release/index.html
index d041d34..74bf1f9 100644
--- a/release/index.html
+++ b/release/index.html
@@ -20,13 +20,13 @@
 <meta property="og:site_name" content="Apache Arrow" />
 <meta property="og:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="og:type" content="article" />
-<meta property="article:published_time" content="2021-11-24T15:50:19-05:00" />
+<meta property="article:published_time" content="2021-12-03T13:53:36-05:00" />
 <meta name="twitter:card" content="summary_large_image" />
 <meta property="twitter:image" content="https://arrow.apache.org/img/arrow.png" />
 <meta property="twitter:title" content="Releases" />
 <meta name="twitter:site" content="@ApacheArrow" />
 <script type="application/ld+json">
-{"description":"Apache Arrow Releases Navigate to the release page for downloads and the changelog. 6.0.1 (18 November 2021) 6.0.0 (26 October 2021) 5.0.0 (29 July 2021) 4.0.1 (26 May 2021) 4.0.0 (26 April 2021) 3.0.0 (26 January 2021) 2.0.0 (19 October 2020) 1.0.1 (21 August 2020) 1.0.0 (24 July 2020) 0.17.1 (18 May 2020) 0.17.0 (20 April 2020) 0.16.0 (7 February 2020) 0.15.1 (1 November 2019) 0.15.0 (5 October 2019) 0.14.1 (22 July 2019) 0.14.0 (4 July 2019) 0.13.0 (1 April 2019) 0.12. [...]
+{"description":"Apache Arrow Releases Navigate to the release page for downloads and the changelog. 6.0.1 (18 November 2021) 6.0.0 (26 October 2021) 5.0.0 (29 July 2021) 4.0.1 (26 May 2021) 4.0.0 (26 April 2021) 3.0.0 (26 January 2021) 2.0.0 (19 October 2020) 1.0.1 (21 August 2020) 1.0.0 (24 July 2020) 0.17.1 (18 May 2020) 0.17.0 (20 April 2020) 0.16.0 (7 February 2020) 0.15.1 (1 November 2019) 0.15.0 (5 October 2019) 0.14.1 (22 July 2019) 0.14.0 (4 July 2019) 0.13.0 (1 April 2019) 0.12. [...]
 <!-- End Jekyll SEO tag -->