You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ho...@apache.org on 2021/11/19 16:37:37 UTC

[arrow-site] branch asf-site updated: update datafusion website (#162)

This is an automated email from the ASF dual-hosted git repository.

houqp pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new e57e14e  update datafusion website (#162)
e57e14e is described below

commit e57e14e785e9f76c4d7a2ce6c8854fe06f560e8c
Author: QP Hou <qp...@scribd.com>
AuthorDate: Fri Nov 19 08:37:22 2021 -0800

    update datafusion website (#162)
---
 .../_modules/{index.html => datafusion.html}       | 120 +++++++++++++++++++--
 datafusion/_modules/index.html                     |   5 +-
 datafusion/_sources/cli/index.rst.txt              |  18 ++++
 datafusion/_sources/python/index.rst.txt           |  13 ++-
 datafusion/_sources/specification/roadmap.md.txt   |  27 ++++-
 datafusion/cli/index.html                          |  21 ++++
 datafusion/genindex.html                           |  32 ++++--
 datafusion/index.html                              |   2 +-
 datafusion/objects.inv                             | Bin 1694 -> 1741 bytes
 .../python/generated/datafusion.DataFrame.html     |  41 +++----
 .../generated/datafusion.ExecutionContext.html     |  21 +++-
 .../python/generated/datafusion.Expression.html    |  30 +++++-
 datafusion/python/index.html                       |  23 ++--
 datafusion/searchindex.js                          |   2 +-
 datafusion/specification/roadmap.html              |  54 ++++++++--
 .../distributed/deployment/docker-compose.html     |  10 ++
 .../user-guide/distributed/deployment/index.html   |  10 ++
 17 files changed, 355 insertions(+), 74 deletions(-)

diff --git a/datafusion/_modules/index.html b/datafusion/_modules/datafusion.html
similarity index 55%
copy from datafusion/_modules/index.html
copy to datafusion/_modules/datafusion.html
index 0233d14..795ad2b 100644
--- a/datafusion/_modules/index.html
+++ b/datafusion/_modules/datafusion.html
@@ -4,7 +4,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml">
   <head>
     <meta charset="utf-8" />
-    <title>Overview: module code &#8212; Arrow Datafusion  documentation</title>
+    <title>datafusion &#8212; Arrow Datafusion  documentation</title>
     
   <link href="../_static/css/theme.css" rel="stylesheet" />
   <link href="../_static/css/index.c5995385ac14fb8791e8eb36b4908be2.css" rel="stylesheet" />
@@ -395,11 +395,119 @@
               
               <div>
                 
-  <h1>All modules for which code is available</h1>
-<ul><li><a href="builtins.html">builtins</a></li>
-<li><a href="datafusion/functions.html">datafusion.functions</a></li>
-<li><a href="functions.html">functions</a></li>
-</ul>
+  <h1>Source code for datafusion</h1><div class="highlight"><pre>
+<span></span><span class="c1"># Licensed to the Apache Software Foundation (ASF) under one</span>
+<span class="c1"># or more contributor license agreements.  See the NOTICE file</span>
+<span class="c1"># distributed with this work for additional information</span>
+<span class="c1"># regarding copyright ownership.  The ASF licenses this file</span>
+<span class="c1"># to you under the Apache License, Version 2.0 (the</span>
+<span class="c1"># &quot;License&quot;); you may not use this file except in compliance</span>
+<span class="c1"># with the License.  You may obtain a copy of the License at</span>
+<span class="c1">#</span>
+<span class="c1">#   http://www.apache.org/licenses/LICENSE-2.0</span>
+<span class="c1">#</span>
+<span class="c1"># Unless required by applicable law or agreed to in writing,</span>
+<span class="c1"># software distributed under the License is distributed on an</span>
+<span class="c1"># &quot;AS IS&quot; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY</span>
+<span class="c1"># KIND, either express or implied.  See the License for the</span>
+<span class="c1"># specific language governing permissions and limitations</span>
+<span class="c1"># under the License.</span>
+
+<span class="kn">from</span> <span class="nn">abc</span> <span class="kn">import</span> <span class="n">ABCMeta</span><span class="p">,</span> <span class="n">abstractmethod</span>
+<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span>
+
+<span class="kn">import</span> <span class="nn">pyarrow</span> <span class="k">as</span> <span class="nn">pa</span>
+
+<span class="kn">from</span> <span class="nn">._internal</span> <span class="kn">import</span> <span class="p">(</span>
+    <span class="n">AggregateUDF</span><span class="p">,</span>
+    <span class="n">DataFrame</span><span class="p">,</span>
+    <span class="n">ExecutionContext</span><span class="p">,</span>
+    <span class="n">Expression</span><span class="p">,</span>
+    <span class="n">ScalarUDF</span><span class="p">,</span>
+<span class="p">)</span>
+
+
+<span class="n">__all__</span> <span class="o">=</span> <span class="p">[</span>
+    <span class="s2">&quot;DataFrame&quot;</span><span class="p">,</span>
+    <span class="s2">&quot;ExecutionContext&quot;</span><span class="p">,</span>
+    <span class="s2">&quot;Expression&quot;</span><span class="p">,</span>
+    <span class="s2">&quot;AggregateUDF&quot;</span><span class="p">,</span>
+    <span class="s2">&quot;ScalarUDF&quot;</span><span class="p">,</span>
+    <span class="s2">&quot;column&quot;</span><span class="p">,</span>
+    <span class="s2">&quot;literal&quot;</span><span class="p">,</span>
+<span class="p">]</span>
+
+
+<span class="k">class</span> <span class="nc">Accumulator</span><span class="p">(</span><span class="n">metaclass</span><span class="o">=</span><span class="n">ABCMeta</span><span class="p">):</span>
+    <span class="nd">@abstractmethod</span>
+    <span class="k">def</span> <span class="nf">state</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">List</span><span class="p">[</span><span class="n">pa</span><span class="o">.</span><span class="n">Scalar</span><span class="p">]:</span>
+        <span class="k">pass</span>
+
+    <span class="nd">@abstractmethod</span>
+    <span class="k">def</span> <span class="nf">update</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">values</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="k">pass</span>
+
+    <span class="nd">@abstractmethod</span>
+    <span class="k">def</span> <span class="nf">merge</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">states</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="k">pass</span>
+
+    <span class="nd">@abstractmethod</span>
+    <span class="k">def</span> <span class="nf">evaluate</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Scalar</span><span class="p">:</span>
+        <span class="k">pass</span>
+
+
+<span class="k">def</span> <span class="nf">column</span><span class="p">(</span><span class="n">value</span><span class="p">):</span>
+    <span class="k">return</span> <span class="n">Expression</span><span class="o">.</span><span class="n">column</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+
+
+<span class="n">col</span> <span class="o">=</span> <span class="n">column</span>
+
+
+<span class="k">def</span> <span class="nf">literal</span><span class="p">(</span><span class="n">value</span><span class="p">):</span>
+    <span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">value</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">Scalar</span><span class="p">):</span>
+        <span class="n">value</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">scalar</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+    <span class="k">return</span> <span class="n">Expression</span><span class="o">.</span><span class="n">literal</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+
+
+<span class="n">lit</span> <span class="o">=</span> <span class="n">literal</span>
+
+
+<span class="k">def</span> <span class="nf">udf</span><span class="p">(</span><span class="n">func</span><span class="p">,</span> <span class="n">input_types</span><span class="p">,</span> <span class="n">return_type</span><span class="p">,</span> <span class="n">volatility</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+    <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Create a new User Defined Function</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="k">if</span> <span class="ow">not</span> <span class="n">callable</span><span class="p">(</span><span class="n">func</span><span class="p">):</span>
+        <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span><span class="s2">&quot;`func` argument must be callable&quot;</span><span class="p">)</span>
+    <span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="n">name</span> <span class="o">=</span> <span class="n">func</span><span class="o">.</span><span class="vm">__qualname__</span>
+    <span class="k">return</span> <span class="n">ScalarUDF</span><span class="p">(</span>
+        <span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span>
+        <span class="n">func</span><span class="o">=</span><span class="n">func</span><span class="p">,</span>
+        <span class="n">input_types</span><span class="o">=</span><span class="n">input_types</span><span class="p">,</span>
+        <span class="n">return_type</span><span class="o">=</span><span class="n">return_type</span><span class="p">,</span>
+        <span class="n">volatility</span><span class="o">=</span><span class="n">volatility</span><span class="p">,</span>
+    <span class="p">)</span>
+
+
+<span class="k">def</span> <span class="nf">udaf</span><span class="p">(</span><span class="n">accum</span><span class="p">,</span> <span class="n">input_type</span><span class="p">,</span> <span class="n">return_type</span><span class="p">,</span> <span class="n">state_type</span><span class="p">,</span> <span class="n">volatility</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
+    <span class="sd">&quot;&quot;&quot;</span>
+<span class="sd">    Create a new User Defined Aggregate Function</span>
+<span class="sd">    &quot;&quot;&quot;</span>
+    <span class="k">if</span> <span class="ow">not</span> <span class="nb">issubclass</span><span class="p">(</span><span class="n">accum</span><span class="p">,</span> <span class="n">Accumulator</span><span class="p">):</span>
+        <span class="k">raise</span> <span class="ne">TypeError</span><span class="p">(</span>
+            <span class="s2">&quot;`accum` must implement the abstract base class Accumulator&quot;</span>
+        <span class="p">)</span>
+    <span class="k">if</span> <span class="n">name</span> <span class="ow">is</span> <span class="kc">None</span><span class="p">:</span>
+        <span class="n">name</span> <span class="o">=</span> <span class="n">accum</span><span class="o">.</span><span class="vm">__qualname__</span>
+    <span class="k">return</span> <span class="n">AggregateUDF</span><span class="p">(</span>
+        <span class="n">name</span><span class="o">=</span><span class="n">name</span><span class="p">,</span>
+        <span class="n">accumulator</span><span class="o">=</span><span class="n">accum</span><span class="p">,</span>
+        <span class="n">input_type</span><span class="o">=</span><span class="n">input_type</span><span class="p">,</span>
+        <span class="n">return_type</span><span class="o">=</span><span class="n">return_type</span><span class="p">,</span>
+        <span class="n">state_type</span><span class="o">=</span><span class="n">state_type</span><span class="p">,</span>
+        <span class="n">volatility</span><span class="o">=</span><span class="n">volatility</span><span class="p">,</span>
+    <span class="p">)</span>
+</pre></div>
 
               </div>
               
diff --git a/datafusion/_modules/index.html b/datafusion/_modules/index.html
index 0233d14..4f74b34 100644
--- a/datafusion/_modules/index.html
+++ b/datafusion/_modules/index.html
@@ -397,8 +397,9 @@
                 
   <h1>All modules for which code is available</h1>
 <ul><li><a href="builtins.html">builtins</a></li>
-<li><a href="datafusion/functions.html">datafusion.functions</a></li>
-<li><a href="functions.html">functions</a></li>
+<li><a href="datafusion.html">datafusion</a></li>
+<ul><li><a href="datafusion/functions.html">datafusion.functions</a></li>
+</ul><li><a href="functions.html">functions</a></li>
 </ul>
 
               </div>
diff --git a/datafusion/_sources/cli/index.rst.txt b/datafusion/_sources/cli/index.rst.txt
index 2b91430..89f7f5d 100644
--- a/datafusion/_sources/cli/index.rst.txt
+++ b/datafusion/_sources/cli/index.rst.txt
@@ -23,6 +23,24 @@ The Arrow DataFusion CLI is a command-line interactive SQL utility that allows
 queries to be executed against CSV and Parquet files. It is a convenient way to
 try DataFusion out with your own data sources.
 
+Install and run using Homebrew (on MacOS)
+=========================================
+
+The easiest way to give DataFusion CLI a spin is via Homebrew (on MacOS). Install it as any other pre-built software like this:
+
+.. code-block:: bash
+
+    brew install datafusion
+    # ==> Downloading https://ghcr.io/v2/homebrew/core/datafusion/manifests/5.0.0
+    # ######################################################################## 100.0%
+    # ==> Downloading https://ghcr.io/v2/homebrew/core/datafusion/blobs/sha256:9ecc8a01be47ceb9a53b39976696afa87c0a8
+    # ==> Downloading from https://pkg-containers.githubusercontent.com/ghcr1/blobs/sha256:9ecc8a01be47ceb9a53b39976
+    # ######################################################################## 100.0%
+    # ==> Pouring datafusion--5.0.0.big_sur.bottle.tar.gz
+    # 🍺  /usr/local/Cellar/datafusion/5.0.0: 9 files, 17.4MB
+
+    datafusion-cli
+
 Run using Cargo
 ===============
 
diff --git a/datafusion/_sources/python/index.rst.txt b/datafusion/_sources/python/index.rst.txt
index 56f9097..57ab8d1 100644
--- a/datafusion/_sources/python/index.rst.txt
+++ b/datafusion/_sources/python/index.rst.txt
@@ -39,11 +39,10 @@ Simple usage:
 .. code-block:: python
 
    import datafusion
+   from datafusion import functions as f
+   from datafusion import col
    import pyarrow
 
-   # an alias
-   f = datafusion.functions
-
    # create a context
    ctx = datafusion.ExecutionContext()
 
@@ -56,8 +55,8 @@ Simple usage:
 
    # create a new statement
    df = df.select(
-       f.col("a") + f.col("b"),
-       f.col("a") - f.col("b"),
+       col("a") + col("b"),
+       col("a") - col("b"),
    )
 
    # execute and collect the first (and only) batch
@@ -77,7 +76,7 @@ UDFs
 
    udf = f.udf(is_null, [pyarrow.int64()], pyarrow.bool_())
 
-   df = df.select(udf(f.col("a")))
+   df = df.select(udf(col("a")))
 
 
 UDAF
@@ -117,7 +116,7 @@ UDAF
 
    df = df.aggregate(
        [],
-       [udaf(f.col("a"))]
+       [udaf(col("a"))]
    )
 
 
diff --git a/datafusion/_sources/specification/roadmap.md.txt b/datafusion/_sources/specification/roadmap.md.txt
index 520815b..09f636f 100644
--- a/datafusion/_sources/specification/roadmap.md.txt
+++ b/datafusion/_sources/specification/roadmap.md.txt
@@ -61,6 +61,7 @@ to provide:
 - Additional constant folding / partial evaluation [#1070](https://github.com/apache/arrow-datafusion/issues/1070)
 - More sophisticated cost based optimizer for join ordering
 - Implement advanced query optimization framework (Tokomak) #440
+- Finer optimizations for group by and aggregate functions
 
 ## Datasources
 
@@ -92,8 +93,28 @@ Note: There are some additional thoughts on a datafusion-cli vision on [#1096](h
 - publishing to apt, brew, and possible NuGet registry so that people can use it more easily
 - adopt a shorter name, like dfcli?
 
-## Ballista
+# Ballista
 
-# Vision
+Ballista is a distributed compute platform based on Apache Arrow and DataFusion. It provides a query scheduler that
+breaks a physical plan into stages and tasks and then schedules tasks for execution across the available executors
+in the cluster.
 
-TBD
+Having Ballista as part of the DataFusion codebase helps ensure that DataFusion remains suitable for distributed
+compute. For example, it helps ensure that physical query plans can be serialized to protobuf format and that they
+remain language-agnostic so that executors can be built in languages other than Rust.
+
+## Ballista Roadmap
+
+## Move query scheduler into DataFusion
+
+The Ballista scheduler has some advantages over DataFusion query execution because it doesn't try to eagerly execute
+the entire query at once but breaks it down into a directionally-acyclic graph (DAG) of stages and executes a
+configurable number of stages and tasks concurrently. It should be possible to push some of this logic down to
+DataFusion so that the same scheduler can be used to scale across cores in-process and across nodes in a cluster.
+
+## Implement execution-time cost-based optimizations based on statistics
+
+After the execution of a query stage, accurate statistics are available for the resulting data. These statistics
+could be leveraged by the scheduler to optimize the query during execution. For example, when performing a hash join
+it is desirable to load the smaller side of the join into memory and in some cases we cannot predict which side will
+be smaller until execution time.
diff --git a/datafusion/cli/index.html b/datafusion/cli/index.html
index 0715064..63b832a 100644
--- a/datafusion/cli/index.html
+++ b/datafusion/cli/index.html
@@ -396,6 +396,11 @@
 <nav id="bd-toc-nav">
     <ul class="visible nav section-nav flex-column">
  <li class="toc-h2 nav-item toc-entry">
+  <a class="reference internal nav-link" href="#install-and-run-using-homebrew-on-macos">
+   Install and run using Homebrew (on MacOS)
+  </a>
+ </li>
+ <li class="toc-h2 nav-item toc-entry">
   <a class="reference internal nav-link" href="#run-using-cargo">
    Run using Cargo
   </a>
@@ -453,6 +458,22 @@
 <p>The Arrow DataFusion CLI is a command-line interactive SQL utility that allows
 queries to be executed against CSV and Parquet files. It is a convenient way to
 try DataFusion out with your own data sources.</p>
+<div class="section" id="install-and-run-using-homebrew-on-macos">
+<h2>Install and run using Homebrew (on MacOS)<a class="headerlink" href="#install-and-run-using-homebrew-on-macos" title="Permalink to this headline">¶</a></h2>
+<p>The easiest way to give DataFusion CLI a spin is via Homebrew (on MacOS). Install it as any other pre-built software like this:</p>
+<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>brew install datafusion
+<span class="c1"># ==&gt; Downloading https://ghcr.io/v2/homebrew/core/datafusion/manifests/5.0.0</span>
+<span class="c1"># ######################################################################## 100.0%</span>
+<span class="c1"># ==&gt; Downloading https://ghcr.io/v2/homebrew/core/datafusion/blobs/sha256:9ecc8a01be47ceb9a53b39976696afa87c0a8</span>
+<span class="c1"># ==&gt; Downloading from https://pkg-containers.githubusercontent.com/ghcr1/blobs/sha256:9ecc8a01be47ceb9a53b39976</span>
+<span class="c1"># ######################################################################## 100.0%</span>
+<span class="c1"># ==&gt; Pouring datafusion--5.0.0.big_sur.bottle.tar.gz</span>
+<span class="c1"># 🍺  /usr/local/Cellar/datafusion/5.0.0: 9 files, 17.4MB</span>
+
+datafusion-cli
+</pre></div>
+</div>
+</div>
 <div class="section" id="run-using-cargo">
 <h2>Run using Cargo<a class="headerlink" href="#run-using-cargo" title="Permalink to this headline">¶</a></h2>
 <p>Use the following commands to clone this repository and run the CLI. This will require the Rust toolchain to be installed. Rust can be installed from <a class="reference external" href="https://rustup.rs/">https://rustup.rs</a>.</p>
diff --git a/datafusion/genindex.html b/datafusion/genindex.html
index fe789e5..60ae287 100644
--- a/datafusion/genindex.html
+++ b/datafusion/genindex.html
@@ -477,6 +477,10 @@
 <h2 id="C">C</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="python/generated/datafusion.Expression.html#datafusion.Expression.cast">cast() (datafusion.Expression method)</a>
+</li>
+      <li><a href="python/generated/datafusion.ExecutionContext.html#datafusion.ExecutionContext.catalog">catalog() (datafusion.ExecutionContext method)</a>
+</li>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.ceil">ceil() (in module datafusion.functions)</a>
 </li>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.character_length">character_length() (in module datafusion.functions)</a>
@@ -485,10 +489,12 @@
 </li>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.col">col() (in module datafusion.functions)</a>
 </li>
-      <li><a href="python/generated/datafusion.DataFrame.html#datafusion.DataFrame.collect">collect() (datafusion.DataFrame method)</a>
-</li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="python/generated/datafusion.DataFrame.html#datafusion.DataFrame.collect">collect() (datafusion.DataFrame method)</a>
+</li>
+      <li><a href="python/generated/datafusion.Expression.html#datafusion.Expression.column">column() (datafusion.Expression static method)</a>
+</li>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.concat">concat() (in module datafusion.functions)</a>
 </li>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.concat_ws">concat_ws() (in module datafusion.functions)</a>
@@ -519,6 +525,8 @@
 <h2 id="E">E</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="python/generated/datafusion.ExecutionContext.html#datafusion.ExecutionContext.empty_table">empty_table() (datafusion.ExecutionContext method)</a>
+</li>
       <li><a href="python/generated/datafusion.ExecutionContext.html#datafusion.ExecutionContext">ExecutionContext (class in datafusion)</a>
 </li>
   </ul></td>
@@ -547,12 +555,14 @@
   <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.Volatility.immutable">immutable() (datafusion.functions.Volatility static method)</a>
 </li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.in_list">in_list() (in module datafusion.functions)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.initcap">initcap() (in module datafusion.functions)</a>
 </li>
+      <li><a href="python/generated/datafusion.Expression.html#datafusion.Expression.is_null">is_null() (datafusion.Expression method)</a>
+</li>
   </ul></td>
 </tr></table>
 
@@ -573,6 +583,8 @@
 </li>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.lit">lit() (in module datafusion.functions)</a>
 </li>
+      <li><a href="python/generated/datafusion.Expression.html#datafusion.Expression.literal">literal() (datafusion.Expression static method)</a>
+</li>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.ln">ln() (in module datafusion.functions)</a>
 </li>
   </ul></td>
@@ -657,6 +669,8 @@
 <h2 id="S">S</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="python/generated/datafusion.DataFrame.html#datafusion.DataFrame.schema">schema() (datafusion.DataFrame method)</a>
+</li>
       <li><a href="python/generated/datafusion.DataFrame.html#datafusion.DataFrame.select">select() (datafusion.DataFrame method)</a>
 </li>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.sha224">sha224() (in module datafusion.functions)</a>
@@ -673,14 +687,14 @@
 </li>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.sin">sin() (in module datafusion.functions)</a>
 </li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="python/generated/datafusion.DataFrame.html#datafusion.DataFrame.sort">sort() (datafusion.DataFrame method)</a>
 
       <ul>
         <li><a href="python/generated/datafusion.Expression.html#datafusion.Expression.sort">(datafusion.Expression method)</a>
 </li>
       </ul></li>
-  </ul></td>
-  <td style="width: 33%; vertical-align: top;"><ul>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.split_part">split_part() (in module datafusion.functions)</a>
 </li>
       <li><a href="python/generated/datafusion.ExecutionContext.html#datafusion.ExecutionContext.sql">sql() (datafusion.ExecutionContext method)</a>
@@ -703,14 +717,16 @@
 <h2 id="T">T</h2>
 <table style="width: 100%" class="indextable genindextable"><tr>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="python/generated/datafusion.ExecutionContext.html#datafusion.ExecutionContext.table">table() (datafusion.ExecutionContext method)</a>
+</li>
       <li><a href="python/generated/datafusion.ExecutionContext.html#datafusion.ExecutionContext.tables">tables() (datafusion.ExecutionContext method)</a>
 </li>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.tan">tan() (in module datafusion.functions)</a>
 </li>
-      <li><a href="python/generated/datafusion.functions.html#datafusion.functions.to_hex">to_hex() (in module datafusion.functions)</a>
-</li>
   </ul></td>
   <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="python/generated/datafusion.functions.html#datafusion.functions.to_hex">to_hex() (in module datafusion.functions)</a>
+</li>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.translate">translate() (in module datafusion.functions)</a>
 </li>
       <li><a href="python/generated/datafusion.functions.html#datafusion.functions.trim">trim() (in module datafusion.functions)</a>
diff --git a/datafusion/index.html b/datafusion/index.html
index 8a69e67..1a7fb43 100644
--- a/datafusion/index.html
+++ b/datafusion/index.html
@@ -458,7 +458,7 @@
 <ul>
 <li class="toctree-l1"><a class="reference internal" href="specification/roadmap.html">Roadmap</a></li>
 <li class="toctree-l1"><a class="reference internal" href="specification/roadmap.html#datafusion">DataFusion</a></li>
-<li class="toctree-l1"><a class="reference internal" href="specification/roadmap.html#vision">Vision</a></li>
+<li class="toctree-l1"><a class="reference internal" href="specification/roadmap.html#ballista">Ballista</a></li>
 <li class="toctree-l1"><a class="reference internal" href="specification/invariants.html">DataFusion’s Invariants</a></li>
 <li class="toctree-l1"><a class="reference internal" href="specification/output-field-name-semantic.html">Datafusion output field name semantic</a></li>
 </ul>
diff --git a/datafusion/objects.inv b/datafusion/objects.inv
index 630ef29..842645e 100644
Binary files a/datafusion/objects.inv and b/datafusion/objects.inv differ
diff --git a/datafusion/python/generated/datafusion.DataFrame.html b/datafusion/python/generated/datafusion.DataFrame.html
index e03283a..cf6fc81 100644
--- a/datafusion/python/generated/datafusion.DataFrame.html
+++ b/datafusion/python/generated/datafusion.DataFrame.html
@@ -376,7 +376,7 @@
 <dt id="datafusion.DataFrame">
 <em class="property">class </em><code class="sig-prename descclassname">datafusion.</code><code class="sig-name descname">DataFrame</code><a class="headerlink" href="#datafusion.DataFrame" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
-<p>A DataFrame is a representation of a logical plan and an API to compose statements.
+<p>A PyDataFrame is a representation of a logical plan and an API to compose statements.
 Use it to build a plan and <cite>.collect()</cite> to execute the plan and collect the result.
 The actual execution of a plan runs natively on Rust and Arrow on a multi-threaded environment.</p>
 <dl class="method">
@@ -396,67 +396,69 @@ The actual execution of a plan runs natively on Rust and Arrow on a multi-thread
 <td><p>Initialize self.</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#datafusion.DataFrame.aggregate" title="datafusion.DataFrame.aggregate"><code class="xref py py-obj docutils literal notranslate"><span class="pre">aggregate</span></code></a></p></td>
-<td><p>Aggregates using expressions</p></td>
+<td><p></p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.DataFrame.collect" title="datafusion.DataFrame.collect"><code class="xref py py-obj docutils literal notranslate"><span class="pre">collect</span></code></a></p></td>
-<td><p>Executes the plan, returning a list of <a href="#id1"><span class="problematic" id="id2">`</span></a>RecordBatch`es.Unless some order is specified in the plan, there is no guarantee of the order of the result.</p></td>
+<td><p>Executes the plan, returning a list of <a href="#id1"><span class="problematic" id="id2">`</span></a>RecordBatch`es.Unless some order is specified in the plan, there is no guarantee of the order of the result..</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#datafusion.DataFrame.filter" title="datafusion.DataFrame.filter"><code class="xref py py-obj docutils literal notranslate"><span class="pre">filter</span></code></a></p></td>
-<td><p>Filter according to the <cite>predicate</cite> expression</p></td>
+<td><p></p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.DataFrame.join" title="datafusion.DataFrame.join"><code class="xref py py-obj docutils literal notranslate"><span class="pre">join</span></code></a></p></td>
-<td><p>Returns the join of two DataFrames <cite>on</cite>.</p></td>
+<td><p></p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#datafusion.DataFrame.limit" title="datafusion.DataFrame.limit"><code class="xref py py-obj docutils literal notranslate"><span class="pre">limit</span></code></a></p></td>
-<td><p>Limits the plan to return at most <cite>count</cite> rows</p></td>
+<td><p></p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.DataFrame.select" title="datafusion.DataFrame.select"><code class="xref py py-obj docutils literal notranslate"><span class="pre">select</span></code></a></p></td>
-<td><p>Select <cite>expressions</cite> from the existing DataFrame.</p></td>
+<td><p></p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#datafusion.DataFrame.show" title="datafusion.DataFrame.show"><code class="xref py py-obj docutils literal notranslate"><span class="pre">show</span></code></a></p></td>
 <td><p>Print the result, 20 lines by default</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.DataFrame.sort" title="datafusion.DataFrame.sort"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sort</span></code></a></p></td>
-<td><p>Sort by specified sorting expressions</p></td>
+<td><p></p></td>
 </tr>
 </tbody>
 </table>
 <dl class="method">
 <dt id="datafusion.DataFrame.aggregate">
 <code class="sig-name descname">aggregate</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.DataFrame.aggregate" title="Permalink to this definition">¶</a></dt>
-<dd><p>Aggregates using expressions</p>
-</dd></dl>
+<dd></dd></dl>
 
 <dl class="method">
 <dt id="datafusion.DataFrame.collect">
 <code class="sig-name descname">collect</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.DataFrame.collect" title="Permalink to this definition">¶</a></dt>
 <dd><p>Executes the plan, returning a list of <a href="#id3"><span class="problematic" id="id4">`</span></a>RecordBatch`es.
-Unless some order is specified in the plan, there is no guarantee of the order of the result</p>
+Unless some order is specified in the plan, there is no
+guarantee of the order of the result.</p>
 </dd></dl>
 
 <dl class="method">
 <dt id="datafusion.DataFrame.filter">
 <code class="sig-name descname">filter</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.DataFrame.filter" title="Permalink to this definition">¶</a></dt>
-<dd><p>Filter according to the <cite>predicate</cite> expression</p>
-</dd></dl>
+<dd></dd></dl>
 
 <dl class="method">
 <dt id="datafusion.DataFrame.join">
 <code class="sig-name descname">join</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.DataFrame.join" title="Permalink to this definition">¶</a></dt>
-<dd><p>Returns the join of two DataFrames <cite>on</cite>.</p>
-</dd></dl>
+<dd></dd></dl>
 
 <dl class="method">
 <dt id="datafusion.DataFrame.limit">
 <code class="sig-name descname">limit</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.DataFrame.limit" title="Permalink to this definition">¶</a></dt>
-<dd><p>Limits the plan to return at most <cite>count</cite> rows</p>
+<dd></dd></dl>
+
+<dl class="method">
+<dt id="datafusion.DataFrame.schema">
+<code class="sig-name descname">schema</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.DataFrame.schema" title="Permalink to this definition">¶</a></dt>
+<dd><p>Returns the schema from the logical plan</p>
 </dd></dl>
 
 <dl class="method">
 <dt id="datafusion.DataFrame.select">
 <code class="sig-name descname">select</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.DataFrame.select" title="Permalink to this definition">¶</a></dt>
-<dd><p>Select <cite>expressions</cite> from the existing DataFrame.</p>
-</dd></dl>
+<dd></dd></dl>
 
 <dl class="method">
 <dt id="datafusion.DataFrame.show">
@@ -467,8 +469,7 @@ Unless some order is specified in the plan, there is no guarantee of the order o
 <dl class="method">
 <dt id="datafusion.DataFrame.sort">
 <code class="sig-name descname">sort</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.DataFrame.sort" title="Permalink to this definition">¶</a></dt>
-<dd><p>Sort by specified sorting expressions</p>
-</dd></dl>
+<dd></dd></dl>
 
 </dd></dl>
 
diff --git a/datafusion/python/generated/datafusion.ExecutionContext.html b/datafusion/python/generated/datafusion.ExecutionContext.html
index 0b4078c..a69e7f7 100644
--- a/datafusion/python/generated/datafusion.ExecutionContext.html
+++ b/datafusion/python/generated/datafusion.ExecutionContext.html
@@ -376,7 +376,7 @@
 <dt id="datafusion.ExecutionContext">
 <em class="property">class </em><code class="sig-prename descclassname">datafusion.</code><code class="sig-name descname">ExecutionContext</code><a class="headerlink" href="#datafusion.ExecutionContext" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
-<p><cite>ExecutionContext</cite> is able to plan and execute DataFusion plans.
+<p><cite>PyExecutionContext</cite> is able to plan and execute DataFusion plans.
 It has a powerful optimizer, a physical planner for local execution, and a
 multi-threaded execution engine to perform the execution.</p>
 <dl class="method">
@@ -408,7 +408,7 @@ multi-threaded execution engine to perform the execution.</p>
 <td><p></p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#datafusion.ExecutionContext.sql" title="datafusion.ExecutionContext.sql"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sql</span></code></a></p></td>
-<td><p>Returns a DataFrame whose plan corresponds to the SQL statement.</p></td>
+<td><p>Returns a PyDataFrame whose plan corresponds to the SQL statement.</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.ExecutionContext.tables" title="datafusion.ExecutionContext.tables"><code class="xref py py-obj docutils literal notranslate"><span class="pre">tables</span></code></a></p></td>
 <td><p></p></td>
@@ -416,11 +416,21 @@ multi-threaded execution engine to perform the execution.</p>
 </tbody>
 </table>
 <dl class="method">
+<dt id="datafusion.ExecutionContext.catalog">
+<code class="sig-name descname">catalog</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.ExecutionContext.catalog" title="Permalink to this definition">¶</a></dt>
+<dd></dd></dl>
+
+<dl class="method">
 <dt id="datafusion.ExecutionContext.create_dataframe">
 <code class="sig-name descname">create_dataframe</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.ExecutionContext.create_dataframe" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>
 
 <dl class="method">
+<dt id="datafusion.ExecutionContext.empty_table">
+<code class="sig-name descname">empty_table</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.ExecutionContext.empty_table" title="Permalink to this definition">¶</a></dt>
+<dd></dd></dl>
+
+<dl class="method">
 <dt id="datafusion.ExecutionContext.register_csv">
 <code class="sig-name descname">register_csv</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.ExecutionContext.register_csv" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>
@@ -443,10 +453,15 @@ multi-threaded execution engine to perform the execution.</p>
 <dl class="method">
 <dt id="datafusion.ExecutionContext.sql">
 <code class="sig-name descname">sql</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.ExecutionContext.sql" title="Permalink to this definition">¶</a></dt>
-<dd><p>Returns a DataFrame whose plan corresponds to the SQL statement.</p>
+<dd><p>Returns a PyDataFrame whose plan corresponds to the SQL statement.</p>
 </dd></dl>
 
 <dl class="method">
+<dt id="datafusion.ExecutionContext.table">
+<code class="sig-name descname">table</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.ExecutionContext.table" title="Permalink to this definition">¶</a></dt>
+<dd></dd></dl>
+
+<dl class="method">
 <dt id="datafusion.ExecutionContext.tables">
 <code class="sig-name descname">tables</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.ExecutionContext.tables" title="Permalink to this definition">¶</a></dt>
 <dd></dd></dl>
diff --git a/datafusion/python/generated/datafusion.Expression.html b/datafusion/python/generated/datafusion.Expression.html
index 1809823..e980adf 100644
--- a/datafusion/python/generated/datafusion.Expression.html
+++ b/datafusion/python/generated/datafusion.Expression.html
@@ -376,7 +376,7 @@
 <dt id="datafusion.Expression">
 <em class="property">class </em><code class="sig-prename descclassname">datafusion.</code><code class="sig-name descname">Expression</code><a class="headerlink" href="#datafusion.Expression" title="Permalink to this definition">¶</a></dt>
 <dd><p>Bases: <code class="xref py py-class docutils literal notranslate"><span class="pre">object</span></code></p>
-<p>An expression that can be used on a DataFrame</p>
+<p>An PyExpr that can be used on a DataFrame</p>
 <dl class="method">
 <dt id="datafusion.Expression.__init__">
 <code class="sig-name descname">__init__</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.Expression.__init__" title="Permalink to this definition">¶</a></dt>
@@ -394,23 +394,43 @@
 <td><p>Initialize self.</p></td>
 </tr>
 <tr class="row-even"><td><p><a class="reference internal" href="#datafusion.Expression.alias" title="datafusion.Expression.alias"><code class="xref py py-obj docutils literal notranslate"><span class="pre">alias</span></code></a></p></td>
-<td><p>assign a name to the expression</p></td>
+<td><p>assign a name to the PyExpr</p></td>
 </tr>
 <tr class="row-odd"><td><p><a class="reference internal" href="#datafusion.Expression.sort" title="datafusion.Expression.sort"><code class="xref py py-obj docutils literal notranslate"><span class="pre">sort</span></code></a></p></td>
-<td><p>Create a sort expression from an existing expression.</p></td>
+<td><p>Create a sort PyExpr from an existing PyExpr.</p></td>
 </tr>
 </tbody>
 </table>
 <dl class="method">
 <dt id="datafusion.Expression.alias">
 <code class="sig-name descname">alias</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.Expression.alias" title="Permalink to this definition">¶</a></dt>
-<dd><p>assign a name to the expression</p>
+<dd><p>assign a name to the PyExpr</p>
 </dd></dl>
 
 <dl class="method">
+<dt id="datafusion.Expression.cast">
+<code class="sig-name descname">cast</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.Expression.cast" title="Permalink to this definition">¶</a></dt>
+<dd></dd></dl>
+
+<dl class="method">
+<dt id="datafusion.Expression.column">
+<em class="property">static </em><code class="sig-name descname">column</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.Expression.column" title="Permalink to this definition">¶</a></dt>
+<dd></dd></dl>
+
+<dl class="method">
+<dt id="datafusion.Expression.is_null">
+<code class="sig-name descname">is_null</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.Expression.is_null" title="Permalink to this definition">¶</a></dt>
+<dd></dd></dl>
+
+<dl class="method">
+<dt id="datafusion.Expression.literal">
+<em class="property">static </em><code class="sig-name descname">literal</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.Expression.literal" title="Permalink to this definition">¶</a></dt>
+<dd></dd></dl>
+
+<dl class="method">
 <dt id="datafusion.Expression.sort">
 <code class="sig-name descname">sort</code><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#datafusion.Expression.sort" title="Permalink to this definition">¶</a></dt>
-<dd><p>Create a sort expression from an existing expression.</p>
+<dd><p>Create a sort PyExpr from an existing PyExpr.</p>
 </dd></dl>
 
 </dd></dl>
diff --git a/datafusion/python/index.html b/datafusion/python/index.html
index 0f1751a..51ba5d6 100644
--- a/datafusion/python/index.html
+++ b/datafusion/python/index.html
@@ -321,6 +321,11 @@
 </p>
 <ul class="nav bd-sidenav">
  <li class="toctree-l1">
+  <a class="reference internal" href="../specification/roadmap.html">
+   Roadmap
+  </a>
+ </li>
+ <li class="toctree-l1">
   <a class="reference internal" href="../specification/invariants.html">
    DataFusion’s Invariants
   </a>
@@ -364,6 +369,11 @@
    Issue tracker
   </a>
  </li>
+ <li class="toctree-l1">
+  <a class="reference external" href="https://github.com/apache/arrow-datafusion/blob/master/CODE_OF_CONDUCT.md">
+   Code of conduct
+  </a>
+ </li>
 </ul>
 
     
@@ -462,11 +472,10 @@
 <h2>How to use it<a class="headerlink" href="#how-to-use-it" title="Permalink to this headline">¶</a></h2>
 <p>Simple usage:</p>
 <div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">datafusion</span>
+<span class="kn">from</span> <span class="nn">datafusion</span> <span class="kn">import</span> <span class="n">functions</span> <span class="k">as</span> <span class="n">f</span>
+<span class="kn">from</span> <span class="nn">datafusion</span> <span class="kn">import</span> <span class="n">col</span>
 <span class="kn">import</span> <span class="nn">pyarrow</span>
 
-<span class="c1"># an alias</span>
-<span class="n">f</span> <span class="o">=</span> <span class="n">datafusion</span><span class="o">.</span><span class="n">functions</span>
-
 <span class="c1"># create a context</span>
 <span class="n">ctx</span> <span class="o">=</span> <span class="n">datafusion</span><span class="o">.</span><span class="n">ExecutionContext</span><span class="p">()</span>
 
@@ -479,8 +488,8 @@
 
 <span class="c1"># create a new statement</span>
 <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span>
-    <span class="n">f</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)</span> <span class="o">+</span> <span class="n">f</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;b&quot;</span><span class="p">),</span>
-    <span class="n">f</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)</span> <span class="o">-</span> <span class="n">f</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;b&quot;</span><span class="p">),</span>
+    <span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)</span> <span class="o">+</span> <span class="n">col</span><span class="p">(</span><span class="s2">&quot;b&quot;</span><span class="p">),</span>
+    <span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)</span> <span class="o">-</span> <span class="n">col</span><span class="p">(</span><span class="s2">&quot;b&quot;</span><span class="p">),</span>
 <span class="p">)</span>
 
 <span class="c1"># execute and collect the first (and only) batch</span>
@@ -497,7 +506,7 @@
 
 <span class="n">udf</span> <span class="o">=</span> <span class="n">f</span><span class="o">.</span><span class="n">udf</span><span class="p">(</span><span class="n">is_null</span><span class="p">,</span> <span class="p">[</span><span class="n">pyarrow</span><span class="o">.</span><span class="n">int64</span><span class="p">()],</span> <span class="n">pyarrow</span><span class="o">.</span><span class="n">bool_</span><span class="p">())</span>
 
-<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">udf</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)))</span>
+<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="n">udf</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">)))</span>
 </pre></div>
 </div>
 </div>
@@ -535,7 +544,7 @@
 
 <span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">aggregate</span><span class="p">(</span>
     <span class="p">[],</span>
-    <span class="p">[</span><span class="n">udaf</span><span class="p">(</span><span class="n">f</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">))]</span>
+    <span class="p">[</span><span class="n">udaf</span><span class="p">(</span><span class="n">col</span><span class="p">(</span><span class="s2">&quot;a&quot;</span><span class="p">))]</span>
 <span class="p">)</span>
 </pre></div>
 </div>
diff --git a/datafusion/searchindex.js b/datafusion/searchindex.js
index 58a3f9c..e871585 100644
--- a/datafusion/searchindex.js
+++ b/datafusion/searchindex.js
@@ -1 +1 @@
-Search.setIndex({docnames:["cli/index","community/communication","index","python/api","python/api/dataframe","python/api/execution_context","python/api/expression","python/api/functions","python/generated/datafusion.DataFrame","python/generated/datafusion.ExecutionContext","python/generated/datafusion.Expression","python/generated/datafusion.functions","python/index","specification/invariants","specification/output-field-name-semantic","specification/roadmap","user-guide/cli","user-guide [...]
\ No newline at end of file
+Search.setIndex({docnames:["cli/index","community/communication","index","python/api","python/api/dataframe","python/api/execution_context","python/api/expression","python/api/functions","python/generated/datafusion.DataFrame","python/generated/datafusion.ExecutionContext","python/generated/datafusion.Expression","python/generated/datafusion.functions","python/index","specification/invariants","specification/output-field-name-semantic","specification/roadmap","user-guide/cli","user-guide [...]
\ No newline at end of file
diff --git a/datafusion/specification/roadmap.html b/datafusion/specification/roadmap.html
index a8178ef..568e919 100644
--- a/datafusion/specification/roadmap.html
+++ b/datafusion/specification/roadmap.html
@@ -446,17 +446,29 @@
      )
     </a>
    </li>
-   <li class="toc-h2 nav-item toc-entry">
-    <a class="reference internal nav-link" href="#ballista">
-     Ballista
-    </a>
-   </li>
   </ul>
  </li>
  <li class="toc-h1 nav-item toc-entry">
-  <a class="reference internal nav-link" href="#vision">
-   Vision
+  <a class="reference internal nav-link" href="#ballista">
+   Ballista
   </a>
+  <ul class="visible nav section-nav flex-column">
+   <li class="toc-h2 nav-item toc-entry">
+    <a class="reference internal nav-link" href="#ballista-roadmap">
+     Ballista Roadmap
+    </a>
+   </li>
+   <li class="toc-h2 nav-item toc-entry">
+    <a class="reference internal nav-link" href="#move-query-scheduler-into-datafusion">
+     Move query scheduler into DataFusion
+    </a>
+   </li>
+   <li class="toc-h2 nav-item toc-entry">
+    <a class="reference internal nav-link" href="#implement-execution-time-cost-based-optimizations-based-on-statistics">
+     Implement execution-time cost-based optimizations based on statistics
+    </a>
+   </li>
+  </ul>
  </li>
 </ul>
 
@@ -550,6 +562,7 @@ to provide:</p>
 <li><p>Additional constant folding / partial evaluation <a class="reference external" href="https://github.com/apache/arrow-datafusion/issues/1070">#1070</a></p></li>
 <li><p>More sophisticated cost based optimizer for join ordering</p></li>
 <li><p>Implement advanced query optimization framework (Tokomak) #440</p></li>
+<li><p>Finer optimizations for group by and aggregate functions</p></li>
 </ul>
 </div>
 <div class="section" id="datasources">
@@ -589,13 +602,32 @@ to provide:</p>
 <li><p>adopt a shorter name, like dfcli?</p></li>
 </ul>
 </div>
+</div>
 <div class="section" id="ballista">
-<h2>Ballista<a class="headerlink" href="#ballista" title="Permalink to this headline">¶</a></h2>
+<h1>Ballista<a class="headerlink" href="#ballista" title="Permalink to this headline">¶</a></h1>
+<p>Ballista is a distributed compute platform based on Apache Arrow and DataFusion. It provides a query scheduler that
+breaks a physical plan into stages and tasks and then schedules tasks for execution across the available executors
+in the cluster.</p>
+<p>Having Ballista as part of the DataFusion codebase helps ensure that DataFusion remains suitable for distributed
+compute. For example, it helps ensure that physical query plans can be serialized to protobuf format and that they
+remain language-agnostic so that executors can be built in languages other than Rust.</p>
+<div class="section" id="ballista-roadmap">
+<h2>Ballista Roadmap<a class="headerlink" href="#ballista-roadmap" title="Permalink to this headline">¶</a></h2>
 </div>
+<div class="section" id="move-query-scheduler-into-datafusion">
+<h2>Move query scheduler into DataFusion<a class="headerlink" href="#move-query-scheduler-into-datafusion" title="Permalink to this headline">¶</a></h2>
+<p>The Ballista scheduler has some advantages over DataFusion query execution because it doesn’t try to eagerly execute
+the entire query at once but breaks it down into a directionally-acyclic graph (DAG) of stages and executes a
+configurable number of stages and tasks concurrently. It should be possible to push some of this logic down to
+DataFusion so that the same scheduler can be used to scale across cores in-process and across nodes in a cluster.</p>
+</div>
+<div class="section" id="implement-execution-time-cost-based-optimizations-based-on-statistics">
+<h2>Implement execution-time cost-based optimizations based on statistics<a class="headerlink" href="#implement-execution-time-cost-based-optimizations-based-on-statistics" title="Permalink to this headline">¶</a></h2>
+<p>After the execution of a query stage, accurate statistics are available for the resulting data. These statistics
+could be leveraged by the scheduler to optimize the query during execution. For example, when performing a hash join
+it is desirable to load the smaller side of the join into memory and in some cases we cannot predict which side will
+be smaller until execution time.</p>
 </div>
-<div class="section" id="vision">
-<h1>Vision<a class="headerlink" href="#vision" title="Permalink to this headline">¶</a></h1>
-<p>TBD</p>
 </div>
 
 
diff --git a/datafusion/user-guide/distributed/deployment/docker-compose.html b/datafusion/user-guide/distributed/deployment/docker-compose.html
index bf8140e..125754e 100644
--- a/datafusion/user-guide/distributed/deployment/docker-compose.html
+++ b/datafusion/user-guide/distributed/deployment/docker-compose.html
@@ -321,6 +321,11 @@
 </p>
 <ul class="nav bd-sidenav">
  <li class="toctree-l1">
+  <a class="reference internal" href="../../../specification/roadmap.html">
+   Roadmap
+  </a>
+ </li>
+ <li class="toctree-l1">
   <a class="reference internal" href="../../../specification/invariants.html">
    DataFusion’s Invariants
   </a>
@@ -364,6 +369,11 @@
    Issue tracker
   </a>
  </li>
+ <li class="toctree-l1">
+  <a class="reference external" href="https://github.com/apache/arrow-datafusion/blob/master/CODE_OF_CONDUCT.md">
+   Code of conduct
+  </a>
+ </li>
 </ul>
 
     
diff --git a/datafusion/user-guide/distributed/deployment/index.html b/datafusion/user-guide/distributed/deployment/index.html
index 5c69647..4b17652 100644
--- a/datafusion/user-guide/distributed/deployment/index.html
+++ b/datafusion/user-guide/distributed/deployment/index.html
@@ -321,6 +321,11 @@
 </p>
 <ul class="nav bd-sidenav">
  <li class="toctree-l1">
+  <a class="reference internal" href="../../../specification/roadmap.html">
+   Roadmap
+  </a>
+ </li>
+ <li class="toctree-l1">
   <a class="reference internal" href="../../../specification/invariants.html">
    DataFusion’s Invariants
   </a>
@@ -364,6 +369,11 @@
    Issue tracker
   </a>
  </li>
+ <li class="toctree-l1">
+  <a class="reference external" href="https://github.com/apache/arrow-datafusion/blob/master/CODE_OF_CONDUCT.md">
+   Code of conduct
+  </a>
+ </li>
 </ul>