You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/11/14 18:44:25 UTC

[arrow-datafusion-python] branch master updated: [DOCS] - Fix sample code and python api docs (#71)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git


The following commit(s) were added to refs/heads/master by this push:
     new f0d5659  [DOCS] - Fix sample code and python api docs (#71)
f0d5659 is described below

commit f0d565912cd1cb86e5f268ff41bf1118e9743690
Author: Francis Du <me...@francis.run>
AuthorDate: Tue Nov 15 02:44:20 2022 +0800

    [DOCS] - Fix sample code and python api docs (#71)
    
    * fix: fix sample code and python api docs
    
    * fix: readme code sample
    
    * fix: python lint
    
    * fix: repo name in docs & url link
    
    * fix: repo name in docs & url link
    
    * fix: remove useless dependency
    
    * fix: remove .DS_Store
---
 .DS_Store                                          | Bin 0 -> 6148 bytes
 .gitignore                                         |   3 +
 README.md                                          |  23 ++-
 docs/Makefile                                      |  38 +++++
 docs/README.md                                     |  64 ++++++++
 docs/build.sh                                      |  28 ++++
 docs/make.bat                                      |  52 +++++++
 docs/requirements.txt                              |  22 +++
 .../images/DataFusion-Logo-Background-White.png    | Bin 0 -> 12401 bytes
 .../images/DataFusion-Logo-Background-White.svg    |   1 +
 .../source/_static/images/DataFusion-Logo-Dark.png | Bin 0 -> 20134 bytes
 .../source/_static/images/DataFusion-Logo-Dark.svg |   1 +
 .../_static/images/DataFusion-Logo-Light.png       | Bin 0 -> 19102 bytes
 .../_static/images/DataFusion-Logo-Light.svg       |   1 +
 docs/source/_static/theme_overrides.css            |  93 +++++++++++
 docs/source/_templates/docs-sidebar.html           |  19 +++
 docs/source/_templates/layout.html                 |   5 +
 docs/source/{python => }/api.rst                   |   2 +
 .../{python/api/functions.rst => api/config.rst}   |   6 +-
 docs/source/{python => }/api/dataframe.rst         |   0
 docs/source/{python => }/api/execution_context.rst |   0
 docs/source/{python => }/api/expression.rst        |   0
 docs/source/{python => }/api/functions.rst         |   2 +-
 .../api/functions.rst => api/object_store.rst}     |   8 +-
 docs/source/conf.py                                | 115 ++++++++++++++
 docs/source/{python => }/index.rst                 | 172 +++++++++++++--------
 .../python/generated/datafusion.DataFrame.rst      |  50 ------
 .../python/generated/datafusion.Expression.rst     |  45 ------
 .../python/generated/datafusion.SessionContext.rst |  52 -------
 .../python/generated/datafusion.functions.rst      |  40 -----
 pyproject.toml                                     |   5 +-
 31 files changed, 582 insertions(+), 265 deletions(-)

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..4d1b949
Binary files /dev/null and b/.DS_Store differ
diff --git a/.gitignore b/.gitignore
index 5b6cf36..57431b9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,9 @@ target
 Cargo.lock
 /venv
 .idea
+/docs/temp
+/docs/build
+.DS_Store
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/README.md b/README.md
index 1a0daef..35186a9 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,6 @@ Simple usage:
 
 ```python
 import datafusion
-from datafusion import functions as f
 from datafusion import col
 import pyarrow
 
@@ -70,6 +69,7 @@ assert result.column(1) == pyarrow.array([-3, -3, -3])
 ### UDFs
 
 ```python
+import pyarrow
 from datafusion import udf
 
 def is_null(array: pyarrow.Array) -> pyarrow.Array:
@@ -77,9 +77,19 @@ def is_null(array: pyarrow.Array) -> pyarrow.Array:
 
 is_null_arr = udf(is_null, [pyarrow.int64()], pyarrow.bool_(), 'stable')
 
+# create a context
+ctx = datafusion.SessionContext()
+
+# create a RecordBatch and a new DataFrame from it
+batch = pyarrow.RecordBatch.from_arrays(
+    [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+    names=["a", "b"],
+)
+df = ctx.create_dataframe([[batch]])
+
 df = df.select(is_null_arr(col("a")))
 
-result = df.collect()
+result = df.collect()[0]
 
 assert result.column(0) == pyarrow.array([False] * 3)
 ```
@@ -89,7 +99,9 @@ assert result.column(0) == pyarrow.array([False] * 3)
 ```python
 import pyarrow
 import pyarrow.compute
+import datafusion
 from datafusion import udaf, Accumulator
+from datafusion import col
 
 
 class MyAccumulator(Accumulator):
@@ -113,7 +125,14 @@ class MyAccumulator(Accumulator):
     def evaluate(self) -> pyarrow.Scalar:
         return self._sum
 
+# create a context
+ctx = datafusion.SessionContext()
 
+# create a RecordBatch and a new DataFrame from it
+batch = pyarrow.RecordBatch.from_arrays(
+    [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+    names=["a", "b"],
+)
 df = ctx.create_dataframe([[batch]])
 
 my_udaf = udaf(MyAccumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()], 'stable')
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..e65c8e2
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..a6f4998
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,64 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# DataFusion Documentation
+
+This folder contains the source content of the [python api](./source/api).
+These are both published to https://arrow.apache.org/datafusion/ 
+as part of the release process.
+
+## Dependencies
+
+It's recommended to install build dependencies and build the documentation
+inside a Python virtualenv.
+
+- Python
+- `pip install -r requirements.txt`
+
+## Build & Preview
+
+Run the provided script to build the HTML pages.
+
+```bash
+./build.sh
+```
+
+The HTML will be generated into a `build` directory.
+
+Preview the site on Linux by running this command.
+
+```bash
+firefox build/html/index.html
+```
+
+## Release Process
+
+The documentation is served through the
+[arrow-site](https://github.com/apache/arrow-site/) repo. To release a new
+version of the docs, follow these steps:
+
+1. Run `./build.sh` inside `docs` folder to generate the docs website inside the `build/html` folder.
+2. Clone the arrow-site repo
+3. Checkout to the `asf-site` branch (NOT `master`)
+4. Copy build artifacts into `arrow-site` repo's `datafusion` folder with a command such as
+
+- `cp -rT ./build/html/ ../../arrow-site/datafusion/` (doesn't work on mac)
+- `rsync -avzr ./build/html/ ../../arrow-site/datafusion/`
+
+5. Commit changes in `arrow-site` and send a PR.
\ No newline at end of file
diff --git a/docs/build.sh b/docs/build.sh
new file mode 100644
index 0000000..3f24f8e
--- /dev/null
+++ b/docs/build.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+set -e
+rm -rf build 2> /dev/null
+rm -rf temp 2> /dev/null
+mkdir temp
+cp -rf source/* temp/
+# replace relative URLs with absolute URLs
+#sed -i 's/\.\.\/\.\.\/\.\.\//https:\/\/github.com\/apache\/arrow-datafusion\/blob\/master\//g' temp/contributor-guide/index.md
+make SOURCEDIR=`pwd`/temp html
\ No newline at end of file
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..1ba6802
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,52 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements.  See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership.  The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License.  You may obtain a copy of the License at
+@rem
+@rem   http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied.  See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
\ No newline at end of file
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..2af5635
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+sphinx
+pydata-sphinx-theme==0.8.0
+myst-parser
+maturin
+jinja2
\ No newline at end of file
diff --git a/docs/source/_static/images/DataFusion-Logo-Background-White.png b/docs/source/_static/images/DataFusion-Logo-Background-White.png
new file mode 100644
index 0000000..023c237
Binary files /dev/null and b/docs/source/_static/images/DataFusion-Logo-Background-White.png differ
diff --git a/docs/source/_static/images/DataFusion-Logo-Background-White.svg b/docs/source/_static/images/DataFusion-Logo-Background-White.svg
new file mode 100644
index 0000000..b3bb47c
--- /dev/null
+++ b/docs/source/_static/images/DataFusion-Logo-Background-White.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 350 170"><rect width="100%" height="105%" fill="white"/><defs><style>.cls-1{fill:none;stroke:#000;stroke-linecap:round;stroke-miterlimit:10;stroke-width:0.75px;stroke-dasharray:0.75 3;}.cls-2{fill:#f3971f;}.cls-3{fill:#f29720;}</style></defs><title>DataFUSION-Logo-Dark</title><g id="Layer_2" data-name="Layer 2" transform="translate(10 10)"><g id="logo"><path class="cls-1" d="M257.26,112.82c16,20.72,25.14,36.57,22,39.34"/><path class="c [...]
\ No newline at end of file
diff --git a/docs/source/_static/images/DataFusion-Logo-Dark.png b/docs/source/_static/images/DataFusion-Logo-Dark.png
new file mode 100644
index 0000000..cc60f12
Binary files /dev/null and b/docs/source/_static/images/DataFusion-Logo-Dark.png differ
diff --git a/docs/source/_static/images/DataFusion-Logo-Dark.svg b/docs/source/_static/images/DataFusion-Logo-Dark.svg
new file mode 100644
index 0000000..e16f244
--- /dev/null
+++ b/docs/source/_static/images/DataFusion-Logo-Dark.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 329.21 164.93"><defs><style>.cls-1{fill:none;stroke:#000;stroke-linecap:round;stroke-miterlimit:10;stroke-width:0.75px;stroke-dasharray:0.75 3;}.cls-2{fill:#f3971f;}.cls-3{fill:#f29720;}</style></defs><title>DataFUSION-Logo-Dark</title><g id="Layer_2" data-name="Layer 2"><g id="logo"><path class="cls-1" d="M257.26,112.82c16,20.72,25.14,36.57,22,39.34"/><path class="cls-1" d="M184.24,37.13c6.55,4.41,15.83,12.47,26.43,23"/><path class="c [...]
\ No newline at end of file
diff --git a/docs/source/_static/images/DataFusion-Logo-Light.png b/docs/source/_static/images/DataFusion-Logo-Light.png
new file mode 100644
index 0000000..8992213
Binary files /dev/null and b/docs/source/_static/images/DataFusion-Logo-Light.png differ
diff --git a/docs/source/_static/images/DataFusion-Logo-Light.svg b/docs/source/_static/images/DataFusion-Logo-Light.svg
new file mode 100644
index 0000000..b3bef21
--- /dev/null
+++ b/docs/source/_static/images/DataFusion-Logo-Light.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 329.21 164.93"><defs><style>.cls-1{fill:none;stroke:#fff;stroke-linecap:round;stroke-miterlimit:10;stroke-width:0.75px;stroke-dasharray:0.75 3;}.cls-2{fill:#fff;}.cls-3{fill:#f3971f;}.cls-4{fill:#f29720;}</style></defs><title>DataFUSION-Logo-Light</title><g id="Layer_2" data-name="Layer 2"><g id="logo"><path class="cls-1" d="M257.26,112.82c16,20.72,25.14,36.57,22,39.34"/><path class="cls-1" d="M184.24,37.13c6.55,4.41,15.83,12.47,26.43, [...]
\ No newline at end of file
diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css
new file mode 100644
index 0000000..1e972cc
--- /dev/null
+++ b/docs/source/_static/theme_overrides.css
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+/* Customizing with theme CSS variables */
+
+:root {
+  --pst-color-active-navigation: 215, 70, 51;
+  --pst-color-link-hover: 215, 70, 51;
+  --pst-color-headerlink: 215, 70, 51;
+  /* Use normal text color (like h3, ..) instead of primary color */
+  --pst-color-h1: var(--color-text-base);
+  --pst-color-h2: var(--color-text-base);
+  /* Use softer blue from bootstrap's default info color */
+  --pst-color-info: 23, 162, 184;
+  --pst-header-height: 0px;
+}
+
+code {
+  color: rgb(215, 70, 51);
+}
+
+.footer {
+  text-align: center;
+}
+
+/* Ensure the logo is properly displayed */
+
+.navbar-brand {
+  height: auto;
+  width: auto;
+}
+
+a.navbar-brand img {
+  height: auto;
+  width: auto;
+  max-height: 15vh;
+  max-width: 100%;
+}
+
+
+/* This is the bootstrap CSS style for "table-striped". Since the theme does
+not yet provide an easy way to configure this globaly, it easier to simply
+include this snippet here than updating each table in all rst files to
+add ":class: table-striped" */
+
+.table tbody tr:nth-of-type(odd) {
+  background-color: rgba(0, 0, 0, 0.05);
+}
+
+
+/* Limit the max height of the sidebar navigation section. Because in our
+custimized template, there is more content above the navigation, i.e.
+larger logo: if we don't decrease the max-height, it will overlap with
+the footer.
+Details: min(15vh, 110px) for the logo size, 8rem for search box etc*/
+
+@media (min-width:720px) {
+  @supports (position:-webkit-sticky) or (position:sticky) {
+    .bd-links {
+      max-height: calc(100vh - min(15vh, 110px) - 8rem)
+    }
+  }
+}
+
+
+/* Fix table text wrapping in RTD theme,
+ * see https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html
+ */
+
+@media screen {
+    table.docutils td {
+        /* !important prevents the common CSS stylesheets from overriding
+          this as on RTD they are loaded after this stylesheet */
+        white-space: normal !important;
+    }
+}
diff --git a/docs/source/_templates/docs-sidebar.html b/docs/source/_templates/docs-sidebar.html
new file mode 100644
index 0000000..bc2bf00
--- /dev/null
+++ b/docs/source/_templates/docs-sidebar.html
@@ -0,0 +1,19 @@
+
+<a class="navbar-brand" href="{{ pathto(master_doc) }}">
+  <img src="{{ pathto('_static/images/' + logo, 1) }}" class="logo" alt="logo">
+</a>
+
+<form class="bd-search d-flex align-items-center" action="{{ pathto('search') }}" method="get">
+  <i class="icon fas fa-search"></i>
+  <input type="search" class="form-control" name="q" id="search-input" placeholder="{{ theme_search_bar_text }}" aria-label="{{ theme_search_bar_text }}" autocomplete="off" >
+</form>
+
+<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
+  <div class="bd-toc-item active">
+    {% if "python/api" in pagename or "python/generated" in pagename %}
+    {{ generate_nav_html("sidebar", startdepth=0, maxdepth=3, collapse=False, includehidden=True, titles_only=True) }}
+    {% else %}
+    {{ generate_nav_html("sidebar", startdepth=0, maxdepth=4, collapse=False, includehidden=True, titles_only=True) }}
+    {% endif %}
+  </div>
+</nav>
diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
new file mode 100644
index 0000000..a9d0f30
--- /dev/null
+++ b/docs/source/_templates/layout.html
@@ -0,0 +1,5 @@
+{% extends "pydata_sphinx_theme/layout.html" %}
+
+{# Silence the navbar #}
+{% block docs_navbar %}
+{% endblock %}
diff --git a/docs/source/python/api.rst b/docs/source/api.rst
similarity index 96%
rename from docs/source/python/api.rst
rename to docs/source/api.rst
index f81753e..a5d6543 100644
--- a/docs/source/python/api.rst
+++ b/docs/source/api.rst
@@ -24,7 +24,9 @@ API Reference
 .. toctree::
    :maxdepth: 2
 
+   api/config
    api/dataframe
    api/execution_context
    api/expression
    api/functions
+   api/object_store
diff --git a/docs/source/python/api/functions.rst b/docs/source/api/config.rst
similarity index 95%
copy from docs/source/python/api/functions.rst
copy to docs/source/api/config.rst
index 6f10d82..df244ae 100644
--- a/docs/source/python/api/functions.rst
+++ b/docs/source/api/config.rst
@@ -15,13 +15,13 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-.. _api.functions:
+.. _api.config:
 .. currentmodule:: datafusion
 
-Functions
+Config
 =========
 
 .. autosummary::
    :toctree: ../generated/
 
-   functions
+   Config
diff --git a/docs/source/python/api/dataframe.rst b/docs/source/api/dataframe.rst
similarity index 100%
rename from docs/source/python/api/dataframe.rst
rename to docs/source/api/dataframe.rst
diff --git a/docs/source/python/api/execution_context.rst b/docs/source/api/execution_context.rst
similarity index 100%
rename from docs/source/python/api/execution_context.rst
rename to docs/source/api/execution_context.rst
diff --git a/docs/source/python/api/expression.rst b/docs/source/api/expression.rst
similarity index 100%
rename from docs/source/python/api/expression.rst
rename to docs/source/api/expression.rst
diff --git a/docs/source/python/api/functions.rst b/docs/source/api/functions.rst
similarity index 97%
copy from docs/source/python/api/functions.rst
copy to docs/source/api/functions.rst
index 6f10d82..958606d 100644
--- a/docs/source/python/api/functions.rst
+++ b/docs/source/api/functions.rst
@@ -24,4 +24,4 @@ Functions
 .. autosummary::
    :toctree: ../generated/
 
-   functions
+   functions.functions
diff --git a/docs/source/python/api/functions.rst b/docs/source/api/object_store.rst
similarity index 90%
rename from docs/source/python/api/functions.rst
rename to docs/source/api/object_store.rst
index 6f10d82..eeb6c43 100644
--- a/docs/source/python/api/functions.rst
+++ b/docs/source/api/object_store.rst
@@ -15,13 +15,13 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-.. _api.functions:
-.. currentmodule:: datafusion
+.. _api.object_store:
+.. currentmodule:: datafusion.object_store
 
-Functions
+ObjectStore
 =========
 
 .. autosummary::
    :toctree: ../generated/
 
-   functions
+   object_store
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..ce3d3c7
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,115 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+# -- Project information -----------------------------------------------------
+
+project = "Arrow DataFusion"
+copyright = "2022, Apache Software Foundation"
+author = "Arrow DataFusion Authors"
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.doctest",
+    "sphinx.ext.ifconfig",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.napoleon",
+    "myst_parser",
+]
+
+source_suffix = {
+    ".rst": "restructuredtext",
+    ".md": "markdown",
+}
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+# Show members for classes in .. autosummary
+autodoc_default_options = {
+    "members": None,
+    "undoc-members": None,
+    "show-inheritance": None,
+    "inherited-members": None,
+}
+
+autosummary_generate = True
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "pydata_sphinx_theme"
+
+html_theme_options = {
+    "use_edit_page_button": True,
+}
+
+html_context = {
+    "github_user": "apache",
+    "github_repo": "arrow-datafusion-python",
+    "github_version": "master",
+    "doc_path": "docs/source",
+}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+
+html_logo = "_static/images/DataFusion-Logo-Background-White.png"
+
+html_css_files = ["theme_overrides.css"]
+
+html_sidebars = {
+    "**": ["docs-sidebar.html"],
+}
+
+# tell myst_parser to auto-generate anchor links for headers h1, h2, h3
+myst_heading_anchors = 3
+
+# enable nice rendering of checkboxes for the task lists
+myst_enable_extensions = ["tasklist"]
diff --git a/docs/source/python/index.rst b/docs/source/index.rst
similarity index 52%
rename from docs/source/python/index.rst
rename to docs/source/index.rst
index 167e66b..78f44ea 100644
--- a/docs/source/python/index.rst
+++ b/docs/source/index.rst
@@ -38,32 +38,31 @@ Simple usage:
 
 .. code-block:: python
 
-   import datafusion
-   from datafusion import functions as f
-   from datafusion import col
-   import pyarrow
+    import datafusion
+    from datafusion import col
+    import pyarrow
 
-   # create a context
-   ctx = datafusion.SessionContext()
+    # create a context
+    ctx = datafusion.SessionContext()
 
-   # create a RecordBatch and a new DataFrame from it
-   batch = pyarrow.RecordBatch.from_arrays(
-       [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
-       names=["a", "b"],
-   )
-   df = ctx.create_dataframe([[batch]])
+    # create a RecordBatch and a new DataFrame from it
+    batch = pyarrow.RecordBatch.from_arrays(
+        [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+        names=["a", "b"],
+    )
+    df = ctx.create_dataframe([[batch]])
 
-   # create a new statement
-   df = df.select(
-       col("a") + col("b"),
-       col("a") - col("b"),
-   )
+    # create a new statement
+    df = df.select(
+        col("a") + col("b"),
+        col("a") - col("b"),
+    )
 
-   # execute and collect the first (and only) batch
-   result = df.collect()[0]
+    # execute and collect the first (and only) batch
+    result = df.collect()[0]
 
-   assert result.column(0) == pyarrow.array([5, 7, 9])
-   assert result.column(1) == pyarrow.array([-3, -3, -3])
+    assert result.column(0) == pyarrow.array([5, 7, 9])
+    assert result.column(1) == pyarrow.array([-3, -3, -3])
 
 
 We can also execute a query against data stored in CSV 
@@ -76,7 +75,6 @@ We can also execute a query against data stored in CSV
 .. code-block:: python
 
     import datafusion
-    from datafusion import functions as f
     from datafusion import col
     import pyarrow
 
@@ -105,7 +103,6 @@ And how to execute a query against a CSV using SQL:
 .. code-block:: python
 
     import datafusion
-    from datafusion import functions as f
     from datafusion import col
     import pyarrow
 
@@ -131,12 +128,29 @@ UDFs
 
 .. code-block:: python
 
-   def is_null(array: pyarrow.Array) -> pyarrow.Array:
-       return array.is_null()
+    import pyarrow
+    from datafusion import udf
+
+    def is_null(array: pyarrow.Array) -> pyarrow.Array:
+        return array.is_null()
+
+    is_null_arr = udf(is_null, [pyarrow.int64()], pyarrow.bool_(), 'stable')
+
+    # create a context
+    ctx = datafusion.SessionContext()
+
+    # create a RecordBatch and a new DataFrame from it
+    batch = pyarrow.RecordBatch.from_arrays(
+        [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+        names=["a", "b"],
+    )
+    df = ctx.create_dataframe([[batch]])
 
-   udf = f.udf(is_null, [pyarrow.int64()], pyarrow.bool_())
+    df = df.select(is_null_arr(col("a")))
 
-   df = df.select(udf(col("a")))
+    result = df.collect()[0]
+
+    assert result.column(0) == pyarrow.array([False] * 3)
 
 
 UDAF
@@ -144,41 +158,54 @@ UDAF
 
 .. code-block:: python
 
-   import pyarrow
-   import pyarrow.compute
+    import pyarrow
+    import pyarrow.compute
+    import datafusion
+    from datafusion import udaf, Accumulator
+    from datafusion import col
+
 
+    class MyAccumulator(Accumulator):
+        """
+        Interface of a user-defined accumulation.
+        """
+        def __init__(self):
+            self._sum = pyarrow.scalar(0.0)
 
-   class Accumulator:
-       """
-       Interface of a user-defined accumulation.
-       """
-       def __init__(self):
-           self._sum = pyarrow.scalar(0.0)
+        def update(self, values: pyarrow.Array) -> None:
+            # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
+            self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(values).as_py())
 
-       def to_scalars(self) -> [pyarrow.Scalar]:
-           return [self._sum]
+        def merge(self, states: pyarrow.Array) -> None:
+            # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
+            self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(states).as_py())
 
-       def update(self, values: pyarrow.Array) -> None:
-           # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
-           self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(values).as_py())
+        def state(self) -> pyarrow.Array:
+            return pyarrow.array([self._sum.as_py()])
 
-       def merge(self, states: pyarrow.Array) -> None:
-           # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
-           self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(states).as_py())
+        def evaluate(self) -> pyarrow.Scalar:
+            return self._sum
 
-       def evaluate(self) -> pyarrow.Scalar:
-           return self._sum
+    # create a context
+    ctx = datafusion.SessionContext()
 
+    # create a RecordBatch and a new DataFrame from it
+    batch = pyarrow.RecordBatch.from_arrays(
+        [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+        names=["a", "b"],
+    )
+    df = ctx.create_dataframe([[batch]])
 
-   df = ...
+    my_udaf = udaf(MyAccumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()], 'stable')
 
-   udaf = f.udaf(Accumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()])
+    df = df.aggregate(
+        [],
+        [my_udaf(col("a"))]
+    )
 
-   df = df.aggregate(
-       [],
-       [udaf(col("a"))]
-   )
+    result = df.collect()[0]
 
+    assert result.column(0) == pyarrow.array([6.0])
 
 How to install (from pip)
 =========================
@@ -187,6 +214,14 @@ How to install (from pip)
 
    pip install datafusion
 
+You can verify the installation by running:
+
+.. code-block:: python
+
+    >>> import datafusion
+    >>> datafusion.__version__
+    '0.6.0'
+
 
 How to develop
 ==============
@@ -197,16 +232,23 @@ Bootstrap:
 
 .. code-block:: shell
 
-   # fetch this repo
-   git clone git@github.com:apache/arrow-datafusion.git
+    # fetch this repo
+    git clone git@github.com:apache/arrow-datafusion-python.git
+    # prepare development environment (used to build wheel / install in development)
+    python3 -m venv venv
+    # activate the venv
+    source venv/bin/activate
+    # update pip itself if necessary
+    python -m pip install -U pip
+    # install dependencies (for Python 3.8+)
+    python -m pip install -r requirements-310.txt
 
-   cd arrow-datafusion/python
+The tests rely on test data in git submodules.
 
-   # prepare development environment (used to build wheel / install in development)
-   python3 -m venv venv
-   # activate the venv
-   source venv/bin/activate
-   pip install -r requirements.txt
+.. code-block:: shell
+
+    git submodule init
+    git submodule update
 
 
 Whenever rust code changes (your changes or via `git pull`):
@@ -225,18 +267,16 @@ To change test dependencies, change the `requirements.in` and run
 
 .. code-block:: shell
 
-   # install pip-tools (this can be done only once), also consider running in venv
-   pip install pip-tools
-
-   # change requirements.in and then run
-   pip-compile --generate-hashes
+    # install pip-tools (this can be done only once), also consider running in venv
+    python -m pip install pip-tools
+    python -m piptools compile --generate-hashes -o requirements-310.txt
 
 
-To update dependencies, run
+To update dependencies, run with `-U`
 
 .. code-block:: shell
 
-   pip-compile update
+   python -m piptools compile -U --generate-hashes -o requirements-310.txt
 
 
 More details about pip-tools `here <https://github.com/jazzband/pip-tools>`_
diff --git a/docs/source/python/generated/datafusion.DataFrame.rst b/docs/source/python/generated/datafusion.DataFrame.rst
deleted file mode 100644
index ffee788..0000000
--- a/docs/source/python/generated/datafusion.DataFrame.rst
+++ /dev/null
@@ -1,50 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-datafusion.DataFrame
-====================
-
-.. currentmodule:: datafusion
-
-.. autoclass:: DataFrame
-
-   
-   .. automethod:: __init__
-
-   
-   .. rubric:: Methods
-
-   .. autosummary::
-   
-      ~DataFrame.__init__
-      ~DataFrame.aggregate
-      ~DataFrame.collect
-      ~DataFrame.explain
-      ~DataFrame.filter
-      ~DataFrame.join
-      ~DataFrame.limit
-      ~DataFrame.schema
-      ~DataFrame.select
-      ~DataFrame.select_columns
-      ~DataFrame.show
-      ~DataFrame.sort
-   
-   
-
-   
-   
-   
\ No newline at end of file
diff --git a/docs/source/python/generated/datafusion.Expression.rst b/docs/source/python/generated/datafusion.Expression.rst
deleted file mode 100644
index 58a5d04..0000000
--- a/docs/source/python/generated/datafusion.Expression.rst
+++ /dev/null
@@ -1,45 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-datafusion.Expression
-=====================
-
-.. currentmodule:: datafusion
-
-.. autoclass:: Expression
-
-   
-   .. automethod:: __init__
-
-   
-   .. rubric:: Methods
-
-   .. autosummary::
-   
-      ~Expression.__init__
-      ~Expression.alias
-      ~Expression.cast
-      ~Expression.column
-      ~Expression.is_null
-      ~Expression.literal
-      ~Expression.sort
-   
-   
-
-   
-   
-   
\ No newline at end of file
diff --git a/docs/source/python/generated/datafusion.SessionContext.rst b/docs/source/python/generated/datafusion.SessionContext.rst
deleted file mode 100644
index 3975325..0000000
--- a/docs/source/python/generated/datafusion.SessionContext.rst
+++ /dev/null
@@ -1,52 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-datafusion.SessionContext
-=========================
-
-.. currentmodule:: datafusion
-
-.. autoclass:: SessionContext
-
-   
-   .. automethod:: __init__
-
-   
-   .. rubric:: Methods
-
-   .. autosummary::
-   
-      ~SessionContext.__init__
-      ~SessionContext.catalog
-      ~SessionContext.create_dataframe
-      ~SessionContext.deregister_table
-      ~SessionContext.empty_table
-      ~SessionContext.register_csv
-      ~SessionContext.register_object_store
-      ~SessionContext.register_parquet
-      ~SessionContext.register_record_batches
-      ~SessionContext.register_table
-      ~SessionContext.register_udf
-      ~SessionContext.sql
-      ~SessionContext.table
-      ~SessionContext.tables
-   
-   
-
-   
-   
-   
\ No newline at end of file
diff --git a/docs/source/python/generated/datafusion.functions.rst b/docs/source/python/generated/datafusion.functions.rst
deleted file mode 100644
index d00e2b4..0000000
--- a/docs/source/python/generated/datafusion.functions.rst
+++ /dev/null
@@ -1,40 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements.  See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership.  The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License.  You may obtain a copy of the License at
-
-..   http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied.  See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-datafusion.functions
-====================
-
-.. automodule:: datafusion.functions
-
-   
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-   
-   
-   
-
-
-
diff --git a/pyproject.toml b/pyproject.toml
index 6658a68..0dd0ce2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,8 +47,9 @@ dependencies = [
 ]
 
 [project.urls]
-documentation = "https://arrow.apache.org/datafusion/python"
-repository = "https://github.com/apache/arrow-datafusion"
+homepage = "arrow.apache.org/datafusion"
+documentation = "arrow.apache.org/datafusion"
+repository = "github.com/apache/arrow-datafusion-python"
 
 [tool.isort]
 profile = "black"