You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/11/14 18:44:25 UTC
[arrow-datafusion-python] branch master updated: [DOCS] - Fix sample code and python api docs (#71)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git
The following commit(s) were added to refs/heads/master by this push:
new f0d5659 [DOCS] - Fix sample code and python api docs (#71)
f0d5659 is described below
commit f0d565912cd1cb86e5f268ff41bf1118e9743690
Author: Francis Du <me...@francis.run>
AuthorDate: Tue Nov 15 02:44:20 2022 +0800
[DOCS] - Fix sample code and python api docs (#71)
* fix: fix sample code and python api docs
* fix: readme code sample
* fix: python lint
* fix: repo name in docs & url link
* fix: repo name in docs & url link
* fix: remove useless dependency
* fix: remove .DS_Store
---
.DS_Store | Bin 0 -> 6148 bytes
.gitignore | 3 +
README.md | 23 ++-
docs/Makefile | 38 +++++
docs/README.md | 64 ++++++++
docs/build.sh | 28 ++++
docs/make.bat | 52 +++++++
docs/requirements.txt | 22 +++
.../images/DataFusion-Logo-Background-White.png | Bin 0 -> 12401 bytes
.../images/DataFusion-Logo-Background-White.svg | 1 +
.../source/_static/images/DataFusion-Logo-Dark.png | Bin 0 -> 20134 bytes
.../source/_static/images/DataFusion-Logo-Dark.svg | 1 +
.../_static/images/DataFusion-Logo-Light.png | Bin 0 -> 19102 bytes
.../_static/images/DataFusion-Logo-Light.svg | 1 +
docs/source/_static/theme_overrides.css | 93 +++++++++++
docs/source/_templates/docs-sidebar.html | 19 +++
docs/source/_templates/layout.html | 5 +
docs/source/{python => }/api.rst | 2 +
.../{python/api/functions.rst => api/config.rst} | 6 +-
docs/source/{python => }/api/dataframe.rst | 0
docs/source/{python => }/api/execution_context.rst | 0
docs/source/{python => }/api/expression.rst | 0
docs/source/{python => }/api/functions.rst | 2 +-
.../api/functions.rst => api/object_store.rst} | 8 +-
docs/source/conf.py | 115 ++++++++++++++
docs/source/{python => }/index.rst | 172 +++++++++++++--------
.../python/generated/datafusion.DataFrame.rst | 50 ------
.../python/generated/datafusion.Expression.rst | 45 ------
.../python/generated/datafusion.SessionContext.rst | 52 -------
.../python/generated/datafusion.functions.rst | 40 -----
pyproject.toml | 5 +-
31 files changed, 582 insertions(+), 265 deletions(-)
diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000..4d1b949
Binary files /dev/null and b/.DS_Store differ
diff --git a/.gitignore b/.gitignore
index 5b6cf36..57431b9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,9 @@ target
Cargo.lock
/venv
.idea
+/docs/temp
+/docs/build
+.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
diff --git a/README.md b/README.md
index 1a0daef..35186a9 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,6 @@ Simple usage:
```python
import datafusion
-from datafusion import functions as f
from datafusion import col
import pyarrow
@@ -70,6 +69,7 @@ assert result.column(1) == pyarrow.array([-3, -3, -3])
### UDFs
```python
+import pyarrow
from datafusion import udf
def is_null(array: pyarrow.Array) -> pyarrow.Array:
@@ -77,9 +77,19 @@ def is_null(array: pyarrow.Array) -> pyarrow.Array:
is_null_arr = udf(is_null, [pyarrow.int64()], pyarrow.bool_(), 'stable')
+# create a context
+ctx = datafusion.SessionContext()
+
+# create a RecordBatch and a new DataFrame from it
+batch = pyarrow.RecordBatch.from_arrays(
+ [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+ names=["a", "b"],
+)
+df = ctx.create_dataframe([[batch]])
+
df = df.select(is_null_arr(col("a")))
-result = df.collect()
+result = df.collect()[0]
assert result.column(0) == pyarrow.array([False] * 3)
```
@@ -89,7 +99,9 @@ assert result.column(0) == pyarrow.array([False] * 3)
```python
import pyarrow
import pyarrow.compute
+import datafusion
from datafusion import udaf, Accumulator
+from datafusion import col
class MyAccumulator(Accumulator):
@@ -113,7 +125,14 @@ class MyAccumulator(Accumulator):
def evaluate(self) -> pyarrow.Scalar:
return self._sum
+# create a context
+ctx = datafusion.SessionContext()
+# create a RecordBatch and a new DataFrame from it
+batch = pyarrow.RecordBatch.from_arrays(
+ [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+ names=["a", "b"],
+)
df = ctx.create_dataframe([[batch]])
my_udaf = udaf(MyAccumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()], 'stable')
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..e65c8e2
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?=
+SPHINXBUILD ?= sphinx-build
+SOURCEDIR = source
+BUILDDIR = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 0000000..a6f4998
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,64 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# DataFusion Documentation
+
+This folder contains the source content of the [python api](./source/api).
+These are both published to https://arrow.apache.org/datafusion/
+as part of the release process.
+
+## Dependencies
+
+It's recommended to install build dependencies and build the documentation
+inside a Python virtualenv.
+
+- Python
+- `pip install -r requirements.txt`
+
+## Build & Preview
+
+Run the provided script to build the HTML pages.
+
+```bash
+./build.sh
+```
+
+The HTML will be generated into a `build` directory.
+
+Preview the site on Linux by running this command.
+
+```bash
+firefox build/html/index.html
+```
+
+## Release Process
+
+The documentation is served through the
+[arrow-site](https://github.com/apache/arrow-site/) repo. To release a new
+version of the docs, follow these steps:
+
+1. Run `./build.sh` inside `docs` folder to generate the docs website inside the `build/html` folder.
+2. Clone the arrow-site repo
+3. Checkout to the `asf-site` branch (NOT `master`)
+4. Copy build artifacts into `arrow-site` repo's `datafusion` folder with a command such as
+
+- `cp -rT ./build/html/ ../../arrow-site/datafusion/` (doesn't work on mac)
+- `rsync -avzr ./build/html/ ../../arrow-site/datafusion/`
+
+5. Commit changes in `arrow-site` and send a PR.
\ No newline at end of file
diff --git a/docs/build.sh b/docs/build.sh
new file mode 100644
index 0000000..3f24f8e
--- /dev/null
+++ b/docs/build.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+set -e
+rm -rf build 2> /dev/null
+rm -rf temp 2> /dev/null
+mkdir temp
+cp -rf source/* temp/
+# replace relative URLs with absolute URLs
+#sed -i 's/\.\.\/\.\.\/\.\.\//https:\/\/github.com\/apache\/arrow-datafusion\/blob\/master\//g' temp/contributor-guide/index.md
+make SOURCEDIR=`pwd`/temp html
\ No newline at end of file
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..1ba6802
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,52 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
\ No newline at end of file
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..2af5635
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+sphinx
+pydata-sphinx-theme==0.8.0
+myst-parser
+maturin
+jinja2
\ No newline at end of file
diff --git a/docs/source/_static/images/DataFusion-Logo-Background-White.png b/docs/source/_static/images/DataFusion-Logo-Background-White.png
new file mode 100644
index 0000000..023c237
Binary files /dev/null and b/docs/source/_static/images/DataFusion-Logo-Background-White.png differ
diff --git a/docs/source/_static/images/DataFusion-Logo-Background-White.svg b/docs/source/_static/images/DataFusion-Logo-Background-White.svg
new file mode 100644
index 0000000..b3bb47c
--- /dev/null
+++ b/docs/source/_static/images/DataFusion-Logo-Background-White.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 350 170"><rect width="100%" height="105%" fill="white"/><defs><style>.cls-1{fill:none;stroke:#000;stroke-linecap:round;stroke-miterlimit:10;stroke-width:0.75px;stroke-dasharray:0.75 3;}.cls-2{fill:#f3971f;}.cls-3{fill:#f29720;}</style></defs><title>DataFUSION-Logo-Dark</title><g id="Layer_2" data-name="Layer 2" transform="translate(10 10)"><g id="logo"><path class="cls-1" d="M257.26,112.82c16,20.72,25.14,36.57,22,39.34"/><path class="c [...]
\ No newline at end of file
diff --git a/docs/source/_static/images/DataFusion-Logo-Dark.png b/docs/source/_static/images/DataFusion-Logo-Dark.png
new file mode 100644
index 0000000..cc60f12
Binary files /dev/null and b/docs/source/_static/images/DataFusion-Logo-Dark.png differ
diff --git a/docs/source/_static/images/DataFusion-Logo-Dark.svg b/docs/source/_static/images/DataFusion-Logo-Dark.svg
new file mode 100644
index 0000000..e16f244
--- /dev/null
+++ b/docs/source/_static/images/DataFusion-Logo-Dark.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 329.21 164.93"><defs><style>.cls-1{fill:none;stroke:#000;stroke-linecap:round;stroke-miterlimit:10;stroke-width:0.75px;stroke-dasharray:0.75 3;}.cls-2{fill:#f3971f;}.cls-3{fill:#f29720;}</style></defs><title>DataFUSION-Logo-Dark</title><g id="Layer_2" data-name="Layer 2"><g id="logo"><path class="cls-1" d="M257.26,112.82c16,20.72,25.14,36.57,22,39.34"/><path class="cls-1" d="M184.24,37.13c6.55,4.41,15.83,12.47,26.43,23"/><path class="c [...]
\ No newline at end of file
diff --git a/docs/source/_static/images/DataFusion-Logo-Light.png b/docs/source/_static/images/DataFusion-Logo-Light.png
new file mode 100644
index 0000000..8992213
Binary files /dev/null and b/docs/source/_static/images/DataFusion-Logo-Light.png differ
diff --git a/docs/source/_static/images/DataFusion-Logo-Light.svg b/docs/source/_static/images/DataFusion-Logo-Light.svg
new file mode 100644
index 0000000..b3bef21
--- /dev/null
+++ b/docs/source/_static/images/DataFusion-Logo-Light.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 329.21 164.93"><defs><style>.cls-1{fill:none;stroke:#fff;stroke-linecap:round;stroke-miterlimit:10;stroke-width:0.75px;stroke-dasharray:0.75 3;}.cls-2{fill:#fff;}.cls-3{fill:#f3971f;}.cls-4{fill:#f29720;}</style></defs><title>DataFUSION-Logo-Light</title><g id="Layer_2" data-name="Layer 2"><g id="logo"><path class="cls-1" d="M257.26,112.82c16,20.72,25.14,36.57,22,39.34"/><path class="cls-1" d="M184.24,37.13c6.55,4.41,15.83,12.47,26.43, [...]
\ No newline at end of file
diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css
new file mode 100644
index 0000000..1e972cc
--- /dev/null
+++ b/docs/source/_static/theme_overrides.css
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+/* Customizing with theme CSS variables */
+
+:root {
+ --pst-color-active-navigation: 215, 70, 51;
+ --pst-color-link-hover: 215, 70, 51;
+ --pst-color-headerlink: 215, 70, 51;
+ /* Use normal text color (like h3, ..) instead of primary color */
+ --pst-color-h1: var(--color-text-base);
+ --pst-color-h2: var(--color-text-base);
+ /* Use softer blue from bootstrap's default info color */
+ --pst-color-info: 23, 162, 184;
+ --pst-header-height: 0px;
+}
+
+code {
+ color: rgb(215, 70, 51);
+}
+
+.footer {
+ text-align: center;
+}
+
+/* Ensure the logo is properly displayed */
+
+.navbar-brand {
+ height: auto;
+ width: auto;
+}
+
+a.navbar-brand img {
+ height: auto;
+ width: auto;
+ max-height: 15vh;
+ max-width: 100%;
+}
+
+
+/* This is the bootstrap CSS style for "table-striped". Since the theme does
+not yet provide an easy way to configure this globaly, it easier to simply
+include this snippet here than updating each table in all rst files to
+add ":class: table-striped" */
+
+.table tbody tr:nth-of-type(odd) {
+ background-color: rgba(0, 0, 0, 0.05);
+}
+
+
+/* Limit the max height of the sidebar navigation section. Because in our
+custimized template, there is more content above the navigation, i.e.
+larger logo: if we don't decrease the max-height, it will overlap with
+the footer.
+Details: min(15vh, 110px) for the logo size, 8rem for search box etc*/
+
+@media (min-width:720px) {
+ @supports (position:-webkit-sticky) or (position:sticky) {
+ .bd-links {
+ max-height: calc(100vh - min(15vh, 110px) - 8rem)
+ }
+ }
+}
+
+
+/* Fix table text wrapping in RTD theme,
+ * see https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html
+ */
+
+@media screen {
+ table.docutils td {
+ /* !important prevents the common CSS stylesheets from overriding
+ this as on RTD they are loaded after this stylesheet */
+ white-space: normal !important;
+ }
+}
diff --git a/docs/source/_templates/docs-sidebar.html b/docs/source/_templates/docs-sidebar.html
new file mode 100644
index 0000000..bc2bf00
--- /dev/null
+++ b/docs/source/_templates/docs-sidebar.html
@@ -0,0 +1,19 @@
+
+<a class="navbar-brand" href="{{ pathto(master_doc) }}">
+ <img src="{{ pathto('_static/images/' + logo, 1) }}" class="logo" alt="logo">
+</a>
+
+<form class="bd-search d-flex align-items-center" action="{{ pathto('search') }}" method="get">
+ <i class="icon fas fa-search"></i>
+ <input type="search" class="form-control" name="q" id="search-input" placeholder="{{ theme_search_bar_text }}" aria-label="{{ theme_search_bar_text }}" autocomplete="off" >
+</form>
+
+<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
+ <div class="bd-toc-item active">
+ {% if "python/api" in pagename or "python/generated" in pagename %}
+ {{ generate_nav_html("sidebar", startdepth=0, maxdepth=3, collapse=False, includehidden=True, titles_only=True) }}
+ {% else %}
+ {{ generate_nav_html("sidebar", startdepth=0, maxdepth=4, collapse=False, includehidden=True, titles_only=True) }}
+ {% endif %}
+ </div>
+</nav>
diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
new file mode 100644
index 0000000..a9d0f30
--- /dev/null
+++ b/docs/source/_templates/layout.html
@@ -0,0 +1,5 @@
+{% extends "pydata_sphinx_theme/layout.html" %}
+
+{# Silence the navbar #}
+{% block docs_navbar %}
+{% endblock %}
diff --git a/docs/source/python/api.rst b/docs/source/api.rst
similarity index 96%
rename from docs/source/python/api.rst
rename to docs/source/api.rst
index f81753e..a5d6543 100644
--- a/docs/source/python/api.rst
+++ b/docs/source/api.rst
@@ -24,7 +24,9 @@ API Reference
.. toctree::
:maxdepth: 2
+ api/config
api/dataframe
api/execution_context
api/expression
api/functions
+ api/object_store
diff --git a/docs/source/python/api/functions.rst b/docs/source/api/config.rst
similarity index 95%
copy from docs/source/python/api/functions.rst
copy to docs/source/api/config.rst
index 6f10d82..df244ae 100644
--- a/docs/source/python/api/functions.rst
+++ b/docs/source/api/config.rst
@@ -15,13 +15,13 @@
.. specific language governing permissions and limitations
.. under the License.
-.. _api.functions:
+.. _api.config:
.. currentmodule:: datafusion
-Functions
+Config
=========
.. autosummary::
:toctree: ../generated/
- functions
+ Config
diff --git a/docs/source/python/api/dataframe.rst b/docs/source/api/dataframe.rst
similarity index 100%
rename from docs/source/python/api/dataframe.rst
rename to docs/source/api/dataframe.rst
diff --git a/docs/source/python/api/execution_context.rst b/docs/source/api/execution_context.rst
similarity index 100%
rename from docs/source/python/api/execution_context.rst
rename to docs/source/api/execution_context.rst
diff --git a/docs/source/python/api/expression.rst b/docs/source/api/expression.rst
similarity index 100%
rename from docs/source/python/api/expression.rst
rename to docs/source/api/expression.rst
diff --git a/docs/source/python/api/functions.rst b/docs/source/api/functions.rst
similarity index 97%
copy from docs/source/python/api/functions.rst
copy to docs/source/api/functions.rst
index 6f10d82..958606d 100644
--- a/docs/source/python/api/functions.rst
+++ b/docs/source/api/functions.rst
@@ -24,4 +24,4 @@ Functions
.. autosummary::
:toctree: ../generated/
- functions
+ functions.functions
diff --git a/docs/source/python/api/functions.rst b/docs/source/api/object_store.rst
similarity index 90%
rename from docs/source/python/api/functions.rst
rename to docs/source/api/object_store.rst
index 6f10d82..eeb6c43 100644
--- a/docs/source/python/api/functions.rst
+++ b/docs/source/api/object_store.rst
@@ -15,13 +15,13 @@
.. specific language governing permissions and limitations
.. under the License.
-.. _api.functions:
-.. currentmodule:: datafusion
+.. _api.object_store:
+.. currentmodule:: datafusion.object_store
-Functions
+ObjectStore
=========
.. autosummary::
:toctree: ../generated/
- functions
+ object_store
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..ce3d3c7
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,115 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+# -- Project information -----------------------------------------------------
+
+project = "Arrow DataFusion"
+copyright = "2022, Apache Software Foundation"
+author = "Arrow DataFusion Authors"
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ "sphinx.ext.autodoc",
+ "sphinx.ext.autosummary",
+ "sphinx.ext.doctest",
+ "sphinx.ext.ifconfig",
+ "sphinx.ext.mathjax",
+ "sphinx.ext.viewcode",
+ "sphinx.ext.napoleon",
+ "myst_parser",
+]
+
+source_suffix = {
+ ".rst": "restructuredtext",
+ ".md": "markdown",
+}
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+# Show members for classes in .. autosummary
+autodoc_default_options = {
+ "members": None,
+ "undoc-members": None,
+ "show-inheritance": None,
+ "inherited-members": None,
+}
+
+autosummary_generate = True
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+#
+html_theme = "pydata_sphinx_theme"
+
+html_theme_options = {
+ "use_edit_page_button": True,
+}
+
+html_context = {
+ "github_user": "apache",
+ "github_repo": "arrow-datafusion-python",
+ "github_version": "master",
+ "doc_path": "docs/source",
+}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+
+html_logo = "_static/images/DataFusion-Logo-Background-White.png"
+
+html_css_files = ["theme_overrides.css"]
+
+html_sidebars = {
+ "**": ["docs-sidebar.html"],
+}
+
+# tell myst_parser to auto-generate anchor links for headers h1, h2, h3
+myst_heading_anchors = 3
+
+# enable nice rendering of checkboxes for the task lists
+myst_enable_extensions = ["tasklist"]
diff --git a/docs/source/python/index.rst b/docs/source/index.rst
similarity index 52%
rename from docs/source/python/index.rst
rename to docs/source/index.rst
index 167e66b..78f44ea 100644
--- a/docs/source/python/index.rst
+++ b/docs/source/index.rst
@@ -38,32 +38,31 @@ Simple usage:
.. code-block:: python
- import datafusion
- from datafusion import functions as f
- from datafusion import col
- import pyarrow
+ import datafusion
+ from datafusion import col
+ import pyarrow
- # create a context
- ctx = datafusion.SessionContext()
+ # create a context
+ ctx = datafusion.SessionContext()
- # create a RecordBatch and a new DataFrame from it
- batch = pyarrow.RecordBatch.from_arrays(
- [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
- names=["a", "b"],
- )
- df = ctx.create_dataframe([[batch]])
+ # create a RecordBatch and a new DataFrame from it
+ batch = pyarrow.RecordBatch.from_arrays(
+ [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+ names=["a", "b"],
+ )
+ df = ctx.create_dataframe([[batch]])
- # create a new statement
- df = df.select(
- col("a") + col("b"),
- col("a") - col("b"),
- )
+ # create a new statement
+ df = df.select(
+ col("a") + col("b"),
+ col("a") - col("b"),
+ )
- # execute and collect the first (and only) batch
- result = df.collect()[0]
+ # execute and collect the first (and only) batch
+ result = df.collect()[0]
- assert result.column(0) == pyarrow.array([5, 7, 9])
- assert result.column(1) == pyarrow.array([-3, -3, -3])
+ assert result.column(0) == pyarrow.array([5, 7, 9])
+ assert result.column(1) == pyarrow.array([-3, -3, -3])
We can also execute a query against data stored in CSV
@@ -76,7 +75,6 @@ We can also execute a query against data stored in CSV
.. code-block:: python
import datafusion
- from datafusion import functions as f
from datafusion import col
import pyarrow
@@ -105,7 +103,6 @@ And how to execute a query against a CSV using SQL:
.. code-block:: python
import datafusion
- from datafusion import functions as f
from datafusion import col
import pyarrow
@@ -131,12 +128,29 @@ UDFs
.. code-block:: python
- def is_null(array: pyarrow.Array) -> pyarrow.Array:
- return array.is_null()
+ import pyarrow
+ from datafusion import udf
+
+ def is_null(array: pyarrow.Array) -> pyarrow.Array:
+ return array.is_null()
+
+ is_null_arr = udf(is_null, [pyarrow.int64()], pyarrow.bool_(), 'stable')
+
+ # create a context
+ ctx = datafusion.SessionContext()
+
+ # create a RecordBatch and a new DataFrame from it
+ batch = pyarrow.RecordBatch.from_arrays(
+ [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+ names=["a", "b"],
+ )
+ df = ctx.create_dataframe([[batch]])
- udf = f.udf(is_null, [pyarrow.int64()], pyarrow.bool_())
+ df = df.select(is_null_arr(col("a")))
- df = df.select(udf(col("a")))
+ result = df.collect()[0]
+
+ assert result.column(0) == pyarrow.array([False] * 3)
UDAF
@@ -144,41 +158,54 @@ UDAF
.. code-block:: python
- import pyarrow
- import pyarrow.compute
+ import pyarrow
+ import pyarrow.compute
+ import datafusion
+ from datafusion import udaf, Accumulator
+ from datafusion import col
+
+ class MyAccumulator(Accumulator):
+ """
+ Interface of a user-defined accumulation.
+ """
+ def __init__(self):
+ self._sum = pyarrow.scalar(0.0)
- class Accumulator:
- """
- Interface of a user-defined accumulation.
- """
- def __init__(self):
- self._sum = pyarrow.scalar(0.0)
+ def update(self, values: pyarrow.Array) -> None:
+ # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
+ self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(values).as_py())
- def to_scalars(self) -> [pyarrow.Scalar]:
- return [self._sum]
+ def merge(self, states: pyarrow.Array) -> None:
+ # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
+ self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(states).as_py())
- def update(self, values: pyarrow.Array) -> None:
- # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
- self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(values).as_py())
+ def state(self) -> pyarrow.Array:
+ return pyarrow.array([self._sum.as_py()])
- def merge(self, states: pyarrow.Array) -> None:
- # not nice since pyarrow scalars can't be summed yet. This breaks on `None`
- self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(states).as_py())
+ def evaluate(self) -> pyarrow.Scalar:
+ return self._sum
- def evaluate(self) -> pyarrow.Scalar:
- return self._sum
+ # create a context
+ ctx = datafusion.SessionContext()
+ # create a RecordBatch and a new DataFrame from it
+ batch = pyarrow.RecordBatch.from_arrays(
+ [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])],
+ names=["a", "b"],
+ )
+ df = ctx.create_dataframe([[batch]])
- df = ...
+ my_udaf = udaf(MyAccumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()], 'stable')
- udaf = f.udaf(Accumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()])
+ df = df.aggregate(
+ [],
+ [my_udaf(col("a"))]
+ )
- df = df.aggregate(
- [],
- [udaf(col("a"))]
- )
+ result = df.collect()[0]
+ assert result.column(0) == pyarrow.array([6.0])
How to install (from pip)
=========================
@@ -187,6 +214,14 @@ How to install (from pip)
pip install datafusion
+You can verify the installation by running:
+
+.. code-block:: python
+
+ >>> import datafusion
+ >>> datafusion.__version__
+ '0.6.0'
+
How to develop
==============
@@ -197,16 +232,23 @@ Bootstrap:
.. code-block:: shell
- # fetch this repo
- git clone git@github.com:apache/arrow-datafusion.git
+ # fetch this repo
+ git clone git@github.com:apache/arrow-datafusion-python.git
+ # prepare development environment (used to build wheel / install in development)
+ python3 -m venv venv
+ # activate the venv
+ source venv/bin/activate
+ # update pip itself if necessary
+ python -m pip install -U pip
+ # install dependencies (for Python 3.8+)
+ python -m pip install -r requirements-310.txt
- cd arrow-datafusion/python
+The tests rely on test data in git submodules.
- # prepare development environment (used to build wheel / install in development)
- python3 -m venv venv
- # activate the venv
- source venv/bin/activate
- pip install -r requirements.txt
+.. code-block:: shell
+
+ git submodule init
+ git submodule update
Whenever rust code changes (your changes or via `git pull`):
@@ -225,18 +267,16 @@ To change test dependencies, change the `requirements.in` and run
.. code-block:: shell
- # install pip-tools (this can be done only once), also consider running in venv
- pip install pip-tools
-
- # change requirements.in and then run
- pip-compile --generate-hashes
+ # install pip-tools (this can be done only once), also consider running in venv
+ python -m pip install pip-tools
+ python -m piptools compile --generate-hashes -o requirements-310.txt
-To update dependencies, run
+To update dependencies, run with `-U`
.. code-block:: shell
- pip-compile update
+ python -m piptools compile -U --generate-hashes -o requirements-310.txt
More details about pip-tools `here <https://github.com/jazzband/pip-tools>`_
diff --git a/docs/source/python/generated/datafusion.DataFrame.rst b/docs/source/python/generated/datafusion.DataFrame.rst
deleted file mode 100644
index ffee788..0000000
--- a/docs/source/python/generated/datafusion.DataFrame.rst
+++ /dev/null
@@ -1,50 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-
-.. http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied. See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-datafusion.DataFrame
-====================
-
-.. currentmodule:: datafusion
-
-.. autoclass:: DataFrame
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
-
- ~DataFrame.__init__
- ~DataFrame.aggregate
- ~DataFrame.collect
- ~DataFrame.explain
- ~DataFrame.filter
- ~DataFrame.join
- ~DataFrame.limit
- ~DataFrame.schema
- ~DataFrame.select
- ~DataFrame.select_columns
- ~DataFrame.show
- ~DataFrame.sort
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/source/python/generated/datafusion.Expression.rst b/docs/source/python/generated/datafusion.Expression.rst
deleted file mode 100644
index 58a5d04..0000000
--- a/docs/source/python/generated/datafusion.Expression.rst
+++ /dev/null
@@ -1,45 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-
-.. http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied. See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-datafusion.Expression
-=====================
-
-.. currentmodule:: datafusion
-
-.. autoclass:: Expression
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
-
- ~Expression.__init__
- ~Expression.alias
- ~Expression.cast
- ~Expression.column
- ~Expression.is_null
- ~Expression.literal
- ~Expression.sort
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/source/python/generated/datafusion.SessionContext.rst b/docs/source/python/generated/datafusion.SessionContext.rst
deleted file mode 100644
index 3975325..0000000
--- a/docs/source/python/generated/datafusion.SessionContext.rst
+++ /dev/null
@@ -1,52 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-
-.. http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied. See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-datafusion.SessionContext
-=========================
-
-.. currentmodule:: datafusion
-
-.. autoclass:: SessionContext
-
-
- .. automethod:: __init__
-
-
- .. rubric:: Methods
-
- .. autosummary::
-
- ~SessionContext.__init__
- ~SessionContext.catalog
- ~SessionContext.create_dataframe
- ~SessionContext.deregister_table
- ~SessionContext.empty_table
- ~SessionContext.register_csv
- ~SessionContext.register_object_store
- ~SessionContext.register_parquet
- ~SessionContext.register_record_batches
- ~SessionContext.register_table
- ~SessionContext.register_udf
- ~SessionContext.sql
- ~SessionContext.table
- ~SessionContext.tables
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/source/python/generated/datafusion.functions.rst b/docs/source/python/generated/datafusion.functions.rst
deleted file mode 100644
index d00e2b4..0000000
--- a/docs/source/python/generated/datafusion.functions.rst
+++ /dev/null
@@ -1,40 +0,0 @@
-.. Licensed to the Apache Software Foundation (ASF) under one
-.. or more contributor license agreements. See the NOTICE file
-.. distributed with this work for additional information
-.. regarding copyright ownership. The ASF licenses this file
-.. to you under the Apache License, Version 2.0 (the
-.. "License"); you may not use this file except in compliance
-.. with the License. You may obtain a copy of the License at
-
-.. http://www.apache.org/licenses/LICENSE-2.0
-
-.. Unless required by applicable law or agreed to in writing,
-.. software distributed under the License is distributed on an
-.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-.. KIND, either express or implied. See the License for the
-.. specific language governing permissions and limitations
-.. under the License.
-
-datafusion.functions
-====================
-
-.. automodule:: datafusion.functions
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/pyproject.toml b/pyproject.toml
index 6658a68..0dd0ce2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,8 +47,9 @@ dependencies = [
]
[project.urls]
-documentation = "https://arrow.apache.org/datafusion/python"
-repository = "https://github.com/apache/arrow-datafusion"
+homepage = "arrow.apache.org/datafusion"
+documentation = "arrow.apache.org/datafusion"
+repository = "github.com/apache/arrow-datafusion-python"
[tool.isort]
profile = "black"