You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2020/11/02 13:18:52 UTC
[airflow] branch master updated: Checks if all the libraries in
setup.py are listed in installation.rst file (#12023)
This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/master by this push:
new 2354bd2 Checks if all the libraries in setup.py are listed in installation.rst file (#12023)
2354bd2 is described below
commit 2354bd2be381bcfe6db132990af1ac34df52b9b4
Author: SZN <sz...@nieradka.net>
AuthorDate: Mon Nov 2 14:17:41 2020 +0100
Checks if all the libraries in setup.py are listed in installation.rst file (#12023)
---
.pre-commit-config.yaml | 6 ++
BREEZE.rst | 6 +-
CONTRIBUTING.rst | 16 +--
INSTALL | 16 +--
STATIC_CODE_CHECKS.rst | 2 +
breeze-complete | 1 +
docs/installation.rst | 22 +++--
docs/spelling_wordlist.txt | 5 +-
.../pre_commit_check_setup_installation.py | 108 +++++++++++++++++++++
setup.py | 2 +
10 files changed, 155 insertions(+), 29 deletions(-)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 037a600..2c2047d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -241,6 +241,12 @@ repos:
files: ^setup.py$
pass_filenames: false
entry: ./scripts/ci/pre_commit/pre_commit_check_order_setup.py
+ - id: setup-installation
+ name: Checks if all the libraries in setup.py are listed in installation.rst file
+ language: python
+ files: ^setup.py$|^docs/installation.rst$
+ pass_filenames: false
+ entry: ./scripts/ci/pre_commit/pre_commit_check_setup_installation.py
- id: update-breeze-file
name: Update output of breeze command in BREEZE.rst
entry: "./scripts/ci/pre_commit/pre_commit_breeze_cmd_line.sh"
diff --git a/BREEZE.rst b/BREEZE.rst
index 4a39f2b..d0d5b1c 100644
--- a/BREEZE.rst
+++ b/BREEZE.rst
@@ -2006,9 +2006,9 @@ This is the current syntax for `./breeze <./breeze>`_:
lint-dockerfile lint-openapi mermaid mixed-line-ending mypy mypy-helm
no-relative-imports pre-commit-descriptions provide-create-sessions pydevd
pydocstyle pylint pylint-tests python-no-log-warn restrict-start_date rst-backticks
- setup-order shellcheck sort-in-the-wild stylelint trailing-whitespace
- update-breeze-file update-extras update-local-yml-file update-setup-cfg-file
- yamllint
+ setup-order setup-installation shellcheck sort-in-the-wild stylelint
+ trailing-whitespace update-breeze-file update-extras update-local-yml-file
+ update-setup-cfg-file yamllint
You can pass extra arguments including options to to the pre-commit framework as
<EXTRA_ARGS> passed after --. For example:
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index f2e4490..cfd1c70 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -541,14 +541,14 @@ This is the full list of those extras:
.. START EXTRAS HERE
all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, apache.druid, apache.hdfs,
-apache.hive, apache.kylin, apache.pinot, apache.webhdfs, async, atlas, aws, azure, cassandra,
-celery, cgroups, cloudant, cncf.kubernetes, dask, databricks, datadog, devel, devel_hadoop, doc,
-docker, druid, elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise, google,
-google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap, microsoft.azure,
-microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty, papermill, password,
-pinot, plexus, postgres, presto, qds, rabbitmq, redis, salesforce, samba, segment, sendgrid, sentry,
-singularity, slack, snowflake, spark, ssh, statsd, tableau, vertica, virtualenv, webhdfs, winrm,
-yandexcloud, all, devel_ci
+apache.hive, apache.kylin, apache.pinot, apache.presto, apache.spark, apache.webhdfs, async, atlas,
+aws, azure, cassandra, celery, cgroups, cloudant, cncf.kubernetes, dask, databricks, datadog, devel,
+devel_hadoop, doc, docker, druid, elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise,
+google, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap,
+microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty,
+papermill, password, pinot, plexus, postgres, presto, qds, rabbitmq, redis, salesforce, samba,
+segment, sendgrid, sentry, singularity, slack, snowflake, spark, ssh, statsd, tableau, vertica,
+virtualenv, webhdfs, winrm, yandexcloud, all, devel_ci
.. END EXTRAS HERE
diff --git a/INSTALL b/INSTALL
index c9ea969..da6d6ad 100644
--- a/INSTALL
+++ b/INSTALL
@@ -64,14 +64,14 @@ pip install -e . \
# START EXTRAS HERE
all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, apache.druid, apache.hdfs,
-apache.hive, apache.kylin, apache.pinot, apache.webhdfs, async, atlas, aws, azure, cassandra,
-celery, cgroups, cloudant, cncf.kubernetes, dask, databricks, datadog, devel, devel_hadoop, doc,
-docker, druid, elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise, google,
-google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap, microsoft.azure,
-microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty, papermill, password,
-pinot, plexus, postgres, presto, qds, rabbitmq, redis, salesforce, samba, segment, sendgrid, sentry,
-singularity, slack, snowflake, spark, ssh, statsd, tableau, vertica, virtualenv, webhdfs, winrm,
-yandexcloud, all, devel_ci
+apache.hive, apache.kylin, apache.pinot, apache.presto, apache.spark, apache.webhdfs, async, atlas,
+aws, azure, cassandra, celery, cgroups, cloudant, cncf.kubernetes, dask, databricks, datadog, devel,
+devel_hadoop, doc, docker, druid, elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise,
+google, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap,
+microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty,
+papermill, password, pinot, plexus, postgres, presto, qds, rabbitmq, redis, salesforce, samba,
+segment, sendgrid, sentry, singularity, slack, snowflake, spark, ssh, statsd, tableau, vertica,
+virtualenv, webhdfs, winrm, yandexcloud, all, devel_ci
# END EXTRAS HERE
diff --git a/STATIC_CODE_CHECKS.rst b/STATIC_CODE_CHECKS.rst
index d3fb5c7..990852c 100644
--- a/STATIC_CODE_CHECKS.rst
+++ b/STATIC_CODE_CHECKS.rst
@@ -138,6 +138,8 @@ require Breeze Docker images to be installed locally:
----------------------------------- ---------------------------------------------------------------- ------------
``setup-order`` Checks for an order of dependencies in setup.py
----------------------------------- ---------------------------------------------------------------- ------------
+``setup-installation`` Checks if all the libraries in setup.py are listed in docs
+----------------------------------- ---------------------------------------------------------------- ------------
``shellcheck`` Checks shell files with shellcheck.
----------------------------------- ---------------------------------------------------------------- ------------
``sort-in-the-wild`` Sort INTHEWILD.md alphabetically.
diff --git a/breeze-complete b/breeze-complete
index 6f0cdb4..33f4847 100644
--- a/breeze-complete
+++ b/breeze-complete
@@ -112,6 +112,7 @@ python-no-log-warn
restrict-start_date
rst-backticks
setup-order
+setup-installation
shellcheck
sort-in-the-wild
stylelint
diff --git a/docs/installation.rst b/docs/installation.rst
index 1654d65..2070391 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -120,8 +120,6 @@ Here's the list of the subpackages and what they enable:
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| all_dbs | ``pip install 'apache-airflow[all_dbs]'`` | All databases integrations |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
-| crypto | ``pip install 'apache-airflow[crypto]'`` | Encrypt connection passwords in metadata db |
-+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| devel | ``pip install 'apache-airflow[devel]'`` | Minimum dev tools requirements |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| devel_hadoop | ``pip install 'apache-airflow[devel_hadoop]'`` | Airflow + dependencies on the Hadoop stack |
@@ -151,16 +149,10 @@ Here's the list of the subpackages and what they enable:
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| kylin | ``pip install 'apache-airflow[apache.kylin]'`` | All Kylin related operators & hooks |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
-| livy | ``pip install 'apache-airflow[apache.livy]'`` | All Livy related operators & hooks |
-+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
-| pig | ``pip install 'apache-airflow[apache.pig]'`` | All Pig related operators & hooks |
-+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| presto | ``pip install 'apache-airflow[apache.presto]'`` | All Presto related operators & hooks |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| spark | ``pip install 'apache-airflow[apache.spark]'`` | All Spark related operators & hooks |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
-| sqoop | ``pip install 'apache-airflow[apache.sqoop]'`` | All Sqoop related operators & hooks |
-+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| webhdfs | ``pip install 'apache-airflow[webhdfs]'`` | HDFS hooks and operators |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
@@ -192,6 +184,10 @@ Here's the list of the subpackages and what they enable:
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| jira | ``pip install 'apache-airflow[jira]'`` | Jira hooks and operators |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
+| pagerduty | ``pip install 'apache-airflow[pagerduty]'`` | |
++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
+| plexus | ``pip install 'apache-airflow[plexus]'`` | |
++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| qds | ``pip install 'apache-airflow[qds]'`` | Enable QDS (Qubole Data Service) support |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| salesforce | ``pip install 'apache-airflow[salesforce]'`` | Salesforce hook |
@@ -200,12 +196,16 @@ Here's the list of the subpackages and what they enable:
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| segment | ``pip install 'apache-airflow[segment]'`` | Segment hooks and sensors |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
+| sentry | ``pip install 'apache-airflow[sentry]'`` | |
++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| slack | ``pip install 'apache-airflow[slack]'`` | :class:`airflow.providers.slack.operators.slack.SlackAPIOperator` |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| snowflake | ``pip install 'apache-airflow[snowflake]'`` | Snowflake hooks and operators |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
| vertica | ``pip install 'apache-airflow[vertica]'`` | Vertica hook support as an Airflow backend |
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
+| yandexcloud | ``pip install 'apache-airflow[yandexcloud]'`` | |
++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
**Software:**
@@ -257,8 +257,14 @@ Here's the list of the subpackages and what they enable:
+---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+
| samba | ``pip install 'apache-airflow[samba]'`` | :class:`airflow.providers.apache.hive.transfers.hive_to_samba.HiveToSambaOperator` |
+---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+
+| singularity | ``pip install 'apache-airflow[singularity]'`` | |
++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+
| statsd | ``pip install 'apache-airflow[statsd]'`` | Needed by StatsD metrics |
+---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+
+| tableau | ``pip install 'apache-airflow[tableau]'`` | |
++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+
+| virtualenv | ``pip install 'apache-airflow[virtualenv]'`` | |
++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+
**Other:**
diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
index ff01a53..8c0fe82 100644
--- a/docs/spelling_wordlist.txt
+++ b/docs/spelling_wordlist.txt
@@ -196,7 +196,6 @@ JobRunning
JobTrigger
Json
Jupyter
-jupytercmd
KYLIN
Kalibrr
Kamil
@@ -868,6 +867,7 @@ js
json
jthomas
jupyter
+jupytercmd
kaxilnaik
keepalive
keepalives
@@ -999,6 +999,7 @@ os
ot
overridable
oversubscription
+pagerduty
pageviews
paginator
papermill
@@ -1109,8 +1110,8 @@ repos
reqs
resetdb
resourceVersion
-resumable
resultset
+resumable
retransmits
rfc
ricard
diff --git a/scripts/ci/pre_commit/pre_commit_check_setup_installation.py b/scripts/ci/pre_commit/pre_commit_check_setup_installation.py
new file mode 100755
index 0000000..7c30683
--- /dev/null
+++ b/scripts/ci/pre_commit/pre_commit_check_setup_installation.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Checks if all the libraries in setup.py are listed in installation.rst file
+"""
+
+import os
+import re
+from os.path import dirname
+from typing import Dict, List
+
+AIRFLOW_SOURCES_DIR = os.path.join(dirname(__file__), os.pardir, os.pardir, os.pardir)
+SETUP_PY_FILE = 'setup.py'
+DOCS_FILE = 'installation.rst'
+PY_IDENTIFIER = r'[a-zA-Z_][a-zA-Z0-9_\.]*'
+
+
+def get_file_content(*path_elements: str) -> str:
+ file_path = os.path.join(AIRFLOW_SOURCES_DIR, *path_elements)
+ with open(file_path) as file_to_read:
+ return file_to_read.read()
+
+
+def get_extras_from_setup() -> Dict[str, List[str]]:
+ """
+ Returns an array EXTRAS_REQUIREMENTS with aliases from setup.py file in format:
+ {'package name': ['alias1', 'alias2], ...}
+ """
+ setup_content = get_file_content(SETUP_PY_FILE)
+
+ extras_section_regex = re.compile(
+ r'^EXTRAS_REQUIREMENTS: Dict[^{]+{([^}]+)}', re.MULTILINE)
+ extras_section = extras_section_regex.findall(setup_content)[0]
+
+ extras_regex = re.compile(
+ rf'^\s+[\"\']({PY_IDENTIFIER})[\"\']:\s*({PY_IDENTIFIER})[^#\n]*(#\s*TODO.*)?$', re.MULTILINE)
+
+ extras_dict: Dict[str, List[str]] = {}
+ for extras in extras_regex.findall(extras_section):
+ package = extras[1]
+ alias = extras[0]
+ if not extras_dict.get(package):
+ extras_dict[package] = []
+ extras_dict[package].append(alias)
+ return extras_dict
+
+
+def get_extras_from_docs() -> List[str]:
+ """
+ Returns an array of install packages names from installation.rst.
+ """
+ docs_content = get_file_content('docs', DOCS_FILE)
+
+ extras_section_regex = re.compile(rf'^\|[^|]+\|.*pip install .apache-airflow\[({PY_IDENTIFIER})\].',
+ re.MULTILINE)
+ extras = extras_section_regex.findall(docs_content)
+
+ extras = list(filter(lambda entry: entry != 'all', extras))
+ return extras
+
+
+if __name__ == '__main__':
+ setup_packages = get_extras_from_setup()
+ docs_packages = get_extras_from_docs()
+
+ output_table = ""
+
+ for extras in sorted(setup_packages.keys()):
+ if not set(setup_packages[extras]).intersection(docs_packages):
+ output_table += "| {:20} | {:^10} | {:^10} |\n".format(extras, "V", "")
+
+ setup_packages_str = str(setup_packages)
+ for extras in sorted(docs_packages):
+ if f"'{extras}'" not in setup_packages_str:
+ output_table += "| {:20} | {:^10} | {:^10} |\n".format(extras, "", "V")
+
+ if(output_table == ""):
+ exit(0)
+
+ print(f"""
+ERROR
+
+"EXTRAS_REQUIREMENTS" section in {SETUP_PY_FILE} should be synchronized
+with "Extra Packages" section in documentation file doc/{DOCS_FILE}.
+
+here is a list of packages that are used but are not documented, or
+documented although not used.
+ """)
+ print(".{:_^22}.{:_^12}.{:_^12}.".format("NAME", "SETUP", "INSTALLATION"))
+ print(output_table)
+
+ exit(1)
diff --git a/setup.py b/setup.py
index 7e1a0cc..43a71b1 100644
--- a/setup.py
+++ b/setup.py
@@ -564,6 +564,8 @@ EXTRAS_REQUIREMENTS: Dict[str, Iterable[str]] = {
"apache.hive": hive,
"apache.kylin": kylin,
"apache.pinot": pinot,
+ "apache.presto": presto,
+ "apache.spark": spark,
"apache.webhdfs": webhdfs,
'async': async_packages,
'atlas': atlas, # TODO: remove this in Airflow 2.1