You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2020/11/02 13:18:52 UTC

[airflow] branch master updated: Checks if all the libraries in setup.py are listed in installation.rst file (#12023)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2354bd2  Checks if all the libraries in setup.py are listed in installation.rst file (#12023)
2354bd2 is described below

commit 2354bd2be381bcfe6db132990af1ac34df52b9b4
Author: SZN <sz...@nieradka.net>
AuthorDate: Mon Nov 2 14:17:41 2020 +0100

    Checks if all the libraries in setup.py are listed in installation.rst file (#12023)
---
 .pre-commit-config.yaml                            |   6 ++
 BREEZE.rst                                         |   6 +-
 CONTRIBUTING.rst                                   |  16 +--
 INSTALL                                            |  16 +--
 STATIC_CODE_CHECKS.rst                             |   2 +
 breeze-complete                                    |   1 +
 docs/installation.rst                              |  22 +++--
 docs/spelling_wordlist.txt                         |   5 +-
 .../pre_commit_check_setup_installation.py         | 108 +++++++++++++++++++++
 setup.py                                           |   2 +
 10 files changed, 155 insertions(+), 29 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 037a600..2c2047d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -241,6 +241,12 @@ repos:
         files: ^setup.py$
         pass_filenames: false
         entry: ./scripts/ci/pre_commit/pre_commit_check_order_setup.py
+      - id: setup-installation
+        name: Checks if all the libraries in setup.py are listed in installation.rst file
+        language: python
+        files: ^setup.py$|^docs/installation.rst$
+        pass_filenames: false
+        entry: ./scripts/ci/pre_commit/pre_commit_check_setup_installation.py
       - id: update-breeze-file
         name: Update output of breeze command in BREEZE.rst
         entry: "./scripts/ci/pre_commit/pre_commit_breeze_cmd_line.sh"
diff --git a/BREEZE.rst b/BREEZE.rst
index 4a39f2b..d0d5b1c 100644
--- a/BREEZE.rst
+++ b/BREEZE.rst
@@ -2006,9 +2006,9 @@ This is the current syntax for  `./breeze <./breeze>`_:
                  lint-dockerfile lint-openapi mermaid mixed-line-ending mypy mypy-helm
                  no-relative-imports pre-commit-descriptions provide-create-sessions pydevd
                  pydocstyle pylint pylint-tests python-no-log-warn restrict-start_date rst-backticks
-                 setup-order shellcheck sort-in-the-wild stylelint trailing-whitespace
-                 update-breeze-file update-extras update-local-yml-file update-setup-cfg-file
-                 yamllint
+                 setup-order setup-installation shellcheck sort-in-the-wild stylelint
+                 trailing-whitespace update-breeze-file update-extras update-local-yml-file
+                 update-setup-cfg-file yamllint
 
         You can pass extra arguments including options to to the pre-commit framework as
         <EXTRA_ARGS> passed after --. For example:
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index f2e4490..cfd1c70 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -541,14 +541,14 @@ This is the full list of those extras:
   .. START EXTRAS HERE
 
 all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, apache.druid, apache.hdfs,
-apache.hive, apache.kylin, apache.pinot, apache.webhdfs, async, atlas, aws, azure, cassandra,
-celery, cgroups, cloudant, cncf.kubernetes, dask, databricks, datadog, devel, devel_hadoop, doc,
-docker, druid, elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise, google,
-google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap, microsoft.azure,
-microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty, papermill, password,
-pinot, plexus, postgres, presto, qds, rabbitmq, redis, salesforce, samba, segment, sendgrid, sentry,
-singularity, slack, snowflake, spark, ssh, statsd, tableau, vertica, virtualenv, webhdfs, winrm,
-yandexcloud, all, devel_ci
+apache.hive, apache.kylin, apache.pinot, apache.presto, apache.spark, apache.webhdfs, async, atlas,
+aws, azure, cassandra, celery, cgroups, cloudant, cncf.kubernetes, dask, databricks, datadog, devel,
+devel_hadoop, doc, docker, druid, elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise,
+google, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap,
+microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty,
+papermill, password, pinot, plexus, postgres, presto, qds, rabbitmq, redis, salesforce, samba,
+segment, sendgrid, sentry, singularity, slack, snowflake, spark, ssh, statsd, tableau, vertica,
+virtualenv, webhdfs, winrm, yandexcloud, all, devel_ci
 
   .. END EXTRAS HERE
 
diff --git a/INSTALL b/INSTALL
index c9ea969..da6d6ad 100644
--- a/INSTALL
+++ b/INSTALL
@@ -64,14 +64,14 @@ pip install -e . \
 # START EXTRAS HERE
 
 all_dbs, amazon, apache.atlas, apache.beam, apache.cassandra, apache.druid, apache.hdfs,
-apache.hive, apache.kylin, apache.pinot, apache.webhdfs, async, atlas, aws, azure, cassandra,
-celery, cgroups, cloudant, cncf.kubernetes, dask, databricks, datadog, devel, devel_hadoop, doc,
-docker, druid, elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise, google,
-google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap, microsoft.azure,
-microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty, papermill, password,
-pinot, plexus, postgres, presto, qds, rabbitmq, redis, salesforce, samba, segment, sendgrid, sentry,
-singularity, slack, snowflake, spark, ssh, statsd, tableau, vertica, virtualenv, webhdfs, winrm,
-yandexcloud, all, devel_ci
+apache.hive, apache.kylin, apache.pinot, apache.presto, apache.spark, apache.webhdfs, async, atlas,
+aws, azure, cassandra, celery, cgroups, cloudant, cncf.kubernetes, dask, databricks, datadog, devel,
+devel_hadoop, doc, docker, druid, elasticsearch, exasol, facebook, gcp, gcp_api, github_enterprise,
+google, google_auth, grpc, hashicorp, hdfs, hive, jdbc, jira, kerberos, kubernetes, ldap,
+microsoft.azure, microsoft.mssql, microsoft.winrm, mongo, mssql, mysql, odbc, oracle, pagerduty,
+papermill, password, pinot, plexus, postgres, presto, qds, rabbitmq, redis, salesforce, samba,
+segment, sendgrid, sentry, singularity, slack, snowflake, spark, ssh, statsd, tableau, vertica,
+virtualenv, webhdfs, winrm, yandexcloud, all, devel_ci
 
 # END EXTRAS HERE
 
diff --git a/STATIC_CODE_CHECKS.rst b/STATIC_CODE_CHECKS.rst
index d3fb5c7..990852c 100644
--- a/STATIC_CODE_CHECKS.rst
+++ b/STATIC_CODE_CHECKS.rst
@@ -138,6 +138,8 @@ require Breeze Docker images to be installed locally:
 ----------------------------------- ---------------------------------------------------------------- ------------
 ``setup-order``                       Checks for an order of dependencies in setup.py
 ----------------------------------- ---------------------------------------------------------------- ------------
+``setup-installation``                Checks if all the libraries in setup.py are listed in docs
+----------------------------------- ---------------------------------------------------------------- ------------
 ``shellcheck``                        Checks shell files with shellcheck.
 ----------------------------------- ---------------------------------------------------------------- ------------
 ``sort-in-the-wild``                  Sort INTHEWILD.md alphabetically.
diff --git a/breeze-complete b/breeze-complete
index 6f0cdb4..33f4847 100644
--- a/breeze-complete
+++ b/breeze-complete
@@ -112,6 +112,7 @@ python-no-log-warn
 restrict-start_date
 rst-backticks
 setup-order
+setup-installation
 shellcheck
 sort-in-the-wild
 stylelint
diff --git a/docs/installation.rst b/docs/installation.rst
index 1654d65..2070391 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -120,8 +120,6 @@ Here's the list of the subpackages and what they enable:
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | all_dbs             | ``pip install 'apache-airflow[all_dbs]'``           | All databases integrations                                           |
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
-| crypto              | ``pip install 'apache-airflow[crypto]'``            | Encrypt connection passwords in metadata db                          |
-+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | devel               | ``pip install 'apache-airflow[devel]'``             | Minimum dev tools requirements                                       |
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | devel_hadoop        | ``pip install 'apache-airflow[devel_hadoop]'``      | Airflow + dependencies on the Hadoop stack                           |
@@ -151,16 +149,10 @@ Here's the list of the subpackages and what they enable:
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | kylin               | ``pip install 'apache-airflow[apache.kylin]'``      | All Kylin related operators & hooks                                  |
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
-| livy                | ``pip install 'apache-airflow[apache.livy]'``       | All Livy related operators & hooks                                   |
-+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
-| pig                 | ``pip install 'apache-airflow[apache.pig]'``        | All Pig related operators & hooks                                    |
-+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | presto              | ``pip install 'apache-airflow[apache.presto]'``     | All Presto related operators & hooks                                 |
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | spark               | ``pip install 'apache-airflow[apache.spark]'``      | All Spark related operators & hooks                                  |
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
-| sqoop               | ``pip install 'apache-airflow[apache.sqoop]'``      | All Sqoop related operators & hooks                                  |
-+---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | webhdfs             | ``pip install 'apache-airflow[webhdfs]'``           | HDFS hooks and operators                                             |
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 
@@ -192,6 +184,10 @@ Here's the list of the subpackages and what they enable:
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | jira                | ``pip install 'apache-airflow[jira]'``              | Jira hooks and operators                                             |
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
+| pagerduty           | ``pip install 'apache-airflow[pagerduty]'``         |                                                                      |
++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
+| plexus              | ``pip install 'apache-airflow[plexus]'``            |                                                                      |
++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | qds                 | ``pip install 'apache-airflow[qds]'``               | Enable QDS (Qubole Data Service) support                             |
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | salesforce          | ``pip install 'apache-airflow[salesforce]'``        | Salesforce hook                                                      |
@@ -200,12 +196,16 @@ Here's the list of the subpackages and what they enable:
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | segment             | ``pip install 'apache-airflow[segment]'``           | Segment hooks and sensors                                            |
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
+| sentry              | ``pip install 'apache-airflow[sentry]'``            |                                                                      |
++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | slack               | ``pip install 'apache-airflow[slack]'``             | :class:`airflow.providers.slack.operators.slack.SlackAPIOperator`    |
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | snowflake           | ``pip install 'apache-airflow[snowflake]'``         | Snowflake hooks and operators                                        |
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 | vertica             | ``pip install 'apache-airflow[vertica]'``           | Vertica hook support as an Airflow backend                           |
 +---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
+| yandexcloud         | ``pip install 'apache-airflow[yandexcloud]'``       |                                                                      |
++---------------------+-----------------------------------------------------+----------------------------------------------------------------------+
 
 
 **Software:**
@@ -257,8 +257,14 @@ Here's the list of the subpackages and what they enable:
 +---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+
 | samba               | ``pip install 'apache-airflow[samba]'``             | :class:`airflow.providers.apache.hive.transfers.hive_to_samba.HiveToSambaOperator` |
 +---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+
+| singularity         | ``pip install 'apache-airflow[singularity]'``       |                                                                                    |
++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+
 | statsd              | ``pip install 'apache-airflow[statsd]'``            | Needed by StatsD metrics                                                           |
 +---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+
+| tableau             | ``pip install 'apache-airflow[tableau]'``           |                                                                                    |
++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+
+| virtualenv          | ``pip install 'apache-airflow[virtualenv]'``        |                                                                                    |
++---------------------+-----------------------------------------------------+------------------------------------------------------------------------------------+
 
 
 **Other:**
diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
index ff01a53..8c0fe82 100644
--- a/docs/spelling_wordlist.txt
+++ b/docs/spelling_wordlist.txt
@@ -196,7 +196,6 @@ JobRunning
 JobTrigger
 Json
 Jupyter
-jupytercmd
 KYLIN
 Kalibrr
 Kamil
@@ -868,6 +867,7 @@ js
 json
 jthomas
 jupyter
+jupytercmd
 kaxilnaik
 keepalive
 keepalives
@@ -999,6 +999,7 @@ os
 ot
 overridable
 oversubscription
+pagerduty
 pageviews
 paginator
 papermill
@@ -1109,8 +1110,8 @@ repos
 reqs
 resetdb
 resourceVersion
-resumable
 resultset
+resumable
 retransmits
 rfc
 ricard
diff --git a/scripts/ci/pre_commit/pre_commit_check_setup_installation.py b/scripts/ci/pre_commit/pre_commit_check_setup_installation.py
new file mode 100755
index 0000000..7c30683
--- /dev/null
+++ b/scripts/ci/pre_commit/pre_commit_check_setup_installation.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Checks if all the libraries in setup.py are listed in installation.rst file
+"""
+
+import os
+import re
+from os.path import dirname
+from typing import Dict, List
+
+AIRFLOW_SOURCES_DIR = os.path.join(dirname(__file__), os.pardir, os.pardir, os.pardir)
+SETUP_PY_FILE = 'setup.py'
+DOCS_FILE = 'installation.rst'
+PY_IDENTIFIER = r'[a-zA-Z_][a-zA-Z0-9_\.]*'
+
+
+def get_file_content(*path_elements: str) -> str:
+    file_path = os.path.join(AIRFLOW_SOURCES_DIR, *path_elements)
+    with open(file_path) as file_to_read:
+        return file_to_read.read()
+
+
+def get_extras_from_setup() -> Dict[str, List[str]]:
+    """
+    Returns an array EXTRAS_REQUIREMENTS with aliases from setup.py file in format:
+    {'package name': ['alias1', 'alias2], ...}
+    """
+    setup_content = get_file_content(SETUP_PY_FILE)
+
+    extras_section_regex = re.compile(
+        r'^EXTRAS_REQUIREMENTS: Dict[^{]+{([^}]+)}', re.MULTILINE)
+    extras_section = extras_section_regex.findall(setup_content)[0]
+
+    extras_regex = re.compile(
+        rf'^\s+[\"\']({PY_IDENTIFIER})[\"\']:\s*({PY_IDENTIFIER})[^#\n]*(#\s*TODO.*)?$', re.MULTILINE)
+
+    extras_dict: Dict[str, List[str]] = {}
+    for extras in extras_regex.findall(extras_section):
+        package = extras[1]
+        alias = extras[0]
+        if not extras_dict.get(package):
+            extras_dict[package] = []
+        extras_dict[package].append(alias)
+    return extras_dict
+
+
+def get_extras_from_docs() -> List[str]:
+    """
+    Returns an array of install packages names from installation.rst.
+    """
+    docs_content = get_file_content('docs', DOCS_FILE)
+
+    extras_section_regex = re.compile(rf'^\|[^|]+\|.*pip install .apache-airflow\[({PY_IDENTIFIER})\].',
+                                      re.MULTILINE)
+    extras = extras_section_regex.findall(docs_content)
+
+    extras = list(filter(lambda entry: entry != 'all', extras))
+    return extras
+
+
+if __name__ == '__main__':
+    setup_packages = get_extras_from_setup()
+    docs_packages = get_extras_from_docs()
+
+    output_table = ""
+
+    for extras in sorted(setup_packages.keys()):
+        if not set(setup_packages[extras]).intersection(docs_packages):
+            output_table += "| {:20} | {:^10} | {:^10} |\n".format(extras, "V", "")
+
+    setup_packages_str = str(setup_packages)
+    for extras in sorted(docs_packages):
+        if f"'{extras}'" not in setup_packages_str:
+            output_table += "| {:20} | {:^10} | {:^10} |\n".format(extras, "", "V")
+
+    if(output_table == ""):
+        exit(0)
+
+    print(f"""
+ERROR
+
+"EXTRAS_REQUIREMENTS" section in {SETUP_PY_FILE} should be synchronized
+with "Extra Packages" section in documentation file doc/{DOCS_FILE}.
+
+here is a list of packages that are used but are not documented, or
+documented although not used.
+    """)
+    print(".{:_^22}.{:_^12}.{:_^12}.".format("NAME", "SETUP", "INSTALLATION"))
+    print(output_table)
+
+    exit(1)
diff --git a/setup.py b/setup.py
index 7e1a0cc..43a71b1 100644
--- a/setup.py
+++ b/setup.py
@@ -564,6 +564,8 @@ EXTRAS_REQUIREMENTS: Dict[str, Iterable[str]] = {
     "apache.hive": hive,
     "apache.kylin": kylin,
     "apache.pinot": pinot,
+    "apache.presto": presto,
+    "apache.spark": spark,
     "apache.webhdfs": webhdfs,
     'async': async_packages,
     'atlas': atlas,  # TODO: remove this in Airflow 2.1