You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2021/12/16 13:37:26 UTC

[airflow] branch main updated: Enable local Breeze script and `pipx` to be used for breeze bootstrap (#19992)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 2c80aaa  Enable local Breeze script and `pipx` to be used for breeze bootstrap (#19992)
2c80aaa is described below

commit 2c80aaab4f486688fa4b8e252e1147a5dfabee54
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Thu Dec 16 14:36:53 2021 +0100

    Enable local Breeze script and `pipx` to be used for breeze bootstrap (#19992)
---
 .github/workflows/ci.yml                           |   2 +-
 .gitignore                                         |   3 +
 .pre-commit-config.yaml                            |   2 +-
 Breeze2                                            |  46 ++++-
 .../adr/0003-bootstraping-virtual-environment.md   | 207 +++++++++++++++++++++
 dev/breeze/src/airflow_breeze/breeze.py            |  46 ++++-
 dev/breeze/src/airflow_breeze/visuals/__init__.py  |   2 +-
 dev/breeze/tests/test_find_airflow_directory.py    |  51 +++++
 8 files changed, 342 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e7f4c97..fa42cd4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -254,7 +254,7 @@ jobs:
         with:
           python-version: '3.7'
           cache: 'pip'
-      - run: pip install .
+      - run: pip install -e .
       - run: python3 -m pytest -n auto --color=yes
 
   tests-ui:
diff --git a/.gitignore b/.gitignore
index 5f37105..f6a605a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -220,3 +220,6 @@ pip-wheel-metadata
 
 # Generated UI licenses
 licenses/LICENSES-ui.txt
+
+# Packaged Breeze2 on Windows
+/Breeze2.exe
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ca983cc..b310fd6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -408,7 +408,7 @@ repos:
         language: pygrep
         name: Only capitalized Breeze used in Breeze2.
         description: Please use capitalized "Breeze" in the new Breeze docs
-        entry: "breeze"
+        entry: ([\W\s\n\t\r]|^)breeze([\W\s\n\t\r]|$)
         pass_filenames: true
         files: ^dev/breeze/doc
       - id: base-operator
diff --git a/Breeze2 b/Breeze2
index 60cf940..9591f6b 100755
--- a/Breeze2
+++ b/Breeze2
@@ -4,6 +4,8 @@ import os
 import sys
 
 # Python <3.4 does not have pathlib
+from venv import EnvBuilder
+
 if sys.version_info.major != 3 or sys.version_info.minor < 7:
     print("ERROR! Make sure you use Python 3.7+ !!")
     sys.exit(1)
@@ -12,13 +14,19 @@ import subprocess
 from os import execv
 from pathlib import Path
 
-AIRFLOW_SOURCES_DIR = Path(__file__).parent.resolve()
+if getattr(sys, 'frozen', False):
+    # If the application is run as a bundle, the PyInstaller bootloader
+    # extends the sys module by a flag frozen=True and sets the temporary app
+    # path into variable _MEIPASS' and sys.executable is Breeze's executable path.
+    AIRFLOW_SOURCES_DIR = Path(sys.executable).parent.resolve()
+else:
+    AIRFLOW_SOURCES_DIR = Path(__file__).parent.resolve()
 BUILD_DIR = AIRFLOW_SOURCES_DIR / ".build"
 BUILD_BREEZE_DIR = BUILD_DIR / "breeze2"
 BUILD_BREEZE_CFG_SAVED = BUILD_BREEZE_DIR / "setup.cfg.saved"
 BUILD_BREEZE_VENV_DIR = BUILD_BREEZE_DIR / "venv"
-BUILD_BREEZE_VENV_BIN_DIR = BUILD_BREEZE_VENV_DIR / "bin"
-BUILD_BREEZE_VENV_PIP = BUILD_BREEZE_VENV_BIN_DIR / "pip"
+BUILD_BREEZE_VENV_BIN_DIR = BUILD_BREEZE_VENV_DIR / ("Scripts" if os.name == 'nt' else "bin")
+BUILD_BREEZE_VENV_PYTHON = BUILD_BREEZE_VENV_BIN_DIR / "python"
 BUILD_BREEZE_VENV_BREEZE = BUILD_BREEZE_VENV_BIN_DIR / "Breeze2"
 
 BREEZE_SOURCE_PATH = AIRFLOW_SOURCES_DIR / "dev" / "breeze"
@@ -41,15 +49,35 @@ def save_config():
 
 if needs_installation():
     print(f"(Re)Installing Breeze's virtualenv in {BUILD_BREEZE_VENV_DIR}")
-    BUILD_BREEZE_VENV_DIR.mkdir(parents=True, exist_ok=True)
-    subprocess.run([sys.executable, "-m", "venv", f"{BUILD_BREEZE_VENV_DIR}"], check=True)
-    subprocess.run(
-        [f"{BUILD_BREEZE_VENV_PIP}", "install", "--upgrade", "-e", "."], cwd=BREEZE_SOURCE_PATH, check=True
-    )
+    try:
+        EnvBuilder(system_site_packages=False, upgrade=True, with_pip=True, prompt="breeze").create(
+            str(BUILD_BREEZE_VENV_DIR)
+        )
+    except Exception as e:
+        # in some cases (mis-configured python) the venv creation might not work via API
+        # (ensurepip missing). This is the case in case of default MacOS Python and Python executable
+        # Bundled in Windows executable, In this case we fallback to running venv as a tool using default
+        # Python3 found on path (in case of Windows Bundled exe, you don't even have a current
+        # interpreted executable available, because Python interpreter is executed through a library.
+        # and sys.executable points to the Bundled exe file.
+        BUILD_BREEZE_VENV_DIR.mkdir(parents=True, exist_ok=True)
+        subprocess.run(["python3", "-m", "venv", f"{BUILD_BREEZE_VENV_DIR}"], check=True)
+    if os.name == 'nt':
+        subprocess.run(
+            [f"{BUILD_BREEZE_VENV_PYTHON}", "-m", "pip", "install", "--upgrade", "-e", "."],
+            cwd=BREEZE_SOURCE_PATH,
+            check=True,
+        )
+    else:
+        subprocess.run(
+            [f"{BUILD_BREEZE_VENV_PYTHON}", "-m", "pip", "install", "--upgrade", "-e", "."],
+            cwd=BREEZE_SOURCE_PATH,
+            check=True,
+        )
     save_config()
 
 if os.name == 'nt':
     # This is the best way of running it on Windows, though it leaves the original process hanging around
-    subprocess.run([f"{BUILD_BREEZE_VENV_BREEZE}.exe"] + sys.argv[1:], check=True)
+    subprocess.run([f"{BUILD_BREEZE_VENV_BREEZE}"] + sys.argv[1:], check=True)
 else:
     execv(f"{BUILD_BREEZE_VENV_BREEZE}", [f"{BUILD_BREEZE_VENV_BREEZE}"] + sys.argv[1:])
diff --git a/dev/breeze/doc/adr/0003-bootstraping-virtual-environment.md b/dev/breeze/doc/adr/0003-bootstraping-virtual-environment.md
new file mode 100644
index 0000000..64717a7
--- /dev/null
+++ b/dev/breeze/doc/adr/0003-bootstraping-virtual-environment.md
@@ -0,0 +1,207 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.  See the License for the
+ specific language governing permissions and limitations
+ under the License.
+ -->
+
+<!-- START doctoc generated TOC please keep comment here to allow auto update -->
+<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
+**Table of Contents**  *generated with [DocToc](https://github.com/thlorenz/doctoc)*
+
+- [3. Bootstraping the virtual environment](#3-bootstraping-the-virtual-environment)
+  - [Status](#status)
+  - [Context](#context)
+  - [Decision](#decision)
+  - [Alternatives considered](#alternatives-considered)
+  - [Consequences](#consequences)
+
+<!-- END doctoc generated TOC please keep comment here to allow auto update -->
+
+# 3. Bootstraping the virtual environment
+
+Date: 2021-12-06
+
+## Status
+
+Draft
+
+## Context
+
+Since Breeze is written in Python, it needs to be run in its own virtual environment.
+This virtual environment is different from Airflow virtualenv as it contains only a
+small set of tools (for example rich) that are not present in the standard Python
+library. We want to keep the virtualenv separated, because setting up Airflow
+virtualenv is hard (especially if you consider cross-platform use). The virtualenv
+is needed mainly to run the script that will actually manage airflow installation
+and dependencies, in the form of Docker images which are part of Breeze.
+
+This virtualenv needs to be easy to setup and it should support the "live" nature
+of Breeze. The idea is that the user of Breeze does not have to do any action
+to update to the latest version of the virtualenv, when new dependencies are
+added, also when new Breeze functionalities are added, they should be automatically
+available for the user after the repository is updated to latest version.
+
+User should not have to think about installing and upgrading Breeze separately from
+switching to different Airflow tag or branch - moreover, the Breeze environment
+should automatically adapt to the version and Branch the user checked out. By its
+nature Airflow Breeze (at least for quite a while) will be evolving together with
+Airflow and it will live in the same repository and new features and behaviours
+will be added continuously.
+
+The workflow that needs to be supported should tap into the regular workflow
+of the user who is developing Airflow.
+
+* git checkout branch
+
+./Breeze should use the version of Breeze that is available in this version
+
+* git rebase --onto apache/main
+
+./Breeze should be automatically updated to the latest version available
+in main (including dependencies)
+
+Also if someone develops Breeze itself, the experience should be seamlessly
+integrated - modification of Breeze code locally should be automatically
+reflected in the Breeze environment of the user who is modifying Breeze.
+
+The user should not have to re-install/update Breeze to automatically use
+the modifying Breeze source code when running Breeze commands and testing
+then with Airflow.
+
+Breeze is also used as part of CI - common Python functions and libraries
+are used across both Breeze development environment and Continuous
+Integration we run. It's been established practice of the CI is that the logic
+of the CI is stored in the same repository as the source code of the
+application it tests and part of the Breeze functions are shared with CI.
+
+In the future when Breeze2 stabilizes and it's update cadence will be
+much slower (which is likele as it happened with the Breeze predecessor)
+there could be an option that Breeze is installed as separate package and
+same released Breeze version could be ued to manage multiple Airflow
+versions, for that we might want to release Breeze as a separate package
+in PyPI. However since there is the CI integration, the source code
+version of Breeze will remain as part of the Airflow's source code.
+
+
+## Decision
+
+The decision is to implement Breeze in a subfolder (`dev/breeze2/`) of
+Apache Airflow as a Python project following the standard setuptools
+enabled project. The project contains setup.py and dependencies described
+in setup.cfg and contains both source code and tests for Breeze code.
+
+The sub-project could be used in the future to produce a  PyPI package
+(we reserved such package in PyPI), however its main purpose is
+to install Breeze in a separate virtualenv bootstrapped
+automatically in editable mode.
+
+There are two ways you will be able to install `Breeze2` - locally in
+repository using ./Breeze2 bootstrapping script and using `pipx`.
+
+The bootstrapping Python script (`Breeze2` in the main repository
+of Airflow) performs the following tasks:
+
+* when run for the first time it creates `.build/breeze2/venv` virtual
+  environment (Python3.6+ based) - with locally installed `dev`
+  project in editable mode (`pip install -e .`) - this makes sure
+  that the users of Breeze will use the latest version of Breeze
+  available in their version of the repository
+* when run subsequently, it will check if setup files changed for
+  Breeze (dependencies changed) and if they did it will automatically
+  reinstall the environment, adding missing dependencies
+* after managing the venv, the Breeze2 script will simply execute
+  the actual Breeze2 script in the `.build/venv` passing the
+  parameters to the script. For the user, the effect will be same
+  as activating the virtualenv and executing the ./Breeze2 from
+  there (but it will happen automatically and invisibly for the
+  user
+* In Windows environment where you have no easy/popular mechanism
+  of running scripts with shebang (#!) equivalent in Posix
+  environments, Users will be able to locally build (using
+  `pyinstaller` a `Breeze2.exe` frozen Python script that will
+  essentially do the same, they could also use `python Breeze2`
+  command or switch to Git Bash to utilize the shebang feature
+  (Git Bash comes together with Git when installed on Windows)
+* The second option is to use `pipx` to install Breeze2.
+  The `pipx` is almost equivalent to what the Bootstrapping does
+  and many users might actually choose to install Breeze this
+  way - and we will add it as an option to install Breeze
+  with pipx `pipx install -e <BREEZE FOLDER>` provides the right
+  installation instruction. The installation can be updated
+  by `pipx install --force -e <BREEZE FOLDER>`.
+  The benefit of using `pipx` is that Breeze becomes
+  available on the path when you install it this way, also
+  it provides out-of-the box Windows support. The drawback is
+  that when new dependencies are added, they will not be
+  automatically installed and that you need to manually force
+  re-installation if new dependencies are used - which is not
+  as seamlessly integrate in the regular development
+  environment, and it might create some confusions for the
+  users who would have to learn `pipx` and it's commands.
+  Another drawback of `pipx` is that installs one global
+  version of Breeze2 for all projects, where it is quite
+  possible that someone has two different versions of
+  Airflow repository checked out and the bootstraping
+  script provides this capability.
+
+The bootstrapping script is temporary measure, until the
+dependencies of Breeze stabilize enough that the need
+to recreate the virtual environment by `pipx` will be
+very infrequent. In this case `pipx` provides better
+user experience, and we might decide even to remove the
+bootstrapping script and switch fully to `pipx`
+
+## Alternatives considered
+
+The alternatives considered were:
+
+* `nox` - this is a tool to manage virtualenv for testing, while
+  it has some built in virtualenv capabilities, it is an
+  additional tool that needs to be installed and it lacks
+  the automation of checking and recreation of the virtualenv
+  when needed (you need to manually run nox to update environment)
+  Alsoi it is targeted for building multiple virtualenv
+  for tests - it has nice pytest integration for example, but it
+  lacks support for managing editable installs for a long time.
+
+* `pyenv` - this is the de-facto standard for maintenance of
+  virtualenvs. it has the capability of creation and switching
+  between virtualenvs easily. Together with some of its plugins
+  (pyenv-virtualenv and auto-activation) it could serve the
+  purpose quite well. However the problem is that if you
+  also use `pyenv` to manage your `airflow` virtualenv this might
+  be source of confusion. Should I activate airflow virtualenv
+  or Breeze2 venv to run tests? Part of Breeze experience is
+  to activate local Airflow virtualenv for IDE integration and
+  since this is different than simple Breeze virtualenv, using
+  pytest and autoactivation in this case might lead to a lot
+  of confusion. Keeping the Breeze virtualenv "hidden" and
+  mostly "used" but not deliberately activated is a better
+  choice - especially that most users will simply "use" Breeze2
+  as an app rather than activate the environment deliberately.
+  Also choosing `pyenv` and it's virtualenv plugin would
+  add extra, unnecessary steps and prerequisites for Breeze.
+
+
+## Consequences
+
+Using Breeze for new users will be much simpler, without
+having to install any prerequisites. The virtualenv used by
+Breeze2 will be hidden from the user, and used behind the
+scenes - and the dependencies used will be automatically
+installed when needed. This will allow to seamlessly
+integrate Breeze tool in the develiopment experience without
+having to worry about extra maintenance needed.
diff --git a/dev/breeze/src/airflow_breeze/breeze.py b/dev/breeze/src/airflow_breeze/breeze.py
index 9969de7..c854d32 100755
--- a/dev/breeze/src/airflow_breeze/breeze.py
+++ b/dev/breeze/src/airflow_breeze/breeze.py
@@ -15,6 +15,9 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import os
+from pathlib import Path
+from typing import Optional
 
 import click
 from click import ClickException
@@ -25,15 +28,47 @@ from airflow_breeze.visuals import ASCIIART, ASCIIART_STYLE
 NAME = "Breeze2"
 VERSION = "0.0.1"
 
+__AIRFLOW_SOURCES_ROOT = Path.cwd()
 
-@click.group()
-def main():
-    pass
-
+__AIRFLOW_CFG_FILE = "setup.cfg"
 
 console = Console(force_terminal=True, color_system="standard", width=180)
 
 
+def get_airflow_sources_root():
+    return __AIRFLOW_SOURCES_ROOT
+
+
+def search_upwards_for_airflow_sources_root(start_from: Path) -> Optional[Path]:
+    root = Path(start_from.root)
+    d = start_from
+    while d != root:
+        attempt = d / __AIRFLOW_CFG_FILE
+        if attempt.exists() and "name = apache-airflow\n" in attempt.read_text():
+            return attempt.parent
+        d = d.parent
+    return None
+
+
+def find_airflow_sources_root():
+    # Try to find airflow sources in current working dir
+    airflow_sources_root = search_upwards_for_airflow_sources_root(Path.cwd())
+    if not airflow_sources_root:
+        # Or if it fails, find it in parents of the directory where the ./breeze.py is.
+        airflow_sources_root = search_upwards_for_airflow_sources_root(Path(__file__).resolve().parent)
+    global __AIRFLOW_SOURCES_ROOT
+    if airflow_sources_root:
+        __AIRFLOW_SOURCES_ROOT = airflow_sources_root
+    else:
+        console.print(f"\n[yellow]Could not find Airflow sources location. Assuming {__AIRFLOW_SOURCES_ROOT}")
+    os.chdir(__AIRFLOW_SOURCES_ROOT)
+
+
+@click.group()
+def main():
+    find_airflow_sources_root()
+
+
 option_verbose = click.option(
     "--verbose",
     is_flag=True,
@@ -54,6 +89,7 @@ def shell(verbose: bool):
     """Enters breeze.py environment. this is the default command use when no other is selected."""
     if verbose:
         console.print("\n[green]Welcome to breeze.py[/]\n")
+        console.print(f"\n[green]Root of Airflow Sources = {__AIRFLOW_SOURCES_ROOT}[/]\n")
     console.print(ASCIIART, style=ASCIIART_STYLE)
     raise ClickException("\nPlease implement entering breeze.py\n")
 
@@ -63,7 +99,7 @@ def shell(verbose: bool):
 def build_ci_image(verbose: bool):
     """Builds breeze.ci image for breeze.py."""
     if verbose:
-        console.print("\n[blue]Building image[/]\n")
+        console.print(f"\n[blue]Building image of airflow from {__AIRFLOW_SOURCES_ROOT}[/]\n")
     raise ClickException("\nPlease implement building the CI image\n")
 
 
diff --git a/dev/breeze/src/airflow_breeze/visuals/__init__.py b/dev/breeze/src/airflow_breeze/visuals/__init__.py
index 2df7911..018b1ad 100644
--- a/dev/breeze/src/airflow_breeze/visuals/__init__.py
+++ b/dev/breeze/src/airflow_breeze/visuals/__init__.py
@@ -59,4 +59,4 @@ ASCIIART = """
 
 """
 
-ASCIIART_STYLE = "blue"
+ASCIIART_STYLE = "white"
diff --git a/dev/breeze/tests/test_find_airflow_directory.py b/dev/breeze/tests/test_find_airflow_directory.py
new file mode 100644
index 0000000..a506644
--- /dev/null
+++ b/dev/breeze/tests/test_find_airflow_directory.py
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+from pathlib import Path
+from unittest import mock
+
+from airflow_breeze.breeze import find_airflow_sources_root, get_airflow_sources_root
+
+ACTUAL_AIRFLOW_SOURCES = Path(__file__).parent.parent.parent.parent
+ROOT_PATH = Path(Path(__file__).root)
+
+
+def test_find_airflow_root_upwards_from_cwd(capsys):
+    os.chdir(Path(__file__).parent)
+    find_airflow_sources_root()
+    assert ACTUAL_AIRFLOW_SOURCES == get_airflow_sources_root()
+    output = str(capsys.readouterr().out)
+    assert output == ''
+
+
+def test_find_airflow_root_upwards_from_file(capsys):
+    os.chdir(Path(__file__).root)
+    find_airflow_sources_root()
+    assert ACTUAL_AIRFLOW_SOURCES == get_airflow_sources_root()
+    output = str(capsys.readouterr().out)
+    assert output == ''
+
+
+@mock.patch('airflow_breeze.breeze.__AIRFLOW_SOURCES_ROOT', ROOT_PATH)
+@mock.patch('airflow_breeze.breeze.__AIRFLOW_CFG_FILE', "bad_name.cfg")
+def test_fallback_find_airflow_root(capsys):
+    os.chdir(ROOT_PATH)
+    find_airflow_sources_root()
+    assert ROOT_PATH == get_airflow_sources_root()
+    output = str(capsys.readouterr().out)
+    assert "Could not find Airflow sources" in output