You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2021/12/16 13:37:26 UTC
[airflow] branch main updated: Enable local Breeze script and `pipx` to be used for breeze bootstrap (#19992)
This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 2c80aaa Enable local Breeze script and `pipx` to be used for breeze bootstrap (#19992)
2c80aaa is described below
commit 2c80aaab4f486688fa4b8e252e1147a5dfabee54
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Thu Dec 16 14:36:53 2021 +0100
Enable local Breeze script and `pipx` to be used for breeze bootstrap (#19992)
---
.github/workflows/ci.yml | 2 +-
.gitignore | 3 +
.pre-commit-config.yaml | 2 +-
Breeze2 | 46 ++++-
.../adr/0003-bootstraping-virtual-environment.md | 207 +++++++++++++++++++++
dev/breeze/src/airflow_breeze/breeze.py | 46 ++++-
dev/breeze/src/airflow_breeze/visuals/__init__.py | 2 +-
dev/breeze/tests/test_find_airflow_directory.py | 51 +++++
8 files changed, 342 insertions(+), 17 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e7f4c97..fa42cd4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -254,7 +254,7 @@ jobs:
with:
python-version: '3.7'
cache: 'pip'
- - run: pip install .
+ - run: pip install -e .
- run: python3 -m pytest -n auto --color=yes
tests-ui:
diff --git a/.gitignore b/.gitignore
index 5f37105..f6a605a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -220,3 +220,6 @@ pip-wheel-metadata
# Generated UI licenses
licenses/LICENSES-ui.txt
+
+# Packaged Breeze2 on Windows
+/Breeze2.exe
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ca983cc..b310fd6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -408,7 +408,7 @@ repos:
language: pygrep
name: Only capitalized Breeze used in Breeze2.
description: Please use capitalized "Breeze" in the new Breeze docs
- entry: "breeze"
+ entry: ([\W\s\n\t\r]|^)breeze([\W\s\n\t\r]|$)
pass_filenames: true
files: ^dev/breeze/doc
- id: base-operator
diff --git a/Breeze2 b/Breeze2
index 60cf940..9591f6b 100755
--- a/Breeze2
+++ b/Breeze2
@@ -4,6 +4,8 @@ import os
import sys
# Python <3.4 does not have pathlib
+from venv import EnvBuilder
+
if sys.version_info.major != 3 or sys.version_info.minor < 7:
print("ERROR! Make sure you use Python 3.7+ !!")
sys.exit(1)
@@ -12,13 +14,19 @@ import subprocess
from os import execv
from pathlib import Path
-AIRFLOW_SOURCES_DIR = Path(__file__).parent.resolve()
+if getattr(sys, 'frozen', False):
+ # If the application is run as a bundle, the PyInstaller bootloader
+ # extends the sys module by a flag frozen=True and sets the temporary app
+ # path into variable _MEIPASS' and sys.executable is Breeze's executable path.
+ AIRFLOW_SOURCES_DIR = Path(sys.executable).parent.resolve()
+else:
+ AIRFLOW_SOURCES_DIR = Path(__file__).parent.resolve()
BUILD_DIR = AIRFLOW_SOURCES_DIR / ".build"
BUILD_BREEZE_DIR = BUILD_DIR / "breeze2"
BUILD_BREEZE_CFG_SAVED = BUILD_BREEZE_DIR / "setup.cfg.saved"
BUILD_BREEZE_VENV_DIR = BUILD_BREEZE_DIR / "venv"
-BUILD_BREEZE_VENV_BIN_DIR = BUILD_BREEZE_VENV_DIR / "bin"
-BUILD_BREEZE_VENV_PIP = BUILD_BREEZE_VENV_BIN_DIR / "pip"
+BUILD_BREEZE_VENV_BIN_DIR = BUILD_BREEZE_VENV_DIR / ("Scripts" if os.name == 'nt' else "bin")
+BUILD_BREEZE_VENV_PYTHON = BUILD_BREEZE_VENV_BIN_DIR / "python"
BUILD_BREEZE_VENV_BREEZE = BUILD_BREEZE_VENV_BIN_DIR / "Breeze2"
BREEZE_SOURCE_PATH = AIRFLOW_SOURCES_DIR / "dev" / "breeze"
@@ -41,15 +49,35 @@ def save_config():
if needs_installation():
print(f"(Re)Installing Breeze's virtualenv in {BUILD_BREEZE_VENV_DIR}")
- BUILD_BREEZE_VENV_DIR.mkdir(parents=True, exist_ok=True)
- subprocess.run([sys.executable, "-m", "venv", f"{BUILD_BREEZE_VENV_DIR}"], check=True)
- subprocess.run(
- [f"{BUILD_BREEZE_VENV_PIP}", "install", "--upgrade", "-e", "."], cwd=BREEZE_SOURCE_PATH, check=True
- )
+ try:
+ EnvBuilder(system_site_packages=False, upgrade=True, with_pip=True, prompt="breeze").create(
+ str(BUILD_BREEZE_VENV_DIR)
+ )
+ except Exception as e:
+ # in some cases (mis-configured python) the venv creation might not work via API
+ # (ensurepip missing). This is the case in case of default MacOS Python and Python executable
+ # Bundled in Windows executable, In this case we fallback to running venv as a tool using default
+ # Python3 found on path (in case of Windows Bundled exe, you don't even have a current
+ # interpreted executable available, because Python interpreter is executed through a library.
+ # and sys.executable points to the Bundled exe file.
+ BUILD_BREEZE_VENV_DIR.mkdir(parents=True, exist_ok=True)
+ subprocess.run(["python3", "-m", "venv", f"{BUILD_BREEZE_VENV_DIR}"], check=True)
+ if os.name == 'nt':
+ subprocess.run(
+ [f"{BUILD_BREEZE_VENV_PYTHON}", "-m", "pip", "install", "--upgrade", "-e", "."],
+ cwd=BREEZE_SOURCE_PATH,
+ check=True,
+ )
+ else:
+ subprocess.run(
+ [f"{BUILD_BREEZE_VENV_PYTHON}", "-m", "pip", "install", "--upgrade", "-e", "."],
+ cwd=BREEZE_SOURCE_PATH,
+ check=True,
+ )
save_config()
if os.name == 'nt':
# This is the best way of running it on Windows, though it leaves the original process hanging around
- subprocess.run([f"{BUILD_BREEZE_VENV_BREEZE}.exe"] + sys.argv[1:], check=True)
+ subprocess.run([f"{BUILD_BREEZE_VENV_BREEZE}"] + sys.argv[1:], check=True)
else:
execv(f"{BUILD_BREEZE_VENV_BREEZE}", [f"{BUILD_BREEZE_VENV_BREEZE}"] + sys.argv[1:])
diff --git a/dev/breeze/doc/adr/0003-bootstraping-virtual-environment.md b/dev/breeze/doc/adr/0003-bootstraping-virtual-environment.md
new file mode 100644
index 0000000..64717a7
--- /dev/null
+++ b/dev/breeze/doc/adr/0003-bootstraping-virtual-environment.md
@@ -0,0 +1,207 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+ -->
+
+<!-- START doctoc generated TOC please keep comment here to allow auto update -->
+<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
+**Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)*
+
+- [3. Bootstraping the virtual environment](#3-bootstraping-the-virtual-environment)
+ - [Status](#status)
+ - [Context](#context)
+ - [Decision](#decision)
+ - [Alternatives considered](#alternatives-considered)
+ - [Consequences](#consequences)
+
+<!-- END doctoc generated TOC please keep comment here to allow auto update -->
+
+# 3. Bootstraping the virtual environment
+
+Date: 2021-12-06
+
+## Status
+
+Draft
+
+## Context
+
+Since Breeze is written in Python, it needs to be run in its own virtual environment.
+This virtual environment is different from Airflow virtualenv as it contains only a
+small set of tools (for example rich) that are not present in the standard Python
+library. We want to keep the virtualenv separated, because setting up Airflow
+virtualenv is hard (especially if you consider cross-platform use). The virtualenv
+is needed mainly to run the script that will actually manage airflow installation
+and dependencies, in the form of Docker images which are part of Breeze.
+
+This virtualenv needs to be easy to setup and it should support the "live" nature
+of Breeze. The idea is that the user of Breeze does not have to do any action
+to update to the latest version of the virtualenv, when new dependencies are
+added, also when new Breeze functionalities are added, they should be automatically
+available for the user after the repository is updated to latest version.
+
+User should not have to think about installing and upgrading Breeze separately from
+switching to different Airflow tag or branch - moreover, the Breeze environment
+should automatically adapt to the version and Branch the user checked out. By its
+nature Airflow Breeze (at least for quite a while) will be evolving together with
+Airflow and it will live in the same repository and new features and behaviours
+will be added continuously.
+
+The workflow that needs to be supported should tap into the regular workflow
+of the user who is developing Airflow.
+
+* git checkout branch
+
+./Breeze should use the version of Breeze that is available in this version
+
+* git rebase --onto apache/main
+
+./Breeze should be automatically updated to the latest version available
+in main (including dependencies)
+
+Also if someone develops Breeze itself, the experience should be seamlessly
+integrated - modification of Breeze code locally should be automatically
+reflected in the Breeze environment of the user who is modifying Breeze.
+
+The user should not have to re-install/update Breeze to automatically use
+the modifying Breeze source code when running Breeze commands and testing
+then with Airflow.
+
+Breeze is also used as part of CI - common Python functions and libraries
+are used across both Breeze development environment and Continuous
+Integration we run. It's been established practice of the CI is that the logic
+of the CI is stored in the same repository as the source code of the
+application it tests and part of the Breeze functions are shared with CI.
+
+In the future when Breeze2 stabilizes and it's update cadence will be
+much slower (which is likele as it happened with the Breeze predecessor)
+there could be an option that Breeze is installed as separate package and
+same released Breeze version could be ued to manage multiple Airflow
+versions, for that we might want to release Breeze as a separate package
+in PyPI. However since there is the CI integration, the source code
+version of Breeze will remain as part of the Airflow's source code.
+
+
+## Decision
+
+The decision is to implement Breeze in a subfolder (`dev/breeze2/`) of
+Apache Airflow as a Python project following the standard setuptools
+enabled project. The project contains setup.py and dependencies described
+in setup.cfg and contains both source code and tests for Breeze code.
+
+The sub-project could be used in the future to produce a PyPI package
+(we reserved such package in PyPI), however its main purpose is
+to install Breeze in a separate virtualenv bootstrapped
+automatically in editable mode.
+
+There are two ways you will be able to install `Breeze2` - locally in
+repository using ./Breeze2 bootstrapping script and using `pipx`.
+
+The bootstrapping Python script (`Breeze2` in the main repository
+of Airflow) performs the following tasks:
+
+* when run for the first time it creates `.build/breeze2/venv` virtual
+ environment (Python3.6+ based) - with locally installed `dev`
+ project in editable mode (`pip install -e .`) - this makes sure
+ that the users of Breeze will use the latest version of Breeze
+ available in their version of the repository
+* when run subsequently, it will check if setup files changed for
+ Breeze (dependencies changed) and if they did it will automatically
+ reinstall the environment, adding missing dependencies
+* after managing the venv, the Breeze2 script will simply execute
+ the actual Breeze2 script in the `.build/venv` passing the
+ parameters to the script. For the user, the effect will be same
+ as activating the virtualenv and executing the ./Breeze2 from
+ there (but it will happen automatically and invisibly for the
+ user
+* In Windows environment where you have no easy/popular mechanism
+ of running scripts with shebang (#!) equivalent in Posix
+ environments, Users will be able to locally build (using
+ `pyinstaller` a `Breeze2.exe` frozen Python script that will
+ essentially do the same, they could also use `python Breeze2`
+ command or switch to Git Bash to utilize the shebang feature
+ (Git Bash comes together with Git when installed on Windows)
+* The second option is to use `pipx` to install Breeze2.
+ The `pipx` is almost equivalent to what the Bootstrapping does
+ and many users might actually choose to install Breeze this
+ way - and we will add it as an option to install Breeze
+ with pipx `pipx install -e <BREEZE FOLDER>` provides the right
+ installation instruction. The installation can be updated
+ by `pipx install --force -e <BREEZE FOLDER>`.
+ The benefit of using `pipx` is that Breeze becomes
+ available on the path when you install it this way, also
+ it provides out-of-the box Windows support. The drawback is
+ that when new dependencies are added, they will not be
+ automatically installed and that you need to manually force
+ re-installation if new dependencies are used - which is not
+ as seamlessly integrate in the regular development
+ environment, and it might create some confusions for the
+ users who would have to learn `pipx` and it's commands.
+ Another drawback of `pipx` is that installs one global
+ version of Breeze2 for all projects, where it is quite
+ possible that someone has two different versions of
+ Airflow repository checked out and the bootstraping
+ script provides this capability.
+
+The bootstrapping script is temporary measure, until the
+dependencies of Breeze stabilize enough that the need
+to recreate the virtual environment by `pipx` will be
+very infrequent. In this case `pipx` provides better
+user experience, and we might decide even to remove the
+bootstrapping script and switch fully to `pipx`
+
+## Alternatives considered
+
+The alternatives considered were:
+
+* `nox` - this is a tool to manage virtualenv for testing, while
+ it has some built in virtualenv capabilities, it is an
+ additional tool that needs to be installed and it lacks
+ the automation of checking and recreation of the virtualenv
+ when needed (you need to manually run nox to update environment)
+ Alsoi it is targeted for building multiple virtualenv
+ for tests - it has nice pytest integration for example, but it
+ lacks support for managing editable installs for a long time.
+
+* `pyenv` - this is the de-facto standard for maintenance of
+ virtualenvs. it has the capability of creation and switching
+ between virtualenvs easily. Together with some of its plugins
+ (pyenv-virtualenv and auto-activation) it could serve the
+ purpose quite well. However the problem is that if you
+ also use `pyenv` to manage your `airflow` virtualenv this might
+ be source of confusion. Should I activate airflow virtualenv
+ or Breeze2 venv to run tests? Part of Breeze experience is
+ to activate local Airflow virtualenv for IDE integration and
+ since this is different than simple Breeze virtualenv, using
+ pytest and autoactivation in this case might lead to a lot
+ of confusion. Keeping the Breeze virtualenv "hidden" and
+ mostly "used" but not deliberately activated is a better
+ choice - especially that most users will simply "use" Breeze2
+ as an app rather than activate the environment deliberately.
+ Also choosing `pyenv` and it's virtualenv plugin would
+ add extra, unnecessary steps and prerequisites for Breeze.
+
+
+## Consequences
+
+Using Breeze for new users will be much simpler, without
+having to install any prerequisites. The virtualenv used by
+Breeze2 will be hidden from the user, and used behind the
+scenes - and the dependencies used will be automatically
+installed when needed. This will allow to seamlessly
+integrate Breeze tool in the develiopment experience without
+having to worry about extra maintenance needed.
diff --git a/dev/breeze/src/airflow_breeze/breeze.py b/dev/breeze/src/airflow_breeze/breeze.py
index 9969de7..c854d32 100755
--- a/dev/breeze/src/airflow_breeze/breeze.py
+++ b/dev/breeze/src/airflow_breeze/breeze.py
@@ -15,6 +15,9 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+import os
+from pathlib import Path
+from typing import Optional
import click
from click import ClickException
@@ -25,15 +28,47 @@ from airflow_breeze.visuals import ASCIIART, ASCIIART_STYLE
NAME = "Breeze2"
VERSION = "0.0.1"
+__AIRFLOW_SOURCES_ROOT = Path.cwd()
-@click.group()
-def main():
- pass
-
+__AIRFLOW_CFG_FILE = "setup.cfg"
console = Console(force_terminal=True, color_system="standard", width=180)
+def get_airflow_sources_root():
+ return __AIRFLOW_SOURCES_ROOT
+
+
+def search_upwards_for_airflow_sources_root(start_from: Path) -> Optional[Path]:
+ root = Path(start_from.root)
+ d = start_from
+ while d != root:
+ attempt = d / __AIRFLOW_CFG_FILE
+ if attempt.exists() and "name = apache-airflow\n" in attempt.read_text():
+ return attempt.parent
+ d = d.parent
+ return None
+
+
+def find_airflow_sources_root():
+ # Try to find airflow sources in current working dir
+ airflow_sources_root = search_upwards_for_airflow_sources_root(Path.cwd())
+ if not airflow_sources_root:
+ # Or if it fails, find it in parents of the directory where the ./breeze.py is.
+ airflow_sources_root = search_upwards_for_airflow_sources_root(Path(__file__).resolve().parent)
+ global __AIRFLOW_SOURCES_ROOT
+ if airflow_sources_root:
+ __AIRFLOW_SOURCES_ROOT = airflow_sources_root
+ else:
+ console.print(f"\n[yellow]Could not find Airflow sources location. Assuming {__AIRFLOW_SOURCES_ROOT}")
+ os.chdir(__AIRFLOW_SOURCES_ROOT)
+
+
+@click.group()
+def main():
+ find_airflow_sources_root()
+
+
option_verbose = click.option(
"--verbose",
is_flag=True,
@@ -54,6 +89,7 @@ def shell(verbose: bool):
"""Enters breeze.py environment. this is the default command use when no other is selected."""
if verbose:
console.print("\n[green]Welcome to breeze.py[/]\n")
+ console.print(f"\n[green]Root of Airflow Sources = {__AIRFLOW_SOURCES_ROOT}[/]\n")
console.print(ASCIIART, style=ASCIIART_STYLE)
raise ClickException("\nPlease implement entering breeze.py\n")
@@ -63,7 +99,7 @@ def shell(verbose: bool):
def build_ci_image(verbose: bool):
"""Builds breeze.ci image for breeze.py."""
if verbose:
- console.print("\n[blue]Building image[/]\n")
+ console.print(f"\n[blue]Building image of airflow from {__AIRFLOW_SOURCES_ROOT}[/]\n")
raise ClickException("\nPlease implement building the CI image\n")
diff --git a/dev/breeze/src/airflow_breeze/visuals/__init__.py b/dev/breeze/src/airflow_breeze/visuals/__init__.py
index 2df7911..018b1ad 100644
--- a/dev/breeze/src/airflow_breeze/visuals/__init__.py
+++ b/dev/breeze/src/airflow_breeze/visuals/__init__.py
@@ -59,4 +59,4 @@ ASCIIART = """
"""
-ASCIIART_STYLE = "blue"
+ASCIIART_STYLE = "white"
diff --git a/dev/breeze/tests/test_find_airflow_directory.py b/dev/breeze/tests/test_find_airflow_directory.py
new file mode 100644
index 0000000..a506644
--- /dev/null
+++ b/dev/breeze/tests/test_find_airflow_directory.py
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+from pathlib import Path
+from unittest import mock
+
+from airflow_breeze.breeze import find_airflow_sources_root, get_airflow_sources_root
+
+ACTUAL_AIRFLOW_SOURCES = Path(__file__).parent.parent.parent.parent
+ROOT_PATH = Path(Path(__file__).root)
+
+
+def test_find_airflow_root_upwards_from_cwd(capsys):
+ os.chdir(Path(__file__).parent)
+ find_airflow_sources_root()
+ assert ACTUAL_AIRFLOW_SOURCES == get_airflow_sources_root()
+ output = str(capsys.readouterr().out)
+ assert output == ''
+
+
+def test_find_airflow_root_upwards_from_file(capsys):
+ os.chdir(Path(__file__).root)
+ find_airflow_sources_root()
+ assert ACTUAL_AIRFLOW_SOURCES == get_airflow_sources_root()
+ output = str(capsys.readouterr().out)
+ assert output == ''
+
+
+@mock.patch('airflow_breeze.breeze.__AIRFLOW_SOURCES_ROOT', ROOT_PATH)
+@mock.patch('airflow_breeze.breeze.__AIRFLOW_CFG_FILE', "bad_name.cfg")
+def test_fallback_find_airflow_root(capsys):
+ os.chdir(ROOT_PATH)
+ find_airflow_sources_root()
+ assert ROOT_PATH == get_airflow_sources_root()
+ output = str(capsys.readouterr().out)
+ assert "Could not find Airflow sources" in output