You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ep...@apache.org on 2023/03/08 13:18:02 UTC

[airflow] 07/12: Improve "other" test category selection (#28630)

This is an automated email from the ASF dual-hosted git repository.

ephraimanierobi pushed a commit to branch v2-5-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit 6f2544886826880ccce80e48e5b864b6c11eb7dd
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Wed Dec 28 23:09:16 2022 +0100

    Improve "other" test category selection (#28630)
    
    The "Other" test category automatically selects all tests that
    are not included in any of the regular categories. That is to
    make sure that we do not forget to add any directory that
    has been added. However this led to a long directory selection
    for "Other" category including system tests that have been
    automatically added there. However those tests are always skipped
    in regular tests and collecting those tests during "Other"
    execution is not needed and slows it down.
    
    Similarly System tests changes were treated as "Other change"
    for incoming PRs. This means that any change to system tests
    would trigger "all tests" rather than selective subset of
    those - the same as any core change.
    
    However System tests are also part of the documentation,
    so any change in system tests should trigger docs
    builds.
    
    This change improves it in a few ways:
    
    * Other tests now do not include "System Tests" - they are
      treated the same way as other test categories (but not
      included in test category selection for now - until we get
      a good way of breeze-integration for System Tests
    
    * They are also excluded from treating them as "other" change
      when considering which tests to run. Changes to system tests
      will not trigger "all" tests, just those that accompanying
      changes would trigger.
    
    * The changes to system tests only, however, triggers docs build
      because those are triggered by any source change.a
    
    * The __pycache__ directories are removed from the list of
      "Other" packages to run.
    
    * In order to make sure system tests are pytest-collectable,
      we perform pytest collection for all tests right after downloading
      the CI images and verifying them. This makes sure that the
      tests are collectible before we even attempt to run them - this way
      we avoid unnecessary machine spin-up and breze installation for the
      multiple jobs that run the tests. This slows down feedback time
      a litle, but should increase overall robustness of the test suite.
    
    * an old, unused nosetest collection script doing the same in the past
      has been removed (it was discovered during implementation)
    
    Noticed in: #28319
    
    (cherry picked from commit e8657ce5596575a0408d97f6b4a72e7d185edc7f)
---
 .github/workflows/ci.yml                           |  6 +++--
 Dockerfile.ci                                      |  4 +++-
 .../src/airflow_breeze/utils/selective_checks.py   | 10 ++++++--
 dev/breeze/tests/test_selective_checks.py          | 25 +++++++++++++++-----
 scripts/docker/entrypoint_ci.sh                    |  4 +++-
 scripts/in_container/run_extract_tests.sh          | 25 --------------------
 ...est_collection.py => test_pytest_collection.py} | 27 ++++++++++++++++------
 7 files changed, 57 insertions(+), 44 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4c8bc96151..ce5004c7d9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -540,6 +540,8 @@ jobs:
         env:
           PYTHON_VERSIONS: ${{ needs.build-info.outputs.python-versions-list-as-string }}
           DEBUG_RESOURCES: ${{needs.build-info.outputs.debug-resources}}
+      - name: "Tests Pytest collection: ${{matrix.python-version}}"
+        run: breeze shell "python /opt/airflow/scripts/in_container/test_pytest_collection.py"
       - name: "Fix ownership"
         run: breeze ci fix-ownership
         if: always()
@@ -867,7 +869,7 @@ jobs:
       - name: "Tests: ${{matrix.python-version}}:${{needs.build-info.outputs.test-types}}"
         run: breeze testing tests --run-in-parallel
       - name: "Tests ARM Pytest collection: ${{matrix.python-version}}"
-        run: breeze shell "python /opt/airflow/scripts/in_container/test_arm_pytest_collection.py"
+        run: breeze shell "python /opt/airflow/scripts/in_container/test_pytest_collection.py" arm
       - name: "Post Tests: ${{matrix.python-version}}:${{needs.build-info.outputs.test-types}}"
         uses: ./.github/actions/post_tests
 
@@ -992,7 +994,7 @@ jobs:
       - name: "Tests: ${{matrix.python-version}}:${{needs.build-info.outputs.test-types}}"
         run: breeze testing tests --run-in-parallel
       - name: "Tests ARM Pytest collection: ${{matrix.python-version}}"
-        run: breeze shell "python /opt/airflow/scripts/in_container/test_arm_pytest_collection.py"
+        run: breeze shell "python /opt/airflow/scripts/in_container/test_pytest_collection.py" arm
       - name: "Post Tests: ${{matrix.python-version}}:${{needs.build-info.outputs.test-types}}"
         uses: ./.github/actions/post_tests
 
diff --git a/Dockerfile.ci b/Dockerfile.ci
index 72b01bb098..369faab2ab 100644
--- a/Dockerfile.ci
+++ b/Dockerfile.ci
@@ -881,7 +881,7 @@ declare -a SELECTED_TESTS CLI_TESTS API_TESTS PROVIDERS_TESTS CORE_TESTS WWW_TES
 
 function find_all_other_tests() {
     local all_tests_dirs
-    all_tests_dirs=$(find "tests" -type d)
+    all_tests_dirs=$(find "tests" -type d ! -name '__pycache__')
     all_tests_dirs=$(echo "${all_tests_dirs}" | sed "/tests$/d" )
     all_tests_dirs=$(echo "${all_tests_dirs}" | sed "/tests\/dags/d" )
     local path
@@ -915,6 +915,7 @@ else
     WWW_TESTS=("tests/www")
     HELM_CHART_TESTS=("tests/charts")
     INTEGRATION_TESTS=("tests/integration")
+    SYSTEM_TESTS=("tests/system")
     ALL_TESTS=("tests")
     ALL_PRESELECTED_TESTS=(
         "${CLI_TESTS[@]}"
@@ -925,6 +926,7 @@ else
         "${CORE_TESTS[@]}"
         "${ALWAYS_TESTS[@]}"
         "${WWW_TESTS[@]}"
+        "${SYSTEM_TESTS[@]}"
     )
 
     NO_PROVIDERS_INTEGRATION_TESTS=(
diff --git a/dev/breeze/src/airflow_breeze/utils/selective_checks.py b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
index 2aee00491b..75fb5f0faa 100644
--- a/dev/breeze/src/airflow_breeze/utils/selective_checks.py
+++ b/dev/breeze/src/airflow_breeze/utils/selective_checks.py
@@ -70,6 +70,7 @@ class FileGroupForCi(Enum):
     SETUP_FILES = "setup_files"
     DOC_FILES = "doc_files"
     WWW_FILES = "www_files"
+    SYSTEM_TEST_FILES = "system_tests"
     KUBERNETES_FILES = "kubernetes_files"
     ALL_PYTHON_FILES = "all_python_files"
     ALL_SOURCE_FILES = "all_sources_for_tests"
@@ -159,6 +160,9 @@ CI_FILE_GROUP_MATCHES = HashableDict(
             "^tests",
             "^kubernetes_tests",
         ],
+        FileGroupForCi.SYSTEM_TEST_FILES: [
+            "^tests/system/",
+        ],
     }
 )
 
@@ -178,7 +182,6 @@ TEST_TYPE_MATCHES = HashableDict(
         SelectiveUnitTestTypes.PROVIDERS: [
             "^airflow/providers/",
             "^tests/providers/",
-            "^tests/system/",
         ],
         SelectiveUnitTestTypes.WWW: ["^airflow/www", "^tests/www"],
     }
@@ -523,9 +526,12 @@ class SelectiveChecks:
         )
 
         kubernetes_files = self._matching_files(FileGroupForCi.KUBERNETES_FILES, CI_FILE_GROUP_MATCHES)
+        system_test_files = self._matching_files(FileGroupForCi.SYSTEM_TEST_FILES, CI_FILE_GROUP_MATCHES)
         all_source_files = self._matching_files(FileGroupForCi.ALL_SOURCE_FILES, CI_FILE_GROUP_MATCHES)
 
-        remaining_files = set(all_source_files) - set(matched_files) - set(kubernetes_files)
+        remaining_files = (
+            set(all_source_files) - set(matched_files) - set(kubernetes_files) - set(system_test_files)
+        )
         count_remaining_files = len(remaining_files)
         if count_remaining_files > 0:
             get_console().print(
diff --git a/dev/breeze/tests/test_selective_checks.py b/dev/breeze/tests/test_selective_checks.py
index 5bf2ac41c3..05fcfa3567 100644
--- a/dev/breeze/tests/test_selective_checks.py
+++ b/dev/breeze/tests/test_selective_checks.py
@@ -177,7 +177,7 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str):
                 (
                     "INTHEWILD.md",
                     "chart/aaaa.txt",
-                    "tests/system/providers/airbyte/file.py",
+                    "tests/providers/airbyte/file.py",
                 ),
                 {
                     "all-python-versions": "['3.7']",
@@ -214,10 +214,9 @@ def assert_outputs_are_printed(expected_outputs: dict[str, str], stderr: str):
                     "docs-build": "true",
                     "run-kubernetes-tests": "true",
                     "upgrade-to-newer-dependencies": "false",
-                    "test-types": "Always Providers",
+                    "test-types": "Always",
                 },
-                id="Helm tests, all providers as common util system file changed, kubernetes tests and "
-                "docs should run even if unimportant files were added",
+                id="Docs should run even if unimportant files were added",
             )
         ),
         (
@@ -465,7 +464,7 @@ def test_expected_output_full_tests_needed(
                 "skip-provider-tests": "true",
                 "test-types": "API Always CLI Core Other WWW",
             },
-            id="All tests except Providers and should run if core file changed in non-main branch",
+            id="All tests except Providers should run if core file changed in non-main branch",
         ),
     ],
 )
@@ -501,6 +500,20 @@ def test_expected_output_pull_request_v2_3(
             },
             id="Nothing should run if only non-important files changed",
         ),
+        pytest.param(
+            ("tests/system/any_file.py",),
+            {
+                "all-python-versions": "['3.7']",
+                "all-python-versions-list-as-string": "3.7",
+                "image-build": "true",
+                "needs-helm-tests": "false",
+                "run-tests": "true",
+                "docs-build": "true",
+                "upgrade-to-newer-dependencies": "false",
+                "test-types": "Always",
+            },
+            id="Only Always and docs build should run if only system tests changed",
+        ),
         pytest.param(
             (
                 "airflow/cli/test.py",
@@ -538,7 +551,7 @@ def test_expected_output_pull_request_v2_3(
                 "skip-provider-tests": "false",
                 "test-types": "API Always CLI Core Other Providers WWW",
             },
-            id="All tests except should run if core file changed",
+            id="All tests should run if core file changed",
         ),
     ],
 )
diff --git a/scripts/docker/entrypoint_ci.sh b/scripts/docker/entrypoint_ci.sh
index 3f19ce4bae..9454626d6c 100755
--- a/scripts/docker/entrypoint_ci.sh
+++ b/scripts/docker/entrypoint_ci.sh
@@ -333,7 +333,7 @@ declare -a SELECTED_TESTS CLI_TESTS API_TESTS PROVIDERS_TESTS CORE_TESTS WWW_TES
 # - so that we do not skip any in the future if new directories are added
 function find_all_other_tests() {
     local all_tests_dirs
-    all_tests_dirs=$(find "tests" -type d)
+    all_tests_dirs=$(find "tests" -type d ! -name '__pycache__')
     all_tests_dirs=$(echo "${all_tests_dirs}" | sed "/tests$/d" )
     all_tests_dirs=$(echo "${all_tests_dirs}" | sed "/tests\/dags/d" )
     local path
@@ -367,6 +367,7 @@ else
     WWW_TESTS=("tests/www")
     HELM_CHART_TESTS=("tests/charts")
     INTEGRATION_TESTS=("tests/integration")
+    SYSTEM_TESTS=("tests/system")
     ALL_TESTS=("tests")
     ALL_PRESELECTED_TESTS=(
         "${CLI_TESTS[@]}"
@@ -377,6 +378,7 @@ else
         "${CORE_TESTS[@]}"
         "${ALWAYS_TESTS[@]}"
         "${WWW_TESTS[@]}"
+        "${SYSTEM_TESTS[@]}"
     )
 
     NO_PROVIDERS_INTEGRATION_TESTS=(
diff --git a/scripts/in_container/run_extract_tests.sh b/scripts/in_container/run_extract_tests.sh
deleted file mode 100755
index dc7b972a76..0000000000
--- a/scripts/in_container/run_extract_tests.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# shellcheck source=scripts/in_container/_in_container_script_init.sh
-. "$( dirname "${BASH_SOURCE[0]}" )/_in_container_script_init.sh"
-
-TMP_FILE=$(mktemp)
-
-nosetests --collect-only --with-xunit --xunit-file="${TMP_FILE}"
-
-python "${AIRFLOW_SOURCES}/tests/test_utils/get_all_tests.py" "${TMP_FILE}" | sort >> "${HOME}/all_tests.txt"
diff --git a/scripts/in_container/test_arm_pytest_collection.py b/scripts/in_container/test_pytest_collection.py
similarity index 73%
rename from scripts/in_container/test_arm_pytest_collection.py
rename to scripts/in_container/test_pytest_collection.py
index 43277c5562..e5c9d44407 100755
--- a/scripts/in_container/test_arm_pytest_collection.py
+++ b/scripts/in_container/test_pytest_collection.py
@@ -20,15 +20,18 @@ from __future__ import annotations
 import json
 import re
 import subprocess
+import sys
 from pathlib import Path
 
 from rich.console import Console
 
 AIRFLOW_SOURCES_ROOT = Path(__file__).parents[2].resolve()
 
-if __name__ == "__main__":
-    console = Console(width=400, color_system="standard")
+console = Console(width=400, color_system="standard")
+
 
+def remove_packages_missing_on_arm():
+    console.print("[bright_blue]Removing packages missing on ARM.")
     provider_dependencies = json.loads(
         (AIRFLOW_SOURCES_ROOT / "generated" / "provider_dependencies.json").read_text()
     )
@@ -43,11 +46,21 @@ if __name__ == "__main__":
         + "\n"
     )
     subprocess.run(["pip", "uninstall", "-y"] + all_dependencies_to_remove)
+
+
+if __name__ == "__main__":
+    arm = False
+    if len(sys.argv) > 1 and sys.argv[1].lower() == "arm":
+        arm = True
+        remove_packages_missing_on_arm()
     result = subprocess.run(["pytest", "--collect-only", "-qqqq", "--disable-warnings", "tests"], check=False)
     if result.returncode != 0:
-        console.print("\n[red]Test collection in ARM environment failed.")
-        console.print(
-            "[yellow]You should wrap the failing imports in try/except/skip clauses\n"
-            "See similar examples as skipped tests right above.\n"
-        )
+        console.print("\n[red]Test collection failed.")
+        if arm:
+            console.print(
+                "[yellow]You should wrap the failing imports in try/except/skip clauses\n"
+                "See similar examples as skipped tests right above.\n"
+            )
+        else:
+            console.print("[yellow]Please add missing packages\n")
         exit(result.returncode)