You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2023/01/10 22:31:34 UTC

[airflow] branch main updated: Validate Sphinx Inventory file header (#28838)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 04306f18b0 Validate Sphinx Inventory file header (#28838)
04306f18b0 is described below

commit 04306f18b0643dfed3ed97863bbcf24dc50a8973
Author: Andrey Anshin <An...@taragol.is>
AuthorDate: Wed Jan 11 02:31:26 2023 +0400

    Validate Sphinx Inventory file header (#28838)
---
 docs/exts/docs_build/fetch_inventories.py | 38 +++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 9 deletions(-)

diff --git a/docs/exts/docs_build/fetch_inventories.py b/docs/exts/docs_build/fetch_inventories.py
index a104cadf77..1cd3e94c1c 100644
--- a/docs/exts/docs_build/fetch_inventories.py
+++ b/docs/exts/docs_build/fetch_inventories.py
@@ -24,11 +24,13 @@ import shutil
 import sys
 import traceback
 from itertools import repeat
+from tempfile import NamedTemporaryFile
 from typing import Iterator
 
 import requests
 import urllib3.exceptions
 from requests.adapters import DEFAULT_POOLSIZE
+from sphinx.util.inventory import InventoryFileReader
 
 from airflow.utils.helpers import partition
 from docs.exts.docs_build.docs_builder import get_available_providers_packages
@@ -47,25 +49,41 @@ S3_DOC_URL_NON_VERSIONED = S3_DOC_URL + "/docs/{package_name}/objects.inv"
 
 def _fetch_file(session: requests.Session, package_name: str, url: str, path: str) -> tuple[str, bool]:
     """
-    Download a file and returns status information as a tuple with package
+    Download a file, validate Sphinx Inventory headers and returns status information as a tuple with package
     name and success status(bool value).
     """
     try:
         response = session.get(url, allow_redirects=True, stream=True)
     except (requests.RequestException, urllib3.exceptions.HTTPError):
-        print(f"Failed to fetch inventory: {url}")
+        print(f"{package_name}: Failed to fetch inventory: {url}")
         traceback.print_exc(file=sys.stderr)
         return package_name, False
     if not response.ok:
-        print(f"Failed to fetch inventory: {url}")
-        print(f"Failed with status: {response.status_code}", file=sys.stderr)
+        print(f"{package_name}: Failed to fetch inventory: {url}")
+        print(f"{package_name}: Failed with status: {response.status_code}", file=sys.stderr)
         return package_name, False
 
-    os.makedirs(os.path.dirname(path), exist_ok=True)
-    with open(path, "wb") as f:
-        response.raw.decode_content = True
-        shutil.copyfileobj(response.raw, f)
-    print(f"Fetched inventory: {url}")
+    if response.url != url:
+        print(f"{package_name}: {url} redirected to {response.url}")
+
+    with NamedTemporaryFile(suffix=package_name, mode="wb+") as tf:
+        for chunk in response.iter_content(chunk_size=4096):
+            tf.write(chunk)
+
+        tf.flush()
+        tf.seek(0, 0)
+
+        line = InventoryFileReader(tf).readline()
+        if not line.startswith("# Sphinx inventory version"):
+            print(f"{package_name}: Response contain unexpected Sphinx Inventory header: {line!r}.")
+            return package_name, False
+
+        tf.seek(0, 0)
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        with open(path, "wb") as f:
+            shutil.copyfileobj(tf, f)
+
+    print(f"{package_name}: Fetched inventory: {response.url}")
     return package_name, True
 
 
@@ -136,5 +154,7 @@ def fetch_inventories():
         print("Failed packages:")
         for pkg_no, (pkg_name, _) in enumerate(failed, start=1):
             print(f"{pkg_no}. {pkg_name}")
+        print("Terminate execution.")
+        raise SystemExit(1)
 
     return [pkg_name for pkg_name, status in failed]