You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2023/07/09 18:44:52 UTC

[airflow-site] 01/01: Adds generation of the new changelog and security links

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch add-changelog-backreference-generation
in repository https://gitbox.apache.org/repos/asf/airflow-site.git

commit 4c74e5bc505b18c5e88f9450230fd4d4db57d813
Author: Jarek Potiuk <ja...@potiuk.com>
AuthorDate: Sun Jul 9 20:06:14 2023 +0200

    Adds generation of the new changelog and security links
    
    Add automated generation of the security and changelog links.
    
    * Added a few renames of variables and types to make it easier
      to understand what's going
    * Converted logs into rich's print with colours to better distinguish
      info, warnings, changes
    * Converted dict into tuple, because we could have two new pages point
      to the same old page (and it was not possible when old page was
      key in a dictionary
    * Hard-coded changelog and security links because they are new and
      have no redirects - that also required to process all versions of
      all providers, because they should be processed even without
      redirects.
---
 post-docs/README.md              |  3 ++
 post-docs/add-back-references.py | 62 +++++++++++++++++++++-------------------
 post-docs/requirements.txt       |  1 +
 3 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/post-docs/README.md b/post-docs/README.md
index 12aa941fd6..c8a004f61d 100644
--- a/post-docs/README.md
+++ b/post-docs/README.md
@@ -7,3 +7,6 @@ To generate these back referencing (refresh HTMLs), run the script in the follow
 ```commandline
 python add-back-reference.py [airflow | providers | helm]
 ```
+
+Before running the script, make sure to install requirements from `requirements.txt` file into
+your virtual environment.
diff --git a/post-docs/add-back-references.py b/post-docs/add-back-references.py
index 73493ec99c..9377bc7bfd 100644
--- a/post-docs/add-back-references.py
+++ b/post-docs/add-back-references.py
@@ -14,16 +14,15 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+from __future__ import annotations
 import enum
-import logging
 import os
 import sys
 import tempfile
 from pathlib import Path
 from urllib.error import URLError
 from urllib.request import urlopen
-
-log = logging.getLogger(__name__)
+from rich import print
 
 airflow_redirects_link = "https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/redirects.txt"
 helm_redirects_link = "https://raw.githubusercontent.com/apache/airflow/main/docs/helm-chart/redirects.txt"
@@ -50,12 +49,15 @@ def download_file(url):
             f.write(data)
         return True, file_name
     except URLError as e:
-        log.warning(e)
+        if e.reason == 'Not Found':
+            print(f"[blue]The {url} does not exist. Skipping.")
+        else:
+            print(f"[yellow]Could not download file {url}: {e}")
         return False, "no-file"
 
 
-def construct_mapping(file_name):
-    old_to_new_map = dict()
+def construct_old_to_new_tuple_mapping(file_name: Path) -> list[tuple[str, str]]:
+    old_to_new_tuples: list[tuple[str, str]] = list()
     with open(file_name) as f:
         file_content = []
         lines = f.readlines()
@@ -76,8 +78,8 @@ def construct_mapping(file_name):
             old_path = old_path.replace(".rst", ".html")
             new_path = new_path.replace(".rst", ".html")
 
-            old_to_new_map[old_path] = new_path
-    return old_to_new_map
+            old_to_new_tuples.append((old_path,new_path))
+    return old_to_new_tuples
 
 
 def get_redirect_content(url: str):
@@ -96,35 +98,37 @@ def create_back_reference_html(back_ref_url, path):
     content = get_redirect_content(back_ref_url)
 
     if Path(path).exists():
-        logging.warning(f'skipping file:{path}, redirects already exist', path)
+        print(f'Skipping file:{path}, redirects already exist')
         return
 
     # creating a back reference html file
     with open(path, "w") as f:
         f.write(content)
+    print(f"[green]Created back reference redirect: {path}")
 
 
-def generate_back_references(link, base_path):
+def generate_back_references(link: str, base_path: str):
     is_downloaded, file_name = download_file(link)
     if not is_downloaded:
-        log.warning('skipping generating back references')
-        return
-    old_to_new = construct_mapping(file_name)
+        old_to_new: list[tuple[str, str]] = []
+    else:
+        print(f"Constructs old to new mapping from redirects.txt for {base_path}")
+        old_to_new = construct_old_to_new_tuple_mapping(file_name)
+    old_to_new.append(("index.html", "changelog.html"))
+    old_to_new.append(("index.html", "security.html"))
 
     versions = [f.path.split("/")[-1] for f in os.scandir(base_path) if f.is_dir()]
 
     for version in versions:
-        r = base_path + "/" + version
-
-        for p in old_to_new:
-            old = p
-            new = old_to_new[p]
+        print(f"Processing {base_path}, version: {version}")
+        versioned_provider_path = base_path + "/" + version
 
+        for old, new in old_to_new:
             # only if old file exists, add the back reference
-            if os.path.exists(r + "/" + p):
-                d = old_to_new[p].split("/")
-                file_name = old_to_new[p].split("/")[-1]
-                dest_dir = r + "/" + "/".join(d[: len(d) - 1])
+            if os.path.exists(versioned_provider_path + "/" + old):
+                split_new_path = new.split("/")
+                file_name = new.split("/")[-1]
+                dest_dir = versioned_provider_path + "/" + "/".join(split_new_path[: len(split_new_path) - 1])
 
                 # finds relative path of old file with respect to new and handles case of different file names also
                 relative_path = os.path.relpath(old, new)
@@ -138,8 +142,9 @@ def generate_back_references(link, base_path):
 
 n = len(sys.argv)
 if n != 2:
-    log.error("missing required arguments, syntax: python add-back-references.py [airflow | providers | "
-              "helm]")
+    print("[red]missing required arguments[/]"
+          "syntax: python add-back-references.py [airflow | providers | helm]")
+    sys.exit(1)
 
 gen_type = GenerationType[sys.argv[1]]
 if gen_type == GenerationType.airflow:
@@ -149,9 +154,8 @@ elif gen_type == GenerationType.helm:
 elif gen_type == GenerationType.providers:
     all_providers = [f.path.split("/")[-1] for f in os.scandir(docs_archive_path)
                      if f.is_dir() and "providers" in f.name]
-    for p in all_providers:
-        log.info("processing airflow provider: %s", p)
-        generate_back_references(get_github_redirects_url(p), get_provider_docs_path(p))
+    for provider in all_providers:
+        print(f"Processing airflow provider: {p}")
+        generate_back_references(get_github_redirects_url(provider), get_provider_docs_path(provider))
 else:
-    log.error("invalid type of doc generation required. Pass one of [airflow | providers | "
-              "helm]")
+    print("[red]Invalid type of doc generation required[/]. Pass one of [airflow | providers | helm]")
diff --git a/post-docs/requirements.txt b/post-docs/requirements.txt
new file mode 100644
index 0000000000..3f382dd0d6
--- /dev/null
+++ b/post-docs/requirements.txt
@@ -0,0 +1 @@
+rich