You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2023/06/13 10:42:16 UTC

[airflow-site] branch main updated: Extending the back reference script to support providers (#799)

This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-site.git


The following commit(s) were added to refs/heads/main by this push:
     new bcdd36030a Extending the back reference script to support providers (#799)
bcdd36030a is described below

commit bcdd36030ae2b91fba1f23eb6f34e12f81e7bfd2
Author: Amogh Desai <am...@gmail.com>
AuthorDate: Tue Jun 13 16:12:10 2023 +0530

    Extending the back reference script to support providers (#799)
    
    
    
    ---------
    
    Co-authored-by: Amogh <ad...@cloudera.com>
---
 post-docs/add-back-references.py | 62 +++++++++++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 20 deletions(-)

diff --git a/post-docs/add-back-references.py b/post-docs/add-back-references.py
index d89d0a2b58..f409b36f27 100644
--- a/post-docs/add-back-references.py
+++ b/post-docs/add-back-references.py
@@ -18,18 +18,20 @@ import enum
 import logging
 import os
 import sys
+import tempfile
 from pathlib import Path
+from urllib.error import URLError
 from urllib.request import urlopen
 import semver
 
+log = logging.getLogger(__name__)
+
 airflow_redirects_link = "https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/redirects.txt"
 helm_redirects_link = "https://raw.githubusercontent.com/apache/airflow/main/docs/helm-chart/redirects.txt"
-providers_redirect_link = "populate-this"
 
 docs_archive_path = "../docs-archive"
 airflow_docs_path = docs_archive_path + "/apache-airflow"
 helm_docs_path = docs_archive_path + "/helm-chart"
-providers_docs_path = docs_archive_path + "/apache-airflow-providers"
 
 
 # types of generations supported
@@ -40,16 +42,22 @@ class GenerationType(enum.Enum):
 
 
 def download_file(url):
-    filedata = urlopen(url)
-    datatowrite = filedata.read()
-
-    with open('redirects.txt', 'wb') as f:
-        f.write(datatowrite)
-
-
-def construct_mapping():
+    try:
+        temp_dir = Path(tempfile.mkdtemp(prefix="temp_dir", suffix=""))
+        file_name = temp_dir / "redirects.txt"
+        filedata = urlopen(url)
+        data = filedata.read()
+        with open(file_name, 'wb') as f:
+            f.write(data)
+        return True, file_name
+    except URLError as e:
+        log.warning(e)
+        return False, "no-file"
+
+
+def construct_mapping(file_name):
     old_to_new_map = dict()
-    with open('redirects.txt') as f:
+    with open(file_name) as f:
         file_content = []
         lines = f.readlines()
         # Skip empty line
@@ -81,11 +89,19 @@ def get_redirect_content(url: str):
     return f'<html><head><meta http-equiv="refresh" content="0; url={url}"/></head></html>'
 
 
+def get_github_redirects_url(provider_name: str):
+    return f'https://raw.githubusercontent.com/apache/airflow/main/docs/{provider_name}/redirects.txt'
+
+
+def get_provider_docs_path(provider_name: str):
+    return docs_archive_path + "/" + provider_name
+
+
 def create_back_reference_html(back_ref_url, path):
     content = get_redirect_content(back_ref_url)
 
     if Path(path).exists():
-        logging.error(f'skipping file:{path}, redirects already exist', path)
+        logging.warning(f'skipping file:{path}, redirects already exist', path)
         return
 
     # Creating an HTML file
@@ -94,8 +110,11 @@ def create_back_reference_html(back_ref_url, path):
 
 
 def generate_back_references(link, base_path):
-    download_file(link)
-    old_to_new = construct_mapping()
+    is_downloaded, file_name = download_file(link)
+    if not is_downloaded:
+        log.warning('skipping generating back references')
+        return
+    old_to_new = construct_mapping(file_name)
 
     versions = [f.path.split("/")[-1] for f in os.scandir(base_path) if f.is_dir()]
 
@@ -125,8 +144,8 @@ def generate_back_references(link, base_path):
 # total arguments
 n = len(sys.argv)
 if n != 2:
-    logging.Logger.error("missing required arguments, syntax: python add-back-references.py [airflow | providers | "
-                         "helm]")
+    log.error("missing required arguments, syntax: python add-back-references.py [airflow | providers | "
+              "helm]")
 
 gen_type = GenerationType[sys.argv[1]]
 if gen_type == GenerationType.airflow:
@@ -134,8 +153,11 @@ if gen_type == GenerationType.airflow:
 elif gen_type == GenerationType.helm:
     generate_back_references(helm_redirects_link, helm_docs_path)
 elif gen_type == GenerationType.providers:
-    # solve this properly for different providers
-    generate_back_references(providers_redirect_link, providers_docs_path)
+    all_providers = [f.path.split("/")[-1] for f in os.scandir(docs_archive_path)
+                     if f.is_dir() and "providers" in f.name]
+    for p in all_providers:
+        log.info("processing provider: %s", p)
+        generate_back_references(get_github_redirects_url(p), get_provider_docs_path(p))
 else:
-    logging.Logger.error("invalid type of doc generation required. Pass one of [airflow | providers | "
-                         "helm]")
+    log.error("invalid type of doc generation required. Pass one of [airflow | providers | "
+              "helm]")