You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by po...@apache.org on 2023/06/13 10:42:16 UTC
[airflow-site] branch main updated: Extending the back reference script to support providers (#799)
This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-site.git
The following commit(s) were added to refs/heads/main by this push:
new bcdd36030a Extending the back reference script to support providers (#799)
bcdd36030a is described below
commit bcdd36030ae2b91fba1f23eb6f34e12f81e7bfd2
Author: Amogh Desai <am...@gmail.com>
AuthorDate: Tue Jun 13 16:12:10 2023 +0530
Extending the back reference script to support providers (#799)
---------
Co-authored-by: Amogh <ad...@cloudera.com>
---
post-docs/add-back-references.py | 62 +++++++++++++++++++++++++++-------------
1 file changed, 42 insertions(+), 20 deletions(-)
diff --git a/post-docs/add-back-references.py b/post-docs/add-back-references.py
index d89d0a2b58..f409b36f27 100644
--- a/post-docs/add-back-references.py
+++ b/post-docs/add-back-references.py
@@ -18,18 +18,20 @@ import enum
import logging
import os
import sys
+import tempfile
from pathlib import Path
+from urllib.error import URLError
from urllib.request import urlopen
import semver
+log = logging.getLogger(__name__)
+
airflow_redirects_link = "https://raw.githubusercontent.com/apache/airflow/main/docs/apache-airflow/redirects.txt"
helm_redirects_link = "https://raw.githubusercontent.com/apache/airflow/main/docs/helm-chart/redirects.txt"
-providers_redirect_link = "populate-this"
docs_archive_path = "../docs-archive"
airflow_docs_path = docs_archive_path + "/apache-airflow"
helm_docs_path = docs_archive_path + "/helm-chart"
-providers_docs_path = docs_archive_path + "/apache-airflow-providers"
# types of generations supported
@@ -40,16 +42,22 @@ class GenerationType(enum.Enum):
def download_file(url):
- filedata = urlopen(url)
- datatowrite = filedata.read()
-
- with open('redirects.txt', 'wb') as f:
- f.write(datatowrite)
-
-
-def construct_mapping():
+ try:
+ temp_dir = Path(tempfile.mkdtemp(prefix="temp_dir", suffix=""))
+ file_name = temp_dir / "redirects.txt"
+ filedata = urlopen(url)
+ data = filedata.read()
+ with open(file_name, 'wb') as f:
+ f.write(data)
+ return True, file_name
+ except URLError as e:
+ log.warning(e)
+ return False, "no-file"
+
+
+def construct_mapping(file_name):
old_to_new_map = dict()
- with open('redirects.txt') as f:
+ with open(file_name) as f:
file_content = []
lines = f.readlines()
# Skip empty line
@@ -81,11 +89,19 @@ def get_redirect_content(url: str):
return f'<html><head><meta http-equiv="refresh" content="0; url={url}"/></head></html>'
+def get_github_redirects_url(provider_name: str):
+ return f'https://raw.githubusercontent.com/apache/airflow/main/docs/{provider_name}/redirects.txt'
+
+
+def get_provider_docs_path(provider_name: str):
+ return docs_archive_path + "/" + provider_name
+
+
def create_back_reference_html(back_ref_url, path):
content = get_redirect_content(back_ref_url)
if Path(path).exists():
- logging.error(f'skipping file:{path}, redirects already exist', path)
+ logging.warning(f'skipping file:{path}, redirects already exist', path)
return
# Creating an HTML file
@@ -94,8 +110,11 @@ def create_back_reference_html(back_ref_url, path):
def generate_back_references(link, base_path):
- download_file(link)
- old_to_new = construct_mapping()
+ is_downloaded, file_name = download_file(link)
+ if not is_downloaded:
+ log.warning('skipping generating back references')
+ return
+ old_to_new = construct_mapping(file_name)
versions = [f.path.split("/")[-1] for f in os.scandir(base_path) if f.is_dir()]
@@ -125,8 +144,8 @@ def generate_back_references(link, base_path):
# total arguments
n = len(sys.argv)
if n != 2:
- logging.Logger.error("missing required arguments, syntax: python add-back-references.py [airflow | providers | "
- "helm]")
+ log.error("missing required arguments, syntax: python add-back-references.py [airflow | providers | "
+ "helm]")
gen_type = GenerationType[sys.argv[1]]
if gen_type == GenerationType.airflow:
@@ -134,8 +153,11 @@ if gen_type == GenerationType.airflow:
elif gen_type == GenerationType.helm:
generate_back_references(helm_redirects_link, helm_docs_path)
elif gen_type == GenerationType.providers:
- # solve this properly for different providers
- generate_back_references(providers_redirect_link, providers_docs_path)
+ all_providers = [f.path.split("/")[-1] for f in os.scandir(docs_archive_path)
+ if f.is_dir() and "providers" in f.name]
+ for p in all_providers:
+ log.info("processing provider: %s", p)
+ generate_back_references(get_github_redirects_url(p), get_provider_docs_path(p))
else:
- logging.Logger.error("invalid type of doc generation required. Pass one of [airflow | providers | "
- "helm]")
+ log.error("invalid type of doc generation required. Pass one of [airflow | providers | "
+ "helm]")