You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@solr.apache.org by GitBox <gi...@apache.org> on 2022/02/07 17:36:31 UTC

[GitHub] [solr] janhoy commented on a change in pull request #596: SOLR-15557 Script to generate refguide page .htaccess redirects

janhoy commented on a change in pull request #596:
URL: https://github.com/apache/solr/pull/596#discussion_r800898706



##########
File path: dev-tools/scripts/refguide/gen-refguide-redirects.py
##########
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Simple script that converts old refguide page names as of 8.11.1 to the new Antora URLs from 9.0
+See input files in folder gen-refguide-redirects/
+
+The old-guide.txt is the plain .adoc names from an 'ls | grep adoc' in old ref-guide src folder
+The new-guide.txt is the output from this command from the new repo in the 'modules' folder:
+    find . | grep adoc | sed 's/\/pages//g' | sed 's/^.\///g'
+The mappings.csv comes from the explicit page renamings sourced from spreadsheet
+  https://docs.google.com/spreadsheets/d/1mwxSpn5Ky7-P4DLFrJGel2h7Il4muTlHmAA-AuRY1rs/edit#gid=982988701
+"""
+
+import os
+import sys
+from pprint import pprint
+sys.path.append(os.path.dirname(__file__))
+import argparse
+
+
+def read_config():
+    parser = argparse.ArgumentParser(description='Convert old refguide page names to new')
+    parser.add_argument('--old', required=True, help='Old pagenames file, one .adoc filename per line')
+    parser.add_argument('--new', required=True, help='New pagenames file, one .adoc filename per line')
+    parser.add_argument('--mapping', required=True, help='Semicolon separated from-to file names (adoc)')
+    parser.add_argument('--htaccess', action='store_true', default=False, help='Output as htaccess rules')
+    newconf = parser.parse_args()
+    return newconf
+
+
+def out(text):
+    global conf
+    if not conf.htaccess:
+        print(text)
+
+
+def lines_from_file(filename):
+    with open(filename, 'r') as fp:
+        lines = []
+        for line in fp.readlines():
+            if line.startswith("#") or len(line.strip()) == 0:
+                continue
+            lines.append(line.replace(".adoc", ".html").strip())
+        return lines
+
+
+def main():
+    global conf
+    conf = read_config()
+
+    new = {}
+    name_map = {}
+
+    out("Reading config")
+    old = lines_from_file(conf.old)
+    for line in lines_from_file(conf.new):
+        (path, file) = line.split("/")
+        new[file] = line
+    for line in lines_from_file(conf.mapping):
+        (frm, to) = line.split(";")
+        name_map[frm] = to
+
+    # Files in src/old-pages as of 2022-02-04
+    old_pages = ["configuration-apis.html", "configuration-guide.html", "controlling-results.html", "deployment-guide.html", "enhancing-queries.html", "field-types.html", "fields-and-schema-design.html", "getting-started.html", "indexing-data-operations.html", "installation-deployment.html", "monitoring-solr.html", "query-guide.html", "scaling-solr.html", "schema-indexing-guide.html", "solr-concepts.html", "solr-schema.html", "solrcloud-clusters.html", "user-managed-clusters.html"]
+
+    result = {}
+    old_guide = []
+    failed = {}
+    regex_new = {}
+    out("Converting...")
+    for frm in old:
+        if frm in new:
+            (subpath, name) = new[frm].split("/")
+            if subpath not in regex_new:
+                regex_new[subpath] = []
+            regex_new[subpath].append(name.split(".html")[0])
+        elif frm in name_map:
+            new_name = name_map[frm]
+            if new_name in new:
+                result[frm] = new[new_name]
+            elif new_name.startswith("/guide/"):
+                result[frm] = new_name[7:]
+            elif new_name == "_8_11":
+                old_guide.append(frm.split(".html")[0])
+            else:
+                failed[frm] = "Mapped value %s not in new guide" % new_name
+        elif frm in old_pages:
+            failed[frm] = "Not yet mapped (in src/old-pages)"
+        else:
+            failed[frm] = "404"
+
+    if conf.htaccess:
+        print("# Existing pages moved to sub path")
+        for key in regex_new:
+            print("RedirectMatch ^/guide/(%s)\.html /guide/%s/$1.html" % ("|".join(regex_new[key]), key))

Review comment:
       Ok, will add the `/solr/` level, and `latest` version.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@solr.apache.org
For additional commands, e-mail: issues-help@solr.apache.org