You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by to...@apache.org on 2022/07/06 06:55:51 UTC

[lucene-jira-archive] branch split-up-updating-script created (now b05badc0)

This is an automated email from the ASF dual-hosted git repository.

tomoko pushed a change to branch split-up-updating-script
in repository https://gitbox.apache.org/repos/asf/lucene-jira-archive.git


      at b05badc0 update README

This branch includes the following new commits:

     new 7c3c528b split up updating script into sub-scripts.
     new b05badc0 update README

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[lucene-jira-archive] 01/02: split up updating script into sub-scripts.

Posted by to...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tomoko pushed a commit to branch split-up-updating-script
in repository https://gitbox.apache.org/repos/asf/lucene-jira-archive.git

commit 7c3c528bf33a0b89503a4423ea3a685de051902f
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Wed Jul 6 15:29:44 2022 +0900

    split up updating script into sub-scripts.
---
 migration/src/common.py                            |   9 ++
 migration/src/download_jira.py                     |   2 +-
 migration/src/jira2github_import.py                |   2 +-
 ...e_issue_links.py => remap_cross_issue_links.py} |  47 +++++----
 migration/src/update_issue_links.py                |   2 +
 migration/src/update_issues.py                     | 111 +++++++++++++++++++++
 6 files changed, 153 insertions(+), 20 deletions(-)

diff --git a/migration/src/common.py b/migration/src/common.py
index 93a6a02f..70324894 100644
--- a/migration/src/common.py
+++ b/migration/src/common.py
@@ -11,6 +11,7 @@ LOG_DIRNAME = "log"
 JIRA_DUMP_DIRNAME = "jira-dump"
 JIRA_ATTACHMENTS_DIRPATH = os.getenv("ATTACHMENTS_DL_DIR", str(Path(tempfile.gettempdir()).joinpath("attachments")))
 GITHUB_IMPORT_DATA_DIRNAME = "github-import-data"
+GITHUB_REMAPPED_DATA_DIRNAME = "github-remapped-data"
 MAPPINGS_DATA_DIRNAME = "mappings-data"
 
 ISSUE_MAPPING_FILENAME = "issue-map.csv"
@@ -60,6 +61,14 @@ def github_data_file(data_dir: Path, issue_number: int) -> Path:
     return data_dir.joinpath(f"GH-{issue_id}.json")
 
 
+def github_remapped_issue_data_file(data_dir: Path, issue_number: int) -> Path:
+    return data_dir.joinpath(f"ISSUE-{issue_number}.json")
+
+
+def github_remapped_comment_data_file(data_dir: Path, comment_id: int) -> Path:
+    return data_dir.joinpath(f"COMMENT-{comment_id}.json")
+
+
 def make_github_title(summary: str, jira_id: str) -> str:
     return f"{summary} [{jira_id}]"
 
diff --git a/migration/src/download_jira.py b/migration/src/download_jira.py
index 92bebc32..3db3e803 100644
--- a/migration/src/download_jira.py
+++ b/migration/src/download_jira.py
@@ -1,7 +1,7 @@
 #
 # Create local dump of Jira issues 
 # Usage:
-#   python src/download_jira.py --issues <issue number list>
+#   python src/download_jira.py --issues <jira issue number list>
 #   python src/download_jira.py --min <min issue number> --max <max issue number>
 #
 
diff --git a/migration/src/jira2github_import.py b/migration/src/jira2github_import.py
index 654e0de3..4399f601 100644
--- a/migration/src/jira2github_import.py
+++ b/migration/src/jira2github_import.py
@@ -1,7 +1,7 @@
 #
 # Convert Jira issues to GitHub issues for Import Issues API (https://gist.github.com/jonmagic/5282384165e0f86ef105)
 # Usage:
-#   python src/jira2github_import.py --issues <issue number list>
+#   python src/jira2github_import.py --issues <jira issue number list>
 #   python src/jira2github_import.py --min <min issue number> --max <max issue number>
 #
 
diff --git a/migration/src/update_issue_links.py b/migration/src/remap_cross_issue_links.py
similarity index 53%
copy from migration/src/update_issue_links.py
copy to migration/src/remap_cross_issue_links.py
index 46254807..11d3bc5f 100644
--- a/migration/src/update_issue_links.py
+++ b/migration/src/remap_cross_issue_links.py
@@ -1,39 +1,42 @@
 #
-# Update GitHub issues/comments to map Jira key to GitHub issue number
+# Remap Jira key to GitHub issue number
 # Usage:
-#   python src/update_issue_links.py --issues <issue number list>
-#   python src/update_issue_links.py
+#   python src/remap_cross_issue_links.py --issues <github issue number list>
+#   python src/remap_cross_issue_links.py
 #
 
 import argparse
 from pathlib import Path
 import sys
 import os
+import json
 
-from common import LOG_DIRNAME, MAPPINGS_DATA_DIRNAME, ISSUE_MAPPING_FILENAME, MaxRetryLimitExceedException, logging_setup, read_issue_id_map, retry_upto
+from common import LOG_DIRNAME, MAPPINGS_DATA_DIRNAME, ISSUE_MAPPING_FILENAME, GITHUB_REMAPPED_DATA_DIRNAME, MaxRetryLimitExceedException, logging_setup, read_issue_id_map, retry_upto, github_remapped_issue_data_file, github_remapped_comment_data_file
 from github_issues_util import *
 from jira_util import embed_gh_issue_link
 
 
 log_dir = Path(__file__).resolve().parent.parent.joinpath(LOG_DIRNAME)
-logger = logging_setup(log_dir, "update_issue_links")
+logger = logging_setup(log_dir, "remap_cross_issue_links")
 
 
 @retry_upto(3, 1.0, logger)
-def update_issue_link_in_issue_body(issue_number: int, issue_id_map: dict[str, str], token: str, repo: str):
+def remap_issue_link_in_issue_body(issue_number: int, issue_id_map: dict[str, str], data_dir: Path, token: str, repo: str):
     body = get_issue_body(token, repo, issue_number, logger)
     if body:
         updated_body = embed_gh_issue_link(body, issue_id_map)
         if updated_body == body:
             logger.debug(f"Issue {issue_number} does not contain any cross-issue links; nothing to do.")
             return
-        if update_issue_body(token, repo, issue_number, updated_body, logger):
-            logger.debug(f"Issue {issue_number} was successfully updated.")
-            
+        data = {"issue_number": issue_number, "body": updated_body}
+        data_file = github_remapped_issue_data_file(data_dir, issue_number)
+        with open(data_file, "w") as fp:
+            json.dump(data, fp=fp, indent=2)
+            logger.debug(f"Updated issue body for issue_number={issue_number} was saved to {data_file}.")
 
 
 @retry_upto(3, 1.0, logger)
-def update_issue_link_in_comments(issue_number: int, issue_id_map: dict[str, str], token: str, repo: str):
+def remap_issue_link_in_comments(issue_number: int, issue_id_map: dict[str, str], data_dir: Path, token: str, repo: str):
     comments = get_issue_comments(token, repo, issue_number, logger)
     if not comments:
         return
@@ -45,8 +48,11 @@ def update_issue_link_in_comments(issue_number: int, issue_id_map: dict[str, str
         if updated_body == body:
             logger.debug(f"Comment {id} does not contain any cross-issue links; nothing to do.")
             continue
-        if update_comment_body(token, repo, id, updated_body, logger):
-            logger.debug(f"Comment {id} was successfully updated.")
+        data = {"comment_id": id, "body": updated_body}
+        data_file = github_remapped_comment_data_file(data_dir, id)
+        with open(data_file, "w") as fp:
+            json.dump(data, fp=fp, indent=2)
+            logger.debug(f"Updated comment body for comment_id={id} was saved to {data_file}.")
 
 
 if __name__ == "__main__":
@@ -62,7 +68,7 @@ if __name__ == "__main__":
     check_authentication(github_token)
 
     parser = argparse.ArgumentParser()
-    parser.add_argument('--issues', type=int, required=False, nargs='*', help='Jira issue number list to be downloaded')
+    parser.add_argument('--issues', type=int, required=False, nargs='*', help='GitHub issue number list to be downloaded')
     args = parser.parse_args()
     
     mapping_data_dir = Path(__file__).resolve().parent.parent.joinpath(MAPPINGS_DATA_DIRNAME)
@@ -71,6 +77,11 @@ if __name__ == "__main__":
         logger.error(f"Jira-GitHub issue id mapping file not found. {issue_mapping_file}")
         sys.exit(1)
     issue_id_map = read_issue_id_map(issue_mapping_file)
+
+    remapped_data_dir = Path(__file__).resolve().parent.parent.joinpath(GITHUB_REMAPPED_DATA_DIRNAME)
+    if not remapped_data_dir.exists():
+        remapped_data_dir.mkdir()
+    assert remapped_data_dir.exists()
     
     issues = []
     if args.issues:
@@ -78,17 +89,17 @@ if __name__ == "__main__":
     else:
         issues = list(issue_id_map.values())
     
-    logger.info(f"Updating GitHub issues")
+    logger.info(f"Remapping cross-issue links")
     for num in issues:
         try:
-            update_issue_link_in_issue_body(num, issue_id_map, github_token, github_repo)
+            remap_issue_link_in_issue_body(num, issue_id_map, remapped_data_dir, github_token, github_repo)
         except MaxRetryLimitExceedException:
-            logger.error(f"Failed to update issue body. Skipped issue {num}")
+            logger.error(f"Failed to export/convert issue body. Skipped issue {num}")
             continue
         try:
-            update_issue_link_in_comments(num, issue_id_map, github_token, github_repo)
+            remap_issue_link_in_comments(num, issue_id_map, remapped_data_dir, github_token, github_repo)
         except MaxRetryLimitExceedException:
-            logger.error(f"Failed to update issue comments. Skipped issue {num}")
+            logger.error(f"Failed to export/convert issue comments. Skipped issue {num}")
             continue
 
     logger.info("Done.")
\ No newline at end of file
diff --git a/migration/src/update_issue_links.py b/migration/src/update_issue_links.py
index 46254807..3309f7dc 100644
--- a/migration/src/update_issue_links.py
+++ b/migration/src/update_issue_links.py
@@ -1,4 +1,6 @@
 #
+# Deprecated.
+#
 # Update GitHub issues/comments to map Jira key to GitHub issue number
 # Usage:
 #   python src/update_issue_links.py --issues <issue number list>
diff --git a/migration/src/update_issues.py b/migration/src/update_issues.py
new file mode 100644
index 00000000..fc1b298d
--- /dev/null
+++ b/migration/src/update_issues.py
@@ -0,0 +1,111 @@
+#
+# Update GitHub issues/comments with re-mapped issue links.
+# Usage:
+#   python src/update_issues.py --issues <github issue number list>
+#   python src/update_issues.py --comments <github comment list>
+#   python src/update_issues.py
+#
+
+import argparse
+from pathlib import Path
+import sys
+import os
+import json
+
+from common import LOG_DIRNAME, GITHUB_REMAPPED_DATA_DIRNAME, MaxRetryLimitExceedException, logging_setup, retry_upto, github_remapped_issue_data_file, github_remapped_comment_data_file
+from github_issues_util import *
+
+
+log_dir = Path(__file__).resolve().parent.parent.joinpath(LOG_DIRNAME)
+logger = logging_setup(log_dir, "update_issues")
+
+
+def update_issue_by_number(issue_number: int, data_dir: Path, token: str, repo: str):
+    data_file = github_remapped_issue_data_file(data_dir, issue_number)
+    update_issue(data_file, token, repo)
+
+
+@retry_upto(3, 1.0, logger)
+def update_issue(data_file: Path, token: str, repo: str):
+    with open(data_file) as fp:
+        o = json.load(fp)
+        issue_number = o["issue_number"]
+        body = o["body"]
+        if update_issue_body(token, repo, issue_number, body, logger):
+            logger.debug(f"Issue {issue_number} was successfully updated.")
+
+
+def update_comment_by_id(comment_id: int, data_dir: Path, token: str, repo: str):
+    data_file = github_remapped_comment_data_file(data_dir, comment_id)
+    update_comment(data_file, token, repo)
+
+
+@retry_upto(3, 1.0, logger)
+def update_comment(data_file: Path, token: str, repo: str):
+    with open(data_file) as fp:
+        o = json.load(fp)
+        comment_id = o["comment_id"]
+        body = o["body"]
+        if update_comment_body(token, repo, comment_id, body, logger):
+            logger.debug(f"Comment {comment_id} was successfully updated.")
+
+
+if __name__ == "__main__":
+    github_token = os.getenv("GITHUB_PAT")
+    if not github_token:
+        print("Please set your GitHub token to GITHUB_PAT environment variable.")
+        sys.exit(1)
+    github_repo = os.getenv("GITHUB_REPO")
+    if not github_repo:
+        print("Please set GitHub repo location to GITHUB_REPO environment varialbe.")
+        sys.exit(1)
+
+    check_authentication(github_token)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--issues', type=int, required=False, nargs='*', help='GitHub issue number list to be updated')
+    parser.add_argument('--comments', type=int, required=False, nargs='*', help='GitHub comment id list to be updated')
+    args = parser.parse_args()
+
+    remapped_data_dir = Path(__file__).resolve().parent.parent.joinpath(GITHUB_REMAPPED_DATA_DIRNAME)
+    if not remapped_data_dir.exists():
+        remapped_data_dir.mkdir()
+    assert remapped_data_dir.exists()
+    
+    issues = []
+    if args.issues:
+        issues = args.issues
+    comments = []
+    if args.comments:
+        comments = args.comments
+    
+    logger.info(f"Updating issues/comments")
+    
+    if not issues and not comments:
+        for data_file in remapped_data_dir.glob("ISSUE-*.json"):
+            try:
+                update_issue(data_file, github_token, github_repo)
+            except MaxRetryLimitExceedException:
+                logger.error(f"Failed to update issue body. Skipped {data_file}")
+                continue
+        for data_file in remapped_data_dir.glob("COMMENT-*.json"):
+            try:
+                update_comment(data_file, github_token, github_repo)
+            except MaxRetryLimitExceedException:
+                logger.error(f"Failed to update issue comments. Skipped {data_file}")
+                continue
+    else:
+        for num in issues:
+            try:
+                update_issue_by_number(num, remapped_data_dir, github_token, github_repo)
+            except MaxRetryLimitExceedException:
+                logger.error(f"Failed to update issue body. Skipped issue {num}")
+                continue
+        for id in comments:
+            try:
+                update_comment_by_id(id, remapped_data_dir, github_token, github_repo)
+            except MaxRetryLimitExceedException:
+                logger.error(f"Failed to update issue comments. Skipped comment {id}")
+                continue
+
+    logger.info("Done.")


[lucene-jira-archive] 02/02: update README

Posted by to...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tomoko pushed a commit to branch split-up-updating-script
in repository https://gitbox.apache.org/repos/asf/lucene-jira-archive.git

commit b05badc0bbd5926c1490c6e149bd03b6998d7b13
Author: Tomoko Uchida <to...@gmail.com>
AuthorDate: Wed Jul 6 15:55:40 2022 +0900

    update README
---
 migration/README.md | 89 ++++++++++++++++++++++++++---------------------------
 1 file changed, 43 insertions(+), 46 deletions(-)

diff --git a/migration/README.md b/migration/README.md
index 19429abe..593a8879 100644
--- a/migration/README.md
+++ b/migration/README.md
@@ -30,23 +30,27 @@ source .env
 
 ## Usage
 
+All logs are saved in `migration/log`.
+
 ### 1. Download Jira issues
 
 `src/download_jira.py` downloads Jira issues and dumps them as JSON files in `migration/jira-dump`. This also downloads attached files in each issue.
 
 ```
-(.venv) migration $ python src/download_jira.py --min 10500 --max 10600
-[2022-06-26 01:57:02,408] INFO:download_jira: Downloading Jira issues in /mnt/hdd/repo/sandbox-lucene-10557/migration/jira-dump
-[2022-06-26 01:57:17,843] INFO:download_jira: Done.
-
-(.venv) migration $ cat log/jira2github_import_2022-06-26T01\:34\:22.log 
-[2022-06-26 01:34:22,300] INFO:jira2github_import: Converting Jira issues to GitHub issues in /mnt/hdd/repo/sandbox-lucene-10557/migration/github-import-data
-[2022-06-26 01:34:23,355] DEBUG:jira2github_import: GitHub issue data created: /mnt/hdd/repo/sandbox-lucene-10557/migration/github-import-data/GH-LUCENE-10500.json
-[2022-06-26 01:34:23,519] DEBUG:jira2github_import: GitHub issue data created: /mnt/hdd/repo/sandbox-lucene-10557/migration/github-import-data/GH-LUCENE-10501.json
-[2022-06-26 01:34:24,894] DEBUG:jira2github_import: GitHub issue data created: /mnt/hdd/repo/sandbox-lucene-10557/migration/github-import-data/GH-LUCENE-10502.json
+(.venv) migration $ python src/download_jira.py --min 10500 --max 10510
+[2022-07-06 15:43:00,864] INFO:download_jira: Downloading Jira issues in /mnt/hdd/repo/lucene-jira-archive/migration/jira-dump. Attachments are saved in ..
+[2022-07-06 15:43:16,247] INFO:download_jira: Done.
+
+(.venv) migration $ ls jira-dump/
+LUCENE-10500.json
+LUCENE-10501.json
+LUCENE-10502.json
 ...
 ```
 
+Downloaded attachments should be committed to a dedicated repo/branch for them.
+
+
 ### 2. Convert Jira issues to GitHub issues
 
 `src/jira2github_import.py` converts Jira dumps into GitHub data that are importable to [issue import API](https://gist.github.com/jonmagic/5282384165e0f86ef105). Converted JSON data is saved in `migration/github-import-data`.
@@ -54,14 +58,14 @@ source .env
 Also this resolves all Jira user ID - GitHub account alignment if the account mapping is given in `mapping-data/account-map.csv`. 
 
 ```
-(.venv) migration $ python src/jira2github_import.py --min 10500 --max 10600
-[2022-06-26 01:34:22,300] INFO:jira2github_import: Converting Jira issues to GitHub issues in /mnt/hdd/repo/sandbox-lucene-10557/migration/github-import-data
-[2022-06-26 01:36:27,739] INFO:jira2github_import: Done.
-
-(.venv) migration $ cat log/jira2github_import_2022-06-26T01\:34\:22.log
-[2022-06-26 01:34:22,300] INFO:jira2github_import: Converting Jira issues to GitHub issues in /mnt/hdd/repo/sandbox-lucene-10557/migration/github-import-data
-[2022-06-26 01:34:23,355] DEBUG:jira2github_import: GitHub issue data created: /mnt/hdd/repo/sandbox-lucene-10557/migration/github-import-data/GH-LUCENE-10500.json
-[2022-06-26 01:34:23,519] DEBUG:jira2github_import: GitHub issue data created: /mnt/hdd/repo/sandbox-lucene-10557/migration/github-import-data/GH-LUCENE-10501.json
+(.venv) migration $ python src/jira2github_import.py --min 10500 --max 10510
+[2022-07-06 15:46:38,837] INFO:jira2github_import: Converting Jira issues to GitHub issues in /mnt/hdd/repo/lucene-jira-archive/migration/github-import-data
+[2022-07-06 15:46:48,761] INFO:jira2github_import: Done.
+
+(.venv) migration $ ls github-import-data/
+GH-LUCENE-10500.json
+GH-LUCENE-10501.json
+GH-LUCENE-10502.json
 ...
 ```
 
@@ -72,47 +76,40 @@ First pass: `src/import_github_issues.py` imports GitHub issues and comments via
 We confirmed this script does not trigger any notifications.
 
 ```
-(.venv) migration $ python src/import_github_issues.py --min 10500 --max 10600
-[2022-06-26 01:36:46,749] INFO:import_github_issues: Importing GitHub issues
-[2022-06-26 01:47:35,979] INFO:import_github_issues: Done.
-
-(.venv) migration $ cat log/import_github_issues_2022-06-26T01\:36\:46.log
-[2022-06-26 01:36:46,749] INFO:import_github_issues: Importing GitHub issues
-[2022-06-26 01:36:52,299] DEBUG:import_github_issues: Import GitHub issue https://github.com/mocobeta/migration-test-2/issues/1 was successfully completed.
-[2022-06-26 01:36:57,883] DEBUG:import_github_issues: Import GitHub issue https://github.com/mocobeta/migration-test-2/issues/2 was successfully completed.
-[2022-06-26 01:37:03,405] DEBUG:import_github_issues: Import GitHub issue https://github.com/mocobeta/migration-test-2/issues/3 was successfully completed.
+(.venv) migration $ python src/import_github_issues.py --min 10500 --max 10510
+[2022-07-06 15:47:48,230] INFO:import_github_issues: Importing GitHub issues
+[2022-07-06 15:52:06,314] INFO:import_github_issues: Done.
 ...
 
 (.venv) migration $ cat mappings-data/issue-map.csv
 JiraKey,GitHubUrl,GitHubNumber
-LUCENE-10500,https://github.com/mocobeta/migration-test-2/issues/1,1
-LUCENE-10501,https://github.com/mocobeta/migration-test-2/issues/2,2
-LUCENE-10502,https://github.com/mocobeta/migration-test-2/issues/3,3
+LUCENE-10500,https://github.com/mocobeta/migration-test-3/issues/42,42
+LUCENE-10501,https://github.com/mocobeta/migration-test-3/issues/43,43
+LUCENE-10502,https://github.com/mocobeta/migration-test-3/issues/44,44
 ...
 ```
 
-### 4. Update GitHub issues and comments
+### 4. Re-map cross-issue links on GitHub
 
-Second pass: `src/update_issue_links.py` 1) iterates all imported GitHub issue descriptions and comments; 2) embed correct GitHub issue number next to the corresponding Jira issue key with previously created issue number mapping; 3) updates them if the texts are changed.
+`src/remap_cross_issue_links.py` exports issues and comments from GitHub and save updated issue/comment bodies to `migration/github-remapped-data`.
 
-e.g.: if `LUCENE-10500` is mapped to GitHub issue `#100`, then all text fragments `LUCENE-10500`  in issue descriptions and comments will be updated to `LUCENE-10500 (#100)`.
+```
+(.venv) migration $ python src/remap_cross_issue_links.py --issues 40 41
+[2022-07-06 15:32:39,895] INFO:remap_cross_issue_links: Remapping cross-issue links
+[2022-07-06 15:32:47,729] INFO:remap_cross_issue_links: Done.
 
-We confirmed this script does not trigger any notifications.
+(.venv) migration $ ls github-remapped-data/
+COMMENT-1175792003.json  COMMENT-1175792076.json  COMMENT-1175797378.json  COMMENT-1175797444.json  COMMENT-1175797570.json  ISSUE-40.json  ISSUE-41.json
+```
+
+### 5. Update GitHub issues and comments
+
+Second pass: `src/update_issues.py` updates issues and comments with updated issue/comment bodies.
 
 ```
-(.venv) migration $ python src/update_issue_links.py
-[2022-06-26 01:59:43,324] INFO:update_issue_links: Updating GitHub issues
-[2022-06-26 02:17:38,332] INFO:update_issue_links: Done.
-
-(.venv) migration $ cat log/update_issue_links_2022-06-26T01\:59\:43.log
-[2022-06-26 01:59:43,324] INFO:update_issue_links: Updating GitHub issues
-[2022-06-26 01:59:45,586] DEBUG:update_issue_links: Issue 1 does not contain any cross-issue links; nothing to do.
-[2022-06-26 01:59:50,062] DEBUG:update_issue_links: # comments in issue 1 = 3
-[2022-06-26 01:59:52,601] DEBUG:update_issue_links: Comment 1166321470 was successfully updated.
-[2022-06-26 01:59:55,164] DEBUG:update_issue_links: Comment 1166321472 was successfully updated.
-[2022-06-26 01:59:55,165] DEBUG:update_issue_links: Comment 1166321473 does not contain any cross-issue links; nothing to do.
-[2022-06-26 01:59:57,426] DEBUG:update_issue_links: Issue 2 does not contain any cross-issue links; nothing to do.
-...
+(.venv) migration $ python src/update_issues.py --issues 40 41 --comments 1175797570 1175797444
+[2022-07-06 15:34:59,537] INFO:update_issues: Updating issues/comments
+[2022-07-06 15:35:06,532] INFO:update_issues: Done.
 ```
 
 ## Already implemented things