You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@devlake.apache.org by wa...@apache.org on 2023/01/21 11:39:21 UTC

[incubator-devlake] branch main updated: fix(jira): optimize collector for remotelink worklog changelog (#4245)

This is an automated email from the ASF dual-hosted git repository.

warren pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git


The following commit(s) were added to refs/heads/main by this push:
     new 1420f7a2f fix(jira): optimize collector for remotelink worklog changelog (#4245)
1420f7a2f is described below

commit 1420f7a2fa677a8f6c00292cf1f8eba0e78123d0
Author: Warren Chen <yi...@merico.dev>
AuthorDate: Sat Jan 21 19:39:16 2023 +0800

    fix(jira): optimize collector for remotelink worklog changelog (#4245)
    
    * fix(jira): optimize collector for remotelink worklog changelog
    
    * fix(jira): fix  for review
    
    * fix(jira): add comment and optimize sql
---
 backend/helpers/pluginhelper/api/api_extractor.go       |  1 -
 backend/plugins/jira/tasks/issue_changelog_collector.go | 12 +++++++++++-
 backend/plugins/jira/tasks/remotelink_collector.go      | 15 ++++++++++-----
 backend/plugins/jira/tasks/worklog_collector.go         | 12 +++++++++++-
 4 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/backend/helpers/pluginhelper/api/api_extractor.go b/backend/helpers/pluginhelper/api/api_extractor.go
index 2f37d0de4..2b4f86e5d 100644
--- a/backend/helpers/pluginhelper/api/api_extractor.go
+++ b/backend/helpers/pluginhelper/api/api_extractor.go
@@ -127,7 +127,6 @@ func (extractor *ApiExtractor) Execute() errors.Error {
 			if err != nil {
 				return errors.Default.Wrap(err, "error adding result to batch")
 			}
-			extractor.args.Ctx.IncProgress(1)
 		}
 		extractor.args.Ctx.IncProgress(1)
 	}
diff --git a/backend/plugins/jira/tasks/issue_changelog_collector.go b/backend/plugins/jira/tasks/issue_changelog_collector.go
index 46f525373..480d853b1 100644
--- a/backend/plugins/jira/tasks/issue_changelog_collector.go
+++ b/backend/plugins/jira/tasks/issue_changelog_collector.go
@@ -75,7 +75,17 @@ func CollectIssueChangelogs(taskCtx plugin.SubTaskContext) errors.Error {
 	}
 	incremental := collectorWithState.IsIncremental()
 	if incremental {
-		clauses = append(clauses, dal.Having("i.updated > max(c.issue_updated) OR  (max(c.issue_updated) IS NULL AND COUNT(c.changelog_id) > 0)"))
+		clauses = append(clauses, dal.Having("i.updated > ? AND (i.updated > max(c.issue_updated) OR (max(c.issue_updated) IS NULL AND COUNT(c.changelog_id) > 0))", collectorWithState.LatestState.LatestSuccessStart))
+	} else {
+		/*
+			i.updated > max(rl.issue_updated) was deleted because for non-incremental collection,
+			max(rl.issue_updated) will only be one of null, less or equal to i.updated
+			so i.updated > max(rl.issue_updated) is always false.
+			max(c.issue_updated) IS NULL AND COUNT(c.changelog_id) > 0 infers the issue has more than 100 changelogs,
+			because we collected changelogs when collecting issues, and assign changelog.issue_updated if num of changelogs < 100,
+			and max(c.issue_updated) IS NULL AND COUNT(c.changelog_id) > 0 means all changelogs for the issue were not assigned issue_updated
+		*/
+		clauses = append(clauses, dal.Having("max(c.issue_updated) IS NULL AND COUNT(c.changelog_id) > 0"))
 	}
 
 	if logger.IsLevelEnabled(log.LOG_DEBUG) {
diff --git a/backend/plugins/jira/tasks/remotelink_collector.go b/backend/plugins/jira/tasks/remotelink_collector.go
index 52fc9ee99..3ebed4c39 100644
--- a/backend/plugins/jira/tasks/remotelink_collector.go
+++ b/backend/plugins/jira/tasks/remotelink_collector.go
@@ -68,12 +68,17 @@ func CollectRemotelinks(taskCtx plugin.SubTaskContext) errors.Error {
 	}
 	incremental := collectorWithState.IsIncremental()
 	if incremental {
-		if collectorWithState.LatestState.LatestSuccessStart != nil {
-			clauses = append(clauses, dal.Having("i.updated > ? AND (i.updated > max(rl.issue_updated) OR max(rl.issue_updated) IS NULL)", collectorWithState.LatestState.LatestSuccessStart))
-		} else {
-			clauses = append(clauses, dal.Having("i.updated > max(rl.issue_updated) OR max(rl.issue_updated) IS NULL"))
-		}
+		clauses = append(clauses, dal.Having("i.updated > ? AND (i.updated > max(rl.issue_updated) OR max(rl.issue_updated) IS NULL)", collectorWithState.LatestState.LatestSuccessStart))
 	}
+	/*
+		i.updated > max(rl.issue_updated) was deleted because for non-incremental collection, max(rl.issue_updated) is always null.
+			so i.updated > max(rl.issue_updated) is constantly false
+		also, for the first collection, max(rl.issue_updated) is always null as there is no data in _tool_jira_remotelinks.
+		In conclusion, we don't need the following clause
+	*/
+	//else {
+	// clauses = append(clauses, dal.Having("i.updated > max(rl.issue_updated) OR max(rl.issue_updated) IS NULL "))
+	//}
 	cursor, err := db.Cursor(clauses...)
 	if err != nil {
 		logger.Error(err, "collect remotelink error")
diff --git a/backend/plugins/jira/tasks/worklog_collector.go b/backend/plugins/jira/tasks/worklog_collector.go
index 517b88aea..2d9d0c85d 100644
--- a/backend/plugins/jira/tasks/worklog_collector.go
+++ b/backend/plugins/jira/tasks/worklog_collector.go
@@ -67,7 +67,17 @@ func CollectWorklogs(taskCtx plugin.SubTaskContext) errors.Error {
 	}
 	incremental := collectorWithState.IsIncremental()
 	if incremental {
-		clauses = append(clauses, dal.Having("i.updated > max(wl.issue_updated) OR  (max(wl.issue_updated) IS NULL AND COUNT(wl.worklog_id) > 0)"))
+		clauses = append(clauses, dal.Having("i.updated > ? AND (i.updated > max(wl.issue_updated) OR (max(wl.issue_updated) IS NULL AND COUNT(wl.worklog_id) > 0))", collectorWithState.LatestState.LatestSuccessStart))
+	} else {
+		/*
+			i.updated > max(rl.issue_updated) was deleted because for non-incremental collection,
+			max(rl.issue_updated) will only be one of null, less or equal to i.updated
+			so i.updated > max(rl.issue_updated) is always false.
+			max(c.issue_updated) IS NULL AND COUNT(c.worklog_id) > 0 infers the issue has more than 100 worklogs,
+			because we collected worklogs when collecting issues, and assign worklog.issue_updated if num of worklogs < 100,
+			and max(c.issue_updated) IS NULL AND COUNT(c.worklog_id) > 0 means all worklogs for the issue were not assigned issue_updated
+		*/
+		clauses = append(clauses, dal.Having("max(wl.issue_updated) IS NULL AND COUNT(wl.worklog_id) > 0"))
 	}
 
 	// construct the input iterator