You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@devlake.apache.org by wa...@apache.org on 2023/01/21 11:39:21 UTC
[incubator-devlake] branch main updated: fix(jira): optimize collector for remotelink worklog changelog (#4245)
This is an automated email from the ASF dual-hosted git repository.
warren pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git
The following commit(s) were added to refs/heads/main by this push:
new 1420f7a2f fix(jira): optimize collector for remotelink worklog changelog (#4245)
1420f7a2f is described below
commit 1420f7a2fa677a8f6c00292cf1f8eba0e78123d0
Author: Warren Chen <yi...@merico.dev>
AuthorDate: Sat Jan 21 19:39:16 2023 +0800
fix(jira): optimize collector for remotelink worklog changelog (#4245)
* fix(jira): optimize collector for remotelink worklog changelog
* fix(jira): fix for review
* fix(jira): add comment and optimize sql
---
backend/helpers/pluginhelper/api/api_extractor.go | 1 -
backend/plugins/jira/tasks/issue_changelog_collector.go | 12 +++++++++++-
backend/plugins/jira/tasks/remotelink_collector.go | 15 ++++++++++-----
backend/plugins/jira/tasks/worklog_collector.go | 12 +++++++++++-
4 files changed, 32 insertions(+), 8 deletions(-)
diff --git a/backend/helpers/pluginhelper/api/api_extractor.go b/backend/helpers/pluginhelper/api/api_extractor.go
index 2f37d0de4..2b4f86e5d 100644
--- a/backend/helpers/pluginhelper/api/api_extractor.go
+++ b/backend/helpers/pluginhelper/api/api_extractor.go
@@ -127,7 +127,6 @@ func (extractor *ApiExtractor) Execute() errors.Error {
if err != nil {
return errors.Default.Wrap(err, "error adding result to batch")
}
- extractor.args.Ctx.IncProgress(1)
}
extractor.args.Ctx.IncProgress(1)
}
diff --git a/backend/plugins/jira/tasks/issue_changelog_collector.go b/backend/plugins/jira/tasks/issue_changelog_collector.go
index 46f525373..480d853b1 100644
--- a/backend/plugins/jira/tasks/issue_changelog_collector.go
+++ b/backend/plugins/jira/tasks/issue_changelog_collector.go
@@ -75,7 +75,17 @@ func CollectIssueChangelogs(taskCtx plugin.SubTaskContext) errors.Error {
}
incremental := collectorWithState.IsIncremental()
if incremental {
- clauses = append(clauses, dal.Having("i.updated > max(c.issue_updated) OR (max(c.issue_updated) IS NULL AND COUNT(c.changelog_id) > 0)"))
+ clauses = append(clauses, dal.Having("i.updated > ? AND (i.updated > max(c.issue_updated) OR (max(c.issue_updated) IS NULL AND COUNT(c.changelog_id) > 0))", collectorWithState.LatestState.LatestSuccessStart))
+ } else {
+ /*
+ i.updated > max(rl.issue_updated) was deleted because for non-incremental collection,
+ max(rl.issue_updated) will only be one of null, less or equal to i.updated
+ so i.updated > max(rl.issue_updated) is always false.
+ max(c.issue_updated) IS NULL AND COUNT(c.changelog_id) > 0 infers the issue has more than 100 changelogs,
+ because we collected changelogs when collecting issues, and assign changelog.issue_updated if num of changelogs < 100,
+ and max(c.issue_updated) IS NULL AND COUNT(c.changelog_id) > 0 means all changelogs for the issue were not assigned issue_updated
+ */
+ clauses = append(clauses, dal.Having("max(c.issue_updated) IS NULL AND COUNT(c.changelog_id) > 0"))
}
if logger.IsLevelEnabled(log.LOG_DEBUG) {
diff --git a/backend/plugins/jira/tasks/remotelink_collector.go b/backend/plugins/jira/tasks/remotelink_collector.go
index 52fc9ee99..3ebed4c39 100644
--- a/backend/plugins/jira/tasks/remotelink_collector.go
+++ b/backend/plugins/jira/tasks/remotelink_collector.go
@@ -68,12 +68,17 @@ func CollectRemotelinks(taskCtx plugin.SubTaskContext) errors.Error {
}
incremental := collectorWithState.IsIncremental()
if incremental {
- if collectorWithState.LatestState.LatestSuccessStart != nil {
- clauses = append(clauses, dal.Having("i.updated > ? AND (i.updated > max(rl.issue_updated) OR max(rl.issue_updated) IS NULL)", collectorWithState.LatestState.LatestSuccessStart))
- } else {
- clauses = append(clauses, dal.Having("i.updated > max(rl.issue_updated) OR max(rl.issue_updated) IS NULL"))
- }
+ clauses = append(clauses, dal.Having("i.updated > ? AND (i.updated > max(rl.issue_updated) OR max(rl.issue_updated) IS NULL)", collectorWithState.LatestState.LatestSuccessStart))
}
+ /*
+ i.updated > max(rl.issue_updated) was deleted because for non-incremental collection, max(rl.issue_updated) is always null.
+ so i.updated > max(rl.issue_updated) is constantly false
+ also, for the first collection, max(rl.issue_updated) is always null as there is no data in _tool_jira_remotelinks.
+ In conclusion, we don't need the following clause
+ */
+ //else {
+ // clauses = append(clauses, dal.Having("i.updated > max(rl.issue_updated) OR max(rl.issue_updated) IS NULL "))
+ //}
cursor, err := db.Cursor(clauses...)
if err != nil {
logger.Error(err, "collect remotelink error")
diff --git a/backend/plugins/jira/tasks/worklog_collector.go b/backend/plugins/jira/tasks/worklog_collector.go
index 517b88aea..2d9d0c85d 100644
--- a/backend/plugins/jira/tasks/worklog_collector.go
+++ b/backend/plugins/jira/tasks/worklog_collector.go
@@ -67,7 +67,17 @@ func CollectWorklogs(taskCtx plugin.SubTaskContext) errors.Error {
}
incremental := collectorWithState.IsIncremental()
if incremental {
- clauses = append(clauses, dal.Having("i.updated > max(wl.issue_updated) OR (max(wl.issue_updated) IS NULL AND COUNT(wl.worklog_id) > 0)"))
+ clauses = append(clauses, dal.Having("i.updated > ? AND (i.updated > max(wl.issue_updated) OR (max(wl.issue_updated) IS NULL AND COUNT(wl.worklog_id) > 0))", collectorWithState.LatestState.LatestSuccessStart))
+ } else {
+ /*
+ i.updated > max(rl.issue_updated) was deleted because for non-incremental collection,
+ max(rl.issue_updated) will only be one of null, less or equal to i.updated
+ so i.updated > max(rl.issue_updated) is always false.
+ max(c.issue_updated) IS NULL AND COUNT(c.worklog_id) > 0 infers the issue has more than 100 worklogs,
+ because we collected worklogs when collecting issues, and assign worklog.issue_updated if num of worklogs < 100,
+ and max(c.issue_updated) IS NULL AND COUNT(c.worklog_id) > 0 means all worklogs for the issue were not assigned issue_updated
+ */
+ clauses = append(clauses, dal.Having("max(wl.issue_updated) IS NULL AND COUNT(wl.worklog_id) > 0"))
}
// construct the input iterator