You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@devlake.apache.org by ka...@apache.org on 2023/03/30 22:22:04 UTC
[incubator-devlake] branch main updated: docs: thorough godoc for StatefulApiCollectorForFinalizableEntity (#4626)
This is an automated email from the ASF dual-hosted git repository.
ka94 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git
The following commit(s) were added to refs/heads/main by this push:
new 40ec1570b docs: thorough godoc for StatefulApiCollectorForFinalizableEntity (#4626)
40ec1570b is described below
commit 40ec1570b964b1655f0d1756419fbf1af6aad9b1
Author: Klesh Wong <zh...@merico.dev>
AuthorDate: Fri Mar 31 06:22:00 2023 +0800
docs: thorough godoc for StatefulApiCollectorForFinalizableEntity (#4626)
* docs: thorough godoc for StatefulApiCollectorForFinalizableEntity
* docs: reword finalizable
* docs: update according to review
* docs: clarify BuildInputIterator usage
---------
Co-authored-by: Keon Amini <ke...@merico.dev>
---
.../pluginhelper/api/api_collector_with_state.go | 52 +++++++++++++---------
1 file changed, 32 insertions(+), 20 deletions(-)
diff --git a/backend/helpers/pluginhelper/api/api_collector_with_state.go b/backend/helpers/pluginhelper/api/api_collector_with_state.go
index f35fb0419..f62d6b365 100644
--- a/backend/helpers/pluginhelper/api/api_collector_with_state.go
+++ b/backend/helpers/pluginhelper/api/api_collector_with_state.go
@@ -122,18 +122,26 @@ func (m *ApiCollectorStateManager) Execute() errors.Error {
}
// NewStatefulApiCollectorForFinalizableEntity aims to add timeFilter/diffSync support for
-// APIs that do NOT support filtering data by updated date. However, it comes with the
+// APIs that do NOT support filtering data by the updated date. However, it comes with the
// following constraints:
// 1. The entity is a short-lived object or it is likely to be irrelevant
// a. ci/id pipelines are short-lived objects
// b. pull request might took a year to be closed or never, but it is likely irrelevant
-// 2. The entity must be Finalizable: when it is finalized, no modification forever
+// 2. The entity must be Finalizable, meaning no future modifications will happen to it once it
+// enter some sort of `Closed`/`Finished` status.
// 3. The API must fit one of the following traits:
-// a. it supports filtering by Created Date, in this case, you may specify the `GetTotalPages`
-// option to fetch data with Determined Strategy if possible.
+// a. it supports filtering by Created Date, in this case, you must implement the filtering
+// via the `UrlTemplate`, `Query` or `Header` hook based on the API specification.
// b. or sorting by Created Date in Descending order, in this case, you must use `Concurrency`
// or `GetNextPageCustomData` instead of `GetTotalPages` for Undetermined Strategy since we have
// to stop the process in the middle.
+//
+// Assuming the API fits the bill, the strategies can be categoried into:
+// - Determined Strategy: if the API supports filtering by the Created Date, use the `GetTotalPages` hook
+// - Undetermind Strategy: if the API supports sorting by the Created Date in Descending order and
+// fetching by Page Number, use the `Concurrent` hook
+// - Sequential Strategy: if the API supports sorting by the Created Date in Descending order but
+// the next page can only be fetched by the Cursor/Token from the previous page, use the `GetNextPageCustomData` hook
func NewStatefulApiCollectorForFinalizableEntity(args FinalizableApiCollectorArgs) (plugin.SubTask, errors.Error) {
// create a manager which could execute multiple collector but acts as a single subtask to callers
manager, err := NewStatefulApiCollector(RawDataSubTaskArgs{
@@ -258,27 +266,31 @@ type FinalizableApiCollectorArgs struct {
CollectUnfinishedDetails FinalizableApiCollectorDetailArgs
}
+// FinalizableApiCollectorCommonArgs is the common arguments for both list and detail collectors
+// Note that all request-related arguments would be called or utilized before any response-related arguments
type FinalizableApiCollectorCommonArgs struct {
- UrlTemplate string `comment:"GoTemplate for API url"`
- Query func(reqData *RequestData, createdAfter *time.Time) (url.Values, errors.Error)
- Header func(reqData *RequestData, createdAfter *time.Time) (http.Header, errors.Error)
- MinTickInterval *time.Duration
- ResponseParser func(res *http.Response) ([]json.RawMessage, errors.Error)
- AfterResponse common.ApiClientAfterResponse
- RequestBody func(reqData *RequestData) map[string]interface{}
- Method string
+ UrlTemplate string // required, url path template for the request, e.g. repos/{{ .Params.Name }}/pulls or incident/{{ .Input.Number }} (if using iterators)
+ Method string // optional, request method, e.g. GET(default), POST, PUT, DELETE
+ Query func(reqData *RequestData, createdAfter *time.Time) (url.Values, errors.Error) // optional, build query params for the request
+ Header func(reqData *RequestData, createdAfter *time.Time) (http.Header, errors.Error) // optional, build header for the request
+ RequestBody func(reqData *RequestData) map[string]interface{} // optional, build request body for the request if the Method set to POST or PUT
+ MinTickInterval *time.Duration // optional, minimum interval between two requests, some endpoints might have a more conservative rate limit than others within the same instance, you can mitigate this by setting a higher MinTickInterval to override the connection level rate limit.
+ AfterResponse common.ApiClientAfterResponse // optional, hook to run after each response, would be called before the ResponseParser
+ ResponseParser func(res *http.Response) ([]json.RawMessage, errors.Error) // required, parse the response body and return a list of entities
}
+
+// FinalizableApiCollectorListArgs is the arguments for the list collector
type FinalizableApiCollectorListArgs struct {
- // optional, leave it be `nil` if API supports filtering by created date (Don't forget to set the Query)
- GetCreated func(item json.RawMessage) (time.Time, errors.Error)
FinalizableApiCollectorCommonArgs
- Concurrency int
- PageSize int
- GetNextPageCustomData func(prevReqData *RequestData, prevPageResponse *http.Response) (interface{}, errors.Error)
- // need to consider the data missing problem: what if new data gets created during collection?
- GetTotalPages func(res *http.Response, args *ApiCollectorArgs) (int, errors.Error)
+ GetCreated func(item json.RawMessage) (time.Time, errors.Error) // optional, to extract create date from a raw json of a single record, leave it be `nil` if API supports filtering by updated date (Don't forget to set the Query)
+ PageSize int // required, number of records per page
+ Concurrency int // required for Undetermined Strategy, number of concurrent requests
+ GetNextPageCustomData func(prevReqData *RequestData, prevPageResponse *http.Response) (interface{}, errors.Error) // required for Sequential Strategy, to extract the next page cursor from the given response
+ GetTotalPages func(res *http.Response, args *ApiCollectorArgs) (int, errors.Error) // required for Determined Strategy, to extract the total number of pages from the given response
}
+
+// FinalizableApiCollectorDetailArgs is the arguments for the detail collector
type FinalizableApiCollectorDetailArgs struct {
FinalizableApiCollectorCommonArgs
- BuildInputIterator func() (Iterator, errors.Error)
+ BuildInputIterator func() (Iterator, errors.Error) // required, create an iterator that iterates through all unfinalized records in the database. These records will be fed as the "Input" (or {{ .Input.* }} in URLTemplate) argument back into FinalizableApiCollectorCommonArgs which makes the API calls to re-collect their newest states.
}