You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@devlake.apache.org by ka...@apache.org on 2023/03/30 22:22:04 UTC

[incubator-devlake] branch main updated: docs: thorough godoc for StatefulApiCollectorForFinalizableEntity (#4626)

This is an automated email from the ASF dual-hosted git repository.

ka94 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-devlake.git


The following commit(s) were added to refs/heads/main by this push:
     new 40ec1570b docs: thorough godoc for StatefulApiCollectorForFinalizableEntity (#4626)
40ec1570b is described below

commit 40ec1570b964b1655f0d1756419fbf1af6aad9b1
Author: Klesh Wong <zh...@merico.dev>
AuthorDate: Fri Mar 31 06:22:00 2023 +0800

    docs: thorough godoc for StatefulApiCollectorForFinalizableEntity (#4626)
    
    * docs: thorough godoc for StatefulApiCollectorForFinalizableEntity
    
    * docs: reword finalizable
    
    * docs: update according to review
    
    * docs: clarify BuildInputIterator usage
    
    ---------
    
    Co-authored-by: Keon Amini <ke...@merico.dev>
---
 .../pluginhelper/api/api_collector_with_state.go   | 52 +++++++++++++---------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/backend/helpers/pluginhelper/api/api_collector_with_state.go b/backend/helpers/pluginhelper/api/api_collector_with_state.go
index f35fb0419..f62d6b365 100644
--- a/backend/helpers/pluginhelper/api/api_collector_with_state.go
+++ b/backend/helpers/pluginhelper/api/api_collector_with_state.go
@@ -122,18 +122,26 @@ func (m *ApiCollectorStateManager) Execute() errors.Error {
 }
 
 // NewStatefulApiCollectorForFinalizableEntity aims to add timeFilter/diffSync support for
-// APIs that do NOT support filtering data by updated date. However, it comes with the
+// APIs that do NOT support filtering data by the updated date. However, it comes with the
 // following constraints:
 //  1. The entity is a short-lived object or it is likely to be irrelevant
 //     a. ci/id pipelines are short-lived objects
 //     b. pull request might took a year to be closed or never, but it is likely irrelevant
-//  2. The entity must be Finalizable: when it is finalized, no modification forever
+//  2. The entity must be Finalizable, meaning no future modifications will happen to it once it
+//     enter some sort of `Closed`/`Finished` status.
 //  3. The API must fit one of the following traits:
-//     a. it supports filtering by Created Date, in this case, you may specify the `GetTotalPages`
-//     option to fetch data with Determined Strategy if possible.
+//     a. it supports filtering by Created Date, in this case, you must implement the filtering
+//     via the `UrlTemplate`, `Query` or `Header` hook based on the API specification.
 //     b. or sorting by Created Date in Descending order, in this case, you must use `Concurrency`
 //     or `GetNextPageCustomData` instead of `GetTotalPages` for Undetermined Strategy since we have
 //     to stop the process in the middle.
+//
+// Assuming the API fits the bill, the strategies can be categoried into:
+//   - Determined Strategy: if the API supports filtering by the Created Date, use the `GetTotalPages` hook
+//   - Undetermind Strategy: if the API supports sorting by the Created Date in Descending order and
+//     fetching by Page Number, use the `Concurrent` hook
+//   - Sequential Strategy: if the API supports sorting by the Created Date in Descending order but
+//     the next page can only be fetched by the Cursor/Token from the previous page, use the `GetNextPageCustomData` hook
 func NewStatefulApiCollectorForFinalizableEntity(args FinalizableApiCollectorArgs) (plugin.SubTask, errors.Error) {
 	// create a manager which could execute multiple collector but acts as a single subtask to callers
 	manager, err := NewStatefulApiCollector(RawDataSubTaskArgs{
@@ -258,27 +266,31 @@ type FinalizableApiCollectorArgs struct {
 	CollectUnfinishedDetails FinalizableApiCollectorDetailArgs
 }
 
+// FinalizableApiCollectorCommonArgs is the common arguments for both list and detail collectors
+// Note that all request-related arguments would be called or utilized before any response-related arguments
 type FinalizableApiCollectorCommonArgs struct {
-	UrlTemplate     string `comment:"GoTemplate for API url"`
-	Query           func(reqData *RequestData, createdAfter *time.Time) (url.Values, errors.Error)
-	Header          func(reqData *RequestData, createdAfter *time.Time) (http.Header, errors.Error)
-	MinTickInterval *time.Duration
-	ResponseParser  func(res *http.Response) ([]json.RawMessage, errors.Error)
-	AfterResponse   common.ApiClientAfterResponse
-	RequestBody     func(reqData *RequestData) map[string]interface{}
-	Method          string
+	UrlTemplate     string                                                                          // required, url path template for the request, e.g. repos/{{ .Params.Name }}/pulls or incident/{{ .Input.Number }} (if using iterators)
+	Method          string                                                                          // optional, request method, e.g. GET(default), POST, PUT, DELETE
+	Query           func(reqData *RequestData, createdAfter *time.Time) (url.Values, errors.Error)  // optional, build query params for the request
+	Header          func(reqData *RequestData, createdAfter *time.Time) (http.Header, errors.Error) // optional, build header for the request
+	RequestBody     func(reqData *RequestData) map[string]interface{}                               // optional, build request body for the request if the Method set to POST or PUT
+	MinTickInterval *time.Duration                                                                  // optional, minimum interval between two requests, some endpoints might have a more conservative rate limit than others within the same instance, you can mitigate this by setting a higher MinTickInterval to override the connection level rate limit.
+	AfterResponse   common.ApiClientAfterResponse                                                   // optional, hook to run after each response, would be called before the ResponseParser
+	ResponseParser  func(res *http.Response) ([]json.RawMessage, errors.Error)                      // required, parse the response body and return a list of entities
 }
+
+// FinalizableApiCollectorListArgs is the arguments for the list collector
 type FinalizableApiCollectorListArgs struct {
-	// optional, leave it be `nil` if API supports filtering by created date (Don't forget to set the Query)
-	GetCreated func(item json.RawMessage) (time.Time, errors.Error)
 	FinalizableApiCollectorCommonArgs
-	Concurrency           int
-	PageSize              int
-	GetNextPageCustomData func(prevReqData *RequestData, prevPageResponse *http.Response) (interface{}, errors.Error)
-	// need to consider the data missing problem: what if new data gets created during collection?
-	GetTotalPages func(res *http.Response, args *ApiCollectorArgs) (int, errors.Error)
+	GetCreated            func(item json.RawMessage) (time.Time, errors.Error)                                        // optional, to extract create date from a raw json of a single record, leave it be `nil` if API supports filtering by updated date (Don't forget to set the Query)
+	PageSize              int                                                                                         // required, number of records per page
+	Concurrency           int                                                                                         // required for Undetermined Strategy, number of concurrent requests
+	GetNextPageCustomData func(prevReqData *RequestData, prevPageResponse *http.Response) (interface{}, errors.Error) // required for Sequential Strategy, to extract the next page cursor from the given response
+	GetTotalPages         func(res *http.Response, args *ApiCollectorArgs) (int, errors.Error)                        // required for Determined Strategy, to extract the total number of pages from the given response
 }
+
+// FinalizableApiCollectorDetailArgs is the arguments for the detail collector
 type FinalizableApiCollectorDetailArgs struct {
 	FinalizableApiCollectorCommonArgs
-	BuildInputIterator func() (Iterator, errors.Error)
+	BuildInputIterator func() (Iterator, errors.Error) // required, create an iterator that iterates through all unfinalized records in the database. These records will be fed as the "Input" (or {{ .Input.* }} in URLTemplate) argument back into FinalizableApiCollectorCommonArgs which makes the API calls to re-collect their newest states.
 }