You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@yunikorn.apache.org by GitBox <gi...@apache.org> on 2021/01/29 05:07:27 UTC

[GitHub] [incubator-yunikorn-core] wilfred-s commented on a change in pull request #216: YUNIKORN-414: Add partitions REST API to fetch list of partitions

wilfred-s commented on a change in pull request #216:
URL: https://github.com/apache/incubator-yunikorn-core/pull/216#discussion_r566581972



##########
File path: pkg/webservice/handlers_test.go
##########
@@ -607,3 +627,46 @@ func TestGetNodesUtilJSON(t *testing.T) {
 	assert.Equal(t, subresNon[0].NumOfNodes, int64(-1))
 	assert.Equal(t, subresNon[0].NodeNames[0], "N/A")
 }
+
+func TestPartitions(t *testing.T) {
+	configs.MockSchedulerConfigByData([]byte(configMultiPartitions))
+	var err error
+	schedulerContext, err = scheduler.NewClusterContext(rmID, policyGroup)
+	assert.NilError(t, err, "Error when load clusterInfo from config")
+	assert.Equal(t, 2, len(schedulerContext.GetPartitionMapClone()))
+
+	// Check default partition
+	partitionName := "[" + rmID + "]default"
+	defaultPartition := schedulerContext.GetPartition(partitionName)
+	assert.Equal(t, 0, len(defaultPartition.GetApplications()))
+
+	// add a new app
+	app := newApplication("app-1", partitionName, "root.default", rmID)
+	err = defaultPartition.AddApplication(app)
+	assert.NilError(t, err, "Failed to add Application to Partition.")
+	assert.Equal(t, app.CurrentState(), objects.New.String())
+	assert.Equal(t, 1, len(defaultPartition.GetApplications()))
+
+	NewWebApp(schedulerContext, nil)
+
+	var req *http.Request
+	req, err = http.NewRequest("GET", "/ws/v1/partitions", strings.NewReader(""))
+	assert.NilError(t, err, "App Handler request failed")
+	resp := &MockResponseWriter{}
+	var partitionInfo []dao.PartitionInfo
+	getPartitions(resp, req)
+	err = json.Unmarshal(resp.outputBytes, &partitionInfo)
+	assert.NilError(t, err, "failed to unmarshal applications dao response from response body: %s", string(resp.outputBytes))
+	for _, part := range partitionInfo {
+		if part.Name == partitionName {
+			assert.Equal(t, part.Name, "[rm-123]default")
+			assert.Equal(t, part.NodeSortingPolicy, "fair")
+			assert.Equal(t, part.Applications["total"], 1)

Review comment:
       We should also test the count for the state the application is in.
   ```
   assert.Equal(t, part.Applications["new"], 1)
   ```
   can we also extend this to multiple apps, in different states and make sure they all count correctly and sum up to the total.

##########
File path: pkg/scheduler/partition.go
##########
@@ -264,6 +264,8 @@ func (pc *PartitionContext) isStopped() bool {
 func (pc *PartitionContext) handlePartitionEvent(event objects.ObjectEvent) error {
 	err := pc.stateMachine.Event(event.String(), pc.Name)
 	if err == nil {
+		pc.Lock()
+		defer pc.Unlock()

Review comment:
       Why do we need this locking?
   The event handling makes sure that the events are processed sequentially.
   The only way a partition can change state is via a config update. So there is no way that we can have multiple things happening at the same time here. There is also only one go routine that can make this change.
   
   Unless this has shown as an issue for data race I do not think we need it.

##########
File path: pkg/webservice/dao/partition_info.go
##########
@@ -24,12 +24,29 @@ type PartitionDAOInfo struct {
 	Queues        QueueDAOInfo      `json:"queues"`
 }
 
+type PartitionInfo struct {
+	Name                    string            `json:"name"`
+	Capacity                PartitionCapacity `json:"capacity"`
+	NodeSortingPolicy       string            `json:"nodeSortingPolicy"`
+	Applications            Applications      `json:"applications"`
+	State                   string            `json:"state"`
+	LastStateTransitionTime string            `json:"lastStateTransitionTime"`
+}
+
 type PartitionCapacity struct {
 	Capacity     string `json:"capacity"`
-	UsedCapacity string `json:"usedcapacity"`
+	UsedCapacity string `json:"usedCapacity"`
 }
 
 type NodeInfo struct {
 	NodeID     string `json:"nodeId"`
 	Capability string `json:"capability"`
 }
+
+type Applications struct {
+	Total     int `json:"total"`
+	Running   int `json:"running"`
+	Pending   int `json:"pending"`
+	Completed int `json:"completed"`
+	Failed    int `json:"failed"`

Review comment:
       I do not see this structure being used anywhere we should remove it.

##########
File path: pkg/scheduler/partition.go
##########
@@ -1208,3 +1210,17 @@ func (pc *PartitionContext) removeAllocationAsk(appID string, allocationKey stri
 		}
 	}
 }
+
+func (pc *PartitionContext) GetCurrentState() string {
+	return pc.stateMachine.Current()
+}
+
+func (pc *PartitionContext) GetStateTime() time.Time {
+	pc.RLock()
+	defer pc.RUnlock()
+	return pc.stateTime
+}
+
+func (pc *PartitionContext) GetNodeSortingPolicy() *policies.NodeSortingPolicy {
+	return pc.nodeSortingPolicy

Review comment:
       We need locking around this. The policy could be changed via the config update and we could thus be triggering a data race.
   I would also recommend that we return a String instead of the policy itself. We do not need more than the String 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org