You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@yunikorn.apache.org by "manirajv06 (via GitHub)" <gi...@apache.org> on 2023/06/09 13:57:37 UTC

[GitHub] [yunikorn-core] manirajv06 opened a new pull request, #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

manirajv06 opened a new pull request, #563:
URL: https://github.com/apache/yunikorn-core/pull/563

   ### What is this PR for?
   Enforcement checks 
   
   
   ### What type of PR is it
   * [ ] - Feature
   
   ### Todos
   * [ ] - Task
   
   ### What is the Jira issue?
   https://issues.apache.org/jira/browse/YUNIKORN-1610
   
   ### How should this be tested?
   
   ### Screenshots (if appropriate)
   
   ### Questions:
   * [ ] - The licenses files need update.
   * [ ] - There is breaking changes for older versions.
   * [ ] - It needs documentation.
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] manirajv06 closed pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "manirajv06 (via GitHub)" <gi...@apache.org>.
manirajv06 closed pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota
URL: https://github.com/apache/yunikorn-core/pull/563


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1228218951


##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -115,6 +132,74 @@ func (qt *QueueTracker) decreaseTrackedResource(queuePath string, applicationID
 	return removeQT, nil
 }
 
+func (qt *QueueTracker) setMaxApplications(count uint64, queuePath string) error {
+	log.Logger().Debug("Setting max applications",
+		zap.String("queue path", queuePath),
+		zap.Uint64("max applications", count))
+	var childQueuePath, immediateChildQueueName string
+	childQueuePath, immediateChildQueueName = getChildQueuePath(queuePath)
+	childQueueTracker := qt
+	if childQueuePath != "" {
+		for childQueuePath != "" {
+			if childQueueTracker != nil {
+				if len(childQueueTracker.childQueueTrackers) == 0 || childQueueTracker.childQueueTrackers[immediateChildQueueName] == nil {
+					newChildQt := newQueueTracker(immediateChildQueueName)
+					childQueueTracker.childQueueTrackers[immediateChildQueueName] = newChildQt
+					childQueueTracker = newChildQt
+				} else {
+					childQueueTracker = childQueueTracker.childQueueTrackers[immediateChildQueueName]
+				}
+			}
+			childQueuePath, immediateChildQueueName = getChildQueuePath(childQueuePath)
+		}
+	}
+
+	if int(childQueueTracker.maxRunningApps) != 0 && len(childQueueTracker.runningApplications) > int(count) {
+		log.Logger().Warn("Current running applications is greater than config max applications",
+			zap.String("queue path", queuePath),
+			zap.Int("current running applications", len(childQueueTracker.runningApplications)),
+			zap.Uint64("config max applications", count))
+		return fmt.Errorf("current running applications is greater than config max applications for %s", queuePath)
+	} else {
+		childQueueTracker.maxRunningApps = count
+	}
+	return nil
+}
+
+func (qt *QueueTracker) setMaxResources(resource *resources.Resource, queuePath string) error {

Review Comment:
   I have a very strong deja vu... I'm pretty sure I've commented about `setMaxResources()` being very similar to `setMaxApplications()`... Was that a different PR? Anyway, here it is again! :)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1228165826


##########
pkg/common/configs/configvalidator.go:
##########
@@ -153,6 +219,56 @@ func checkQueueMaxApplications(cur QueueConfig) error {
 	return nil
 }
 
+func checkLimitMaxApplications(cur QueueConfig, parent *QueueConfig, users map[string]uint64, groups map[string]uint64) error {
+	// Collect user and group limits of parent queue
+	if parent != nil {
+		for _, limit := range parent.Limits {
+			for _, user := range limit.Users {
+				users[user] = limit.MaxApplications
+			}
+			for _, group := range limit.Groups {
+				groups[group] = limit.MaxApplications
+			}
+		}
+	}
+
+	// compare user & group limit setting between the current queue and parent queue
+	for _, limit := range cur.Limits {
+		for _, user := range limit.Users {
+			curM := limit.MaxApplications
+			// Is user limit setting exists?
+			if _, ok := users[user]; ok {
+				if users[user] != 0 && (users[user] < curM || curM == 0) {
+					return fmt.Errorf("user %s max applications %v of queue %s is greater than immediate or ancestor parent max applications %v", user, curM, cur.Name, users[user])
+				}
+				users[user] = uint64(math.Min(float64(curM), float64(users[user])))
+			} else {
+				users[user] = curM
+			}
+		}
+		for _, group := range limit.Groups {
+			curM := limit.MaxApplications
+			// Is group limit setting exists?
+			if _, ok := groups[group]; ok {
+				if groups[group] != 0 && (groups[group] < curM || curM == 0) {
+					return fmt.Errorf("group %s max applications %v of queue %s is greater than immediate or ancestor parent max applications %v", group, curM, cur.Name, groups[group])
+				}
+				groups[group] = uint64(math.Min(float64(curM), float64(groups[group])))

Review Comment:
   `Min()` as Wilfred asked.



##########
pkg/common/configs/config_test.go:
##########
@@ -1721,3 +1722,418 @@ func TestGetConfigurationString(t *testing.T) {
 		})
 	}
 }
+
+func prepareUserLimitsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 50
+                maxresources: {memory: 1000, vcore: 100}
+            queues:
+              - name: leaf
+                maxapplications: 100
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 10000, vcore: 1000}
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + leafQueueMaxResource + `
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	return data
+}
+
+func prepareUserLimitsWithTwoLevelsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string, userMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 900
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                ` + leafQueueMaxResource + `
+            queues:
+              - name: leaf
+                maxapplications: 900
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    ` + leafQueueMaxResource + `
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + userMaxResource + `
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	return data
+}
+func TestUserLimitsWithHierarchicalQueue(t *testing.T) {
+	// Make sure parent queue user max apps should not less than the child queue max resource
+	// validate the config and check after the update
+	_, err := CreateConfig(prepareUserLimitsConfig(50, "{memory: 10000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max resource map[memory:10000 vcore:100000] of queue leaf is greater than immediate or ancestor parent maximum resource map[memory:1000 vcore:100000]")
+
+	// Make sure parent queue user max apps should not less than the child queue max apps
+	// validate the config and check after the update
+	_, err = CreateConfig(prepareUserLimitsConfig(90, "{memory: 1000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max applications 90 of queue leaf is greater than immediate or ancestor parent max applications 50")
+
+	// (More than one level check)
+	// Make sure hierarchical queue user max resource should not less than the child queue max resource
+	// validate the config and check after the update
+	_, err = CreateConfig(prepareUserLimitsWithTwoLevelsConfig(90, "{memory: 100000, vcore: 100}", "{memory: 90000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max resource map[memory:90000 vcore:100000] of queue leaf is greater than immediate or ancestor parent maximum resource map[memory:10000 vcore:10000000]")
+
+	// (More than one level check)
+	// Make sure hierarchical queue user max apps should not less than the child queue max apps
+	// validate the config and check after the update
+	_, err = CreateConfig(prepareUserLimitsWithTwoLevelsConfig(900, "{memory: 10000, vcore: 1000}", "{memory: 1000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max applications 900 of queue leaf is greater than immediate or ancestor parent max applications 500")
+
+	// Special case: make sure wildcard user with hierarchical queue check
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - "*"
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+        queues:
+          - name: level1
+            maxapplications: 900
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 100000, vcore: 1000}
+            queues:
+              - name: leaf
+                maxapplications: 900
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 100000, vcore: 1000}
+                limits:
+                  - limit:
+                    users: 
+                    - "*"
+                    maxapplications: 90
+                    maxresources: {memory: 90000, vcore: 100}
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	// validate the config and check after the update
+	_, err = CreateConfig(data)
+	assert.ErrorContains(t, err, "user * max resource map[memory:90000 vcore:100000] of queue leaf is greater than immediate or ancestor parent maximum resource map[memory:10000 vcore:10000000]")
+}
+
+func prepareGroupLimitsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            groups: 
+            - group1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                groups: 
+                - group1
+                maxapplications: 50
+                maxresources: {memory: 1000, vcore: 100}
+            queues:
+              - name: leaf
+                maxapplications: 100
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 10000, vcore: 1000}
+                limits:
+                  - limit:
+                    groups: 
+                    - group1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + leafQueueMaxResource + `

Review Comment:
   Placeholders



##########
pkg/common/configs/config_test.go:
##########
@@ -1721,3 +1722,418 @@ func TestGetConfigurationString(t *testing.T) {
 		})
 	}
 }
+
+func prepareUserLimitsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 50
+                maxresources: {memory: 1000, vcore: 100}
+            queues:
+              - name: leaf
+                maxapplications: 100
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 10000, vcore: 1000}
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + leafQueueMaxResource + `
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	return data
+}
+
+func prepareUserLimitsWithTwoLevelsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string, userMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 900
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                ` + leafQueueMaxResource + `
+            queues:
+              - name: leaf
+                maxapplications: 900
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    ` + leafQueueMaxResource + `
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + userMaxResource + `

Review Comment:
   Placeholders



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#issuecomment-1602694086

   @manirajv06 could you rebase this patch? It still has YUNIKORN-1608 in the commit history. Also, squish the older commits at least because now it's 18 commits total. You can keep the last 4-5 but the previous ones are no longer needed.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1228203762


##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -48,20 +49,36 @@ func newQueueTracker(queueName string) *QueueTracker {
 		queueName:           queueName,
 		resourceUsage:       resources.NewResource(),
 		runningApplications: make(map[string]bool),
+		maxResourceUsage:    resources.NewResource(),
+		maxRunningApps:      0,
 		childQueueTrackers:  make(map[string]*QueueTracker),
 	}
 	return queueTracker
 }
 
-func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource) error {
+func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource) bool {
 	log.Logger().Debug("Increasing resource usage",
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
 		zap.Stringer("resource", usage))
 	if queuePath == "" || applicationID == "" || usage == nil {
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s",
-			queuePath, applicationID, usage.String())
+		return false
+	}

Review Comment:
   This is something I've been thinking about - do we really need these checks? It's similar to what I observed in YUNIKORN-1807. I'm not sure that being so defensive is needed if everything comes from our code. It's not like a method which is called from all kinds of places. We have unit tests in place which verify this. Calls to increase/decrease go though `Manager`, which is only called by the `Application`. If `usage` somehow becomes `nil` at any point, we already have a pretty nasty bug and we should just bail out and let YK crash. 
   Same for the strings - that does not cause panic, but it's a bug, we don't gain anything by returning it to the caller. 
   Instead of having these kinds of `if` branches, IMO it's much better to ensure proper test coverage. For example, you can add a test to `application_test.go` which makes sure that we call these methods with the right values and it's a valid path.
   
   Another thing is that even if `queuePath` is not empty, it can still be an invalid string that completely messes up `qt.childQueueTrackers[immediateChildQueueName].increaseTrackedResource(...)` because the map will return a `nil` tracker. So we only defend against a special case to begin with.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1228149072


##########
pkg/common/configs/config_test.go:
##########
@@ -1721,3 +1722,418 @@ func TestGetConfigurationString(t *testing.T) {
 		})
 	}
 }
+
+func prepareUserLimitsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 50
+                maxresources: {memory: 1000, vcore: 100}
+            queues:
+              - name: leaf
+                maxapplications: 100
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 10000, vcore: 1000}
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + leafQueueMaxResource + `

Review Comment:
   Placeholders



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1228165826


##########
pkg/common/configs/configvalidator.go:
##########
@@ -153,6 +219,56 @@ func checkQueueMaxApplications(cur QueueConfig) error {
 	return nil
 }
 
+func checkLimitMaxApplications(cur QueueConfig, parent *QueueConfig, users map[string]uint64, groups map[string]uint64) error {
+	// Collect user and group limits of parent queue
+	if parent != nil {
+		for _, limit := range parent.Limits {
+			for _, user := range limit.Users {
+				users[user] = limit.MaxApplications
+			}
+			for _, group := range limit.Groups {
+				groups[group] = limit.MaxApplications
+			}
+		}
+	}
+
+	// compare user & group limit setting between the current queue and parent queue
+	for _, limit := range cur.Limits {
+		for _, user := range limit.Users {
+			curM := limit.MaxApplications
+			// Is user limit setting exists?
+			if _, ok := users[user]; ok {
+				if users[user] != 0 && (users[user] < curM || curM == 0) {
+					return fmt.Errorf("user %s max applications %v of queue %s is greater than immediate or ancestor parent max applications %v", user, curM, cur.Name, users[user])
+				}
+				users[user] = uint64(math.Min(float64(curM), float64(users[user])))
+			} else {
+				users[user] = curM
+			}
+		}
+		for _, group := range limit.Groups {
+			curM := limit.MaxApplications
+			// Is group limit setting exists?
+			if _, ok := groups[group]; ok {
+				if groups[group] != 0 && (groups[group] < curM || curM == 0) {
+					return fmt.Errorf("group %s max applications %v of queue %s is greater than immediate or ancestor parent max applications %v", group, curM, cur.Name, groups[group])
+				}
+				groups[group] = uint64(math.Min(float64(curM), float64(groups[group])))

Review Comment:
   `Min()` as Wilfred asked.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1228149494


##########
pkg/common/configs/config_test.go:
##########
@@ -1721,3 +1722,418 @@ func TestGetConfigurationString(t *testing.T) {
 		})
 	}
 }
+
+func prepareUserLimitsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 50
+                maxresources: {memory: 1000, vcore: 100}
+            queues:
+              - name: leaf
+                maxapplications: 100
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 10000, vcore: 1000}
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + leafQueueMaxResource + `
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	return data
+}
+
+func prepareUserLimitsWithTwoLevelsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string, userMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 900
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                ` + leafQueueMaxResource + `
+            queues:
+              - name: leaf
+                maxapplications: 900
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    ` + leafQueueMaxResource + `
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + userMaxResource + `

Review Comment:
   Placeholders



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] manirajv06 commented on pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "manirajv06 (via GitHub)" <gi...@apache.org>.
manirajv06 commented on PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#issuecomment-1606747809

   > LGTM, flow in `QueueTracker.increaseTrackedResource()` and `QueueTracker.decreaseTrackedResource()` can be simplified but lets do that in a separate jira so we get the base in
   
   Filed https://issues.apache.org/jira/browse/YUNIKORN-1836 for further simplification


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1239602158


##########
pkg/common/constants.go:
##########
@@ -0,0 +1,25 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package common
+
+const (
+	WildCard  = "*"

Review Comment:
   Nit: "wildcard" is a single word, so just `Wildcard` (lowecase "c").



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] codecov[bot] commented on pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "codecov[bot] (via GitHub)" <gi...@apache.org>.
codecov[bot] commented on PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#issuecomment-1585556692

   ## [Codecov](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=h1&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache) Report
   > Merging [#563](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache) (3b9fbd9) into [master](https://app.codecov.io/gh/apache/yunikorn-core/commit/8febe293312f74c4af5000ba8858fb0821772b2a?el=desc&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache) (8febe29) will **decrease** coverage by `0.18%`.
   > The diff coverage is `64.21%`.
   
   ```diff
   @@            Coverage Diff             @@
   ##           master     #563      +/-   ##
   ==========================================
   - Coverage   74.84%   74.67%   -0.18%     
   ==========================================
     Files          71       73       +2     
     Lines       11537    12074     +537     
   ==========================================
   + Hits         8635     9016     +381     
   - Misses       2597     2726     +129     
   - Partials      305      332      +27     
   ```
   
   
   | [Impacted Files](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache) | Coverage Δ | |
   |---|---|---|
   | [pkg/scheduler/objects/queue.go](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache#diff-cGtnL3NjaGVkdWxlci9vYmplY3RzL3F1ZXVlLmdv) | `76.87% <ø> (-0.02%)` | :arrow_down: |
   | [pkg/webservice/handlers.go](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache#diff-cGtnL3dlYnNlcnZpY2UvaGFuZGxlcnMuZ28=) | `79.79% <ø> (ø)` | |
   | [pkg/scheduler/partition.go](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache#diff-cGtnL3NjaGVkdWxlci9wYXJ0aXRpb24uZ28=) | `76.60% <25.00%> (-0.64%)` | :arrow_down: |
   | [pkg/scheduler/ugm/manager.go](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache#diff-cGtnL3NjaGVkdWxlci91Z20vbWFuYWdlci5nbw==) | `55.95% <49.14%> (-2.95%)` | :arrow_down: |
   | [pkg/common/configs/configvalidator.go](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache#diff-cGtnL2NvbW1vbi9jb25maWdzL2NvbmZpZ3ZhbGlkYXRvci5nbw==) | `88.76% <87.50%> (-0.29%)` | :arrow_down: |
   | [pkg/scheduler/ugm/queue\_tracker.go](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache#diff-cGtnL3NjaGVkdWxlci91Z20vcXVldWVfdHJhY2tlci5nbw==) | `87.17% <93.67%> (+5.73%)` | :arrow_up: |
   | [pkg/scheduler/objects/application.go](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache#diff-cGtnL3NjaGVkdWxlci9vYmplY3RzL2FwcGxpY2F0aW9uLmdv) | `65.38% <100.00%> (+0.85%)` | :arrow_up: |
   | [pkg/scheduler/ugm/group\_tracker.go](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache#diff-cGtnL3NjaGVkdWxlci91Z20vZ3JvdXBfdHJhY2tlci5nbw==) | `100.00% <100.00%> (ø)` | |
   | [pkg/scheduler/ugm/user\_tracker.go](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache#diff-cGtnL3NjaGVkdWxlci91Z20vdXNlcl90cmFja2VyLmdv) | `100.00% <100.00%> (ø)` | |
   | [pkg/scheduler/ugm/utilities.go](https://app.codecov.io/gh/apache/yunikorn-core/pull/563?src=pr&el=tree&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache#diff-cGtnL3NjaGVkdWxlci91Z20vdXRpbGl0aWVzLmdv) | `100.00% <100.00%> (ø)` | |
   
   ... and [5 files with indirect coverage changes](https://app.codecov.io/gh/apache/yunikorn-core/pull/563/indirect-changes?src=pr&el=tree-more&utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache)
   
   :mega: We’re building smart automated test selection to slash your CI/CD build times. [Learn more](https://about.codecov.io/iterative-testing/?utm_medium=referral&utm_source=github&utm_content=comment&utm_campaign=pr+comments&utm_term=apache)
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1228149886


##########
pkg/common/configs/config_test.go:
##########
@@ -1721,3 +1722,418 @@ func TestGetConfigurationString(t *testing.T) {
 		})
 	}
 }
+
+func prepareUserLimitsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 50
+                maxresources: {memory: 1000, vcore: 100}
+            queues:
+              - name: leaf
+                maxapplications: 100
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 10000, vcore: 1000}
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + leafQueueMaxResource + `
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	return data
+}
+
+func prepareUserLimitsWithTwoLevelsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string, userMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 900
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                ` + leafQueueMaxResource + `
+            queues:
+              - name: leaf
+                maxapplications: 900
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    ` + leafQueueMaxResource + `
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + userMaxResource + `
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	return data
+}
+func TestUserLimitsWithHierarchicalQueue(t *testing.T) {
+	// Make sure parent queue user max apps should not less than the child queue max resource
+	// validate the config and check after the update
+	_, err := CreateConfig(prepareUserLimitsConfig(50, "{memory: 10000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max resource map[memory:10000 vcore:100000] of queue leaf is greater than immediate or ancestor parent maximum resource map[memory:1000 vcore:100000]")
+
+	// Make sure parent queue user max apps should not less than the child queue max apps
+	// validate the config and check after the update
+	_, err = CreateConfig(prepareUserLimitsConfig(90, "{memory: 1000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max applications 90 of queue leaf is greater than immediate or ancestor parent max applications 50")
+
+	// (More than one level check)
+	// Make sure hierarchical queue user max resource should not less than the child queue max resource
+	// validate the config and check after the update
+	_, err = CreateConfig(prepareUserLimitsWithTwoLevelsConfig(90, "{memory: 100000, vcore: 100}", "{memory: 90000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max resource map[memory:90000 vcore:100000] of queue leaf is greater than immediate or ancestor parent maximum resource map[memory:10000 vcore:10000000]")
+
+	// (More than one level check)
+	// Make sure hierarchical queue user max apps should not less than the child queue max apps
+	// validate the config and check after the update
+	_, err = CreateConfig(prepareUserLimitsWithTwoLevelsConfig(900, "{memory: 10000, vcore: 1000}", "{memory: 1000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max applications 900 of queue leaf is greater than immediate or ancestor parent max applications 500")
+
+	// Special case: make sure wildcard user with hierarchical queue check
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - "*"
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+        queues:
+          - name: level1
+            maxapplications: 900
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 100000, vcore: 1000}
+            queues:
+              - name: leaf
+                maxapplications: 900
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 100000, vcore: 1000}
+                limits:
+                  - limit:
+                    users: 
+                    - "*"
+                    maxapplications: 90
+                    maxresources: {memory: 90000, vcore: 100}
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	// validate the config and check after the update
+	_, err = CreateConfig(data)
+	assert.ErrorContains(t, err, "user * max resource map[memory:90000 vcore:100000] of queue leaf is greater than immediate or ancestor parent maximum resource map[memory:10000 vcore:10000000]")
+}
+
+func prepareGroupLimitsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            groups: 
+            - group1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                groups: 
+                - group1
+                maxapplications: 50
+                maxresources: {memory: 1000, vcore: 100}
+            queues:
+              - name: leaf
+                maxapplications: 100
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 10000, vcore: 1000}
+                limits:
+                  - limit:
+                    groups: 
+                    - group1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + leafQueueMaxResource + `

Review Comment:
   Placeholders



##########
pkg/common/configs/config_test.go:
##########
@@ -1721,3 +1722,418 @@ func TestGetConfigurationString(t *testing.T) {
 		})
 	}
 }
+
+func prepareUserLimitsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 50
+                maxresources: {memory: 1000, vcore: 100}
+            queues:
+              - name: leaf
+                maxapplications: 100
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 10000, vcore: 1000}
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + leafQueueMaxResource + `
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	return data
+}
+
+func prepareUserLimitsWithTwoLevelsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string, userMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 900
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                ` + leafQueueMaxResource + `
+            queues:
+              - name: leaf
+                maxapplications: 900
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    ` + leafQueueMaxResource + `
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + userMaxResource + `
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	return data
+}
+func TestUserLimitsWithHierarchicalQueue(t *testing.T) {
+	// Make sure parent queue user max apps should not less than the child queue max resource
+	// validate the config and check after the update
+	_, err := CreateConfig(prepareUserLimitsConfig(50, "{memory: 10000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max resource map[memory:10000 vcore:100000] of queue leaf is greater than immediate or ancestor parent maximum resource map[memory:1000 vcore:100000]")
+
+	// Make sure parent queue user max apps should not less than the child queue max apps
+	// validate the config and check after the update
+	_, err = CreateConfig(prepareUserLimitsConfig(90, "{memory: 1000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max applications 90 of queue leaf is greater than immediate or ancestor parent max applications 50")
+
+	// (More than one level check)
+	// Make sure hierarchical queue user max resource should not less than the child queue max resource
+	// validate the config and check after the update
+	_, err = CreateConfig(prepareUserLimitsWithTwoLevelsConfig(90, "{memory: 100000, vcore: 100}", "{memory: 90000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max resource map[memory:90000 vcore:100000] of queue leaf is greater than immediate or ancestor parent maximum resource map[memory:10000 vcore:10000000]")
+
+	// (More than one level check)
+	// Make sure hierarchical queue user max apps should not less than the child queue max apps
+	// validate the config and check after the update
+	_, err = CreateConfig(prepareUserLimitsWithTwoLevelsConfig(900, "{memory: 10000, vcore: 1000}", "{memory: 1000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max applications 900 of queue leaf is greater than immediate or ancestor parent max applications 500")
+
+	// Special case: make sure wildcard user with hierarchical queue check
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - "*"
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+        queues:
+          - name: level1
+            maxapplications: 900
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 100000, vcore: 1000}
+            queues:
+              - name: leaf
+                maxapplications: 900
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 100000, vcore: 1000}
+                limits:
+                  - limit:
+                    users: 
+                    - "*"
+                    maxapplications: 90
+                    maxresources: {memory: 90000, vcore: 100}
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	// validate the config and check after the update
+	_, err = CreateConfig(data)
+	assert.ErrorContains(t, err, "user * max resource map[memory:90000 vcore:100000] of queue leaf is greater than immediate or ancestor parent maximum resource map[memory:10000 vcore:10000000]")
+}
+
+func prepareGroupLimitsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            groups: 
+            - group1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                groups: 
+                - group1
+                maxapplications: 50
+                maxresources: {memory: 1000, vcore: 100}
+            queues:
+              - name: leaf
+                maxapplications: 100
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 10000, vcore: 1000}
+                limits:
+                  - limit:
+                    groups: 
+                    - group1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + leafQueueMaxResource + `
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                groups: 
+                - group1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	return data
+}
+
+func prepareGroupLimitsWithTwoLevelsConfig(leafQueueMaxApps uint64, leafQueueMaxResources string, groupMaxResources string) string {
+	// (More than one level check)
+	// Make sure hierarchical queue user max resource should not less than the child queue max resource
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            groups: 
+            - group1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 900
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                ` + leafQueueMaxResources + `
+            queues:
+              - name: leaf
+                maxapplications: 900
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    ` + leafQueueMaxResources + `
+                limits:
+                  - limit:
+                    groups: 
+                    - group1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + groupMaxResources + `

Review Comment:
   Placeholders



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1228222851


##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -115,6 +132,74 @@ func (qt *QueueTracker) decreaseTrackedResource(queuePath string, applicationID
 	return removeQT, nil
 }
 
+func (qt *QueueTracker) setMaxApplications(count uint64, queuePath string) error {
+	log.Logger().Debug("Setting max applications",
+		zap.String("queue path", queuePath),
+		zap.Uint64("max applications", count))
+	var childQueuePath, immediateChildQueueName string
+	childQueuePath, immediateChildQueueName = getChildQueuePath(queuePath)
+	childQueueTracker := qt
+	if childQueuePath != "" {
+		for childQueuePath != "" {
+			if childQueueTracker != nil {
+				if len(childQueueTracker.childQueueTrackers) == 0 || childQueueTracker.childQueueTrackers[immediateChildQueueName] == nil {
+					newChildQt := newQueueTracker(immediateChildQueueName)
+					childQueueTracker.childQueueTrackers[immediateChildQueueName] = newChildQt
+					childQueueTracker = newChildQt
+				} else {
+					childQueueTracker = childQueueTracker.childQueueTrackers[immediateChildQueueName]
+				}
+			}
+			childQueuePath, immediateChildQueueName = getChildQueuePath(childQueuePath)
+		}
+	}
+
+	if int(childQueueTracker.maxRunningApps) != 0 && len(childQueueTracker.runningApplications) > int(count) {
+		log.Logger().Warn("Current running applications is greater than config max applications",
+			zap.String("queue path", queuePath),
+			zap.Int("current running applications", len(childQueueTracker.runningApplications)),
+			zap.Uint64("config max applications", count))
+		return fmt.Errorf("current running applications is greater than config max applications for %s", queuePath)
+	} else {
+		childQueueTracker.maxRunningApps = count
+	}
+	return nil
+}
+
+func (qt *QueueTracker) setMaxResources(resource *resources.Resource, queuePath string) error {

Review Comment:
   AH ok, I just checked the commits, this PR contains commits from other PRs... Let's fix those first because I ended up reviewing the same code again.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] wilfred-s commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "wilfred-s (via GitHub)" <gi...@apache.org>.
wilfred-s commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1236305672


##########
pkg/scheduler/ugm/manager.go:
##########
@@ -35,23 +35,24 @@ var m *Manager
 
 const maxresources = "maxresources"
 const maxapplications = "maxapplications"
+const wildcard = "*"
 
 // Manager implements tracker. A User Group Manager to track the usage for both user and groups.
 // Holds object of both user and group trackers
 type Manager struct {
-	userTrackers      map[string]*UserTracker
-	groupTrackers     map[string]*GroupTracker
-	userLimitsConfig  map[string]map[string]map[string]interface{} // Hold limits settings of user * queue path
-	groupLimitsConfig map[string]map[string]map[string]interface{} // Hold limits settings of group * queue path
+	userTrackers              map[string]*UserTracker
+	groupTrackers             map[string]*GroupTracker
+	userWildCardLimitsConfig  map[string]map[string]interface{} // Hold limits settings of user '*'
+	groupWildCardLimitsConfig map[string]map[string]interface{} // Hold limits settings of group '*'

Review Comment:
   Simplify this: introduce a struct type that has two entries. Resource and uint64 to store the wildcard settings in. Store that struct in the map. That safes one indirection and the continuous conversion of the limit object's `map[string]string` to a resource.



##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -31,38 +31,141 @@ import (
 
 type QueueTracker struct {
 	queueName           string
+	parent              *QueueTracker
+	queuePath           string
 	resourceUsage       *resources.Resource
 	runningApplications map[string]bool
-	maxResourceUsage    *resources.Resource
+	maxResources        *resources.Resource
 	maxRunningApps      uint64
 	childQueueTrackers  map[string]*QueueTracker
 }
 
 func newRootQueueTracker() *QueueTracker {
-	return newQueueTracker(configs.RootQueue)
+	qt := newQueueTracker(configs.RootQueue, nil)
+	return qt
 }
 
-func newQueueTracker(queueName string) *QueueTracker {
-	log.Logger().Debug("Creating queue tracker object for queue",
-		zap.String("queue", queueName))
+func newQueueTracker(queueName string, parent *QueueTracker) *QueueTracker {
 	queueTracker := &QueueTracker{
 		queueName:           queueName,
+		parent:              parent,
 		resourceUsage:       resources.NewResource(),
 		runningApplications: make(map[string]bool),
+		maxResources:        resources.NewResource(),
+		maxRunningApps:      0,
 		childQueueTrackers:  make(map[string]*QueueTracker),
 	}
+	parentQueueName := ""
+	if parent != nil {
+		queueTracker.queuePath = queueTracker.parent.queuePath + configs.DOT + queueName
+		parentQueueName = parent.queueName
+	} else if queueName == configs.RootQueue {
+		queueTracker.queuePath = configs.RootQueue
+	}
+	log.Logger().Debug("Created queue tracker object for queue",
+		zap.String("queue", queueName),
+		zap.String("parent", parentQueueName),
+		zap.String("queue path ", queueTracker.queuePath))
 	return queueTracker
 }
 
-func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource) error {
+type trackingType int
+
+const (
+	none trackingType = iota
+	user
+	group
+)
+
+func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, trackType trackingType, usage *resources.Resource) bool {
 	log.Logger().Debug("Increasing resource usage",
+		zap.Int("tracking type", int(trackType)),
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
 		zap.Stringer("resource", usage))
 	if queuePath == "" || applicationID == "" || usage == nil {
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s",
-			queuePath, applicationID, usage.String())
+		return false
+	}
+
+	finalResourceUsage := qt.resourceUsage.Clone()
+	finalResourceUsage.AddTo(usage)
+	wildCardQuotaExceeded := false
+
+	// Are there any settings for specific user/group? If not, try wild card settings
+	if int(qt.maxRunningApps) == 0 && resources.Equals(resources.NewResource(), qt.maxResources) {
+		// Is there any wild card settings? Do we need to apply enforcement checks using wild card limit settings?
+		var config map[string]interface{}
+		if trackType == user {
+			config = m.getUserWildCardLimitsConfig(qt.queuePath)
+		} else if trackType == group {
+			config = m.getGroupWildCardLimitsConfig(qt.queuePath)
+		}
+		if config != nil {
+			var maxResources map[string]string
+			var maxApplications uint64
+			var ok bool
+			if maxApplications, ok = config[maxapplications].(uint64); !ok {
+				log.Logger().Debug("Problem in using the wild card limit max application settings.",
+					zap.String("queue path", queuePath),
+					zap.Int("tracking type", int(trackType)),
+					zap.Any("max applications ", config[maxapplications]))
+			}
+			if maxResources, ok = config[maxresources].(map[string]string); !ok {
+				log.Logger().Debug("Problem in using the wild card limit max resources settings.",
+					zap.String("queue path", queuePath),
+					zap.Int("tracking type", int(trackType)),
+					zap.Any("max applications ", config[maxresources]))
+			}
+			var maxResource *resources.Resource
+			var err error
+			if maxResource, err = resources.NewResourceFromConf(maxResources); err != nil {
+				log.Logger().Debug("Problem in using the wild card limit max resources settings.",
+					zap.String("queue path", queuePath),
+					zap.String("max resources after casting", maxResource.String()),
+					zap.Error(err))
+				return false
+			}
+			wildCardQuotaExceeded = (maxApplications != 0 && len(qt.runningApplications)+1 > int(maxApplications)) || (!resources.Equals(resources.NewResource(), maxResource) && resources.StrictlyGreaterThan(finalResourceUsage, maxResource))
+			log.Logger().Debug("using wild card limit settings as user/group specific limit settings not set",
+				zap.Int("tracking type", int(trackType)),
+				zap.String("queue path", queuePath),
+				zap.Uint64("wild card max running apps", maxApplications),
+				zap.String("wild card max resources", maxResource.String()),
+				zap.Bool("wild card quota exceeded", wildCardQuotaExceeded))
+		}

Review Comment:
   return here if not passed: wildcard has been checked and no specific quota was set.



##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -48,20 +49,36 @@ func newQueueTracker(queueName string) *QueueTracker {
 		queueName:           queueName,
 		resourceUsage:       resources.NewResource(),
 		runningApplications: make(map[string]bool),
+		maxResourceUsage:    resources.NewResource(),
+		maxRunningApps:      0,
 		childQueueTrackers:  make(map[string]*QueueTracker),
 	}
 	return queueTracker
 }
 
-func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource) error {
+func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource) bool {
 	log.Logger().Debug("Increasing resource usage",
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
 		zap.Stringer("resource", usage))
 	if queuePath == "" || applicationID == "" || usage == nil {
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s",
-			queuePath, applicationID, usage.String())
+		return false
+	}

Review Comment:
   agree remove the check



##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -31,38 +31,141 @@ import (
 
 type QueueTracker struct {
 	queueName           string
+	parent              *QueueTracker
+	queuePath           string
 	resourceUsage       *resources.Resource
 	runningApplications map[string]bool
-	maxResourceUsage    *resources.Resource
+	maxResources        *resources.Resource
 	maxRunningApps      uint64
 	childQueueTrackers  map[string]*QueueTracker
 }
 
 func newRootQueueTracker() *QueueTracker {
-	return newQueueTracker(configs.RootQueue)
+	qt := newQueueTracker(configs.RootQueue, nil)
+	return qt
 }
 
-func newQueueTracker(queueName string) *QueueTracker {
-	log.Logger().Debug("Creating queue tracker object for queue",
-		zap.String("queue", queueName))
+func newQueueTracker(queueName string, parent *QueueTracker) *QueueTracker {
 	queueTracker := &QueueTracker{
 		queueName:           queueName,
+		parent:              parent,
 		resourceUsage:       resources.NewResource(),
 		runningApplications: make(map[string]bool),
+		maxResources:        resources.NewResource(),
+		maxRunningApps:      0,
 		childQueueTrackers:  make(map[string]*QueueTracker),
 	}
+	parentQueueName := ""
+	if parent != nil {
+		queueTracker.queuePath = queueTracker.parent.queuePath + configs.DOT + queueName
+		parentQueueName = parent.queueName
+	} else if queueName == configs.RootQueue {
+		queueTracker.queuePath = configs.RootQueue
+	}
+	log.Logger().Debug("Created queue tracker object for queue",
+		zap.String("queue", queueName),
+		zap.String("parent", parentQueueName),
+		zap.String("queue path ", queueTracker.queuePath))
 	return queueTracker
 }
 
-func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource) error {
+type trackingType int
+
+const (
+	none trackingType = iota
+	user
+	group
+)
+
+func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, trackType trackingType, usage *resources.Resource) bool {
 	log.Logger().Debug("Increasing resource usage",
+		zap.Int("tracking type", int(trackType)),
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
 		zap.Stringer("resource", usage))
 	if queuePath == "" || applicationID == "" || usage == nil {
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s",
-			queuePath, applicationID, usage.String())
+		return false
+	}
+
+	finalResourceUsage := qt.resourceUsage.Clone()
+	finalResourceUsage.AddTo(usage)
+	wildCardQuotaExceeded := false
+

Review Comment:
   is this a new app to add to the list of running applications?
   ```
   existingApp := runningApplications[appId]
   ```
   Only check maxRunningApps within range if `existingApp == false`



##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -31,38 +31,141 @@ import (
 
 type QueueTracker struct {
 	queueName           string
+	parent              *QueueTracker
+	queuePath           string
 	resourceUsage       *resources.Resource
 	runningApplications map[string]bool
-	maxResourceUsage    *resources.Resource
+	maxResources        *resources.Resource
 	maxRunningApps      uint64
 	childQueueTrackers  map[string]*QueueTracker
 }
 
 func newRootQueueTracker() *QueueTracker {
-	return newQueueTracker(configs.RootQueue)
+	qt := newQueueTracker(configs.RootQueue, nil)
+	return qt
 }
 
-func newQueueTracker(queueName string) *QueueTracker {
-	log.Logger().Debug("Creating queue tracker object for queue",
-		zap.String("queue", queueName))
+func newQueueTracker(queueName string, parent *QueueTracker) *QueueTracker {
 	queueTracker := &QueueTracker{
 		queueName:           queueName,
+		parent:              parent,
 		resourceUsage:       resources.NewResource(),
 		runningApplications: make(map[string]bool),
+		maxResources:        resources.NewResource(),
+		maxRunningApps:      0,
 		childQueueTrackers:  make(map[string]*QueueTracker),
 	}
+	parentQueueName := ""
+	if parent != nil {
+		queueTracker.queuePath = queueTracker.parent.queuePath + configs.DOT + queueName
+		parentQueueName = parent.queueName
+	} else if queueName == configs.RootQueue {
+		queueTracker.queuePath = configs.RootQueue
+	}
+	log.Logger().Debug("Created queue tracker object for queue",
+		zap.String("queue", queueName),
+		zap.String("parent", parentQueueName),
+		zap.String("queue path ", queueTracker.queuePath))
 	return queueTracker
 }
 
-func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource) error {
+type trackingType int
+
+const (
+	none trackingType = iota
+	user
+	group
+)
+
+func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, trackType trackingType, usage *resources.Resource) bool {
 	log.Logger().Debug("Increasing resource usage",
+		zap.Int("tracking type", int(trackType)),
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
 		zap.Stringer("resource", usage))
 	if queuePath == "" || applicationID == "" || usage == nil {
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s",
-			queuePath, applicationID, usage.String())
+		return false
+	}
+
+	finalResourceUsage := qt.resourceUsage.Clone()
+	finalResourceUsage.AddTo(usage)
+	wildCardQuotaExceeded := false
+
+	// Are there any settings for specific user/group? If not, try wild card settings
+	if int(qt.maxRunningApps) == 0 && resources.Equals(resources.NewResource(), qt.maxResources) {
+		// Is there any wild card settings? Do we need to apply enforcement checks using wild card limit settings?
+		var config map[string]interface{}
+		if trackType == user {
+			config = m.getUserWildCardLimitsConfig(qt.queuePath)
+		} else if trackType == group {
+			config = m.getGroupWildCardLimitsConfig(qt.queuePath)
+		}
+		if config != nil {
+			var maxResources map[string]string
+			var maxApplications uint64
+			var ok bool
+			if maxApplications, ok = config[maxapplications].(uint64); !ok {
+				log.Logger().Debug("Problem in using the wild card limit max application settings.",
+					zap.String("queue path", queuePath),
+					zap.Int("tracking type", int(trackType)),
+					zap.Any("max applications ", config[maxapplications]))
+			}
+			if maxResources, ok = config[maxresources].(map[string]string); !ok {
+				log.Logger().Debug("Problem in using the wild card limit max resources settings.",
+					zap.String("queue path", queuePath),
+					zap.Int("tracking type", int(trackType)),
+					zap.Any("max applications ", config[maxresources]))
+			}
+			var maxResource *resources.Resource
+			var err error
+			if maxResource, err = resources.NewResourceFromConf(maxResources); err != nil {
+				log.Logger().Debug("Problem in using the wild card limit max resources settings.",
+					zap.String("queue path", queuePath),
+					zap.String("max resources after casting", maxResource.String()),
+					zap.Error(err))
+				return false
+			}
+			wildCardQuotaExceeded = (maxApplications != 0 && len(qt.runningApplications)+1 > int(maxApplications)) || (!resources.Equals(resources.NewResource(), maxResource) && resources.StrictlyGreaterThan(finalResourceUsage, maxResource))
+			log.Logger().Debug("using wild card limit settings as user/group specific limit settings not set",
+				zap.Int("tracking type", int(trackType)),
+				zap.String("queue path", queuePath),
+				zap.Uint64("wild card max running apps", maxApplications),
+				zap.String("wild card max resources", maxResource.String()),
+				zap.Bool("wild card quota exceeded", wildCardQuotaExceeded))
+		}
+	}
+
+	// apply user/group specific limit settings set if configured, otherwise use wild card limit settings
+	if (int(qt.maxRunningApps) != 0 && !resources.Equals(resources.NewResource(), qt.maxResources)) || wildCardQuotaExceeded {

Review Comment:
   no casting to int just use the uint64



##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -31,38 +31,141 @@ import (
 
 type QueueTracker struct {
 	queueName           string
+	parent              *QueueTracker
+	queuePath           string
 	resourceUsage       *resources.Resource
 	runningApplications map[string]bool
-	maxResourceUsage    *resources.Resource
+	maxResources        *resources.Resource
 	maxRunningApps      uint64
 	childQueueTrackers  map[string]*QueueTracker
 }
 
 func newRootQueueTracker() *QueueTracker {
-	return newQueueTracker(configs.RootQueue)
+	qt := newQueueTracker(configs.RootQueue, nil)
+	return qt
 }
 
-func newQueueTracker(queueName string) *QueueTracker {
-	log.Logger().Debug("Creating queue tracker object for queue",
-		zap.String("queue", queueName))
+func newQueueTracker(queueName string, parent *QueueTracker) *QueueTracker {
 	queueTracker := &QueueTracker{
 		queueName:           queueName,
+		parent:              parent,
 		resourceUsage:       resources.NewResource(),
 		runningApplications: make(map[string]bool),
+		maxResources:        resources.NewResource(),
+		maxRunningApps:      0,
 		childQueueTrackers:  make(map[string]*QueueTracker),
 	}
+	parentQueueName := ""
+	if parent != nil {
+		queueTracker.queuePath = queueTracker.parent.queuePath + configs.DOT + queueName
+		parentQueueName = parent.queueName
+	} else if queueName == configs.RootQueue {
+		queueTracker.queuePath = configs.RootQueue
+	}
+	log.Logger().Debug("Created queue tracker object for queue",
+		zap.String("queue", queueName),
+		zap.String("parent", parentQueueName),
+		zap.String("queue path ", queueTracker.queuePath))
 	return queueTracker
 }
 
-func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource) error {
+type trackingType int
+
+const (
+	none trackingType = iota
+	user
+	group
+)
+
+func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, trackType trackingType, usage *resources.Resource) bool {
 	log.Logger().Debug("Increasing resource usage",
+		zap.Int("tracking type", int(trackType)),
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
 		zap.Stringer("resource", usage))
 	if queuePath == "" || applicationID == "" || usage == nil {
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s",
-			queuePath, applicationID, usage.String())
+		return false
+	}
+
+	finalResourceUsage := qt.resourceUsage.Clone()
+	finalResourceUsage.AddTo(usage)
+	wildCardQuotaExceeded := false
+
+	// Are there any settings for specific user/group? If not, try wild card settings
+	if int(qt.maxRunningApps) == 0 && resources.Equals(resources.NewResource(), qt.maxResources) {
+		// Is there any wild card settings? Do we need to apply enforcement checks using wild card limit settings?
+		var config map[string]interface{}
+		if trackType == user {
+			config = m.getUserWildCardLimitsConfig(qt.queuePath)
+		} else if trackType == group {
+			config = m.getGroupWildCardLimitsConfig(qt.queuePath)
+		}
+		if config != nil {
+			var maxResources map[string]string
+			var maxApplications uint64
+			var ok bool
+			if maxApplications, ok = config[maxapplications].(uint64); !ok {
+				log.Logger().Debug("Problem in using the wild card limit max application settings.",
+					zap.String("queue path", queuePath),
+					zap.Int("tracking type", int(trackType)),
+					zap.Any("max applications ", config[maxapplications]))
+			}
+			if maxResources, ok = config[maxresources].(map[string]string); !ok {
+				log.Logger().Debug("Problem in using the wild card limit max resources settings.",
+					zap.String("queue path", queuePath),
+					zap.Int("tracking type", int(trackType)),
+					zap.Any("max applications ", config[maxresources]))
+			}
+			var maxResource *resources.Resource
+			var err error
+			if maxResource, err = resources.NewResourceFromConf(maxResources); err != nil {
+				log.Logger().Debug("Problem in using the wild card limit max resources settings.",
+					zap.String("queue path", queuePath),
+					zap.String("max resources after casting", maxResource.String()),
+					zap.Error(err))
+				return false
+			}
+			wildCardQuotaExceeded = (maxApplications != 0 && len(qt.runningApplications)+1 > int(maxApplications)) || (!resources.Equals(resources.NewResource(), maxResource) && resources.StrictlyGreaterThan(finalResourceUsage, maxResource))
+			log.Logger().Debug("using wild card limit settings as user/group specific limit settings not set",
+				zap.Int("tracking type", int(trackType)),
+				zap.String("queue path", queuePath),
+				zap.Uint64("wild card max running apps", maxApplications),
+				zap.String("wild card max resources", maxResource.String()),
+				zap.Bool("wild card quota exceeded", wildCardQuotaExceeded))
+		}
+	}
+
+	// apply user/group specific limit settings set if configured, otherwise use wild card limit settings
+	if (int(qt.maxRunningApps) != 0 && !resources.Equals(resources.NewResource(), qt.maxResources)) || wildCardQuotaExceeded {
+		log.Logger().Debug("applying enforcement checks using limit settings",
+			zap.Int("tracking type", int(trackType)),
+			zap.String("queue path", queuePath),
+			zap.Uint64("max running apps", qt.maxRunningApps),
+			zap.String("max resources", qt.maxResources.String()),
+			zap.Bool("wild card quota exceeded", wildCardQuotaExceeded))
+		if (len(qt.runningApplications)+1 > int(qt.maxRunningApps) || resources.StrictlyGreaterThan(finalResourceUsage, qt.maxResources)) || wildCardQuotaExceeded {
+			log.Logger().Warn("Unable to increase resource usage as allowing new application to run would exceed either configured max applications or max resources limit of specific user/group or wild card user/group",
+				zap.String("queue path", queuePath),
+				zap.Int("tracking type", int(trackType)),
+				zap.Int("current running applications", len(qt.runningApplications)),
+				zap.Int("max running applications", int(qt.maxRunningApps)),
+				zap.String("current resource usage", qt.resourceUsage.String()),
+				zap.String("max resource usage", qt.maxResources.String()))
+			return false
+		}
+	}
+
+	childQueuePath, immediateChildQueueName := getChildQueuePath(queuePath)
+	if childQueuePath != "" {
+		if qt.childQueueTrackers[immediateChildQueueName] == nil {
+			qt.childQueueTrackers[immediateChildQueueName] = newQueueTracker(immediateChildQueueName, qt)
+		}
+		result := qt.childQueueTrackers[immediateChildQueueName].increaseTrackedResource(childQueuePath, applicationID, trackType, usage)
+		if !result {
+			return false
+		}

Review Comment:
   use
   ```
   if allowed := qt....; !allowed {
    return allowed
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1228171403


##########
pkg/scheduler/partition_test.go:
##########
@@ -1900,7 +1956,7 @@ func setupPreemptionForRequiredNode(t *testing.T) (*PartitionContext, *objects.A
 	// check if updated (must be after allocate call)
 	assert.Equal(t, 1, len(app.GetReservations()), "ask should have been reserved")
 	assert.Equal(t, 1, len(app.GetAskReservations(allocID2)), "ask should have been reserved")
-	assertUserGroupResource(t, getTestUserGroup(), resources.NewResourceFromMap(map[string]resources.Quantity{"vcore": 8000}))
+	internalAssertUserGroupResource(t, getTestUserGroup(), resources.NewResourceFromMap(map[string]resources.Quantity{"vcore": 8000}), getExpectedQueuesLimitsForPreemptionWithRequiredNode())

Review Comment:
   Will you rebase this based on #543? You already made some changes which are similar. Is this related? (You changed `partition_test.go` in #543).
   
   EDIT: OK, this will change



##########
pkg/common/configs/config_test.go:
##########
@@ -1721,3 +1722,418 @@ func TestGetConfigurationString(t *testing.T) {
 		})
 	}
 }
+
+func prepareUserLimitsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 50
+                maxresources: {memory: 1000, vcore: 100}
+            queues:
+              - name: leaf
+                maxapplications: 100
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 10000, vcore: 1000}
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + leafQueueMaxResource + `
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	return data
+}
+
+func prepareUserLimitsWithTwoLevelsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string, userMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 900
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                ` + leafQueueMaxResource + `
+            queues:
+              - name: leaf
+                maxapplications: 900
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    ` + leafQueueMaxResource + `
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + userMaxResource + `
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	return data
+}
+func TestUserLimitsWithHierarchicalQueue(t *testing.T) {
+	// Make sure parent queue user max apps should not less than the child queue max resource
+	// validate the config and check after the update
+	_, err := CreateConfig(prepareUserLimitsConfig(50, "{memory: 10000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max resource map[memory:10000 vcore:100000] of queue leaf is greater than immediate or ancestor parent maximum resource map[memory:1000 vcore:100000]")
+
+	// Make sure parent queue user max apps should not less than the child queue max apps
+	// validate the config and check after the update
+	_, err = CreateConfig(prepareUserLimitsConfig(90, "{memory: 1000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max applications 90 of queue leaf is greater than immediate or ancestor parent max applications 50")
+
+	// (More than one level check)
+	// Make sure hierarchical queue user max resource should not less than the child queue max resource
+	// validate the config and check after the update
+	_, err = CreateConfig(prepareUserLimitsWithTwoLevelsConfig(90, "{memory: 100000, vcore: 100}", "{memory: 90000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max resource map[memory:90000 vcore:100000] of queue leaf is greater than immediate or ancestor parent maximum resource map[memory:10000 vcore:10000000]")
+
+	// (More than one level check)
+	// Make sure hierarchical queue user max apps should not less than the child queue max apps
+	// validate the config and check after the update
+	_, err = CreateConfig(prepareUserLimitsWithTwoLevelsConfig(900, "{memory: 10000, vcore: 1000}", "{memory: 1000, vcore: 100}"))
+	assert.ErrorContains(t, err, "user user1 max applications 900 of queue leaf is greater than immediate or ancestor parent max applications 500")
+
+	// Special case: make sure wildcard user with hierarchical queue check
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - "*"
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+        queues:
+          - name: level1
+            maxapplications: 900
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 100000, vcore: 1000}
+            queues:
+              - name: leaf
+                maxapplications: 900
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 100000, vcore: 1000}
+                limits:
+                  - limit:
+                    users: 
+                    - "*"
+                    maxapplications: 90
+                    maxresources: {memory: 90000, vcore: 100}
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	// validate the config and check after the update
+	_, err = CreateConfig(data)
+	assert.ErrorContains(t, err, "user * max resource map[memory:90000 vcore:100000] of queue leaf is greater than immediate or ancestor parent maximum resource map[memory:10000 vcore:10000000]")
+}
+
+func prepareGroupLimitsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            groups: 
+            - group1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                groups: 
+                - group1
+                maxapplications: 50
+                maxresources: {memory: 1000, vcore: 100}
+            queues:
+              - name: leaf
+                maxapplications: 100
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 10000, vcore: 1000}
+                limits:
+                  - limit:
+                    groups: 
+                    - group1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + leafQueueMaxResource + `
+          - name: level2
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                groups: 
+                - group1
+                maxapplications: 10
+                maxresources: {memory: 1000, vcore: 100}
+`
+	return data
+}
+
+func prepareGroupLimitsWithTwoLevelsConfig(leafQueueMaxApps uint64, leafQueueMaxResources string, groupMaxResources string) string {
+	// (More than one level check)
+	// Make sure hierarchical queue user max resource should not less than the child queue max resource
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            groups: 
+            - group1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 900
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                ` + leafQueueMaxResources + `
+            queues:
+              - name: leaf
+                maxapplications: 900
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    ` + leafQueueMaxResources + `
+                limits:
+                  - limit:
+                    groups: 
+                    - group1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + groupMaxResources + `

Review Comment:
   Placeholders



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] wilfred-s commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "wilfred-s (via GitHub)" <gi...@apache.org>.
wilfred-s commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1239308847


##########
pkg/log/logger.go:
##########
@@ -258,7 +258,7 @@ func UpdateLoggingConfig(config map[string]string) {
 // with one that checks the enabled log level first.
 func initLoggingConfig(config map[string]string) {
 	levelMap := make(map[string]zapcore.Level)
-	levelMap[nullLogger] = zapcore.InfoLevel
+	levelMap[nullLogger] = zapcore.DebugLevel

Review Comment:
   unintended change?



##########
pkg/scheduler/objects/application.go:
##########
@@ -1637,24 +1637,22 @@ func (sa *Application) addAllocationInternal(info *Allocation) {
 // Increase user resource usage
 // No locking must be called while holding the lock
 func (sa *Application) incUserResourceUsage(resource *resources.Resource) {
-	if err := ugm.GetUserManager().IncreaseTrackedResource(sa.queuePath, sa.ApplicationID, resource, sa.user); err != nil {
-		log.Log(log.SchedApplication).Error("Unable to track the user resource usage",
+	if allowed := ugm.GetUserManager().IncreaseTrackedResource(sa.queuePath, sa.ApplicationID, resource, sa.user); !allowed {
+		log.Log(log.SchedUGM).Debug("User quota exceeded unexpectedly.",

Review Comment:
   This is logged in the ugm and queueTracker already, choice made to log in queueTracker only



##########
pkg/scheduler/ugm/manager.go:
##########
@@ -475,127 +413,128 @@ func (m *Manager) internalProcessConfig(cur configs.QueueConfig, queuePath strin
 	return nil
 }
 
-func (m *Manager) processUserConfig(user string, limit configs.Limit, queuePath string, userGroupLimits map[string]bool, tempLimitsMap map[string]interface{}, tempUserMap map[string]map[string]interface{}) error {
+func (m *Manager) processUserConfig(user string, limitConfig *LimitConfig, queuePath string, userLimits map[string]bool) error {
 	if user == "*" {
 		// traverse all tracked users
 		for u, ut := range m.userTrackers {
 			// Is this user already tracked for the queue path?
-			if m.IsQueuePathTrackedCompletely(ut.queueTracker, queuePath) {
+			if ut.IsQueuePathTrackedCompletely(queuePath) {
 				log.Log(log.SchedUGM).Debug("Processing wild card user limits configuration for all existing users",
 					zap.String("user", u),
-					zap.String("limit", limit.Limit),
 					zap.String("queue path", queuePath),
-					zap.Uint64("max application", limit.MaxApplications),
-					zap.Any("max resources", limit.MaxResources))
-
-				// creates an entry for the user being processed always as it has been cleaned before
-				if _, ok := m.userLimitsConfig[u]; ok {
-					m.userLimitsConfig[u][queuePath] = tempLimitsMap
-				} else {
-					m.userLimitsConfig[u] = tempUserMap
-				}
-				if err := m.setUserLimits(u, limit, queuePath); err != nil {
+					zap.Uint64("max application", limitConfig.maxApplications),
+					zap.Any("max resources", limitConfig.maxResources))
+				if err := m.setUserLimits(u, limitConfig, queuePath); err != nil {
 					return err
 				}
-				userGroupLimits[u] = true
+				userLimits[u] = true
 			}
 		}
-	} else {
-		// creates an entry for the user being processed always as it has been cleaned before
-		if _, ok := m.userLimitsConfig[user]; ok {
-			m.userLimitsConfig[user][queuePath] = tempLimitsMap
-		} else {
-			m.userLimitsConfig[user] = tempUserMap
-		}
-		if err := m.setUserLimits(user, limit, queuePath); err != nil {
+	} else if user != "" {
+		if err := m.setUserLimits(user, limitConfig, queuePath); err != nil {
 			return err
 		}
-		userGroupLimits[user] = true
+		userLimits[user] = true
 	}
 	return nil
 }
 
-func (m *Manager) processGroupConfig(group string, limit configs.Limit, queuePath string, userGroupLimits map[string]bool, tempLimitsMap map[string]interface{}, tempGroupMap map[string]map[string]interface{}) error {
+func (m *Manager) processGroupConfig(group string, limitConfig *LimitConfig, queuePath string, groupLimits map[string]bool) error {
 	if group == "*" {
 		// traverse all tracked groups
 		for g, gt := range m.groupTrackers {
 			// Is this group already tracked for the queue path?
-			if m.IsQueuePathTrackedCompletely(gt.queueTracker, queuePath) {
+			if gt.IsQueuePathTrackedCompletely(queuePath) {
 				log.Log(log.SchedUGM).Debug("Processing wild card user limits configuration for all existing groups",
 					zap.String("group", g),
-					zap.String("limit", limit.Limit),
 					zap.String("queue path", queuePath),
-					zap.Uint64("max application", limit.MaxApplications),
-					zap.Any("max resources", limit.MaxResources))
-				// creates an entry for the group being processed always as it has been cleaned before
-				if _, ok := m.groupLimitsConfig[g]; ok {
-					m.groupLimitsConfig[g][queuePath] = tempLimitsMap
-				} else {
-					m.groupLimitsConfig[g] = tempGroupMap
-				}
-				if err := m.setGroupLimits(g, limit, queuePath); err != nil {
+					zap.Uint64("max application", limitConfig.maxApplications),
+					zap.Any("max resources", limitConfig.maxResources))
+				if err := m.setGroupLimits(g, limitConfig, queuePath); err != nil {
 					return err
 				}
-				userGroupLimits[g] = true
+				groupLimits[g] = true
 			}
 		}
-	} else {
-		// creates an entry for the group being processed always as it has been cleaned before
-		if _, ok := m.groupLimitsConfig[group]; ok {
-			m.groupLimitsConfig[group][queuePath] = tempLimitsMap
-		} else {
-			m.groupLimitsConfig[group] = tempGroupMap
-		}
-		if err := m.setGroupLimits(group, limit, queuePath); err != nil {
+	} else if group != "" {
+		if err := m.setGroupLimits(group, limitConfig, queuePath); err != nil {
 			return err
 		}
-		userGroupLimits[group] = true
+		groupLimits[group] = true
 	}
 	return nil
 }
 
-func (m *Manager) clearEarlierSetLimits(userGroupLimits map[string]bool, queuePath string) error {
-	// Clear already configured limits of user for which limits have been configured before but not now through #cur
+// clearEarlierSetLimits Clear already configured limits of users and groups for which limits have been configured before but not now
+func (m *Manager) clearEarlierSetLimits(userLimits map[string]bool, groupLimits map[string]bool, queuePath string) error {
+	// Clear already configured limits of user for which limits have been configured before but not now
 	for u, ut := range m.userTrackers {
 		// Is this user already tracked for the queue path?
-		if m.IsQueuePathTrackedCompletely(ut.queueTracker, queuePath) {
-			if _, ok := userGroupLimits[u]; !ok {
-				err := ut.setMaxResources(resources.NewResource(), queuePath)
-				if err != nil {
-					return err
+		if ut.IsQueuePathTrackedCompletely(queuePath) {
+			// Is there any limit config set for user in the current configuration? If not, then clear those old limit settings
+			if _, ok := userLimits[u]; !ok {
+				log.Log(log.SchedUGM).Debug("Need to clear earlier set configs for user",
+					zap.String("user", u),
+					zap.String("queue path", queuePath))
+				// Is there any running applications in end queue of this queue path? If not, then remove the linkage between end queue and its immediate parent
+				if ut.IsUnlinkRequired(queuePath) {
+					ut.UnlinkQT(queuePath)
+				} else {
+					ut.setLimits(queuePath, resources.NewResource(), 0)
+					log.Log(log.SchedUGM).Debug("Cleared earlier set limit configs for user",
+						zap.String("user", u),
+						zap.String("queue path", queuePath),
+						zap.Int("New max apps", 0),
+						zap.String("New max resource", resources.NewResource().String()))
 				}
-				err = ut.setMaxApplications(0, queuePath)
-				if err != nil {
-					return err
+				// Does "root" queue has any child queue trackers? At some point during this whole traversal, root might
+				// not have any child queue trackers. When the situation comes, remove the linkage between the user and
+				// its root queue tracker
+				if ut.canBeRemoved() {
+					delete(m.userTrackers, ut.userName)
 				}
 			}
 		}
 	}
 
-	// Clear already configured limits of group for which limits have been configured before but not now through #cur
+	// Clear already configured limits of group for which limits have been configured before but not now
 	for g, gt := range m.groupTrackers {
 		// Is this group already tracked for the queue path?
-		if m.IsQueuePathTrackedCompletely(gt.queueTracker, queuePath) {
-			if _, ok := userGroupLimits[g]; !ok {
-				if err := gt.setMaxResources(resources.NewResource(), queuePath); err != nil {
-					return err
+		if gt.IsQueuePathTrackedCompletely(queuePath) {
+			// Is there any limit config set for group in the current configuration? If not, then clear those old limit settings
+			if ok := groupLimits[g]; !ok {
+				log.Log(log.SchedUGM).Debug("Need to clear earlier set configs for group",
+					zap.String("group", g),
+					zap.String("queue path", queuePath))
+				// Is there any running applications in end queue of this queue path? If not, then remove the linkage between end queue and its immediate parent
+				if gt.IsUnlinkRequired(queuePath) {
+					gt.UnlinkQT(queuePath)
+				} else {
+					gt.setLimits(queuePath, resources.NewResource(), 0)
+					log.Log(log.SchedUGM).Debug("Cleared earlier set limit configs for group",
+						zap.String("group", g),
+						zap.String("queue path", queuePath),
+						zap.Int("New max apps", 0),
+						zap.String("New max resource", resources.NewResource().String()))

Review Comment:
   they do not tell us anything new they will always be zeros, remove



##########
pkg/scheduler/ugm/manager.go:
##########
@@ -77,112 +84,69 @@ func (m *Manager) IncreaseTrackedResource(queuePath string, applicationID string
 			zap.String("queue path", queuePath),
 			zap.String("application", applicationID),
 			zap.Stringer("resource", usage))
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s, user: %s",
-			queuePath, applicationID, usage.String(), user.User)
+		return false
 	}
 	m.Lock()
 	defer m.Unlock()
 	var userTracker *UserTracker
 	if m.userTrackers[user.User] == nil {
+		log.Log(log.SchedUGM).Debug("User tracker doesn't exists. Creating user tracker.",
+			zap.String("user", user.User),
+			zap.String("queue path", queuePath),
+			zap.String("application", applicationID),
+			zap.Stringer("resource", usage))
 		userTracker = newUserTracker(user.User)
-
-		// Set the limits for all configured queue paths of the user
-		for configQueuePath, config := range m.userLimitsConfig[user.User] {
-			log.Log(log.SchedUGM).Debug("Setting the limit max applications settings.",
-				zap.String("user", user.User),
-				zap.String("queue path", configQueuePath))
-			maxApps, ok := config[maxapplications].(uint64)
-			if !ok {
-				log.Log(log.SchedUGM).Warn("Problem in setting the limit max applications settings. Unable to cast the value from interface to uint64",
-					zap.String("user", user.User),
-					zap.String("queue path", configQueuePath),
-					zap.Uint64("limit max applications", maxApps))
-				return fmt.Errorf("unable to set the max applications. user: %s, queuepath : %s, applicationid: %s",
-					user.User, configQueuePath, applicationID)
-			}
-			err := userTracker.setMaxApplications(maxApps, configQueuePath)
-			if err != nil {
-				log.Log(log.SchedUGM).Warn("Problem in setting the limit max applications settings.",
-					zap.String("user", user.User),
-					zap.String("queue path", configQueuePath),
-					zap.Uint64("limit max applications", maxApps),
-					zap.Error(err))
-				return fmt.Errorf("unable to set the max applications. user: %s, queuepath : %s, applicationid: %s, usage: %s, reason: %w",
-					user.User, configQueuePath, applicationID, usage.String(), err)
-			}
-			maxResources, ok := config[maxresources].(map[string]string)
-			if !ok {
-				log.Log(log.SchedUGM).Warn("Problem in setting the limit max resources settings. Unable to cast the value from interface to resource",
-					zap.String("user", user.User),
-					zap.String("queue path", configQueuePath),
-					zap.Any("limit max resources", maxResources))
-				return fmt.Errorf("unable to set the max resources. user: %s, queuepath : %s, applicationid: %s",
-					user.User, configQueuePath, applicationID)
-			}
-			resource, resourceErr := resources.NewResourceFromConf(maxResources)
-			if resourceErr != nil {
-				log.Log(log.SchedUGM).Warn("Problem in setting the limit max resources settings.",
-					zap.String("user", user.User),
-					zap.String("queue path", configQueuePath),
-					zap.Any("limit max resources", maxResources),
-					zap.Error(resourceErr))
-				return fmt.Errorf("unable to set the max resources. user: %s, queuepath : %s, applicationid: %s, usage: %s, reason: %w",
-					user.User, configQueuePath, applicationID, usage.String(), resourceErr)
-			}
-			setMaxResourcesErr := userTracker.setMaxResources(resource, configQueuePath)
-			if setMaxResourcesErr != nil {
-				log.Log(log.SchedUGM).Warn("Problem in setting the limit max resources settings.",
-					zap.String("user", user.User),
-					zap.String("queue path", configQueuePath),
-					zap.Any("limit max resources", maxResources),
-					zap.Error(setMaxResourcesErr))
-				return fmt.Errorf("unable to set the max resources. user: %s, queuepath : %s, applicationid: %s, usage: %s, reason: %w",
-					user.User, configQueuePath, applicationID, usage.String(), setMaxResourcesErr)
-			}
-		}
 		m.userTrackers[user.User] = userTracker
 	} else {
 		userTracker = m.userTrackers[user.User]
 	}
-	err := userTracker.increaseTrackedResource(queuePath, applicationID, usage)
-	if err != nil {
-		log.Log(log.SchedUGM).Error("Problem in increasing the user resource usage",
+	log.Log(log.SchedUGM).Debug("Increasing resource usage for user",
+		zap.String("user", user.User),
+		zap.String("queue path", queuePath),
+		zap.String("application", applicationID),
+		zap.Stringer("resource", usage))
+	increased := userTracker.increaseTrackedResource(queuePath, applicationID, usage)
+	if !increased {
+		log.Log(log.SchedUGM).Warn("Not allowed to increase the user resource usage",
 			zap.String("user", user.User),
 			zap.String("queue path", queuePath),
 			zap.String("application", applicationID),
-			zap.Stringer("resource", usage),
-			zap.String("err message", err.Error()))
-		return err
+			zap.Stringer("resource", usage))
+		return increased
 	}

Review Comment:
   This is logged in the app and queueTracker already, choice made to log in queueTracker only



##########
pkg/scheduler/ugm/manager.go:
##########
@@ -77,112 +84,69 @@ func (m *Manager) IncreaseTrackedResource(queuePath string, applicationID string
 			zap.String("queue path", queuePath),
 			zap.String("application", applicationID),
 			zap.Stringer("resource", usage))
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s, user: %s",
-			queuePath, applicationID, usage.String(), user.User)
+		return false

Review Comment:
   Change the level of this message to Debug as it shows a bug in our code, not something that should ever happen.



##########
pkg/scheduler/ugm/manager.go:
##########
@@ -77,112 +84,69 @@ func (m *Manager) IncreaseTrackedResource(queuePath string, applicationID string
 			zap.String("queue path", queuePath),
 			zap.String("application", applicationID),
 			zap.Stringer("resource", usage))
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s, user: %s",
-			queuePath, applicationID, usage.String(), user.User)
+		return false
 	}
 	m.Lock()
 	defer m.Unlock()
 	var userTracker *UserTracker
 	if m.userTrackers[user.User] == nil {
+		log.Log(log.SchedUGM).Debug("User tracker doesn't exists. Creating user tracker.",
+			zap.String("user", user.User),
+			zap.String("queue path", queuePath),
+			zap.String("application", applicationID),
+			zap.Stringer("resource", usage))
 		userTracker = newUserTracker(user.User)
-
-		// Set the limits for all configured queue paths of the user
-		for configQueuePath, config := range m.userLimitsConfig[user.User] {
-			log.Log(log.SchedUGM).Debug("Setting the limit max applications settings.",
-				zap.String("user", user.User),
-				zap.String("queue path", configQueuePath))
-			maxApps, ok := config[maxapplications].(uint64)
-			if !ok {
-				log.Log(log.SchedUGM).Warn("Problem in setting the limit max applications settings. Unable to cast the value from interface to uint64",
-					zap.String("user", user.User),
-					zap.String("queue path", configQueuePath),
-					zap.Uint64("limit max applications", maxApps))
-				return fmt.Errorf("unable to set the max applications. user: %s, queuepath : %s, applicationid: %s",
-					user.User, configQueuePath, applicationID)
-			}
-			err := userTracker.setMaxApplications(maxApps, configQueuePath)
-			if err != nil {
-				log.Log(log.SchedUGM).Warn("Problem in setting the limit max applications settings.",
-					zap.String("user", user.User),
-					zap.String("queue path", configQueuePath),
-					zap.Uint64("limit max applications", maxApps),
-					zap.Error(err))
-				return fmt.Errorf("unable to set the max applications. user: %s, queuepath : %s, applicationid: %s, usage: %s, reason: %w",
-					user.User, configQueuePath, applicationID, usage.String(), err)
-			}
-			maxResources, ok := config[maxresources].(map[string]string)
-			if !ok {
-				log.Log(log.SchedUGM).Warn("Problem in setting the limit max resources settings. Unable to cast the value from interface to resource",
-					zap.String("user", user.User),
-					zap.String("queue path", configQueuePath),
-					zap.Any("limit max resources", maxResources))
-				return fmt.Errorf("unable to set the max resources. user: %s, queuepath : %s, applicationid: %s",
-					user.User, configQueuePath, applicationID)
-			}
-			resource, resourceErr := resources.NewResourceFromConf(maxResources)
-			if resourceErr != nil {
-				log.Log(log.SchedUGM).Warn("Problem in setting the limit max resources settings.",
-					zap.String("user", user.User),
-					zap.String("queue path", configQueuePath),
-					zap.Any("limit max resources", maxResources),
-					zap.Error(resourceErr))
-				return fmt.Errorf("unable to set the max resources. user: %s, queuepath : %s, applicationid: %s, usage: %s, reason: %w",
-					user.User, configQueuePath, applicationID, usage.String(), resourceErr)
-			}
-			setMaxResourcesErr := userTracker.setMaxResources(resource, configQueuePath)
-			if setMaxResourcesErr != nil {
-				log.Log(log.SchedUGM).Warn("Problem in setting the limit max resources settings.",
-					zap.String("user", user.User),
-					zap.String("queue path", configQueuePath),
-					zap.Any("limit max resources", maxResources),
-					zap.Error(setMaxResourcesErr))
-				return fmt.Errorf("unable to set the max resources. user: %s, queuepath : %s, applicationid: %s, usage: %s, reason: %w",
-					user.User, configQueuePath, applicationID, usage.String(), setMaxResourcesErr)
-			}
-		}
 		m.userTrackers[user.User] = userTracker
 	} else {
 		userTracker = m.userTrackers[user.User]
 	}
-	err := userTracker.increaseTrackedResource(queuePath, applicationID, usage)
-	if err != nil {
-		log.Log(log.SchedUGM).Error("Problem in increasing the user resource usage",
+	log.Log(log.SchedUGM).Debug("Increasing resource usage for user",
+		zap.String("user", user.User),
+		zap.String("queue path", queuePath),
+		zap.String("application", applicationID),
+		zap.Stringer("resource", usage))
+	increased := userTracker.increaseTrackedResource(queuePath, applicationID, usage)
+	if !increased {
+		log.Log(log.SchedUGM).Warn("Not allowed to increase the user resource usage",
 			zap.String("user", user.User),
 			zap.String("queue path", queuePath),
 			zap.String("application", applicationID),
-			zap.Stringer("resource", usage),
-			zap.String("err message", err.Error()))
-		return err
+			zap.Stringer("resource", usage))
+		return increased
 	}
-	if err = m.ensureGroupTrackerForApp(queuePath, applicationID, user); err != nil {
-		return err
+	if err := m.ensureGroupTrackerForApp(queuePath, applicationID, user); err != nil {
+		return false
 	}
 	group, err := m.getGroup(user)
 	if err != nil {
-		return err
+		return false
 	}
 	groupTracker := m.groupTrackers[group]
 	if groupTracker != nil {
-		err = groupTracker.increaseTrackedResource(queuePath, applicationID, usage)
-		if err != nil {
-			log.Log(log.SchedUGM).Error("Problem in increasing the group resource usage",
+		log.Log(log.SchedUGM).Debug("Increasing resource usage for group",
+			zap.String("group", group),
+			zap.String("queue path", queuePath),
+			zap.String("application", applicationID),
+			zap.Stringer("resource", usage))
+		increased := groupTracker.increaseTrackedResource(queuePath, applicationID, usage)
+		if !increased {
+			log.Log(log.SchedUGM).Warn("Not allowed to increase the group resource usage",

Review Comment:
   This is logged in the app and queueTracker already, choice made to log in queueTracker only



##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -33,71 +31,163 @@ type QueueTracker struct {
 	queueName           string
 	resourceUsage       *resources.Resource
 	runningApplications map[string]bool
-	maxResourceUsage    *resources.Resource
+	maxResources        *resources.Resource
 	maxRunningApps      uint64
 	childQueueTrackers  map[string]*QueueTracker
 }
 
 func newRootQueueTracker() *QueueTracker {
-	return newQueueTracker(configs.RootQueue)
+	qt := newQueueTracker(configs.RootQueue)
+	return qt
 }
 
 func newQueueTracker(queueName string) *QueueTracker {

Review Comment:
   new signature: newQueueTracker(queuePath, queueName string) *QueueTracker
   add the queuePath filed with the full path to the object:
   ```
   qt.queuePath := queuePath + "." + queueName
   qt.queueName := queueName
   ```
   the call for the root queue gets an empty path:
   ```
   rootQueue := newQueueTracker("", configs.RootQueue)
   ```
   when we create a new child we use the current path and the childname:
   ```
   qt.childQueueTracker[child] = newQueueTracker(qt.path, childName)
   ```
   Looking up the queue in the wildcard list:
   ```
   wildcardQuota := getUserWildCardLimitsConfig(qt.path)
   ```



##########
pkg/scheduler/ugm/manager.go:
##########
@@ -195,60 +159,77 @@ func (m *Manager) DecreaseTrackedResource(queuePath string, applicationID string
 			zap.String("application", applicationID),
 			zap.Stringer("resource", usage),
 			zap.Bool("removeApp", removeApp))
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s, user: %s",
-			queuePath, applicationID, usage.String(), user.User)
+		return false
 	}
 	m.Lock()
 	defer m.Unlock()
 	userTracker := m.userTrackers[user.User]
-	if userTracker != nil {
-		removeQT, err := userTracker.decreaseTrackedResource(queuePath, applicationID, usage, removeApp)
-		if err != nil {
-			log.Log(log.SchedUGM).Error("Problem in decreasing the user resource usage",
-				zap.String("user", user.User),
-				zap.String("queue path", queuePath),
-				zap.String("application", applicationID),
-				zap.Stringer("resource", usage),
-				zap.Bool("removeApp", removeApp),
-				zap.String("err message", err.Error()))
-			return err
-		}
-		if removeApp && removeQT {
-			delete(m.userTrackers, user.User)
-		}
-	} else {
+	if userTracker == nil {
 		log.Log(log.SchedUGM).Error("user tracker must be available in userTrackers map",
 			zap.String("user", user.User))
-		return fmt.Errorf("user tracker for %s is missing in userTrackers map", user.User)
+		return false
+	}
+	log.Log(log.SchedUGM).Debug("Decreasing resource usage for user",
+		zap.String("user", user.User),
+		zap.String("queue path", queuePath),
+		zap.String("application", applicationID),
+		zap.Stringer("resource", usage),
+		zap.Bool("removeApp", removeApp))
+	removeQT, decreased := userTracker.decreaseTrackedResource(queuePath, applicationID, usage, removeApp)
+	if !decreased {
+		log.Log(log.SchedUGM).Warn("Not allowed to decrease the user resource usage",
+			zap.String("user", user.User),
+			zap.String("queue path", queuePath),
+			zap.String("application", applicationID),
+			zap.Stringer("resource", usage),
+			zap.Bool("removeApp", removeApp))
+		return decreased
+	}
+	if removeApp && removeQT {
+		log.Log(log.SchedUGM).Debug("Removing user from manager",
+			zap.String("user", user.User),
+			zap.String("queue path", queuePath),
+			zap.String("application", applicationID),
+			zap.Bool("removeApp", removeApp))
+		delete(m.userTrackers, user.User)
 	}
 
 	group, err := m.getGroup(user)
 	if err != nil {
-		return err
+		return false
 	}
 	groupTracker := m.groupTrackers[group]
-	if groupTracker != nil {
-		removeQT, err := groupTracker.decreaseTrackedResource(queuePath, applicationID, usage, removeApp)
-		if err != nil {
-			log.Log(log.SchedUGM).Error("Problem in decreasing the group resource usage",
-				zap.String("user", user.User),
-				zap.String("group", group),
-				zap.String("queue path", queuePath),
-				zap.String("application", applicationID),
-				zap.Stringer("resource", usage),
-				zap.Bool("removeApp", removeApp),
-				zap.String("err message", err.Error()))
-			return err
-		}
-		if removeApp && removeQT {
-			delete(m.groupTrackers, group)
-		}
-	} else {
+	if groupTracker == nil {
 		log.Log(log.SchedUGM).Error("appGroupTrackers tracker must be available in groupTrackers map",
 			zap.String("appGroupTrackers", group))
-		return fmt.Errorf("appGroupTrackers tracker for %s is missing in groupTrackers map", group)
+		return false
 	}
-	return nil
+	log.Log(log.SchedUGM).Debug("Decreasing resource usage for group",
+		zap.String("group", group),
+		zap.String("queue path", queuePath),
+		zap.String("application", applicationID),
+		zap.Stringer("resource", usage),
+		zap.Bool("removeApp", removeApp))
+	removeQT, decreased = groupTracker.decreaseTrackedResource(queuePath, applicationID, usage, removeApp)
+	if !decreased {
+		log.Log(log.SchedUGM).Warn("Not allowed to decrease the user resource usage",

Review Comment:
   This is logged in the app and queueTracker already, choice made to log in queueTracker only



##########
pkg/scheduler/ugm/manager.go:
##########
@@ -475,127 +413,128 @@ func (m *Manager) internalProcessConfig(cur configs.QueueConfig, queuePath strin
 	return nil
 }
 
-func (m *Manager) processUserConfig(user string, limit configs.Limit, queuePath string, userGroupLimits map[string]bool, tempLimitsMap map[string]interface{}, tempUserMap map[string]map[string]interface{}) error {
+func (m *Manager) processUserConfig(user string, limitConfig *LimitConfig, queuePath string, userLimits map[string]bool) error {
 	if user == "*" {
 		// traverse all tracked users
 		for u, ut := range m.userTrackers {
 			// Is this user already tracked for the queue path?
-			if m.IsQueuePathTrackedCompletely(ut.queueTracker, queuePath) {
+			if ut.IsQueuePathTrackedCompletely(queuePath) {
 				log.Log(log.SchedUGM).Debug("Processing wild card user limits configuration for all existing users",
 					zap.String("user", u),
-					zap.String("limit", limit.Limit),
 					zap.String("queue path", queuePath),
-					zap.Uint64("max application", limit.MaxApplications),
-					zap.Any("max resources", limit.MaxResources))
-
-				// creates an entry for the user being processed always as it has been cleaned before
-				if _, ok := m.userLimitsConfig[u]; ok {
-					m.userLimitsConfig[u][queuePath] = tempLimitsMap
-				} else {
-					m.userLimitsConfig[u] = tempUserMap
-				}
-				if err := m.setUserLimits(u, limit, queuePath); err != nil {
+					zap.Uint64("max application", limitConfig.maxApplications),
+					zap.Any("max resources", limitConfig.maxResources))
+				if err := m.setUserLimits(u, limitConfig, queuePath); err != nil {
 					return err
 				}
-				userGroupLimits[u] = true
+				userLimits[u] = true
 			}
 		}
-	} else {
-		// creates an entry for the user being processed always as it has been cleaned before
-		if _, ok := m.userLimitsConfig[user]; ok {
-			m.userLimitsConfig[user][queuePath] = tempLimitsMap
-		} else {
-			m.userLimitsConfig[user] = tempUserMap
-		}
-		if err := m.setUserLimits(user, limit, queuePath); err != nil {
+	} else if user != "" {
+		if err := m.setUserLimits(user, limitConfig, queuePath); err != nil {
 			return err
 		}
-		userGroupLimits[user] = true
+		userLimits[user] = true
 	}
 	return nil
 }
 
-func (m *Manager) processGroupConfig(group string, limit configs.Limit, queuePath string, userGroupLimits map[string]bool, tempLimitsMap map[string]interface{}, tempGroupMap map[string]map[string]interface{}) error {
+func (m *Manager) processGroupConfig(group string, limitConfig *LimitConfig, queuePath string, groupLimits map[string]bool) error {
 	if group == "*" {
 		// traverse all tracked groups
 		for g, gt := range m.groupTrackers {
 			// Is this group already tracked for the queue path?
-			if m.IsQueuePathTrackedCompletely(gt.queueTracker, queuePath) {
+			if gt.IsQueuePathTrackedCompletely(queuePath) {
 				log.Log(log.SchedUGM).Debug("Processing wild card user limits configuration for all existing groups",
 					zap.String("group", g),
-					zap.String("limit", limit.Limit),
 					zap.String("queue path", queuePath),
-					zap.Uint64("max application", limit.MaxApplications),
-					zap.Any("max resources", limit.MaxResources))
-				// creates an entry for the group being processed always as it has been cleaned before
-				if _, ok := m.groupLimitsConfig[g]; ok {
-					m.groupLimitsConfig[g][queuePath] = tempLimitsMap
-				} else {
-					m.groupLimitsConfig[g] = tempGroupMap
-				}
-				if err := m.setGroupLimits(g, limit, queuePath); err != nil {
+					zap.Uint64("max application", limitConfig.maxApplications),
+					zap.Any("max resources", limitConfig.maxResources))
+				if err := m.setGroupLimits(g, limitConfig, queuePath); err != nil {
 					return err
 				}
-				userGroupLimits[g] = true
+				groupLimits[g] = true
 			}
 		}
-	} else {
-		// creates an entry for the group being processed always as it has been cleaned before
-		if _, ok := m.groupLimitsConfig[group]; ok {
-			m.groupLimitsConfig[group][queuePath] = tempLimitsMap
-		} else {
-			m.groupLimitsConfig[group] = tempGroupMap
-		}
-		if err := m.setGroupLimits(group, limit, queuePath); err != nil {
+	} else if group != "" {
+		if err := m.setGroupLimits(group, limitConfig, queuePath); err != nil {
 			return err
 		}
-		userGroupLimits[group] = true
+		groupLimits[group] = true
 	}
 	return nil
 }
 
-func (m *Manager) clearEarlierSetLimits(userGroupLimits map[string]bool, queuePath string) error {
-	// Clear already configured limits of user for which limits have been configured before but not now through #cur
+// clearEarlierSetLimits Clear already configured limits of users and groups for which limits have been configured before but not now
+func (m *Manager) clearEarlierSetLimits(userLimits map[string]bool, groupLimits map[string]bool, queuePath string) error {
+	// Clear already configured limits of user for which limits have been configured before but not now
 	for u, ut := range m.userTrackers {
 		// Is this user already tracked for the queue path?
-		if m.IsQueuePathTrackedCompletely(ut.queueTracker, queuePath) {
-			if _, ok := userGroupLimits[u]; !ok {
-				err := ut.setMaxResources(resources.NewResource(), queuePath)
-				if err != nil {
-					return err
+		if ut.IsQueuePathTrackedCompletely(queuePath) {
+			// Is there any limit config set for user in the current configuration? If not, then clear those old limit settings
+			if _, ok := userLimits[u]; !ok {
+				log.Log(log.SchedUGM).Debug("Need to clear earlier set configs for user",
+					zap.String("user", u),
+					zap.String("queue path", queuePath))
+				// Is there any running applications in end queue of this queue path? If not, then remove the linkage between end queue and its immediate parent
+				if ut.IsUnlinkRequired(queuePath) {
+					ut.UnlinkQT(queuePath)
+				} else {
+					ut.setLimits(queuePath, resources.NewResource(), 0)
+					log.Log(log.SchedUGM).Debug("Cleared earlier set limit configs for user",
+						zap.String("user", u),
+						zap.String("queue path", queuePath),
+						zap.Int("New max apps", 0),
+						zap.String("New max resource", resources.NewResource().String()))

Review Comment:
   they do not tell us anything new they will always be zeros, remove



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1228204462


##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -48,20 +49,36 @@ func newQueueTracker(queueName string) *QueueTracker {
 		queueName:           queueName,
 		resourceUsage:       resources.NewResource(),
 		runningApplications: make(map[string]bool),
+		maxResourceUsage:    resources.NewResource(),
+		maxRunningApps:      0,
 		childQueueTrackers:  make(map[string]*QueueTracker),
 	}
 	return queueTracker
 }
 
-func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource) error {
+func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource) bool {
 	log.Logger().Debug("Increasing resource usage",
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
 		zap.Stringer("resource", usage))
 	if queuePath == "" || applicationID == "" || usage == nil {
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s",
-			queuePath, applicationID, usage.String())
+		return false
+	}

Review Comment:
   cc @wilfred-s I vote for removing this and don't return anything.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] manirajv06 commented on pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "manirajv06 (via GitHub)" <gi...@apache.org>.
manirajv06 commented on PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#issuecomment-1590711189

   > I added some comments, but in the meantime I realized I was commenting on stuff that was supposed to be introduced by other PRs... anyway let's get back to this when #543 and #529 are in.
   
   Yes, thats the plan.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1228171403


##########
pkg/scheduler/partition_test.go:
##########
@@ -1900,7 +1956,7 @@ func setupPreemptionForRequiredNode(t *testing.T) (*PartitionContext, *objects.A
 	// check if updated (must be after allocate call)
 	assert.Equal(t, 1, len(app.GetReservations()), "ask should have been reserved")
 	assert.Equal(t, 1, len(app.GetAskReservations(allocID2)), "ask should have been reserved")
-	assertUserGroupResource(t, getTestUserGroup(), resources.NewResourceFromMap(map[string]resources.Quantity{"vcore": 8000}))
+	internalAssertUserGroupResource(t, getTestUserGroup(), resources.NewResourceFromMap(map[string]resources.Quantity{"vcore": 8000}), getExpectedQueuesLimitsForPreemptionWithRequiredNode())

Review Comment:
   Will you rebase this based on #543? You already made some changes which are similar. Is this related? (You changed `partition_test.go` in #543).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] pbacsko commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "pbacsko (via GitHub)" <gi...@apache.org>.
pbacsko commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1228149072


##########
pkg/common/configs/config_test.go:
##########
@@ -1721,3 +1722,418 @@ func TestGetConfigurationString(t *testing.T) {
 		})
 	}
 }
+
+func prepareUserLimitsConfig(leafQueueMaxApps uint64, leafQueueMaxResource string) string {
+	data := `
+partitions:
+  - name: default
+    queues:
+      - name: root
+        maxapplications: 3000
+        limits:
+          - limit:
+            users: 
+            - user1
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 500
+          - limit:
+            users:
+            - user2
+            groups:
+            - prod
+            maxresources: {memory: 10000, vcore: 10000}
+            maxapplications: 300
+        queues:
+          - name: level1
+            maxapplications: 100
+            resources:
+              guaranteed:
+                {memory: 1000, vcore: 10}
+              max:
+                {memory: 10000, vcore: 1000}
+            limits:
+              - limit:
+                users: 
+                - user1
+                maxapplications: 50
+                maxresources: {memory: 1000, vcore: 100}
+            queues:
+              - name: leaf
+                maxapplications: 100
+                resources:
+                  guaranteed:
+                    {memory: 1000, vcore: 10}
+                  max:
+                    {memory: 10000, vcore: 1000}
+                limits:
+                  - limit:
+                    users: 
+                    - user1
+                    maxapplications: ` + strconv.Itoa(int(leafQueueMaxApps)) + `
+                    maxresources: ` + leafQueueMaxResource + `

Review Comment:
   Placeholders



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


[GitHub] [yunikorn-core] wilfred-s commented on a diff in pull request #563: [YUNIKORN-1610] Enforcement changes for User Based Quota

Posted by "wilfred-s (via GitHub)" <gi...@apache.org>.
wilfred-s commented on code in PR #563:
URL: https://github.com/apache/yunikorn-core/pull/563#discussion_r1237969497


##########
pkg/scheduler/objects/application.go:
##########
@@ -1631,24 +1631,22 @@ func (sa *Application) addAllocationInternal(info *Allocation) {
 // Increase user resource usage
 // No locking must be called while holding the lock
 func (sa *Application) incUserResourceUsage(resource *resources.Resource) {
-	if err := ugm.GetUserManager().IncreaseTrackedResource(sa.queuePath, sa.ApplicationID, resource, sa.user); err != nil {
-		log.Log(log.SchedApplication).Error("Unable to track the user resource usage",
+	if allowed := ugm.GetUserManager().IncreaseTrackedResource(sa.queuePath, sa.ApplicationID, resource, sa.user); !allowed {
+		log.Log(log.SchedUGM).Error("Not allowed to increase the user resource usage",

Review Comment:
   Not an error, debug at best.
   Also need to be careful that we do not log the same information in multiple places. This same thing is logged in ugm as a WARN. Preferably log in ugm and not the application.



##########
pkg/scheduler/ugm/manager.go:
##########
@@ -33,25 +33,24 @@ import (
 var once sync.Once
 var m *Manager
 
-const maxresources = "maxresources"
-const maxapplications = "maxapplications"
+const wildcard = "*"

Review Comment:
   This should already be defined somewhere I would assume, if not it has to be moved to common constants.



##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -31,102 +29,197 @@ import (
 
 type QueueTracker struct {
 	queueName           string
+	parent              *QueueTracker
+	queuePath           string
 	resourceUsage       *resources.Resource
 	runningApplications map[string]bool
-	maxResourceUsage    *resources.Resource
+	maxResources        *resources.Resource
 	maxRunningApps      uint64
 	childQueueTrackers  map[string]*QueueTracker
 }
 
 func newRootQueueTracker() *QueueTracker {
-	return newQueueTracker(configs.RootQueue)
+	qt := newQueueTracker(configs.RootQueue, nil)
+	return qt
 }
 
-func newQueueTracker(queueName string) *QueueTracker {
-	log.Log(log.SchedUGM).Debug("Creating queue tracker object for queue",
-		zap.String("queue", queueName))
+func newQueueTracker(queueName string, parent *QueueTracker) *QueueTracker {
 	queueTracker := &QueueTracker{
 		queueName:           queueName,
+		parent:              parent,
 		resourceUsage:       resources.NewResource(),
 		runningApplications: make(map[string]bool),
+		maxResources:        resources.NewResource(),
+		maxRunningApps:      0,
 		childQueueTrackers:  make(map[string]*QueueTracker),
 	}
+	parentQueueName := ""
+	if parent != nil {
+		queueTracker.queuePath = queueTracker.parent.queuePath + configs.DOT + queueName
+		parentQueueName = parent.queueName
+	} else if queueName == configs.RootQueue {
+		queueTracker.queuePath = configs.RootQueue
+	}
+	log.Log(log.SchedUGM).Debug("Created queue tracker object for queue",
+		zap.String("queue", queueName),
+		zap.String("parent", parentQueueName),
+		zap.String("queue path ", queueTracker.queuePath))
 	return queueTracker
 }
 
-func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource) error {
+type trackingType int
+
+const (
+	none trackingType = iota
+	user
+	group
+)
+
+func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, trackType trackingType, usage *resources.Resource) bool {
 	log.Log(log.SchedUGM).Debug("Increasing resource usage",
+		zap.Int("tracking type", int(trackType)),
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
 		zap.Stringer("resource", usage))
-	if queuePath == "" || applicationID == "" || usage == nil {
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s",
-			queuePath, applicationID, usage.String())
+	finalResourceUsage := qt.resourceUsage.Clone()
+	finalResourceUsage.AddTo(usage)
+	wildCardQuotaExceeded := false
+	existingApp := qt.runningApplications[applicationID]
+
+	// apply user/group specific limit settings set if configured, otherwise use wild card limit settings
+	if qt.maxRunningApps != 0 && !resources.Equals(resources.NewResource(), qt.maxResources) {
+		log.Log(log.SchedUGM).Debug("applying enforcement checks using limit settings of specific user/group",
+			zap.Int("tracking type", int(trackType)),
+			zap.String("queue path", queuePath),
+			zap.Bool("existing app", existingApp),
+			zap.Uint64("max running apps", qt.maxRunningApps),
+			zap.String("max resources", qt.maxResources.String()))
+		if (!existingApp && len(qt.runningApplications)+1 > int(qt.maxRunningApps)) ||
+			resources.StrictlyGreaterThan(finalResourceUsage, qt.maxResources) {
+			log.Log(log.SchedUGM).Warn("Unable to increase resource usage as allowing new application to run would exceed either configured max applications or max resources limit of specific user/group",
+				zap.Int("tracking type", int(trackType)),
+				zap.String("queue path", queuePath),
+				zap.Bool("existing app", existingApp),
+				zap.Int("current running applications", len(qt.runningApplications)),
+				zap.Uint64("max running applications", qt.maxRunningApps),
+				zap.String("current resource usage", qt.resourceUsage.String()),
+				zap.String("max resource usage", qt.maxResources.String()))
+			return false
+		}
 	}
+
+	// Try wild card settings
+	if qt.maxRunningApps == 0 && resources.Equals(resources.NewResource(), qt.maxResources) {
+		// Is there any wild card settings? Do we need to apply enforcement checks using wild card limit settings?
+		var config *LimitConfig
+		if trackType == user {
+			config = m.getUserWildCardLimitsConfig(qt.queuePath)
+		} else if trackType == group {
+			config = m.getGroupWildCardLimitsConfig(qt.queuePath)
+		}
+		if config != nil {
+			log.Log(log.SchedUGM).Debug("applying enforcement checks using limit settings of wild card user/group",
+				zap.Int("tracking type", int(trackType)),
+				zap.String("queue path", queuePath),
+				zap.Bool("existing app", existingApp),
+				zap.Uint64("wild card max running apps", config.maxApplications),
+				zap.String("wild card max resources", config.maxResources.String()),
+				zap.Bool("wild card quota exceeded", wildCardQuotaExceeded))
+			wildCardQuotaExceeded = (config.maxApplications != 0 && !existingApp && len(qt.runningApplications)+1 > int(config.maxApplications)) ||
+				(!resources.Equals(resources.NewResource(), config.maxResources) && resources.StrictlyGreaterThan(finalResourceUsage, config.maxResources))
+			if wildCardQuotaExceeded {
+				log.Log(log.SchedUGM).Warn("Unable to increase resource usage as allowing new application to run would exceed either configured max applications or max resources limit of wild card user/group",
+					zap.Int("tracking type", int(trackType)),
+					zap.String("queue path", queuePath),
+					zap.Bool("existing app", existingApp),
+					zap.Int("current running applications", len(qt.runningApplications)),
+					zap.Uint64("max running applications", config.maxApplications),
+					zap.String("current resource usage", qt.resourceUsage.String()),
+					zap.String("max resource usage", config.maxResources.String()))
+				return false
+			}
+		}
+	}
+
+	childQueuePath, immediateChildQueueName := getChildQueuePath(queuePath)
+	if childQueuePath != "" {
+		if qt.childQueueTrackers[immediateChildQueueName] == nil {
+			qt.childQueueTrackers[immediateChildQueueName] = newQueueTracker(immediateChildQueueName, qt)
+		}
+		allowed := qt.childQueueTrackers[immediateChildQueueName].increaseTrackedResource(childQueuePath, applicationID, trackType, usage)
+		if !allowed {
+			return allowed
+		}
+	}
+
 	qt.resourceUsage.AddTo(usage)
 	qt.runningApplications[applicationID] = true
 
 	log.Log(log.SchedUGM).Debug("Successfully increased resource usage",
+		zap.Int("tracking type", int(trackType)),
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
+		zap.Bool("existing app", existingApp),
 		zap.Stringer("resource", usage),
 		zap.Stringer("total resource after increasing", qt.resourceUsage),
 		zap.Int("total applications after increasing", len(qt.runningApplications)))
-
-	childQueuePath, immediateChildQueueName := getChildQueuePath(queuePath)
-	if childQueuePath != "" {
-		if qt.childQueueTrackers[immediateChildQueueName] == nil {
-			qt.childQueueTrackers[immediateChildQueueName] = newQueueTracker(immediateChildQueueName)
-		}
-		err := qt.childQueueTrackers[immediateChildQueueName].increaseTrackedResource(childQueuePath, applicationID, usage)
-		if err != nil {
-			return err
-		}
-	}
-	return nil
+	return true
 }
 
-func (qt *QueueTracker) decreaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource, removeApp bool) (bool, error) {
+func (qt *QueueTracker) decreaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource, removeApp bool) (bool, bool) {
 	log.Log(log.SchedUGM).Debug("Decreasing resource usage",
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
 		zap.Stringer("resource", usage),
 		zap.Bool("removeApp", removeApp))
-	if queuePath == "" || usage == nil {
-		return false, fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s",
-			queuePath, applicationID, usage.String())
-	}
-	qt.resourceUsage.SubFrom(usage)
-	if removeApp {
-		delete(qt.runningApplications, applicationID)
-	}
-	log.Log(log.SchedUGM).Debug("Successfully decreased resource usage",
-		zap.String("queue path", queuePath),
-		zap.String("application", applicationID),
-		zap.Stringer("resource", usage),
-		zap.Stringer("total resource after decreasing", qt.resourceUsage),
-		zap.Int("total applications after decreasing", len(qt.runningApplications)))
-
 	childQueuePath, immediateChildQueueName := getChildQueuePath(queuePath)
 	if childQueuePath != "" {
 		if qt.childQueueTrackers[immediateChildQueueName] != nil {
-			removeQT, err := qt.childQueueTrackers[immediateChildQueueName].decreaseTrackedResource(childQueuePath, applicationID, usage, removeApp)
-			if err != nil {
-				return false, err
+			removeQT, decreased := qt.childQueueTrackers[immediateChildQueueName].decreaseTrackedResource(childQueuePath, applicationID, usage, removeApp)
+			if !decreased {
+				return false, decreased
 			}
 			if removeQT {
+				log.Log(log.SchedUGM).Debug("Removed queue tracker linkage from its parent",
+					zap.String("queue path ", queuePath),
+					zap.String("removed queue name", immediateChildQueueName),
+					zap.String("parent queue", qt.queueName))
 				delete(qt.childQueueTrackers, immediateChildQueueName)
 			}
 		} else {
 			log.Log(log.SchedUGM).Error("Child queueTracker tracker must be available in child queues map",
 				zap.String("child queueTracker name", immediateChildQueueName))
-			return false, fmt.Errorf("child queueTracker tracker for %s is missing in child queues map", immediateChildQueueName)
+			return false, false
 		}
 	}
 
+	qt.resourceUsage.SubFrom(usage)
+	if removeApp {
+		log.Log(log.SchedUGM).Debug("Removed application from running applications",
+			zap.String("application", applicationID),
+			zap.String("queue path", queuePath),
+			zap.String("queue name", qt.queueName))
+		delete(qt.runningApplications, applicationID)
+	}
+	log.Log(log.SchedUGM).Debug("Successfully decreased resource usage",
+		zap.String("queue path", queuePath),
+		zap.String("application", applicationID),
+		zap.Stringer("resource", usage),
+		zap.Stringer("total resource after decreasing", qt.resourceUsage),
+		zap.Int("total applications after decreasing", len(qt.runningApplications)))
+
 	// Determine if the queue tracker should be removed
-	removeQT := len(qt.childQueueTrackers) == 0 && len(qt.runningApplications) == 0 && resources.IsZero(qt.resourceUsage)
-	return removeQT, nil
+	removeQT := len(qt.childQueueTrackers) == 0 && len(qt.runningApplications) == 0 && resources.IsZero(qt.resourceUsage) &&
+		qt.maxRunningApps == 0 && resources.Equals(resources.NewResource(), qt.maxResources)
+	log.Log(log.SchedUGM).Debug("Can queue tracker be removed?",
+		zap.String("queue path ", queuePath),
+		zap.Int("no. of child queue trackers", len(qt.childQueueTrackers)),
+		zap.Int("no. of running applications", len(qt.runningApplications)),
+		zap.Bool("current resource usage", resources.IsZero(qt.resourceUsage)),
+		zap.Uint64("max running applications", qt.maxRunningApps),
+		zap.String("max resources", qt.maxResources.String()),
+		zap.Bool("remove QT", removeQT))

Review Comment:
   Lot of overhead: we know exactly when we need to remove,
   Create a single DEBUG log line:
   ```
   log.Log(log.SchedUGM).Debug("Remove queue tracker",
   		zap.String("queue path ", queuePath),
   		zap.Bool("remove ", removeQT))
   ```



##########
pkg/scheduler/ugm/manager.go:
##########
@@ -195,60 +159,79 @@ func (m *Manager) DecreaseTrackedResource(queuePath string, applicationID string
 			zap.String("application", applicationID),
 			zap.Stringer("resource", usage),
 			zap.Bool("removeApp", removeApp))
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s, user: %s",
-			queuePath, applicationID, usage.String(), user.User)
+		return false
 	}
 	m.Lock()
 	defer m.Unlock()
 	userTracker := m.userTrackers[user.User]
 	if userTracker != nil {
-		removeQT, err := userTracker.decreaseTrackedResource(queuePath, applicationID, usage, removeApp)
-		if err != nil {
-			log.Log(log.SchedUGM).Error("Problem in decreasing the user resource usage",
+		log.Log(log.SchedUGM).Debug("Decreasing resource usage for user",

Review Comment:
   turn if around



##########
pkg/scheduler/partition_test.go:
##########
@@ -346,7 +346,7 @@ func TestRemoveNodeWithAllocations(t *testing.T) {
 	assert.Equal(t, 1, len(released), "node did not release correct allocation")
 	assert.Equal(t, 0, len(confirmed), "node did not confirm correct allocation")
 	assert.Equal(t, released[0].GetUUID(), allocUUID, "uuid returned by release not the same as on allocation")
-	assertLimits(t, getTestUserGroup(), nil)
+	assertLimits(t, getTestUserGroup(), resources.Multiply(appRes, 0))

Review Comment:
   Why is this not a `resource.Zero` ?



##########
pkg/scheduler/ugm/manager.go:
##########
@@ -419,30 +348,40 @@ func (m *Manager) UpdateConfig(config configs.QueueConfig, queuePath string) err
 	m.Lock()
 	defer m.Unlock()
 
-	// clear the local limit config maps before processing the limit config
-	m.userLimitsConfig = make(map[string]map[string]map[string]interface{})
-	m.groupLimitsConfig = make(map[string]map[string]map[string]interface{})
+	m.userWildCardLimitsConfig = make(map[string]*LimitConfig)
+	m.groupWildCardLimitsConfig = make(map[string]*LimitConfig)
 	return m.internalProcessConfig(config, queuePath)
 }
 
 func (m *Manager) internalProcessConfig(cur configs.QueueConfig, queuePath string) error {
 	// Holds user and group for which limits have been configured with specific queue path
-	userGroupLimits := make(map[string]bool)
+	userLimits := make(map[string]bool)
+	groupLimits := make(map[string]bool)
 	// Traverse limits of specific queue path
 	for _, limit := range cur.Limits {
-		tempLimitsMap := make(map[string]interface{})
-		tempLimitsMap[maxresources] = limit.MaxResources
-		tempLimitsMap[maxapplications] = limit.MaxApplications
+		var maxResource *resources.Resource
+		var err error
+		if maxResource, err = resources.NewResourceFromConf(limit.MaxResources); err != nil {
+			log.Log(log.SchedUGM).Debug("Problem in using the wild card limit max resources settings.",

Review Comment:
   Assume all resources can be parsed: the config must have passed the  validation before we get here



##########
pkg/scheduler/ugm/manager.go:
##########
@@ -475,113 +416,119 @@ func (m *Manager) internalProcessConfig(cur configs.QueueConfig, queuePath strin
 	return nil
 }
 
-func (m *Manager) processUserConfig(user string, limit configs.Limit, queuePath string, userGroupLimits map[string]bool, tempLimitsMap map[string]interface{}, tempUserMap map[string]map[string]interface{}) error {
+func (m *Manager) processUserConfig(user string, limit configs.Limit, queuePath string, userLimits map[string]bool) error {
 	if user == "*" {
 		// traverse all tracked users
 		for u, ut := range m.userTrackers {
 			// Is this user already tracked for the queue path?
-			if m.IsQueuePathTrackedCompletely(ut.queueTracker, queuePath) {
+			if ut.IsQueuePathTrackedCompletely(queuePath) {
 				log.Log(log.SchedUGM).Debug("Processing wild card user limits configuration for all existing users",
 					zap.String("user", u),
 					zap.String("limit", limit.Limit),
 					zap.String("queue path", queuePath),
 					zap.Uint64("max application", limit.MaxApplications),
 					zap.Any("max resources", limit.MaxResources))
-
-				// creates an entry for the user being processed always as it has been cleaned before
-				if _, ok := m.userLimitsConfig[u]; ok {
-					m.userLimitsConfig[u][queuePath] = tempLimitsMap
-				} else {
-					m.userLimitsConfig[u] = tempUserMap
-				}
 				if err := m.setUserLimits(u, limit, queuePath); err != nil {
 					return err
 				}
-				userGroupLimits[u] = true
+				userLimits[u] = true
 			}
 		}
-	} else {
-		// creates an entry for the user being processed always as it has been cleaned before
-		if _, ok := m.userLimitsConfig[user]; ok {
-			m.userLimitsConfig[user][queuePath] = tempLimitsMap
-		} else {
-			m.userLimitsConfig[user] = tempUserMap
-		}
+	} else if user != "" {
 		if err := m.setUserLimits(user, limit, queuePath); err != nil {
 			return err
 		}
-		userGroupLimits[user] = true
+		userLimits[user] = true
 	}
 	return nil
 }
 
-func (m *Manager) processGroupConfig(group string, limit configs.Limit, queuePath string, userGroupLimits map[string]bool, tempLimitsMap map[string]interface{}, tempGroupMap map[string]map[string]interface{}) error {
+func (m *Manager) processGroupConfig(group string, limit configs.Limit, queuePath string, groupLimits map[string]bool) error {
 	if group == "*" {
 		// traverse all tracked groups
 		for g, gt := range m.groupTrackers {
 			// Is this group already tracked for the queue path?
-			if m.IsQueuePathTrackedCompletely(gt.queueTracker, queuePath) {
+			if gt.IsQueuePathTrackedCompletely(queuePath) {
 				log.Log(log.SchedUGM).Debug("Processing wild card user limits configuration for all existing groups",
 					zap.String("group", g),
 					zap.String("limit", limit.Limit),
 					zap.String("queue path", queuePath),
 					zap.Uint64("max application", limit.MaxApplications),
 					zap.Any("max resources", limit.MaxResources))
-				// creates an entry for the group being processed always as it has been cleaned before
-				if _, ok := m.groupLimitsConfig[g]; ok {
-					m.groupLimitsConfig[g][queuePath] = tempLimitsMap
-				} else {
-					m.groupLimitsConfig[g] = tempGroupMap
-				}
 				if err := m.setGroupLimits(g, limit, queuePath); err != nil {
 					return err
 				}
-				userGroupLimits[g] = true
+				groupLimits[g] = true
 			}
 		}
-	} else {
-		// creates an entry for the group being processed always as it has been cleaned before
-		if _, ok := m.groupLimitsConfig[group]; ok {
-			m.groupLimitsConfig[group][queuePath] = tempLimitsMap
-		} else {
-			m.groupLimitsConfig[group] = tempGroupMap
-		}
+	} else if group != "" {
 		if err := m.setGroupLimits(group, limit, queuePath); err != nil {
 			return err
 		}
-		userGroupLimits[group] = true
+		groupLimits[group] = true
 	}
 	return nil
 }
 
-func (m *Manager) clearEarlierSetLimits(userGroupLimits map[string]bool, queuePath string) error {
-	// Clear already configured limits of user for which limits have been configured before but not now through #cur
+// clearEarlierSetLimits Clear already configured limits of users and groups for which limits have been configured before but not now
+func (m *Manager) clearEarlierSetLimits(userLimits map[string]bool, groupLimits map[string]bool, queuePath string) error {
+	// Clear already configured limits of user for which limits have been configured before but not now
 	for u, ut := range m.userTrackers {
 		// Is this user already tracked for the queue path?
-		if m.IsQueuePathTrackedCompletely(ut.queueTracker, queuePath) {
-			if _, ok := userGroupLimits[u]; !ok {
-				err := ut.setMaxResources(resources.NewResource(), queuePath)
-				if err != nil {
-					return err
+		if ut.IsQueuePathTrackedCompletely(queuePath) {
+			// Is there any limit config set for user in the current configuration? If not, then clear those old limit settings
+			if _, ok := userLimits[u]; !ok {
+				log.Log(log.SchedUGM).Debug("Need to clear earlier set configs for user",
+					zap.String("user", u),
+					zap.String("queue path", queuePath))
+				// Is there any running applications in end queue of this queue path? If not, then remove the linkage between end queue and its immediate parent
+				if ut.IsUnlinkRequired(queuePath) {
+					ut.UnlinkQT(queuePath)
+				} else {
+					ut.setMaxResources(resources.NewResource(), queuePath)
+					ut.setMaxApplications(0, queuePath)

Review Comment:
   Use one function to set both, propagate that into all objects



##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -31,102 +29,197 @@ import (
 
 type QueueTracker struct {
 	queueName           string
+	parent              *QueueTracker
+	queuePath           string
 	resourceUsage       *resources.Resource
 	runningApplications map[string]bool
-	maxResourceUsage    *resources.Resource
+	maxResources        *resources.Resource
 	maxRunningApps      uint64
 	childQueueTrackers  map[string]*QueueTracker
 }
 
 func newRootQueueTracker() *QueueTracker {
-	return newQueueTracker(configs.RootQueue)
+	qt := newQueueTracker(configs.RootQueue, nil)
+	return qt
 }
 
-func newQueueTracker(queueName string) *QueueTracker {
-	log.Log(log.SchedUGM).Debug("Creating queue tracker object for queue",
-		zap.String("queue", queueName))
+func newQueueTracker(queueName string, parent *QueueTracker) *QueueTracker {
 	queueTracker := &QueueTracker{
 		queueName:           queueName,
+		parent:              parent,
 		resourceUsage:       resources.NewResource(),
 		runningApplications: make(map[string]bool),
+		maxResources:        resources.NewResource(),
+		maxRunningApps:      0,
 		childQueueTrackers:  make(map[string]*QueueTracker),
 	}
+	parentQueueName := ""
+	if parent != nil {
+		queueTracker.queuePath = queueTracker.parent.queuePath + configs.DOT + queueName
+		parentQueueName = parent.queueName
+	} else if queueName == configs.RootQueue {
+		queueTracker.queuePath = configs.RootQueue
+	}
+	log.Log(log.SchedUGM).Debug("Created queue tracker object for queue",
+		zap.String("queue", queueName),
+		zap.String("parent", parentQueueName),
+		zap.String("queue path ", queueTracker.queuePath))
 	return queueTracker
 }
 
-func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource) error {
+type trackingType int
+
+const (
+	none trackingType = iota
+	user
+	group
+)
+
+func (qt *QueueTracker) increaseTrackedResource(queuePath string, applicationID string, trackType trackingType, usage *resources.Resource) bool {
 	log.Log(log.SchedUGM).Debug("Increasing resource usage",
+		zap.Int("tracking type", int(trackType)),
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
 		zap.Stringer("resource", usage))
-	if queuePath == "" || applicationID == "" || usage == nil {
-		return fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s",
-			queuePath, applicationID, usage.String())
+	finalResourceUsage := qt.resourceUsage.Clone()
+	finalResourceUsage.AddTo(usage)
+	wildCardQuotaExceeded := false
+	existingApp := qt.runningApplications[applicationID]
+
+	// apply user/group specific limit settings set if configured, otherwise use wild card limit settings
+	if qt.maxRunningApps != 0 && !resources.Equals(resources.NewResource(), qt.maxResources) {
+		log.Log(log.SchedUGM).Debug("applying enforcement checks using limit settings of specific user/group",
+			zap.Int("tracking type", int(trackType)),
+			zap.String("queue path", queuePath),
+			zap.Bool("existing app", existingApp),
+			zap.Uint64("max running apps", qt.maxRunningApps),
+			zap.String("max resources", qt.maxResources.String()))
+		if (!existingApp && len(qt.runningApplications)+1 > int(qt.maxRunningApps)) ||
+			resources.StrictlyGreaterThan(finalResourceUsage, qt.maxResources) {
+			log.Log(log.SchedUGM).Warn("Unable to increase resource usage as allowing new application to run would exceed either configured max applications or max resources limit of specific user/group",
+				zap.Int("tracking type", int(trackType)),
+				zap.String("queue path", queuePath),
+				zap.Bool("existing app", existingApp),
+				zap.Int("current running applications", len(qt.runningApplications)),
+				zap.Uint64("max running applications", qt.maxRunningApps),
+				zap.String("current resource usage", qt.resourceUsage.String()),
+				zap.String("max resource usage", qt.maxResources.String()))
+			return false
+		}
 	}
+
+	// Try wild card settings
+	if qt.maxRunningApps == 0 && resources.Equals(resources.NewResource(), qt.maxResources) {
+		// Is there any wild card settings? Do we need to apply enforcement checks using wild card limit settings?
+		var config *LimitConfig
+		if trackType == user {
+			config = m.getUserWildCardLimitsConfig(qt.queuePath)
+		} else if trackType == group {
+			config = m.getGroupWildCardLimitsConfig(qt.queuePath)
+		}
+		if config != nil {
+			log.Log(log.SchedUGM).Debug("applying enforcement checks using limit settings of wild card user/group",
+				zap.Int("tracking type", int(trackType)),
+				zap.String("queue path", queuePath),
+				zap.Bool("existing app", existingApp),
+				zap.Uint64("wild card max running apps", config.maxApplications),
+				zap.String("wild card max resources", config.maxResources.String()),
+				zap.Bool("wild card quota exceeded", wildCardQuotaExceeded))
+			wildCardQuotaExceeded = (config.maxApplications != 0 && !existingApp && len(qt.runningApplications)+1 > int(config.maxApplications)) ||
+				(!resources.Equals(resources.NewResource(), config.maxResources) && resources.StrictlyGreaterThan(finalResourceUsage, config.maxResources))
+			if wildCardQuotaExceeded {
+				log.Log(log.SchedUGM).Warn("Unable to increase resource usage as allowing new application to run would exceed either configured max applications or max resources limit of wild card user/group",
+					zap.Int("tracking type", int(trackType)),
+					zap.String("queue path", queuePath),
+					zap.Bool("existing app", existingApp),
+					zap.Int("current running applications", len(qt.runningApplications)),
+					zap.Uint64("max running applications", config.maxApplications),
+					zap.String("current resource usage", qt.resourceUsage.String()),
+					zap.String("max resource usage", config.maxResources.String()))
+				return false
+			}
+		}
+	}
+
+	childQueuePath, immediateChildQueueName := getChildQueuePath(queuePath)
+	if childQueuePath != "" {
+		if qt.childQueueTrackers[immediateChildQueueName] == nil {
+			qt.childQueueTrackers[immediateChildQueueName] = newQueueTracker(immediateChildQueueName, qt)
+		}
+		allowed := qt.childQueueTrackers[immediateChildQueueName].increaseTrackedResource(childQueuePath, applicationID, trackType, usage)
+		if !allowed {
+			return allowed
+		}
+	}
+
 	qt.resourceUsage.AddTo(usage)
 	qt.runningApplications[applicationID] = true
 
 	log.Log(log.SchedUGM).Debug("Successfully increased resource usage",
+		zap.Int("tracking type", int(trackType)),
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
+		zap.Bool("existing app", existingApp),
 		zap.Stringer("resource", usage),
 		zap.Stringer("total resource after increasing", qt.resourceUsage),
 		zap.Int("total applications after increasing", len(qt.runningApplications)))
-
-	childQueuePath, immediateChildQueueName := getChildQueuePath(queuePath)
-	if childQueuePath != "" {
-		if qt.childQueueTrackers[immediateChildQueueName] == nil {
-			qt.childQueueTrackers[immediateChildQueueName] = newQueueTracker(immediateChildQueueName)
-		}
-		err := qt.childQueueTrackers[immediateChildQueueName].increaseTrackedResource(childQueuePath, applicationID, usage)
-		if err != nil {
-			return err
-		}
-	}
-	return nil
+	return true
 }
 
-func (qt *QueueTracker) decreaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource, removeApp bool) (bool, error) {
+func (qt *QueueTracker) decreaseTrackedResource(queuePath string, applicationID string, usage *resources.Resource, removeApp bool) (bool, bool) {
 	log.Log(log.SchedUGM).Debug("Decreasing resource usage",
 		zap.String("queue path", queuePath),
 		zap.String("application", applicationID),
 		zap.Stringer("resource", usage),
 		zap.Bool("removeApp", removeApp))
-	if queuePath == "" || usage == nil {
-		return false, fmt.Errorf("mandatory parameters are missing. queuepath: %s, application id: %s, resource usage: %s",
-			queuePath, applicationID, usage.String())
-	}
-	qt.resourceUsage.SubFrom(usage)
-	if removeApp {
-		delete(qt.runningApplications, applicationID)
-	}
-	log.Log(log.SchedUGM).Debug("Successfully decreased resource usage",
-		zap.String("queue path", queuePath),
-		zap.String("application", applicationID),
-		zap.Stringer("resource", usage),
-		zap.Stringer("total resource after decreasing", qt.resourceUsage),
-		zap.Int("total applications after decreasing", len(qt.runningApplications)))
-
 	childQueuePath, immediateChildQueueName := getChildQueuePath(queuePath)
 	if childQueuePath != "" {
 		if qt.childQueueTrackers[immediateChildQueueName] != nil {
-			removeQT, err := qt.childQueueTrackers[immediateChildQueueName].decreaseTrackedResource(childQueuePath, applicationID, usage, removeApp)
-			if err != nil {
-				return false, err
+			removeQT, decreased := qt.childQueueTrackers[immediateChildQueueName].decreaseTrackedResource(childQueuePath, applicationID, usage, removeApp)

Review Comment:
   Change to == nil and directly return logging the message



##########
pkg/scheduler/ugm/queue_tracker.go:
##########
@@ -201,11 +274,75 @@ func (qt *QueueTracker) getResourceUsageDAOInfo(parentQueuePath string) *dao.Res
 	for app := range qt.runningApplications {
 		usage.RunningApplications = append(usage.RunningApplications, app)
 	}
-	usage.MaxResources = qt.maxResourceUsage
+	usage.MaxResources = qt.maxResources
 	usage.MaxApplications = qt.maxRunningApps
 	for _, cqt := range qt.childQueueTrackers {
 		childUsage := cqt.getResourceUsageDAOInfo(fullQueuePath)
 		usage.Children = append(usage.Children, childUsage)
 	}
 	return usage
 }
+
+// IsQueuePathTrackedCompletely Traverse queue path upto the end queue through its linkage
+// to confirm entire queuePath has been tracked completely or not
+func (qt *QueueTracker) IsQueuePathTrackedCompletely(queuePath string) bool {
+	if queuePath == configs.RootQueue || queuePath == qt.queueName {
+		return true
+	}
+	childQueuePath, immediateChildQueueName := getChildQueuePath(queuePath)
+	if immediateChildQueueName != "" {
+		if childUt, ok := qt.childQueueTrackers[immediateChildQueueName]; ok {
+			return childUt.IsQueuePathTrackedCompletely(childQueuePath)
+		}
+	}
+	return false
+}
+
+// IsUnlinkRequired Traverse queue path upto the leaf queue and decide whether
+// linkage needs to be removed or not based on the running applications.
+// If there are any running applications in end leaf queue, we should remove the linkage between
+// the leaf and its parent queue using UnlinkQT method. Otherwise, we should leave as it is.
+func (qt *QueueTracker) IsUnlinkRequired(queuePath string) bool {
+	childQueuePath, immediateChildQueueName := getChildQueuePath(queuePath)
+	if immediateChildQueueName != "" {
+		if childUt, ok := qt.childQueueTrackers[immediateChildQueueName]; ok {
+			return childUt.IsUnlinkRequired(childQueuePath)
+		}
+	}
+	if queuePath == configs.RootQueue || queuePath == qt.queueName {
+		if len(qt.runningApplications) == 0 {
+			log.Log(log.SchedUGM).Debug("Is Unlink Required?",
+				zap.String("queue path", queuePath),
+				zap.Int("no. of applications", len(qt.runningApplications)))
+			return true
+		}
+	}
+	return false
+}
+
+// UnlinkQT Traverse queue path upto the end queue. If end queue has any more child queue trackers,
+// then goes upto each child queue and removes the linkage with its immediate parent
+func (qt *QueueTracker) UnlinkQT(queuePath string) bool {

Review Comment:
   Parent not needed if we use recursive checks.
   
   root.parent.**child**.leaf
    -> slice of [root, parent, child, leaf]
   ```
   manager -> user.rootQT.unlinkChild(path []slice)
   root.unlinkChild([parent, child, leaf])
     root -> find parent
   parent.unlinkChild([child.leaf])
      parent -> find child
   child.unlinkQT([leaf])
   if slice.len ==1 leaf can be removed?
     delete(qt.childQueueTrackers, leaf.queueName)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org