You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@yunikorn.apache.org by GitBox <gi...@apache.org> on 2021/08/11 03:14:28 UTC

[GitHub] [incubator-yunikorn-core] wilfred-s commented on a change in pull request #296: YUNIKORN-193 Add option to allow parent to set unmanaged child queue …

wilfred-s commented on a change in pull request #296:
URL: https://github.com/apache/incubator-yunikorn-core/pull/296#discussion_r686417588



##########
File path: pkg/scheduler/objects/queue.go
##########
@@ -182,36 +195,86 @@ func (sq *Queue) mergeProperties(parent, config map[string]string) {
 	}
 }
 
-// Set the properties that the dynamic child queue inherits from the parent
-// The properties list for the parent must be retrieved using getProperties()
-// This currently only sets the sort policy as it is set on the parent
-// Further implementation is part of YUNIKORN-193
-// lock free call
-func (sq *Queue) setTemplateProperties(parent map[string]string) {
-	if len(parent) == 0 {
-		return
+// try to find a template from closet parent
+func lookupTemplate(sq *Queue) *template.Template {

Review comment:
       We access the parent unlocked which will trigger race conditions. This also seems to indicate implicit inheritance. (see comment above).
   A cheaper solution would be to set the template when we add the queue (set the pointer to the same template as the parent has)
   
   NIT: typo in the comment

##########
File path: pkg/scheduler/objects/queue.go
##########
@@ -142,15 +144,26 @@ func NewDynamicQueue(name string, leaf bool, parent *Queue) (*Queue, error) {
 	if err != nil {
 		return nil, fmt.Errorf("dynamic queue creation failed: %s", err)
 	}
-	// pull the properties from the parent that should be set on the child
-	sq.setTemplateProperties(parent.getProperties())
+
+	sq.applyTemplate(lookupTemplate(parent))
 	sq.UpdateSortType()
 	log.Logger().Info("dynamic queue added to scheduler",
 		zap.String("queueName", sq.QueuePath))
 
 	return sq, nil
 }
 
+// use input template to initialize properties, maxResource, and guaranteedResource
+// only leaf queue can use template as the values from template is meaningful to leaf only.

Review comment:
       Design question: do we inherit the whole template parent to parent?

##########
File path: pkg/scheduler/objects/queue.go
##########
@@ -182,36 +195,86 @@ func (sq *Queue) mergeProperties(parent, config map[string]string) {
 	}
 }
 
-// Set the properties that the dynamic child queue inherits from the parent
-// The properties list for the parent must be retrieved using getProperties()
-// This currently only sets the sort policy as it is set on the parent
-// Further implementation is part of YUNIKORN-193
-// lock free call
-func (sq *Queue) setTemplateProperties(parent map[string]string) {
-	if len(parent) == 0 {
-		return
+// try to find a template from closet parent
+func lookupTemplate(sq *Queue) *template.Template {
+	if sq == nil {
+		return nil
 	}
-	// for a leaf queue pull out all values from the template and set each of them
-	// See YUNIKORN-193: for now just copy one attr from parent
-	if sq.isLeaf {
-		if parent[configs.ApplicationSortPolicy] != "" {
-			sq.properties[configs.ApplicationSortPolicy] = parent[configs.ApplicationSortPolicy]
-		}
+	if current := sq.template; current != nil {
+		return current
 	}
-	// for a parent queue we just copy the template from its parent (no need to be recursive)
-	// this stops at the first managed queue
-	// See YUNIKORN-193
+	return lookupTemplate(sq.parent)
 }
 
-func (sq *Queue) SetQueueConfig(conf configs.QueueConfig) error {
+func (sq *Queue) ApplyConf(conf configs.QueueConfig) error {
 	sq.Lock()
 	defer sq.Unlock()
-	return sq.setQueueConfig(conf)
+	return sq.applyConf(conf)
+}

Review comment:
       Layout comment: keep the exported locked version with the unlocked version

##########
File path: pkg/scheduler/objects/queue.go
##########
@@ -142,15 +144,26 @@ func NewDynamicQueue(name string, leaf bool, parent *Queue) (*Queue, error) {
 	if err != nil {
 		return nil, fmt.Errorf("dynamic queue creation failed: %s", err)
 	}
-	// pull the properties from the parent that should be set on the child
-	sq.setTemplateProperties(parent.getProperties())
+
+	sq.applyTemplate(lookupTemplate(parent))

Review comment:
       This should be processed as part of the `addChildQueue()` call that we do just before this point now that we have a proper template to set. Removes taking a lock and copying data.

##########
File path: pkg/scheduler/objects/queue_test.go
##########
@@ -1166,8 +1169,7 @@ func TestQueueProps(t *testing.T) {
 	leaf, err = createDynamicQueue(parent, "leaf", false)
 	assert.NilError(t, err, "failed to create leaf queue")
 	assert.Assert(t, leaf.isLeaf && !leaf.isManaged, "leaf queue is not marked as unmanaged leaf")
-	assert.Equal(t, len(leaf.properties), 1, "leaf queue properties size incorrect")
-	assert.Equal(t, leaf.properties[configs.ApplicationSortPolicy], "stateaware", "leaf queue property value not as expected")
+	assert.Equal(t, len(leaf.properties), 0, "leaf queue properties size incorrect")

Review comment:
       The fact that this test breaks means we have regressed somewhere. This test should not break by default unless we specifically want it to break.

##########
File path: pkg/scheduler/objects/queue_test.go
##########
@@ -1338,10 +1340,203 @@ func TestSupportTaskGroup(t *testing.T) {
 	assert.Assert(t, !leaf.SupportTaskGroup(), "leaf queue (FAIR policy) should not support task group")
 }
 
-func TestGetPartitionQueues(t *testing.T) {
+func TestGetPartitionQueueDAOInfo(t *testing.T) {
 	root, err := createRootQueue(nil)
 	assert.NilError(t, err, "failed to create basic root queue: %v", err)
-	root.properties = make(map[string]string)
-	root.properties["key"] = "value"
-	assert.Equal(t, "value", root.GetPartitionQueues().Properties["key"])
+
+	// test properties
+	root.properties = getProperties()
+	assert.Assert(t, reflect.DeepEqual(root.properties, root.GetPartitionQueueDAOInfo().Properties))
+
+	// test template
+	root.template = template.NewTemplate(getProperties(), getResource(t), getResource(t))
+	assert.Assert(t, reflect.DeepEqual(root.template.GetProperties(), root.GetPartitionQueueDAOInfo().TemplateInfo.Properties))
+	assert.Equal(t, root.template.GetMaxResource().DAOString(), root.GetPartitionQueueDAOInfo().TemplateInfo.MaxResource)

Review comment:
       This is dangerous: resources are a map you cannot never rely on the fact that the resources will be returned in a fixed order. Two calls to DAOString() on the same resource might return different string values.

##########
File path: pkg/scheduler/objects/queue.go
##########
@@ -182,36 +195,86 @@ func (sq *Queue) mergeProperties(parent, config map[string]string) {
 	}
 }
 
-// Set the properties that the dynamic child queue inherits from the parent
-// The properties list for the parent must be retrieved using getProperties()
-// This currently only sets the sort policy as it is set on the parent
-// Further implementation is part of YUNIKORN-193
-// lock free call
-func (sq *Queue) setTemplateProperties(parent map[string]string) {
-	if len(parent) == 0 {
-		return
+// try to find a template from closet parent
+func lookupTemplate(sq *Queue) *template.Template {
+	if sq == nil {
+		return nil
 	}
-	// for a leaf queue pull out all values from the template and set each of them
-	// See YUNIKORN-193: for now just copy one attr from parent
-	if sq.isLeaf {
-		if parent[configs.ApplicationSortPolicy] != "" {
-			sq.properties[configs.ApplicationSortPolicy] = parent[configs.ApplicationSortPolicy]
-		}

Review comment:
       This will regress existing setups. We need to handle this gracefully. I do not see this attribute in the template either. This is an important one to add

##########
File path: pkg/scheduler/objects/queue.go
##########
@@ -182,36 +195,86 @@ func (sq *Queue) mergeProperties(parent, config map[string]string) {
 	}
 }
 
-// Set the properties that the dynamic child queue inherits from the parent
-// The properties list for the parent must be retrieved using getProperties()
-// This currently only sets the sort policy as it is set on the parent
-// Further implementation is part of YUNIKORN-193
-// lock free call
-func (sq *Queue) setTemplateProperties(parent map[string]string) {
-	if len(parent) == 0 {
-		return
+// try to find a template from closet parent
+func lookupTemplate(sq *Queue) *template.Template {
+	if sq == nil {
+		return nil
 	}
-	// for a leaf queue pull out all values from the template and set each of them
-	// See YUNIKORN-193: for now just copy one attr from parent
-	if sq.isLeaf {
-		if parent[configs.ApplicationSortPolicy] != "" {
-			sq.properties[configs.ApplicationSortPolicy] = parent[configs.ApplicationSortPolicy]
-		}
+	if current := sq.template; current != nil {
+		return current
 	}
-	// for a parent queue we just copy the template from its parent (no need to be recursive)
-	// this stops at the first managed queue
-	// See YUNIKORN-193
+	return lookupTemplate(sq.parent)
 }
 
-func (sq *Queue) SetQueueConfig(conf configs.QueueConfig) error {
+func (sq *Queue) ApplyConf(conf configs.QueueConfig) error {
 	sq.Lock()
 	defer sq.Unlock()
-	return sq.setQueueConfig(conf)
+	return sq.applyConf(conf)
+}
+
+// set inner resource only if input resource is valid
+func (sq *Queue) checkAndSetResources(maxResource, guaranteedResource *resources.Resource) {
+	isInvalidResource := func(resource *resources.Resource) bool {
+		return resource == nil || len(resource.Resources) == 0 || resources.IsZero(resource)

Review comment:
       This is already available as a call to `StrictlyGreaterThanZero(res)` using that call would be the proper solution and would fix a possible lingering bug in setting max and guaranteed resources. 

##########
File path: pkg/common/configs/config.go
##########
@@ -74,10 +74,16 @@ type QueueConfig struct {
 	Properties      map[string]string `yaml:",omitempty" json:",omitempty"`
 	AdminACL        string            `yaml:",omitempty" json:",omitempty"`
 	SubmitACL       string            `yaml:",omitempty" json:",omitempty"`
+	ChildTemplate   ChildTemplate     `yaml:",omitempty" json:",omitempty"`
 	Queues          []QueueConfig     `yaml:",omitempty" json:",omitempty"`
 	Limits          []Limit           `yaml:",omitempty" json:",omitempty"`
 }
 
+type ChildTemplate struct {
+	Properties map[string]string `yaml:",omitempty" json:",omitempty"`

Review comment:
       We should also add the MaxApplications in this structure

##########
File path: pkg/scheduler/objects/queue.go
##########
@@ -182,36 +195,86 @@ func (sq *Queue) mergeProperties(parent, config map[string]string) {
 	}
 }
 
-// Set the properties that the dynamic child queue inherits from the parent
-// The properties list for the parent must be retrieved using getProperties()
-// This currently only sets the sort policy as it is set on the parent
-// Further implementation is part of YUNIKORN-193
-// lock free call
-func (sq *Queue) setTemplateProperties(parent map[string]string) {
-	if len(parent) == 0 {
-		return
+// try to find a template from closet parent
+func lookupTemplate(sq *Queue) *template.Template {
+	if sq == nil {
+		return nil
 	}
-	// for a leaf queue pull out all values from the template and set each of them
-	// See YUNIKORN-193: for now just copy one attr from parent
-	if sq.isLeaf {
-		if parent[configs.ApplicationSortPolicy] != "" {
-			sq.properties[configs.ApplicationSortPolicy] = parent[configs.ApplicationSortPolicy]
-		}
+	if current := sq.template; current != nil {
+		return current
 	}
-	// for a parent queue we just copy the template from its parent (no need to be recursive)
-	// this stops at the first managed queue
-	// See YUNIKORN-193
+	return lookupTemplate(sq.parent)
 }
 
-func (sq *Queue) SetQueueConfig(conf configs.QueueConfig) error {
+func (sq *Queue) ApplyConf(conf configs.QueueConfig) error {
 	sq.Lock()
 	defer sq.Unlock()
-	return sq.setQueueConfig(conf)
+	return sq.applyConf(conf)
+}
+
+// set inner resource only if input resource is valid
+func (sq *Queue) checkAndSetResources(maxResource, guaranteedResource *resources.Resource) {
+	isInvalidResource := func(resource *resources.Resource) bool {
+		return resource == nil || len(resource.Resources) == 0 || resources.IsZero(resource)
+	}
+
+	if isInvalidResource(maxResource) {
+		log.Logger().Debug("max resources setting ignored: cannot set zero max resources")
+	} else {
+		sq.maxResource = maxResource
+	}
+
+	if isInvalidResource(guaranteedResource) {
+		log.Logger().Debug("guaranteed resources setting ignored: cannot set zero max resources")
+	} else {
+		sq.guaranteedResource = guaranteedResource
+	}
+}
+
+func (sq *Queue) setResources(resource configs.Resources) error {

Review comment:
       Fold `setResources()` and `checkAndSetResources()` into one function. the root queue check should have happened already (see comment later)

##########
File path: pkg/scheduler/objects/queue.go
##########
@@ -182,36 +195,86 @@ func (sq *Queue) mergeProperties(parent, config map[string]string) {
 	}
 }
 
-// Set the properties that the dynamic child queue inherits from the parent
-// The properties list for the parent must be retrieved using getProperties()
-// This currently only sets the sort policy as it is set on the parent
-// Further implementation is part of YUNIKORN-193
-// lock free call
-func (sq *Queue) setTemplateProperties(parent map[string]string) {
-	if len(parent) == 0 {
-		return
+// try to find a template from closet parent
+func lookupTemplate(sq *Queue) *template.Template {
+	if sq == nil {
+		return nil
 	}
-	// for a leaf queue pull out all values from the template and set each of them
-	// See YUNIKORN-193: for now just copy one attr from parent
-	if sq.isLeaf {
-		if parent[configs.ApplicationSortPolicy] != "" {
-			sq.properties[configs.ApplicationSortPolicy] = parent[configs.ApplicationSortPolicy]
-		}
+	if current := sq.template; current != nil {
+		return current
 	}
-	// for a parent queue we just copy the template from its parent (no need to be recursive)
-	// this stops at the first managed queue
-	// See YUNIKORN-193
+	return lookupTemplate(sq.parent)
 }
 
-func (sq *Queue) SetQueueConfig(conf configs.QueueConfig) error {
+func (sq *Queue) ApplyConf(conf configs.QueueConfig) error {
 	sq.Lock()
 	defer sq.Unlock()
-	return sq.setQueueConfig(conf)
+	return sq.applyConf(conf)
+}
+
+// set inner resource only if input resource is valid
+func (sq *Queue) checkAndSetResources(maxResource, guaranteedResource *resources.Resource) {
+	isInvalidResource := func(resource *resources.Resource) bool {
+		return resource == nil || len(resource.Resources) == 0 || resources.IsZero(resource)
+	}
+
+	if isInvalidResource(maxResource) {
+		log.Logger().Debug("max resources setting ignored: cannot set zero max resources")
+	} else {
+		sq.maxResource = maxResource
+	}
+
+	if isInvalidResource(guaranteedResource) {
+		log.Logger().Debug("guaranteed resources setting ignored: cannot set zero max resources")
+	} else {
+		sq.guaranteedResource = guaranteedResource
+	}
+}
+
+func (sq *Queue) setResources(resource configs.Resources) error {
+	// Load the max & guaranteed resources for all but the root queue
+	if sq.Name != configs.RootQueue {
+		maxResource, err := resources.NewResourceFromConf(resource.Max)
+		if err != nil {
+			log.Logger().Error("parsing failed on max resources this should not happen",
+				zap.Error(err))
+			return err
+		}
+
+		guaranteedResource, err := resources.NewResourceFromConf(resource.Guaranteed)
+		if err != nil {
+			log.Logger().Error("parsing failed on guaranteed resources this should not happen",
+				zap.Error(err))
+			return err
+		}
+		sq.checkAndSetResources(maxResource, guaranteedResource)
+	}
+	return nil
+}
+
+func (sq *Queue) setTemplate(conf configs.ChildTemplate) error {
+	// Empty returns true if all elements in this template are "0". otherwise, it returns false.
+	// This method is used to check whether there is template in yaml file.
+	configIsNotEmpty := func() bool {
+		return len(conf.Properties) != 0 || len(conf.Resources.Guaranteed) != 0 || len(conf.Resources.Max) != 0

Review comment:
       Can we fold this into the `FromConf()` call? A non empty list of empty property values is also empty etc. The `FromConf()` code should do the exact same checks including the resource parsing.

##########
File path: pkg/scheduler/objects/queue.go
##########
@@ -239,30 +302,12 @@ func (sq *Queue) setQueueConfig(conf configs.QueueConfig) error {
 		sq.isLeaf = false
 	}
 
-	// Load the max & guaranteed resources for all but the root queue
-	if sq.Name != configs.RootQueue {
-		sq.maxResource, err = resources.NewResourceFromConf(conf.Resources.Max)
-		if err != nil {
-			log.Logger().Error("parsing failed on max resources this should not happen",
-				zap.Error(err))
-			return err
-		}
-		if len(sq.maxResource.Resources) == 0 || resources.IsZero(sq.maxResource) {
-			log.Logger().Debug("max resources config setting ignored: cannot set zero max resources")
-			sq.maxResource = nil
-		}
+	if err = sq.setTemplate(conf.ChildTemplate); err != nil {

Review comment:
       We have established the type of queue. We should not set a template if we have a leaf queue. The check should be here instead of in the `setTemplate()` code.

##########
File path: pkg/scheduler/objects/queue.go
##########
@@ -239,30 +302,12 @@ func (sq *Queue) setQueueConfig(conf configs.QueueConfig) error {
 		sq.isLeaf = false
 	}
 
-	// Load the max & guaranteed resources for all but the root queue
-	if sq.Name != configs.RootQueue {
-		sq.maxResource, err = resources.NewResourceFromConf(conf.Resources.Max)
-		if err != nil {
-			log.Logger().Error("parsing failed on max resources this should not happen",
-				zap.Error(err))
-			return err
-		}
-		if len(sq.maxResource.Resources) == 0 || resources.IsZero(sq.maxResource) {
-			log.Logger().Debug("max resources config setting ignored: cannot set zero max resources")
-			sq.maxResource = nil
-		}
+	if err = sq.setTemplate(conf.ChildTemplate); err != nil {
+		return nil
+	}
 
-		// Load the guaranteed resources
-		sq.guaranteedResource, err = resources.NewResourceFromConf(conf.Resources.Guaranteed)
-		if err != nil {
-			log.Logger().Error("parsing failed on guaranteed resources this should not happen",
-				zap.Error(err))
-			return err
-		}
-		if len(sq.guaranteedResource.Resources) == 0 || resources.IsZero(sq.guaranteedResource) {
-			log.Logger().Debug("guaranteed resources config setting ignored: guaranteed must be non-zero to take effect")
-			sq.guaranteedResource = nil
-		}
+	if err = sq.setResources(conf.Resources); err != nil {
+		return err

Review comment:
       Same comment as above `setResources()` is a nop call for the root queue: check here before calling.

##########
File path: pkg/scheduler/objects/queue.go
##########
@@ -409,22 +454,25 @@ func (sq *Queue) GetQueueInfos() dao.QueueDAOInfo {
 	return queueInfo
 }
 
-func (sq *Queue) GetPartitionQueues() dao.PartitionQueueDAOInfo {
+func (sq *Queue) GetPartitionQueueDAOInfo() dao.PartitionQueueDAOInfo {
 	queueInfo := dao.PartitionQueueDAOInfo{}
 	if len(sq.children) > 0 {
 		for _, child := range sq.GetCopyOfChildren() {
-			queueInfo.Children = append(queueInfo.Children, child.GetPartitionQueues())
+			queueInfo.Children = append(queueInfo.Children, child.GetPartitionQueueDAOInfo())
 		}
 	}
 	sq.RLock()
 	defer sq.RUnlock()
-	queueInfo.QueueName = sq.GetQueuePath()
+
+	// we have held the read lock so following method should not take lock again.

Review comment:
       We should not lock anything while holding the lock. Which means that we must not call `sq.parent.GetQueuePath()` to get the path as it might deadlock. Derive the parent path via:
   `queueInfo.Parent = sq.QueuePath[:strings.LastIndex(sq.QueuePath, configs.DOT)]`
   if parent is not nil
   
   NIT: comment before taking the lock.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org