You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@yunikorn.apache.org by "manirajv06 (via GitHub)" <gi...@apache.org> on 2023/09/06 06:35:40 UTC

[GitHub] [yunikorn-k8shim] manirajv06 commented on a diff in pull request #653: [YUNIKORN-1909] add gang scheduling with hugepages e2e test

manirajv06 commented on code in PR #653:
URL: https://github.com/apache/yunikorn-k8shim/pull/653#discussion_r1316800912


##########
test/e2e/gang_scheduling/gang_scheduling_test.go:
##########
@@ -899,119 +498,231 @@ var _ = Describe("", func() {
 	// 5. Verify app allocation is empty
 	It("Verify_OriginatorDeletion_Trigger_Placeholders_Cleanup", func() {
 		// case 1: originator pod without ownerreference
-		podResources := map[string]resource.Quantity{
-			"cpu":    resource.MustParse("10m"),
-			"memory": resource.MustParse("10M"),
-		}
-		podConf := k8s.TestPodConfig{
-			Name: "gang-driver-pod" + common.RandSeq(5),
-			Labels: map[string]string{
-				"app":           "sleep-" + common.RandSeq(5),
-				"applicationId": "appid-" + common.RandSeq(5),
+		verifyOriginatorDeletionCase(false)
+	})
+
+	// Test to verify originator deletion will trigger placeholders cleanup
+	// 1. Create an originator pod
+	// 2. Set pod ownerreference with ownerreference, take configmap for example
+	// 3. Delete originator pod to trigger placeholders deletion
+	// 4. Verify placeholders deleted
+	// 5. Verify app allocation is empty
+	It("Verify_OriginatorDeletionWithOwnerreference_Trigger_Placeholders_Cleanup", func() {
+		// case 2: originator pod with ownerreference
+		verifyOriginatorDeletionCase(true)
+	})
+
+	ginkgo.DescribeTable("", func(annotations k8s.PodAnnotation) {
+		job := createJob(appID, minResource, annotations, 2)
+
+		// Validate placeholders deleted.
+		checkPlaceholderTerminated(appID, &annotations)
+
+		// Validate incorrect task-group definition ignored
+		timeoutErr := kClient.WaitForJobPods(ns, job.Name, int(*job.Spec.Parallelism), 30*time.Second)
+		Ω(timeoutErr).NotTo(HaveOccurred())
+		appPods, getErr := kClient.ListPods(ns, fmt.Sprintf("applicationId=%s", appID))
+		Ω(getErr).NotTo(HaveOccurred())
+		Ω(len(appPods.Items)).To(BeNumerically("==", *job.Spec.Parallelism))
+	},
+		ginkgo.Entry("Verify_TG_With_Duplicate_Group", k8s.PodAnnotation{
+			TaskGroups: []interfaces.TaskGroup{
+				{Name: "groupdup", MinMember: int32(3), MinResource: minResource},
+				{Name: "groupdup", MinMember: int32(5), MinResource: minResource},
+				{Name: groupA, MinMember: int32(7), MinResource: minResource},
 			},
-			Annotations: &k8s.PodAnnotation{
-				TaskGroups: []interfaces.TaskGroup{
-					{
-						Name:         groupA + "-" + common.RandSeq(5),
-						MinMember:    int32(3),
-						MinResource:  podResources,
-						NodeSelector: map[string]string{"kubernetes.io/hostname": "unsatisfiable"},
-					},
-					{
-						Name:        groupB + "-" + common.RandSeq(5),
-						MinMember:   int32(3),
-						MinResource: podResources,
-					},
-				},
+		}),
+		ginkgo.Entry("Verify_TG_With_Invalid_Chars", k8s.PodAnnotation{
+			TaskGroups: []interfaces.TaskGroup{
+				{Name: "GROUPCAPS", MinMember: int32(3), MinResource: minResource},
 			},
-			Resources: &v1.ResourceRequirements{
-				Requests: v1.ResourceList{
-					"cpu":    podResources["cpu"],
-					"memory": podResources["memory"],
-				},
+		}),
+		ginkgo.Entry("Verify_TG_With_Invalid_MinMember", k8s.PodAnnotation{
+			TaskGroups: []interfaces.TaskGroup{
+				{Name: groupA, MinMember: int32(-1), MinResource: minResource},
 			},
-		}
+		}),
+	)
 
-		podTest, err := k8s.InitTestPod(podConf)
-		Ω(err).NotTo(HaveOccurred())
-		taskGroupsMap, annErr := k8s.PodAnnotationToMap(podConf.Annotations)
-		Ω(annErr).NotTo(HaveOccurred())
-		By(fmt.Sprintf("Deploy pod %s with task-groups: %+v", podTest.Name, taskGroupsMap[k8s.TaskGroups]))
-		originator, err := kClient.CreatePod(podTest, ns)
+	// Test placeholder with hugepages
+	// 1. Deploy 1 job with hugepages-2Mi
+	// 2. Verify all pods running
+	It("Verify_HugePage", func() {
+		hugepageKey := fmt.Sprintf("%s2Mi", v1.ResourceHugePagesPrefix)
+		nodes, err := kClient.GetNodes()
 		Ω(err).NotTo(HaveOccurred())
+		hasHugePages := false
+		for _, node := range nodes.Items {
+			if v, ok := node.Status.Capacity[v1.ResourceName(hugepageKey)]; ok {
+				if v.Value() != 0 {
+					hasHugePages = true
+					break
+				}
+			}
+		}
+		if !hasHugePages {
+			ginkgo.Skip("Skip hugepages test as no node has hugepages")
+		}
 
-		By("Verify appState = Accepted")
-		timeoutErr := restClient.WaitForAppStateTransition(defaultPartition, nsQueue, podConf.Labels["applicationId"], yunikorn.States().Application.Accepted, 20)
-		Ω(timeoutErr).NotTo(HaveOccurred())
-
-		By("Wait for placeholders running")
-		phNames := yunikorn.GetPlaceholderNames(podConf.Annotations, podConf.Labels["applicationId"])
-		tgBNames := phNames[podConf.Annotations.TaskGroups[1].Name]
-		for _, ph := range tgBNames {
-			runErr := kClient.WaitForPodRunning(ns, ph, 30*time.Second)
-			Ω(runErr).NotTo(HaveOccurred())
+		// add hugepages to request
+		minResource[hugepageKey] = resource.MustParse("100Mi")
+		annotations := k8s.PodAnnotation{
+			TaskGroupName: groupA,
+			TaskGroups: []interfaces.TaskGroup{
+				{Name: groupA, MinMember: int32(3), MinResource: minResource},
+			},
 		}
+		job := createJob(appID, minResource, annotations, 3)
 
-		By("Delete originator pod")
-		deleteErr := kClient.DeletePod(originator.Name, ns)
-		Ω(deleteErr).NotTo(HaveOccurred())
+		By("Verify all job pods are running")
+		jobRunErr := kClient.WaitForJobPods(ns, job.Name, int(*job.Spec.Parallelism), 2*time.Minute)
+		Ω(jobRunErr).NotTo(HaveOccurred())
 
-		By("Verify placeholders deleted")
-		for _, placeholders := range phNames {
-			for _, ph := range placeholders {
-				deleteErr = kClient.WaitForPodTerminated(ns, ph, 30*time.Second)
-				Ω(deleteErr).NotTo(HaveOccurred(), "Placeholder %s still running", ph)
-			}
+		checkAppStatus(appID, yunikorn.States().Application.Running)

Review Comment:
   Can we also show the resources used by pods and assert/check for expected resource type (huge pages in this case)? so that it will come out very clearly.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@yunikorn.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org