You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@devlake.apache.org by GitBox <gi...@apache.org> on 2022/10/21 08:50:31 UTC

[GitHub] [incubator-devlake] klesh commented on a diff in pull request #3513: refactor(e2e): add e2e diversity

klesh commented on code in PR #3513:
URL: https://github.com/apache/incubator-devlake/pull/3513#discussion_r1001524150


##########
helpers/e2ehelper/data_flow_tester.go:
##########
@@ -121,26 +121,30 @@ func (t *DataFlowTester) ImportCsvIntoRawTable(csvRelPath string, rawTableName s
 	csvIter := pluginhelper.NewCsvFileIterator(csvRelPath)
 	defer csvIter.Close()
 	t.FlushRawTable(rawTableName)
-	// load rows and insert into target table

Review Comment:
   Why delete the comment?
   We need more instead of less



##########
helpers/e2ehelper/data_flow_tester.go:
##########
@@ -121,26 +121,30 @@ func (t *DataFlowTester) ImportCsvIntoRawTable(csvRelPath string, rawTableName s
 	csvIter := pluginhelper.NewCsvFileIterator(csvRelPath)
 	defer csvIter.Close()
 	t.FlushRawTable(rawTableName)
-	// load rows and insert into target table
+	fc := fieldCheck{}

Review Comment:
   How about `DataDiversityChecker`? 



##########
helpers/e2ehelper/data_flow_tester.go:
##########
@@ -469,3 +484,48 @@ func (t *DataFlowTester) VerifyTableWithOptions(dst schema.Tabler, opts TableOpt
 	}
 	assert.Equal(t.T, expectedTotal, actualTotal, fmt.Sprintf(`%s count not match`, dst.TableName()))
 }
+
+// fieldCheck will hold all fields and relevant values
+type fieldCheck map[string][]interface{}
+
+// feedInRawTable will add value of fields to a list, later we will use this list to judge diversity
+func (f fieldCheck) feedInRawTable(toInsertValues map[string]interface{}) {
+	for k, v := range toInsertValues {
+		if k == `created_at` || (k == `input` && v == `null`) || k == `url` {
+			continue
+		}
+		if f[k] == nil {
+			valueList := make([]interface{}, 0)

Review Comment:
   I think using `map` directly would be more efficient.



##########
helpers/e2ehelper/data_flow_tester.go:
##########
@@ -469,3 +484,48 @@ func (t *DataFlowTester) VerifyTableWithOptions(dst schema.Tabler, opts TableOpt
 	}
 	assert.Equal(t.T, expectedTotal, actualTotal, fmt.Sprintf(`%s count not match`, dst.TableName()))
 }
+
+// fieldCheck will hold all fields and relevant values
+type fieldCheck map[string][]interface{}
+
+// feedInRawTable will add value of fields to a list, later we will use this list to judge diversity
+func (f fieldCheck) feedInRawTable(toInsertValues map[string]interface{}) {
+	for k, v := range toInsertValues {
+		if k == `created_at` || (k == `input` && v == `null`) || k == `url` {
+			continue
+		}
+		if f[k] == nil {
+			valueList := make([]interface{}, 0)

Review Comment:
   I don't really understand the algo here 😂



##########
helpers/e2ehelper/data_flow_tester.go:
##########
@@ -469,3 +484,48 @@ func (t *DataFlowTester) VerifyTableWithOptions(dst schema.Tabler, opts TableOpt
 	}
 	assert.Equal(t.T, expectedTotal, actualTotal, fmt.Sprintf(`%s count not match`, dst.TableName()))
 }
+
+// fieldCheck will hold all fields and relevant values
+type fieldCheck map[string][]interface{}
+
+// feedInRawTable will add value of fields to a list, later we will use this list to judge diversity
+func (f fieldCheck) feedInRawTable(toInsertValues map[string]interface{}) {
+	for k, v := range toInsertValues {
+		if k == `created_at` || (k == `input` && v == `null`) || k == `url` {
+			continue
+		}
+		if f[k] == nil {
+			valueList := make([]interface{}, 0)
+			valueList = append(valueList, v)
+			f[k] = valueList
+		} else {
+			if v != f[k][0] {
+				f[k] = append(f[k], v)
+			}
+		}
+	}
+}
+
+// feedInSnapshotTable will add value of fields(only targetFields) to a list, later we will use this list to judge diversity
+func (f fieldCheck) feedInSnapshotTable(toInsertValues map[string]interface{}, targetFields map[string]bool) {

Review Comment:
   I think `snapshots` are the output of a subtask, am I wrong?



##########
helpers/e2ehelper/data_flow_tester.go:
##########
@@ -121,26 +121,30 @@ func (t *DataFlowTester) ImportCsvIntoRawTable(csvRelPath string, rawTableName s
 	csvIter := pluginhelper.NewCsvFileIterator(csvRelPath)
 	defer csvIter.Close()
 	t.FlushRawTable(rawTableName)
-	// load rows and insert into target table
+	fc := fieldCheck{}

Review Comment:
   And we should unit test this Checker to make sure it would fail on low-diversity and pass on high-diversity.



##########
helpers/e2ehelper/data_flow_tester.go:
##########
@@ -419,17 +424,26 @@ func (t *DataFlowTester) VerifyTableWithOptions(dst schema.Tabler, opts TableOpt
 	}
 
 	targetFields := t.resolveTargetFields(dst, opts)
+	targetFieldMap := make(map[string]bool)
+	for _, v := range targetFields {
+		targetFieldMap[v] = true
+	}
 	pkColumns, err := dal.GetPrimarykeyColumns(t.Dal, dst)
 	if err != nil {
 		panic(err)
 	}
-
+	// will not check pk values as connection_id or board_id will be unique in snapshot tables

Review Comment:
   If they would be all unique, and we are expecting them, why not check them to make sure of it? what is the harm?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@devlake.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org