You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "zeroshade (via GitHub)" <gi...@apache.org> on 2023/05/30 17:02:26 UTC

[GitHub] [arrow] zeroshade commented on a diff in pull request #35823: GH-35828: [Go] `array.ApproxEqual` allows map entries with different key order

zeroshade commented on code in PR #35823:
URL: https://github.com/apache/arrow/pull/35823#discussion_r1210566705


##########
go/arrow/array/compare.go:
##########
@@ -732,3 +732,57 @@ func arrayApproxEqualStruct(left, right *Struct, opt equalOption) bool {
 	}
 	return true
 }
+
+// arrayApproxEqualMap doesn't care about the order of keys (in Go map traversal order is undefined)
+func arrayApproxEqualMap(left, right *Map, opt equalOption) bool {
+	for i := 0; i < left.Len(); i++ {
+		if left.IsNull(i) {
+			continue
+		}
+		if !arrayApproxEqualSingleMapEntry(left.newListValue(i).(*Struct), right.newListValue(i).(*Struct), opt) {
+			return false
+		}
+	}
+	return true
+}
+
+// arrayApproxEqualSingleMapEntry is a helper function that checks if a single entry pair is approx equal.
+// Basically, it doesn't care about key order.
+// structs passed will be released
+func arrayApproxEqualSingleMapEntry(left, right *Struct, opt equalOption) bool {
+	defer left.Release()
+	defer right.Release()
+
+	// Every element here is a key-value pair
+	lElems := make([]arrow.Array, left.Len())
+	rElems := make([]arrow.Array, right.Len())
+	for i := 0; i < left.Len(); i++ {
+		lElems[i] = NewSlice(left, int64(i), int64(i+1))
+		rElems[i] = NewSlice(right, int64(i), int64(i+1))
+	}
+	defer func() {
+		for i := range lElems {
+			lElems[i].Release()
+			rElems[i].Release()
+		}
+	}()
+
+	used := make(map[int]bool, right.Len())
+	for _, ll := range lElems {
+		found := false
+		for i, rr := range rElems {
+			if used[i] {
+				continue
+			}
+			if arrayApproxEqual(ll, rr, opt) {
+				found = true
+				used[i] = true
+				break
+			}
+		}

Review Comment:
   rather than calling `arrayApproxEqual` on the whole struct, we should probably be just comparing the first field (the key field) of each and then *only* comparing the second field (the value field) if the first field matched.



##########
go/arrow/array/compare.go:
##########
@@ -732,3 +732,57 @@ func arrayApproxEqualStruct(left, right *Struct, opt equalOption) bool {
 	}
 	return true
 }
+
+// arrayApproxEqualMap doesn't care about the order of keys (in Go map traversal order is undefined)
+func arrayApproxEqualMap(left, right *Map, opt equalOption) bool {
+	for i := 0; i < left.Len(); i++ {
+		if left.IsNull(i) {
+			continue
+		}
+		if !arrayApproxEqualSingleMapEntry(left.newListValue(i).(*Struct), right.newListValue(i).(*Struct), opt) {
+			return false
+		}
+	}
+	return true
+}
+
+// arrayApproxEqualSingleMapEntry is a helper function that checks if a single entry pair is approx equal.
+// Basically, it doesn't care about key order.
+// structs passed will be released
+func arrayApproxEqualSingleMapEntry(left, right *Struct, opt equalOption) bool {
+	defer left.Release()
+	defer right.Release()
+
+	// Every element here is a key-value pair
+	lElems := make([]arrow.Array, left.Len())
+	rElems := make([]arrow.Array, right.Len())
+	for i := 0; i < left.Len(); i++ {
+		lElems[i] = NewSlice(left, int64(i), int64(i+1))
+		rElems[i] = NewSlice(right, int64(i), int64(i+1))
+	}
+	defer func() {
+		for i := range lElems {
+			lElems[i].Release()
+			rElems[i].Release()
+		}
+	}()

Review Comment:
   why are these slices necessary? can't you just use the indices of the arrays themselves? 
   
   you could also just use `sliceApproxEqual` rather than creating a bunch of slices up front



##########
go/arrow/array/compare.go:
##########
@@ -732,3 +732,57 @@ func arrayApproxEqualStruct(left, right *Struct, opt equalOption) bool {
 	}
 	return true
 }
+
+// arrayApproxEqualMap doesn't care about the order of keys (in Go map traversal order is undefined)
+func arrayApproxEqualMap(left, right *Map, opt equalOption) bool {
+	for i := 0; i < left.Len(); i++ {
+		if left.IsNull(i) {
+			continue
+		}
+		if !arrayApproxEqualSingleMapEntry(left.newListValue(i).(*Struct), right.newListValue(i).(*Struct), opt) {
+			return false
+		}
+	}
+	return true
+}
+
+// arrayApproxEqualSingleMapEntry is a helper function that checks if a single entry pair is approx equal.
+// Basically, it doesn't care about key order.
+// structs passed will be released
+func arrayApproxEqualSingleMapEntry(left, right *Struct, opt equalOption) bool {
+	defer left.Release()
+	defer right.Release()
+
+	// Every element here is a key-value pair
+	lElems := make([]arrow.Array, left.Len())
+	rElems := make([]arrow.Array, right.Len())
+	for i := 0; i < left.Len(); i++ {
+		lElems[i] = NewSlice(left, int64(i), int64(i+1))
+		rElems[i] = NewSlice(right, int64(i), int64(i+1))
+	}
+	defer func() {
+		for i := range lElems {
+			lElems[i].Release()
+			rElems[i].Release()
+		}
+	}()
+
+	used := make(map[int]bool, right.Len())
+	for _, ll := range lElems {
+		found := false
+		for i, rr := range rElems {
+			if used[i] {
+				continue
+			}
+			if arrayApproxEqual(ll, rr, opt) {
+				found = true
+				used[i] = true
+				break
+			}
+		}
+		if !found {
+			return false
+		}
+	}
+	return true

Review Comment:
   should verify that `len(used)` == `right.Len()`. Otherwise this would return true in the case where the right side contains keys the left side was missing. 
   
   Should also add test cases for failed comparisons where keys have different values and when keys are missing between the operands.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org