You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "zeroshade (via GitHub)" <gi...@apache.org> on 2023/06/01 15:48:42 UTC

[GitHub] [arrow] zeroshade commented on a diff in pull request #35872: GH-35871: [Go] Account for struct validity bitmap in `array.ApproxEqual`

zeroshade commented on code in PR #35872:
URL: https://github.com/apache/arrow/pull/35872#discussion_r1213320182


##########
go/arrow/array/compare.go:
##########
@@ -735,10 +735,14 @@ func arrayApproxEqualFixedSizeList(left, right *FixedSizeList, opt equalOption)
 }
 
 func arrayApproxEqualStruct(left, right *Struct, opt equalOption) bool {
-	for i, lf := range left.fields {
-		rf := right.fields[i]
-		if !arrayApproxEqual(lf, rf, opt) {
-			return false
+	for i := 0; i < left.Len(); i++ {
+		if left.IsNull(i) {
+			continue
+		}

Review Comment:
   shouldn't this be:
   
   ```go
   if left.IsNull(i) {
       if !right.IsNull(i) {
           return false
       }
       continue
   }
   ```
   ?



##########
go/arrow/array/compare.go:
##########
@@ -735,10 +735,14 @@ func arrayApproxEqualFixedSizeList(left, right *FixedSizeList, opt equalOption)
 }
 
 func arrayApproxEqualStruct(left, right *Struct, opt equalOption) bool {
-	for i, lf := range left.fields {
-		rf := right.fields[i]
-		if !arrayApproxEqual(lf, rf, opt) {
-			return false
+	for i := 0; i < left.Len(); i++ {
+		if left.IsNull(i) {
+			continue
+		}
+		for j := range left.fields {
+			if !sliceApproxEqual(left.fields[j], int64(i), int64(i+1), right.fields[j], int64(i), int64(i+1), opt) {
+				return false
+			}
 		}

Review Comment:
   We should probably use a `bitutils.SetBitRunReader` to simply skip the indices that are null in the parent bitmap and call `sliceApproxEqual` on the runs of valid values. Something like:
   
   ```go
   compareRun := func(pos, length int64) error {
           for i := range left.fields {
                   if !sliceApproxEqual(left.fields[i], pos, pos+length, right.fields[i], pos, pos+length, opt) {
                       return arrow.ErrInvalid
                   }
           }
           return nil
   }
   
   return bitutils.VisitSetBitRuns(left.NullBitmapBytes(), left.Offset(), left.Len(), compareRun) == nil
   ```
   
   This way we create fewer slices and do more optimized comparisons rather than creating a new slice for *every* index.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org