You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "zeroshade (via GitHub)" <gi...@apache.org> on 2023/05/25 16:48:09 UTC

[GitHub] [arrow] zeroshade commented on a diff in pull request #35723: GH-32832: [Go] support building with tinygo

zeroshade commented on code in PR #35723:
URL: https://github.com/apache/arrow/pull/35723#discussion_r1205597091


##########
dev/release/rat_exclude_files.txt:
##########
@@ -82,6 +82,8 @@ go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go
 go/parquet/internal/gen-go/parquet/parquet-consts.go
 go/parquet/internal/gen-go/parquet/parquet.go
 go/parquet/version_string.go
+go/internal/json/json.go
+go/internal/json/json_tinygo.go

Review Comment:
   instead of excluding these,  can you please add the Apache license heading to them? Typically this exclusion is reserved for generated files.



##########
go/arrow/array/string.go:
##########
@@ -221,16 +217,12 @@ func (a *LargeString) ValueOffsets() []int64 {
 	return a.offsets[beg:end]
 }
 
-func (a *LargeString) ValueBytes() (ret []byte) {
+func (a *LargeString) ValueBytes() []byte {
 	beg := a.array.data.offset
 	end := beg + a.array.data.length
 	data := a.values[a.offsets[beg]:a.offsets[end]]
 
-	s := (*reflect.SliceHeader)(unsafe.Pointer(&ret))
-	s.Data = (*reflect.StringHeader)(unsafe.Pointer(&data)).Data
-	s.Len = len(data)
-	s.Cap = len(data)
-	return
+	return []byte(data)

Review Comment:
   same comment as above



##########
go/arrow/array/string.go:
##########
@@ -79,16 +79,12 @@ func (a *String) ValueOffsets() []int32 {
 	return a.offsets[beg:end]
 }
 
-func (a *String) ValueBytes() (ret []byte) {
+func (a *String) ValueBytes() []byte {
 	beg := a.array.data.offset
 	end := beg + a.array.data.length
 	data := a.values[a.offsets[beg]:a.offsets[end]]
 
-	s := (*reflect.SliceHeader)(unsafe.Pointer(&ret))
-	s.Data = (*reflect.StringHeader)(unsafe.Pointer(&data)).Data
-	s.Len = len(data)
-	s.Cap = len(data)
-	return
+	return []byte(data)

Review Comment:
   this causes a copy which isn't what we want. This could end up creating a copy of the entire array essentially which would be pretty bad. I assume this change is because of the issues with `SliceHeader`/`StringHeader` in tinygo?
   
   An alternative solution here would be to just pull the bytes from the buffer directly:
   
   ```go
   if a.array.data.buffers[2] != nil {
       return a.array.data.buffers[2].Bytes()[a.offsets[beg]:a.offsets[end]]
   }
   return nil
   ```
   
   This way we preserve the fact that we're not copying the data.



##########
go/arrow/bitutil/bitutil.go:
##########
@@ -150,15 +150,12 @@ const (
 )
 
 func bytesToUint64(b []byte) []uint64 {
-	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
-
-	var res []uint64
-	s := (*reflect.SliceHeader)(unsafe.Pointer(&res))
-	s.Data = h.Data
-	s.Len = h.Len / uint64SizeBytes
-	s.Cap = h.Cap / uint64SizeBytes
+	if cap(b) < uint64SizeBytes {
+		return nil
+	}
 
-	return res
+	h := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+	return unsafe.Slice((*uint64)(unsafe.Pointer(h.Data)), cap(b)/uint64SizeBytes)[:len(b)/uint64SizeBytes]

Review Comment:
   if we're sure this is only ever called with a slice with a non-zero length, this could be simplified to 
   
   ```go
   return unsafe.Slice((*uint64)(unsafe.Pointer(&b[0])), cap(b)/uint64SizeBytes)[:len(b)/uint64SizeBytes]
   ```



##########
go/arrow/compute/cast_test.go:
##########
@@ -2589,7 +2589,7 @@ func (c *CastSuite) TestStructToDifferentNullabilityStruct() {
 		}
 		srcNonNull, _, err := array.FromJSON(c.mem, arrow.StructOf(fieldsSrcNonNullable...),
 			strings.NewReader(`[
-				{"a": 11, "b": 32, "c", 95},
+				{"a": 11, "b": 32, "c": 95},

Review Comment:
   same comment as before, how is this not already failing?



##########
go/arrow/compute/arithmetic_test.go:
##########
@@ -2621,7 +2621,7 @@ func (us *UnaryArithmeticSigned[T]) TestAbsoluteValue() {
 				fn(`[]`, `[]`)
 				// scalar/arrays with nulls
 				fn(`[null]`, `[null]`)
-				fn(`[1, null -10]`, `[1, null, 10]`)
+				fn(`[1, null, -10]`, `[1, null, 10]`)

Review Comment:
   so this is interesting, how was this not failing already?



##########
go/internal/hashing/xxh3_memo_table.go:
##########
@@ -166,16 +168,13 @@ func (s *BinaryMemoTable) Size() int {
 }
 
 // helper function to easily return a byte slice for any given value
-// regardless of the type if it's a []byte, parquet.ByteArray,
-// parquet.FixedLenByteArray or string.
+// regardless of the type.

Review Comment:
   should probably clarify that this only works for `[]byte`, `string` or something which fulfills the `ByteSlice` interface



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org