You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ze...@apache.org on 2022/10/20 15:57:34 UTC
[arrow] branch master updated: ARROW-18081: [Go] Add Scalar Boolean functions (#14442)
This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new e06e98db35 ARROW-18081: [Go] Add Scalar Boolean functions (#14442)
e06e98db35 is described below
commit e06e98db356e602212019cfbae83fd3d5347292d
Author: Matt Topol <zo...@gmail.com>
AuthorDate: Thu Oct 20 11:57:26 2022 -0400
ARROW-18081: [Go] Add Scalar Boolean functions (#14442)
Authored-by: Matt Topol <zo...@gmail.com>
Signed-off-by: Matt Topol <zo...@gmail.com>
---
ci/scripts/go_test.sh | 2 +-
go/arrow/bitutil/_lib/bitmap_ops.c | 14 +-
go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s | 198 ++++++++++++
go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s | 258 ++++++++++++++++
go/arrow/bitutil/bitmap_ops.go | 46 +++
go/arrow/bitutil/bitmap_ops_amd64.go | 6 +
go/arrow/bitutil/bitmap_ops_arm64.go | 2 +
go/arrow/bitutil/bitmap_ops_avx2_amd64.go | 14 +
go/arrow/bitutil/bitmap_ops_avx2_amd64.s | 181 +++++++++++
go/arrow/bitutil/bitmap_ops_noasm.go | 2 +
go/arrow/bitutil/bitmap_ops_ppc64le.go | 2 +
go/arrow/bitutil/bitmap_ops_s390x.go | 2 +
go/arrow/bitutil/bitmap_ops_sse4_amd64.go | 14 +
go/arrow/bitutil/bitmap_ops_sse4_amd64.s | 245 +++++++++++++++
go/arrow/bitutil/bitmaps.go | 175 ++++++++++-
.../compute/internal/kernels/scalar_boolean.go | 332 +++++++++++++++++++++
go/arrow/compute/internal/kernels/types.go | 43 +++
go/arrow/compute/registry.go | 1 +
go/arrow/compute/scalar_bool.go | 131 ++++++++
go/arrow/compute/scalar_bool_test.go | 152 ++++++++++
go/go.sum | 1 +
21 files changed, 1812 insertions(+), 9 deletions(-)
diff --git a/ci/scripts/go_test.sh b/ci/scripts/go_test.sh
index e31fa55564..54b05c3cc2 100755
--- a/ci/scripts/go_test.sh
+++ b/ci/scripts/go_test.sh
@@ -61,7 +61,7 @@ pushd ${source_dir}/arrow
TAGS="assert,test"
if [[ -n "${ARROW_GO_TESTCGO}" ]]; then
if [[ "${MSYSTEM}" = "MINGW64" ]]; then
- export PATH=${MINGW_PREFIX}/bin:$PATH
+ export PATH=${MINGW_PREFIX}\\bin:${MINGW_PREFIX}\\lib:$PATH
fi
TAGS="${TAGS},ccalloc"
fi
diff --git a/go/arrow/bitutil/_lib/bitmap_ops.c b/go/arrow/bitutil/_lib/bitmap_ops.c
index 96817b2f2b..f48b4d4d82 100644
--- a/go/arrow/bitutil/_lib/bitmap_ops.c
+++ b/go/arrow/bitutil/_lib/bitmap_ops.c
@@ -31,4 +31,16 @@ void FULL_NAME(bitmap_aligned_or)(const uint8_t* left, const uint8_t* right, uin
for (int64_t i = 0; i < nbytes; ++i) {
out[i] = left[i] | right[i];
}
-}
\ No newline at end of file
+}
+
+void FULL_NAME(bitmap_aligned_and_not)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) {
+ for (int64_t i = 0; i < nbytes; ++i) {
+ out[i] = left[i] & ~right[i];
+ }
+}
+
+void FULL_NAME(bitmap_aligned_xor)(const uint8_t* left, const uint8_t* right, uint8_t* out, const int64_t nbytes) {
+ for (int64_t i = 0; i < nbytes; ++i) {
+ out[i] = left[i] ^ right[i];
+ }
+}
diff --git a/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s b/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s
index 69f69d2970..a4010dab55 100644
--- a/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s
+++ b/go/arrow/bitutil/_lib/bitmap_ops_avx2_amd64.s
@@ -207,6 +207,204 @@ bitmap_aligned_or_avx2: # @bitmap_aligned_or_avx2
.Lfunc_end1:
.size bitmap_aligned_or_avx2, .Lfunc_end1-bitmap_aligned_or_avx2
# -- End function
+ .globl bitmap_aligned_and_not_avx2 # -- Begin function bitmap_aligned_and_not_avx2
+ .p2align 4, 0x90
+ .type bitmap_aligned_and_not_avx2,@function
+bitmap_aligned_and_not_avx2: # @bitmap_aligned_and_not_avx2
+# %bb.0:
+ push rbp
+ mov rbp, rsp
+ push rbx
+ and rsp, -8
+ test rcx, rcx
+ jle .LBB2_12
+# %bb.1:
+ cmp rcx, 127
+ ja .LBB2_7
+# %bb.2:
+ xor r8d, r8d
+ jmp .LBB2_3
+.LBB2_7:
+ lea r8, [rdx + rcx]
+ lea rax, [rdi + rcx]
+ cmp rax, rdx
+ seta r11b
+ lea rax, [rsi + rcx]
+ cmp r8, rdi
+ seta bl
+ cmp rax, rdx
+ seta r10b
+ cmp r8, rsi
+ seta r9b
+ xor r8d, r8d
+ test r11b, bl
+ jne .LBB2_3
+# %bb.8:
+ and r10b, r9b
+ jne .LBB2_3
+# %bb.9:
+ mov r8, rcx
+ and r8, -128
+ xor eax, eax
+ .p2align 4, 0x90
+.LBB2_10: # =>This Inner Loop Header: Depth=1
+ vmovups ymm0, ymmword ptr [rsi + rax]
+ vmovups ymm1, ymmword ptr [rsi + rax + 32]
+ vmovups ymm2, ymmword ptr [rsi + rax + 64]
+ vmovups ymm3, ymmword ptr [rsi + rax + 96]
+ vandnps ymm0, ymm0, ymmword ptr [rdi + rax]
+ vandnps ymm1, ymm1, ymmword ptr [rdi + rax + 32]
+ vandnps ymm2, ymm2, ymmword ptr [rdi + rax + 64]
+ vandnps ymm3, ymm3, ymmword ptr [rdi + rax + 96]
+ vmovups ymmword ptr [rdx + rax], ymm0
+ vmovups ymmword ptr [rdx + rax + 32], ymm1
+ vmovups ymmword ptr [rdx + rax + 64], ymm2
+ vmovups ymmword ptr [rdx + rax + 96], ymm3
+ sub rax, -128
+ cmp r8, rax
+ jne .LBB2_10
+# %bb.11:
+ cmp r8, rcx
+ je .LBB2_12
+.LBB2_3:
+ mov r9, r8
+ not r9
+ test cl, 1
+ je .LBB2_5
+# %bb.4:
+ mov al, byte ptr [rsi + r8]
+ not al
+ and al, byte ptr [rdi + r8]
+ mov byte ptr [rdx + r8], al
+ or r8, 1
+.LBB2_5:
+ add r9, rcx
+ je .LBB2_12
+ .p2align 4, 0x90
+.LBB2_6: # =>This Inner Loop Header: Depth=1
+ movzx eax, byte ptr [rsi + r8]
+ not al
+ and al, byte ptr [rdi + r8]
+ mov byte ptr [rdx + r8], al
+ movzx eax, byte ptr [rsi + r8 + 1]
+ not al
+ and al, byte ptr [rdi + r8 + 1]
+ mov byte ptr [rdx + r8 + 1], al
+ add r8, 2
+ cmp rcx, r8
+ jne .LBB2_6
+.LBB2_12:
+ lea rsp, [rbp - 8]
+ pop rbx
+ pop rbp
+ vzeroupper
+ ret
+.Lfunc_end2:
+ .size bitmap_aligned_and_not_avx2, .Lfunc_end2-bitmap_aligned_and_not_avx2
+ # -- End function
+ .globl bitmap_aligned_xor_avx2 # -- Begin function bitmap_aligned_xor_avx2
+ .p2align 4, 0x90
+ .type bitmap_aligned_xor_avx2,@function
+bitmap_aligned_xor_avx2: # @bitmap_aligned_xor_avx2
+# %bb.0:
+ push rbp
+ mov rbp, rsp
+ push rbx
+ and rsp, -8
+ test rcx, rcx
+ jle .LBB3_12
+# %bb.1:
+ cmp rcx, 127
+ ja .LBB3_7
+# %bb.2:
+ xor r10d, r10d
+ jmp .LBB3_3
+.LBB3_7:
+ lea r9, [rdx + rcx]
+ lea rax, [rdi + rcx]
+ cmp rax, rdx
+ seta r11b
+ lea rax, [rsi + rcx]
+ cmp r9, rdi
+ seta bl
+ cmp rax, rdx
+ seta r8b
+ cmp r9, rsi
+ seta r9b
+ xor r10d, r10d
+ test r11b, bl
+ jne .LBB3_3
+# %bb.8:
+ and r8b, r9b
+ jne .LBB3_3
+# %bb.9:
+ mov r10, rcx
+ and r10, -128
+ xor r8d, r8d
+ .p2align 4, 0x90
+.LBB3_10: # =>This Inner Loop Header: Depth=1
+ vmovups ymm0, ymmword ptr [rsi + r8]
+ vmovups ymm1, ymmword ptr [rsi + r8 + 32]
+ vmovups ymm2, ymmword ptr [rsi + r8 + 64]
+ vmovups ymm3, ymmword ptr [rsi + r8 + 96]
+ vxorps ymm0, ymm0, ymmword ptr [rdi + r8]
+ vxorps ymm1, ymm1, ymmword ptr [rdi + r8 + 32]
+ vxorps ymm2, ymm2, ymmword ptr [rdi + r8 + 64]
+ vxorps ymm3, ymm3, ymmword ptr [rdi + r8 + 96]
+ vmovups ymmword ptr [rdx + r8], ymm0
+ vmovups ymmword ptr [rdx + r8 + 32], ymm1
+ vmovups ymmword ptr [rdx + r8 + 64], ymm2
+ vmovups ymmword ptr [rdx + r8 + 96], ymm3
+ sub r8, -128
+ cmp r10, r8
+ jne .LBB3_10
+# %bb.11:
+ cmp r10, rcx
+ je .LBB3_12
+.LBB3_3:
+ mov r8, r10
+ not r8
+ add r8, rcx
+ mov r9, rcx
+ and r9, 3
+ je .LBB3_5
+ .p2align 4, 0x90
+.LBB3_4: # =>This Inner Loop Header: Depth=1
+ movzx eax, byte ptr [rsi + r10]
+ xor al, byte ptr [rdi + r10]
+ mov byte ptr [rdx + r10], al
+ add r10, 1
+ add r9, -1
+ jne .LBB3_4
+.LBB3_5:
+ cmp r8, 3
+ jb .LBB3_12
+ .p2align 4, 0x90
+.LBB3_6: # =>This Inner Loop Header: Depth=1
+ movzx eax, byte ptr [rsi + r10]
+ xor al, byte ptr [rdi + r10]
+ mov byte ptr [rdx + r10], al
+ movzx eax, byte ptr [rsi + r10 + 1]
+ xor al, byte ptr [rdi + r10 + 1]
+ mov byte ptr [rdx + r10 + 1], al
+ movzx eax, byte ptr [rsi + r10 + 2]
+ xor al, byte ptr [rdi + r10 + 2]
+ mov byte ptr [rdx + r10 + 2], al
+ movzx eax, byte ptr [rsi + r10 + 3]
+ xor al, byte ptr [rdi + r10 + 3]
+ mov byte ptr [rdx + r10 + 3], al
+ add r10, 4
+ cmp rcx, r10
+ jne .LBB3_6
+.LBB3_12:
+ lea rsp, [rbp - 8]
+ pop rbx
+ pop rbp
+ vzeroupper
+ ret
+.Lfunc_end3:
+ .size bitmap_aligned_xor_avx2, .Lfunc_end3-bitmap_aligned_xor_avx2
+ # -- End function
.ident "Ubuntu clang version 11.1.0-6"
.section ".note.GNU-stack","",@progbits
.addrsig
diff --git a/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s b/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s
index 9d028155b7..840c1a623b 100644
--- a/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s
+++ b/go/arrow/bitutil/_lib/bitmap_ops_sse4_amd64.s
@@ -267,6 +267,264 @@ bitmap_aligned_or_sse4: # @bitmap_aligned_or_sse4
.Lfunc_end1:
.size bitmap_aligned_or_sse4, .Lfunc_end1-bitmap_aligned_or_sse4
# -- End function
+ .globl bitmap_aligned_and_not_sse4 # -- Begin function bitmap_aligned_and_not_sse4
+ .p2align 4, 0x90
+ .type bitmap_aligned_and_not_sse4,@function
+bitmap_aligned_and_not_sse4: # @bitmap_aligned_and_not_sse4
+# %bb.0:
+ push rbp
+ mov rbp, rsp
+ push rbx
+ and rsp, -8
+ test rcx, rcx
+ jle .LBB2_16
+# %bb.1:
+ cmp rcx, 31
+ ja .LBB2_7
+# %bb.2:
+ xor r11d, r11d
+.LBB2_3:
+ mov r8, r11
+ not r8
+ test cl, 1
+ je .LBB2_5
+# %bb.4:
+ mov al, byte ptr [rsi + r11]
+ not al
+ and al, byte ptr [rdi + r11]
+ mov byte ptr [rdx + r11], al
+ or r11, 1
+.LBB2_5:
+ add r8, rcx
+ je .LBB2_16
+ .p2align 4, 0x90
+.LBB2_6: # =>This Inner Loop Header: Depth=1
+ movzx eax, byte ptr [rsi + r11]
+ not al
+ and al, byte ptr [rdi + r11]
+ mov byte ptr [rdx + r11], al
+ movzx eax, byte ptr [rsi + r11 + 1]
+ not al
+ and al, byte ptr [rdi + r11 + 1]
+ mov byte ptr [rdx + r11 + 1], al
+ add r11, 2
+ cmp rcx, r11
+ jne .LBB2_6
+ jmp .LBB2_16
+.LBB2_7:
+ lea r9, [rdx + rcx]
+ lea rax, [rdi + rcx]
+ cmp rax, rdx
+ seta r10b
+ lea rax, [rsi + rcx]
+ cmp r9, rdi
+ seta bl
+ cmp rax, rdx
+ seta r8b
+ cmp r9, rsi
+ seta r9b
+ xor r11d, r11d
+ test r10b, bl
+ jne .LBB2_3
+# %bb.8:
+ and r8b, r9b
+ jne .LBB2_3
+# %bb.9:
+ mov r11, rcx
+ and r11, -32
+ lea rax, [r11 - 32]
+ mov r9, rax
+ shr r9, 5
+ add r9, 1
+ test rax, rax
+ je .LBB2_10
+# %bb.11:
+ mov r10, r9
+ and r10, -2
+ neg r10
+ xor r8d, r8d
+ .p2align 4, 0x90
+.LBB2_12: # =>This Inner Loop Header: Depth=1
+ movups xmm0, xmmword ptr [rdi + r8]
+ movups xmm1, xmmword ptr [rdi + r8 + 16]
+ movups xmm2, xmmword ptr [rsi + r8]
+ andnps xmm2, xmm0
+ movups xmm0, xmmword ptr [rsi + r8 + 16]
+ andnps xmm0, xmm1
+ movups xmmword ptr [rdx + r8], xmm2
+ movups xmmword ptr [rdx + r8 + 16], xmm0
+ movups xmm0, xmmword ptr [rdi + r8 + 32]
+ movups xmm1, xmmword ptr [rdi + r8 + 48]
+ movups xmm2, xmmword ptr [rsi + r8 + 32]
+ andnps xmm2, xmm0
+ movups xmm0, xmmword ptr [rsi + r8 + 48]
+ andnps xmm0, xmm1
+ movups xmmword ptr [rdx + r8 + 32], xmm2
+ movups xmmword ptr [rdx + r8 + 48], xmm0
+ add r8, 64
+ add r10, 2
+ jne .LBB2_12
+# %bb.13:
+ test r9b, 1
+ je .LBB2_15
+.LBB2_14:
+ movups xmm0, xmmword ptr [rdi + r8]
+ movups xmm1, xmmword ptr [rdi + r8 + 16]
+ movups xmm2, xmmword ptr [rsi + r8]
+ andnps xmm2, xmm0
+ movups xmm0, xmmword ptr [rsi + r8 + 16]
+ andnps xmm0, xmm1
+ movups xmmword ptr [rdx + r8], xmm2
+ movups xmmword ptr [rdx + r8 + 16], xmm0
+.LBB2_15:
+ cmp r11, rcx
+ jne .LBB2_3
+.LBB2_16:
+ lea rsp, [rbp - 8]
+ pop rbx
+ pop rbp
+ ret
+.LBB2_10:
+ xor r8d, r8d
+ test r9b, 1
+ jne .LBB2_14
+ jmp .LBB2_15
+.Lfunc_end2:
+ .size bitmap_aligned_and_not_sse4, .Lfunc_end2-bitmap_aligned_and_not_sse4
+ # -- End function
+ .globl bitmap_aligned_xor_sse4 # -- Begin function bitmap_aligned_xor_sse4
+ .p2align 4, 0x90
+ .type bitmap_aligned_xor_sse4,@function
+bitmap_aligned_xor_sse4: # @bitmap_aligned_xor_sse4
+# %bb.0:
+ push rbp
+ mov rbp, rsp
+ push rbx
+ and rsp, -8
+ test rcx, rcx
+ jle .LBB3_16
+# %bb.1:
+ cmp rcx, 31
+ ja .LBB3_7
+# %bb.2:
+ xor r11d, r11d
+.LBB3_3:
+ mov r8, r11
+ not r8
+ add r8, rcx
+ mov r9, rcx
+ and r9, 3
+ je .LBB3_5
+ .p2align 4, 0x90
+.LBB3_4: # =>This Inner Loop Header: Depth=1
+ movzx eax, byte ptr [rsi + r11]
+ xor al, byte ptr [rdi + r11]
+ mov byte ptr [rdx + r11], al
+ add r11, 1
+ add r9, -1
+ jne .LBB3_4
+.LBB3_5:
+ cmp r8, 3
+ jb .LBB3_16
+ .p2align 4, 0x90
+.LBB3_6: # =>This Inner Loop Header: Depth=1
+ movzx eax, byte ptr [rsi + r11]
+ xor al, byte ptr [rdi + r11]
+ mov byte ptr [rdx + r11], al
+ movzx eax, byte ptr [rsi + r11 + 1]
+ xor al, byte ptr [rdi + r11 + 1]
+ mov byte ptr [rdx + r11 + 1], al
+ movzx eax, byte ptr [rsi + r11 + 2]
+ xor al, byte ptr [rdi + r11 + 2]
+ mov byte ptr [rdx + r11 + 2], al
+ movzx eax, byte ptr [rsi + r11 + 3]
+ xor al, byte ptr [rdi + r11 + 3]
+ mov byte ptr [rdx + r11 + 3], al
+ add r11, 4
+ cmp rcx, r11
+ jne .LBB3_6
+ jmp .LBB3_16
+.LBB3_7:
+ lea r9, [rdx + rcx]
+ lea rax, [rdi + rcx]
+ cmp rax, rdx
+ seta r10b
+ lea rax, [rsi + rcx]
+ cmp r9, rdi
+ seta bl
+ cmp rax, rdx
+ seta r8b
+ cmp r9, rsi
+ seta r9b
+ xor r11d, r11d
+ test r10b, bl
+ jne .LBB3_3
+# %bb.8:
+ and r8b, r9b
+ jne .LBB3_3
+# %bb.9:
+ mov r11, rcx
+ and r11, -32
+ lea rax, [r11 - 32]
+ mov r9, rax
+ shr r9, 5
+ add r9, 1
+ test rax, rax
+ je .LBB3_10
+# %bb.11:
+ mov r10, r9
+ and r10, -2
+ neg r10
+ xor r8d, r8d
+ .p2align 4, 0x90
+.LBB3_12: # =>This Inner Loop Header: Depth=1
+ movups xmm0, xmmword ptr [rdi + r8]
+ movups xmm1, xmmword ptr [rdi + r8 + 16]
+ movups xmm2, xmmword ptr [rsi + r8]
+ xorps xmm2, xmm0
+ movups xmm0, xmmword ptr [rsi + r8 + 16]
+ xorps xmm0, xmm1
+ movups xmmword ptr [rdx + r8], xmm2
+ movups xmmword ptr [rdx + r8 + 16], xmm0
+ movups xmm0, xmmword ptr [rdi + r8 + 32]
+ movups xmm1, xmmword ptr [rdi + r8 + 48]
+ movups xmm2, xmmword ptr [rsi + r8 + 32]
+ xorps xmm2, xmm0
+ movups xmm0, xmmword ptr [rsi + r8 + 48]
+ xorps xmm0, xmm1
+ movups xmmword ptr [rdx + r8 + 32], xmm2
+ movups xmmword ptr [rdx + r8 + 48], xmm0
+ add r8, 64
+ add r10, 2
+ jne .LBB3_12
+# %bb.13:
+ test r9b, 1
+ je .LBB3_15
+.LBB3_14:
+ movups xmm0, xmmword ptr [rdi + r8]
+ movups xmm1, xmmword ptr [rdi + r8 + 16]
+ movups xmm2, xmmword ptr [rsi + r8]
+ xorps xmm2, xmm0
+ movups xmm0, xmmword ptr [rsi + r8 + 16]
+ xorps xmm0, xmm1
+ movups xmmword ptr [rdx + r8], xmm2
+ movups xmmword ptr [rdx + r8 + 16], xmm0
+.LBB3_15:
+ cmp r11, rcx
+ jne .LBB3_3
+.LBB3_16:
+ lea rsp, [rbp - 8]
+ pop rbx
+ pop rbp
+ ret
+.LBB3_10:
+ xor r8d, r8d
+ test r9b, 1
+ jne .LBB3_14
+ jmp .LBB3_15
+.Lfunc_end3:
+ .size bitmap_aligned_xor_sse4, .Lfunc_end3-bitmap_aligned_xor_sse4
+ # -- End function
.ident "Ubuntu clang version 11.1.0-6"
.section ".note.GNU-stack","",@progbits
.addrsig
diff --git a/go/arrow/bitutil/bitmap_ops.go b/go/arrow/bitutil/bitmap_ops.go
index 62322b04b9..7db750a6dd 100644
--- a/go/arrow/bitutil/bitmap_ops.go
+++ b/go/arrow/bitutil/bitmap_ops.go
@@ -39,6 +39,29 @@ func alignedBitAndGo(left, right, out []byte) {
}
}
+func alignedBitAndNotGo(left, right, out []byte) {
+ var (
+ nbytes = len(out)
+ i = 0
+ )
+ if nbytes > uint64SizeBytes {
+ // case where we have enough bytes to operate on words
+ leftWords := bytesToUint64(left[i:])
+ rightWords := bytesToUint64(right[i:])
+ outWords := bytesToUint64(out[i:])
+
+ for w := range outWords {
+ outWords[w] = leftWords[w] &^ rightWords[w]
+ }
+
+ i += len(outWords) * uint64SizeBytes
+ }
+ // grab any remaining bytes that were fewer than a word
+ for ; i < nbytes; i++ {
+ out[i] = left[i] &^ right[i]
+ }
+}
+
func alignedBitOrGo(left, right, out []byte) {
var (
nbytes = len(out)
@@ -61,3 +84,26 @@ func alignedBitOrGo(left, right, out []byte) {
out[i] = left[i] | right[i]
}
}
+
+func alignedBitXorGo(left, right, out []byte) {
+ var (
+ nbytes = len(out)
+ i = 0
+ )
+ if nbytes > uint64SizeBytes {
+ // case where we have enough bytes to operate on words
+ leftWords := bytesToUint64(left[i:])
+ rightWords := bytesToUint64(right[i:])
+ outWords := bytesToUint64(out[i:])
+
+ for w := range outWords {
+ outWords[w] = leftWords[w] ^ rightWords[w]
+ }
+
+ i += len(outWords) * uint64SizeBytes
+ }
+ // grab any remaining bytes that were fewer than a word
+ for ; i < nbytes; i++ {
+ out[i] = left[i] ^ right[i]
+ }
+}
diff --git a/go/arrow/bitutil/bitmap_ops_amd64.go b/go/arrow/bitutil/bitmap_ops_amd64.go
index 9aa5a6dd56..ad0fd674ab 100644
--- a/go/arrow/bitutil/bitmap_ops_amd64.go
+++ b/go/arrow/bitutil/bitmap_ops_amd64.go
@@ -25,11 +25,17 @@ func init() {
if cpu.X86.HasAVX2 {
bitAndOp.opAligned = bitmapAlignedAndAVX2
bitOrOp.opAligned = bitmapAlignedOrAVX2
+ bitAndNotOp.opAligned = bitmapAlignedAndNotAVX2
+ bitXorOp.opAligned = bitmapAlignedXorAVX2
} else if cpu.X86.HasSSE42 {
bitAndOp.opAligned = bitmapAlignedAndSSE4
bitOrOp.opAligned = bitmapAlignedOrSSE4
+ bitAndNotOp.opAligned = bitmapAlignedAndNotSSE4
+ bitXorOp.opAligned = bitmapAlignedXorSSE4
} else {
bitAndOp.opAligned = alignedBitAndGo
bitOrOp.opAligned = alignedBitOrGo
+ bitAndNotOp.opAligned = alignedBitAndNotGo
+ bitXorOp.opAligned = alignedBitXorGo
}
}
diff --git a/go/arrow/bitutil/bitmap_ops_arm64.go b/go/arrow/bitutil/bitmap_ops_arm64.go
index 86c47639a9..28d95d84ad 100644
--- a/go/arrow/bitutil/bitmap_ops_arm64.go
+++ b/go/arrow/bitutil/bitmap_ops_arm64.go
@@ -22,4 +22,6 @@ package bitutil
func init() {
bitAndOp.opAligned = alignedBitAndGo
bitOrOp.opAligned = alignedBitOrGo
+ bitAndNotOp.opAligned = alignedBitAndNotGo
+ bitXorOp.opAligned = alignedBitXorGo
}
diff --git a/go/arrow/bitutil/bitmap_ops_avx2_amd64.go b/go/arrow/bitutil/bitmap_ops_avx2_amd64.go
index 731b9807b7..1c01bd0f38 100644
--- a/go/arrow/bitutil/bitmap_ops_avx2_amd64.go
+++ b/go/arrow/bitutil/bitmap_ops_avx2_amd64.go
@@ -36,3 +36,17 @@ func _bitmap_aligned_or_avx2(left, right, out unsafe.Pointer, length int64)
func bitmapAlignedOrAVX2(left, right, out []byte) {
_bitmap_aligned_or_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out)))
}
+
+//go:noescape
+func _bitmap_aligned_and_not_avx2(left, right, out unsafe.Pointer, length int64)
+
+func bitmapAlignedAndNotAVX2(left, right, out []byte) {
+ _bitmap_aligned_and_not_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out)))
+}
+
+//go:noescape
+func _bitmap_aligned_xor_avx2(left, right, out unsafe.Pointer, length int64)
+
+func bitmapAlignedXorAVX2(left, right, out []byte) {
+ _bitmap_aligned_xor_avx2(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out)))
+}
diff --git a/go/arrow/bitutil/bitmap_ops_avx2_amd64.s b/go/arrow/bitutil/bitmap_ops_avx2_amd64.s
index 2e2ade8961..00172e8659 100644
--- a/go/arrow/bitutil/bitmap_ops_avx2_amd64.s
+++ b/go/arrow/bitutil/bitmap_ops_avx2_amd64.s
@@ -190,3 +190,184 @@ LBB1_6:
LBB1_12:
VZEROUPPER
RET
+
+TEXT ·_bitmap_aligned_and_not_avx2(SB), $0-32
+
+ MOVQ left+0(FP), DI
+ MOVQ right+8(FP), SI
+ MOVQ out+16(FP), DX
+ MOVQ length+24(FP), CX
+
+ WORD $0x8548; BYTE $0xc9 // test rcx, rcx
+ JLE LBB2_12
+ LONG $0x7ff98348 // cmp rcx, 127
+ JA LBB2_7
+ WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
+ JMP LBB2_3
+
+LBB2_7:
+ LONG $0x0a048d4c // lea r8, [rdx + rcx]
+ LONG $0x0f048d48 // lea rax, [rdi + rcx]
+ WORD $0x3948; BYTE $0xd0 // cmp rax, rdx
+ LONG $0xd3970f41 // seta r11b
+ LONG $0x0e048d48 // lea rax, [rsi + rcx]
+ WORD $0x3949; BYTE $0xf8 // cmp r8, rdi
+ WORD $0x970f; BYTE $0xd3 // seta bl
+ WORD $0x3948; BYTE $0xd0 // cmp rax, rdx
+ LONG $0xd2970f41 // seta r10b
+ WORD $0x3949; BYTE $0xf0 // cmp r8, rsi
+ LONG $0xd1970f41 // seta r9b
+ WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
+ WORD $0x8441; BYTE $0xdb // test r11b, bl
+ JNE LBB2_3
+ WORD $0x2045; BYTE $0xca // and r10b, r9b
+ JNE LBB2_3
+ WORD $0x8949; BYTE $0xc8 // mov r8, rcx
+ LONG $0x80e08349 // and r8, -128
+ WORD $0xc031 // xor eax, eax
+
+LBB2_10:
+ LONG $0x0410fcc5; BYTE $0x06 // vmovups ymm0, yword [rsi + rax]
+ LONG $0x4c10fcc5; WORD $0x2006 // vmovups ymm1, yword [rsi + rax + 32]
+ LONG $0x5410fcc5; WORD $0x4006 // vmovups ymm2, yword [rsi + rax + 64]
+ LONG $0x5c10fcc5; WORD $0x6006 // vmovups ymm3, yword [rsi + rax + 96]
+ LONG $0x0455fcc5; BYTE $0x07 // vandnps ymm0, ymm0, yword [rdi + rax]
+ LONG $0x4c55f4c5; WORD $0x2007 // vandnps ymm1, ymm1, yword [rdi + rax + 32]
+ LONG $0x5455ecc5; WORD $0x4007 // vandnps ymm2, ymm2, yword [rdi + rax + 64]
+ LONG $0x5c55e4c5; WORD $0x6007 // vandnps ymm3, ymm3, yword [rdi + rax + 96]
+ LONG $0x0411fcc5; BYTE $0x02 // vmovups yword [rdx + rax], ymm0
+ LONG $0x4c11fcc5; WORD $0x2002 // vmovups yword [rdx + rax + 32], ymm1
+ LONG $0x5411fcc5; WORD $0x4002 // vmovups yword [rdx + rax + 64], ymm2
+ LONG $0x5c11fcc5; WORD $0x6002 // vmovups yword [rdx + rax + 96], ymm3
+ LONG $0x80e88348 // sub rax, -128
+ WORD $0x3949; BYTE $0xc0 // cmp r8, rax
+ JNE LBB2_10
+ WORD $0x3949; BYTE $0xc8 // cmp r8, rcx
+ JE LBB2_12
+
+LBB2_3:
+ WORD $0x894d; BYTE $0xc1 // mov r9, r8
+ WORD $0xf749; BYTE $0xd1 // not r9
+ WORD $0xc1f6; BYTE $0x01 // test cl, 1
+ JE LBB2_5
+ LONG $0x06048a42 // mov al, byte [rsi + r8]
+ WORD $0xd0f6 // not al
+ LONG $0x07042242 // and al, byte [rdi + r8]
+ LONG $0x02048842 // mov byte [rdx + r8], al
+ LONG $0x01c88349 // or r8, 1
+
+LBB2_5:
+ WORD $0x0149; BYTE $0xc9 // add r9, rcx
+ JE LBB2_12
+
+LBB2_6:
+ LONG $0x04b60f42; BYTE $0x06 // movzx eax, byte [rsi + r8]
+ WORD $0xd0f6 // not al
+ LONG $0x07042242 // and al, byte [rdi + r8]
+ LONG $0x02048842 // mov byte [rdx + r8], al
+ LONG $0x44b60f42; WORD $0x0106 // movzx eax, byte [rsi + r8 + 1]
+ WORD $0xd0f6 // not al
+ LONG $0x07442242; BYTE $0x01 // and al, byte [rdi + r8 + 1]
+ LONG $0x02448842; BYTE $0x01 // mov byte [rdx + r8 + 1], al
+ LONG $0x02c08349 // add r8, 2
+ WORD $0x394c; BYTE $0xc1 // cmp rcx, r8
+ JNE LBB2_6
+
+LBB2_12:
+ VZEROUPPER
+ RET
+
+TEXT ·_bitmap_aligned_xor_avx2(SB), $0-32
+
+ MOVQ left+0(FP), DI
+ MOVQ right+8(FP), SI
+ MOVQ out+16(FP), DX
+ MOVQ length+24(FP), CX
+
+ WORD $0x8548; BYTE $0xc9 // test rcx, rcx
+ JLE LBB3_12
+ LONG $0x7ff98348 // cmp rcx, 127
+ JA LBB3_7
+ WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
+ JMP LBB3_3
+
+LBB3_7:
+ LONG $0x0a0c8d4c // lea r9, [rdx + rcx]
+ LONG $0x0f048d48 // lea rax, [rdi + rcx]
+ WORD $0x3948; BYTE $0xd0 // cmp rax, rdx
+ LONG $0xd3970f41 // seta r11b
+ LONG $0x0e048d48 // lea rax, [rsi + rcx]
+ WORD $0x3949; BYTE $0xf9 // cmp r9, rdi
+ WORD $0x970f; BYTE $0xd3 // seta bl
+ WORD $0x3948; BYTE $0xd0 // cmp rax, rdx
+ LONG $0xd0970f41 // seta r8b
+ WORD $0x3949; BYTE $0xf1 // cmp r9, rsi
+ LONG $0xd1970f41 // seta r9b
+ WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
+ WORD $0x8441; BYTE $0xdb // test r11b, bl
+ JNE LBB3_3
+ WORD $0x2045; BYTE $0xc8 // and r8b, r9b
+ JNE LBB3_3
+ WORD $0x8949; BYTE $0xca // mov r10, rcx
+ LONG $0x80e28349 // and r10, -128
+ WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
+
+LBB3_10:
+ LONG $0x107ca1c4; WORD $0x0604 // vmovups ymm0, yword [rsi + r8]
+ LONG $0x107ca1c4; WORD $0x064c; BYTE $0x20 // vmovups ymm1, yword [rsi + r8 + 32]
+ LONG $0x107ca1c4; WORD $0x0654; BYTE $0x40 // vmovups ymm2, yword [rsi + r8 + 64]
+ LONG $0x107ca1c4; WORD $0x065c; BYTE $0x60 // vmovups ymm3, yword [rsi + r8 + 96]
+ LONG $0x577ca1c4; WORD $0x0704 // vxorps ymm0, ymm0, yword [rdi + r8]
+ LONG $0x5774a1c4; WORD $0x074c; BYTE $0x20 // vxorps ymm1, ymm1, yword [rdi + r8 + 32]
+ LONG $0x576ca1c4; WORD $0x0754; BYTE $0x40 // vxorps ymm2, ymm2, yword [rdi + r8 + 64]
+ LONG $0x5764a1c4; WORD $0x075c; BYTE $0x60 // vxorps ymm3, ymm3, yword [rdi + r8 + 96]
+ LONG $0x117ca1c4; WORD $0x0204 // vmovups yword [rdx + r8], ymm0
+ LONG $0x117ca1c4; WORD $0x024c; BYTE $0x20 // vmovups yword [rdx + r8 + 32], ymm1
+ LONG $0x117ca1c4; WORD $0x0254; BYTE $0x40 // vmovups yword [rdx + r8 + 64], ymm2
+ LONG $0x117ca1c4; WORD $0x025c; BYTE $0x60 // vmovups yword [rdx + r8 + 96], ymm3
+ LONG $0x80e88349 // sub r8, -128
+ WORD $0x394d; BYTE $0xc2 // cmp r10, r8
+ JNE LBB3_10
+ WORD $0x3949; BYTE $0xca // cmp r10, rcx
+ JE LBB3_12
+
+LBB3_3:
+ WORD $0x894d; BYTE $0xd0 // mov r8, r10
+ WORD $0xf749; BYTE $0xd0 // not r8
+ WORD $0x0149; BYTE $0xc8 // add r8, rcx
+ WORD $0x8949; BYTE $0xc9 // mov r9, rcx
+ LONG $0x03e18349 // and r9, 3
+ JE LBB3_5
+
+LBB3_4:
+ LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10]
+ LONG $0x17043242 // xor al, byte [rdi + r10]
+ LONG $0x12048842 // mov byte [rdx + r10], al
+ LONG $0x01c28349 // add r10, 1
+ LONG $0xffc18349 // add r9, -1
+ JNE LBB3_4
+
+LBB3_5:
+ LONG $0x03f88349 // cmp r8, 3
+ JB LBB3_12
+
+LBB3_6:
+ LONG $0x04b60f42; BYTE $0x16 // movzx eax, byte [rsi + r10]
+ LONG $0x17043242 // xor al, byte [rdi + r10]
+ LONG $0x12048842 // mov byte [rdx + r10], al
+ LONG $0x44b60f42; WORD $0x0116 // movzx eax, byte [rsi + r10 + 1]
+ LONG $0x17443242; BYTE $0x01 // xor al, byte [rdi + r10 + 1]
+ LONG $0x12448842; BYTE $0x01 // mov byte [rdx + r10 + 1], al
+ LONG $0x44b60f42; WORD $0x0216 // movzx eax, byte [rsi + r10 + 2]
+ LONG $0x17443242; BYTE $0x02 // xor al, byte [rdi + r10 + 2]
+ LONG $0x12448842; BYTE $0x02 // mov byte [rdx + r10 + 2], al
+ LONG $0x44b60f42; WORD $0x0316 // movzx eax, byte [rsi + r10 + 3]
+ LONG $0x17443242; BYTE $0x03 // xor al, byte [rdi + r10 + 3]
+ LONG $0x12448842; BYTE $0x03 // mov byte [rdx + r10 + 3], al
+ LONG $0x04c28349 // add r10, 4
+ WORD $0x394c; BYTE $0xd1 // cmp rcx, r10
+ JNE LBB3_6
+
+LBB3_12:
+ VZEROUPPER
+ RET
diff --git a/go/arrow/bitutil/bitmap_ops_noasm.go b/go/arrow/bitutil/bitmap_ops_noasm.go
index 785531c1c2..e25347791f 100644
--- a/go/arrow/bitutil/bitmap_ops_noasm.go
+++ b/go/arrow/bitutil/bitmap_ops_noasm.go
@@ -22,4 +22,6 @@ package bitutil
func init() {
bitAndOp.opAligned = alignedBitAndGo
bitOrOp.opAligned = alignedBitOrGo
+ bitAndNotOp.opAligned = alignedBitAndNotGo
+ bitXorOp.opAligned = alignedBitXorGo
}
diff --git a/go/arrow/bitutil/bitmap_ops_ppc64le.go b/go/arrow/bitutil/bitmap_ops_ppc64le.go
index 86c47639a9..28d95d84ad 100644
--- a/go/arrow/bitutil/bitmap_ops_ppc64le.go
+++ b/go/arrow/bitutil/bitmap_ops_ppc64le.go
@@ -22,4 +22,6 @@ package bitutil
func init() {
bitAndOp.opAligned = alignedBitAndGo
bitOrOp.opAligned = alignedBitOrGo
+ bitAndNotOp.opAligned = alignedBitAndNotGo
+ bitXorOp.opAligned = alignedBitXorGo
}
diff --git a/go/arrow/bitutil/bitmap_ops_s390x.go b/go/arrow/bitutil/bitmap_ops_s390x.go
index 86c47639a9..28d95d84ad 100644
--- a/go/arrow/bitutil/bitmap_ops_s390x.go
+++ b/go/arrow/bitutil/bitmap_ops_s390x.go
@@ -22,4 +22,6 @@ package bitutil
func init() {
bitAndOp.opAligned = alignedBitAndGo
bitOrOp.opAligned = alignedBitOrGo
+ bitAndNotOp.opAligned = alignedBitAndNotGo
+ bitXorOp.opAligned = alignedBitXorGo
}
diff --git a/go/arrow/bitutil/bitmap_ops_sse4_amd64.go b/go/arrow/bitutil/bitmap_ops_sse4_amd64.go
index 5d1fcf9682..f16bce12bb 100644
--- a/go/arrow/bitutil/bitmap_ops_sse4_amd64.go
+++ b/go/arrow/bitutil/bitmap_ops_sse4_amd64.go
@@ -36,3 +36,17 @@ func _bitmap_aligned_or_sse4(left, right, out unsafe.Pointer, length int64)
func bitmapAlignedOrSSE4(left, right, out []byte) {
_bitmap_aligned_or_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out)))
}
+
+//go:noescape
+func _bitmap_aligned_and_not_sse4(left, right, out unsafe.Pointer, length int64)
+
+func bitmapAlignedAndNotSSE4(left, right, out []byte) {
+ _bitmap_aligned_and_not_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out)))
+}
+
+//go:noescape
+func _bitmap_aligned_xor_sse4(left, right, out unsafe.Pointer, length int64)
+
+func bitmapAlignedXorSSE4(left, right, out []byte) {
+ _bitmap_aligned_xor_sse4(unsafe.Pointer(&left[0]), unsafe.Pointer(&right[0]), unsafe.Pointer(&out[0]), int64(len(out)))
+}
diff --git a/go/arrow/bitutil/bitmap_ops_sse4_amd64.s b/go/arrow/bitutil/bitmap_ops_sse4_amd64.s
index ad81cf6372..c15e186253 100644
--- a/go/arrow/bitutil/bitmap_ops_sse4_amd64.s
+++ b/go/arrow/bitutil/bitmap_ops_sse4_amd64.s
@@ -254,3 +254,248 @@ LBB1_10:
LONG $0x01c1f641 // test r9b, 1
JNE LBB1_14
JMP LBB1_15
+
+TEXT ·_bitmap_aligned_and_not_sse4(SB), $0-32
+
+ MOVQ left+0(FP), DI
+ MOVQ right+8(FP), SI
+ MOVQ out+16(FP), DX
+ MOVQ length+24(FP), CX
+
+ WORD $0x8548; BYTE $0xc9 // test rcx, rcx
+ JLE LBB2_16
+ LONG $0x1ff98348 // cmp rcx, 31
+ JA LBB2_7
+ WORD $0x3145; BYTE $0xdb // xor r11d, r11d
+
+LBB2_3:
+ WORD $0x894d; BYTE $0xd8 // mov r8, r11
+ WORD $0xf749; BYTE $0xd0 // not r8
+ WORD $0xc1f6; BYTE $0x01 // test cl, 1
+ JE LBB2_5
+ LONG $0x1e048a42 // mov al, byte [rsi + r11]
+ WORD $0xd0f6 // not al
+ LONG $0x1f042242 // and al, byte [rdi + r11]
+ LONG $0x1a048842 // mov byte [rdx + r11], al
+ LONG $0x01cb8349 // or r11, 1
+
+LBB2_5:
+ WORD $0x0149; BYTE $0xc8 // add r8, rcx
+ JE LBB2_16
+
+LBB2_6:
+ LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11]
+ WORD $0xd0f6 // not al
+ LONG $0x1f042242 // and al, byte [rdi + r11]
+ LONG $0x1a048842 // mov byte [rdx + r11], al
+ LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1]
+ WORD $0xd0f6 // not al
+ LONG $0x1f442242; BYTE $0x01 // and al, byte [rdi + r11 + 1]
+ LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al
+ LONG $0x02c38349 // add r11, 2
+ WORD $0x394c; BYTE $0xd9 // cmp rcx, r11
+ JNE LBB2_6
+ JMP LBB2_16
+
+LBB2_7:
+ LONG $0x0a0c8d4c // lea r9, [rdx + rcx]
+ LONG $0x0f048d48 // lea rax, [rdi + rcx]
+ WORD $0x3948; BYTE $0xd0 // cmp rax, rdx
+ LONG $0xd2970f41 // seta r10b
+ LONG $0x0e048d48 // lea rax, [rsi + rcx]
+ WORD $0x3949; BYTE $0xf9 // cmp r9, rdi
+ WORD $0x970f; BYTE $0xd3 // seta bl
+ WORD $0x3948; BYTE $0xd0 // cmp rax, rdx
+ LONG $0xd0970f41 // seta r8b
+ WORD $0x3949; BYTE $0xf1 // cmp r9, rsi
+ LONG $0xd1970f41 // seta r9b
+ WORD $0x3145; BYTE $0xdb // xor r11d, r11d
+ WORD $0x8441; BYTE $0xda // test r10b, bl
+ JNE LBB2_3
+ WORD $0x2045; BYTE $0xc8 // and r8b, r9b
+ JNE LBB2_3
+ WORD $0x8949; BYTE $0xcb // mov r11, rcx
+ LONG $0xe0e38349 // and r11, -32
+ LONG $0xe0438d49 // lea rax, [r11 - 32]
+ WORD $0x8949; BYTE $0xc1 // mov r9, rax
+ LONG $0x05e9c149 // shr r9, 5
+ LONG $0x01c18349 // add r9, 1
+ WORD $0x8548; BYTE $0xc0 // test rax, rax
+ JE LBB2_10
+ WORD $0x894d; BYTE $0xca // mov r10, r9
+ LONG $0xfee28349 // and r10, -2
+ WORD $0xf749; BYTE $0xda // neg r10
+ WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
+
+LBB2_12:
+ LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8]
+ LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16]
+ LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8]
+ WORD $0x550f; BYTE $0xd0 // andnps xmm2, xmm0
+ LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16]
+ WORD $0x550f; BYTE $0xc1 // andnps xmm0, xmm1
+ LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2
+ LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0
+ LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32]
+ LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48]
+ LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32]
+ WORD $0x550f; BYTE $0xd0 // andnps xmm2, xmm0
+ LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48]
+ WORD $0x550f; BYTE $0xc1 // andnps xmm0, xmm1
+ LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2
+ LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0
+ LONG $0x40c08349 // add r8, 64
+ LONG $0x02c28349 // add r10, 2
+ JNE LBB2_12
+ LONG $0x01c1f641 // test r9b, 1
+ JE LBB2_15
+
+LBB2_14:
+ LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8]
+ LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16]
+ LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8]
+ WORD $0x550f; BYTE $0xd0 // andnps xmm2, xmm0
+ LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16]
+ WORD $0x550f; BYTE $0xc1 // andnps xmm0, xmm1
+ LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2
+ LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0
+
+LBB2_15:
+ WORD $0x3949; BYTE $0xcb // cmp r11, rcx
+ JNE LBB2_3
+
+LBB2_16:
+ RET
+
+LBB2_10:
+ WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
+ LONG $0x01c1f641 // test r9b, 1
+ JNE LBB2_14
+ JMP LBB2_15
+
+TEXT ·_bitmap_aligned_xor_sse4(SB), $0-32
+
+ MOVQ left+0(FP), DI
+ MOVQ right+8(FP), SI
+ MOVQ out+16(FP), DX
+ MOVQ length+24(FP), CX
+
+ WORD $0x8548; BYTE $0xc9 // test rcx, rcx
+ JLE LBB3_16
+ LONG $0x1ff98348 // cmp rcx, 31
+ JA LBB3_7
+ WORD $0x3145; BYTE $0xdb // xor r11d, r11d
+
+LBB3_3:
+ WORD $0x894d; BYTE $0xd8 // mov r8, r11
+ WORD $0xf749; BYTE $0xd0 // not r8
+ WORD $0x0149; BYTE $0xc8 // add r8, rcx
+ WORD $0x8949; BYTE $0xc9 // mov r9, rcx
+ LONG $0x03e18349 // and r9, 3
+ JE LBB3_5
+
+LBB3_4:
+ LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11]
+ LONG $0x1f043242 // xor al, byte [rdi + r11]
+ LONG $0x1a048842 // mov byte [rdx + r11], al
+ LONG $0x01c38349 // add r11, 1
+ LONG $0xffc18349 // add r9, -1
+ JNE LBB3_4
+
+LBB3_5:
+ LONG $0x03f88349 // cmp r8, 3
+ JB LBB3_16
+
+LBB3_6:
+ LONG $0x04b60f42; BYTE $0x1e // movzx eax, byte [rsi + r11]
+ LONG $0x1f043242 // xor al, byte [rdi + r11]
+ LONG $0x1a048842 // mov byte [rdx + r11], al
+ LONG $0x44b60f42; WORD $0x011e // movzx eax, byte [rsi + r11 + 1]
+ LONG $0x1f443242; BYTE $0x01 // xor al, byte [rdi + r11 + 1]
+ LONG $0x1a448842; BYTE $0x01 // mov byte [rdx + r11 + 1], al
+ LONG $0x44b60f42; WORD $0x021e // movzx eax, byte [rsi + r11 + 2]
+ LONG $0x1f443242; BYTE $0x02 // xor al, byte [rdi + r11 + 2]
+ LONG $0x1a448842; BYTE $0x02 // mov byte [rdx + r11 + 2], al
+ LONG $0x44b60f42; WORD $0x031e // movzx eax, byte [rsi + r11 + 3]
+ LONG $0x1f443242; BYTE $0x03 // xor al, byte [rdi + r11 + 3]
+ LONG $0x1a448842; BYTE $0x03 // mov byte [rdx + r11 + 3], al
+ LONG $0x04c38349 // add r11, 4
+ WORD $0x394c; BYTE $0xd9 // cmp rcx, r11
+ JNE LBB3_6
+ JMP LBB3_16
+
+LBB3_7:
+ LONG $0x0a0c8d4c // lea r9, [rdx + rcx]
+ LONG $0x0f048d48 // lea rax, [rdi + rcx]
+ WORD $0x3948; BYTE $0xd0 // cmp rax, rdx
+ LONG $0xd2970f41 // seta r10b
+ LONG $0x0e048d48 // lea rax, [rsi + rcx]
+ WORD $0x3949; BYTE $0xf9 // cmp r9, rdi
+ WORD $0x970f; BYTE $0xd3 // seta bl
+ WORD $0x3948; BYTE $0xd0 // cmp rax, rdx
+ LONG $0xd0970f41 // seta r8b
+ WORD $0x3949; BYTE $0xf1 // cmp r9, rsi
+ LONG $0xd1970f41 // seta r9b
+ WORD $0x3145; BYTE $0xdb // xor r11d, r11d
+ WORD $0x8441; BYTE $0xda // test r10b, bl
+ JNE LBB3_3
+ WORD $0x2045; BYTE $0xc8 // and r8b, r9b
+ JNE LBB3_3
+ WORD $0x8949; BYTE $0xcb // mov r11, rcx
+ LONG $0xe0e38349 // and r11, -32
+ LONG $0xe0438d49 // lea rax, [r11 - 32]
+ WORD $0x8949; BYTE $0xc1 // mov r9, rax
+ LONG $0x05e9c149 // shr r9, 5
+ LONG $0x01c18349 // add r9, 1
+ WORD $0x8548; BYTE $0xc0 // test rax, rax
+ JE LBB3_10
+ WORD $0x894d; BYTE $0xca // mov r10, r9
+ LONG $0xfee28349 // and r10, -2
+ WORD $0xf749; BYTE $0xda // neg r10
+ WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
+
+LBB3_12:
+ LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8]
+ LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16]
+ LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8]
+ WORD $0x570f; BYTE $0xd0 // xorps xmm2, xmm0
+ LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16]
+ WORD $0x570f; BYTE $0xc1 // xorps xmm0, xmm1
+ LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2
+ LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0
+ LONG $0x44100f42; WORD $0x2007 // movups xmm0, oword [rdi + r8 + 32]
+ LONG $0x4c100f42; WORD $0x3007 // movups xmm1, oword [rdi + r8 + 48]
+ LONG $0x54100f42; WORD $0x2006 // movups xmm2, oword [rsi + r8 + 32]
+ WORD $0x570f; BYTE $0xd0 // xorps xmm2, xmm0
+ LONG $0x44100f42; WORD $0x3006 // movups xmm0, oword [rsi + r8 + 48]
+ WORD $0x570f; BYTE $0xc1 // xorps xmm0, xmm1
+ LONG $0x54110f42; WORD $0x2002 // movups oword [rdx + r8 + 32], xmm2
+ LONG $0x44110f42; WORD $0x3002 // movups oword [rdx + r8 + 48], xmm0
+ LONG $0x40c08349 // add r8, 64
+ LONG $0x02c28349 // add r10, 2
+ JNE LBB3_12
+ LONG $0x01c1f641 // test r9b, 1
+ JE LBB3_15
+
+LBB3_14:
+ LONG $0x04100f42; BYTE $0x07 // movups xmm0, oword [rdi + r8]
+ LONG $0x4c100f42; WORD $0x1007 // movups xmm1, oword [rdi + r8 + 16]
+ LONG $0x14100f42; BYTE $0x06 // movups xmm2, oword [rsi + r8]
+ WORD $0x570f; BYTE $0xd0 // xorps xmm2, xmm0
+ LONG $0x44100f42; WORD $0x1006 // movups xmm0, oword [rsi + r8 + 16]
+ WORD $0x570f; BYTE $0xc1 // xorps xmm0, xmm1
+ LONG $0x14110f42; BYTE $0x02 // movups oword [rdx + r8], xmm2
+ LONG $0x44110f42; WORD $0x1002 // movups oword [rdx + r8 + 16], xmm0
+
+LBB3_15:
+ WORD $0x3949; BYTE $0xcb // cmp r11, rcx
+ JNE LBB3_3
+
+LBB3_16:
+ RET
+
+LBB3_10:
+ WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
+ LONG $0x01c1f641 // test r9b, 1
+ JNE LBB3_14
+ JMP LBB3_15
diff --git a/go/arrow/bitutil/bitmaps.go b/go/arrow/bitutil/bitmaps.go
index abd1b188a7..c23a123292 100644
--- a/go/arrow/bitutil/bitmaps.go
+++ b/go/arrow/bitutil/bitmaps.go
@@ -18,6 +18,7 @@ package bitutil
import (
"bytes"
+ "errors"
"math/bits"
"unsafe"
@@ -374,9 +375,14 @@ func (bm *BitmapWordWriter) PutNextTrailingByte(b byte, validBits int) {
}
}
-// CopyBitmap copies the bitmap indicated by src, starting at bit offset srcOffset,
-// and copying length bits into dst, starting at bit offset dstOffset.
-func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) {
+type transferMode int8
+
+const (
+ transferCopy transferMode = iota
+ transferInvert
+)
+
+func transferBitmap(mode transferMode, src []byte, srcOffset, length int, dst []byte, dstOffset int) {
if length == 0 {
// if there's nothing to write, end early.
return
@@ -393,12 +399,19 @@ func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) {
nwords := rdr.Words()
for nwords > 0 {
nwords--
- wr.PutNextWord(rdr.NextWord())
+ if mode == transferInvert {
+ wr.PutNextWord(^rdr.NextWord())
+ } else {
+ wr.PutNextWord(rdr.NextWord())
+ }
}
nbytes := rdr.TrailingBytes()
for nbytes > 0 {
nbytes--
bt, validBits := rdr.NextTrailingByte()
+ if mode == transferInvert {
+ bt = ^bt
+ }
wr.PutNextTrailingByte(bt, validBits)
}
return
@@ -417,14 +430,33 @@ func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) {
// - high 5 bits: old bits from last byte of dest buffer
trailingBits := nbytes*8 - length
trailMask := byte(uint(1)<<(8-trailingBits)) - 1
-
- copy(dst, src[:nbytes-1])
- lastData := src[nbytes-1]
+ var lastData byte
+ if mode == transferInvert {
+ for i, b := range src[:nbytes-1] {
+ dst[i] = ^b
+ }
+ lastData = ^src[nbytes-1]
+ } else {
+ copy(dst, src[:nbytes-1])
+ lastData = src[nbytes-1]
+ }
dst[nbytes-1] &= ^trailMask
dst[nbytes-1] |= lastData & trailMask
}
+// CopyBitmap copies the bitmap indicated by src, starting at bit offset srcOffset,
+// and copying length bits into dst, starting at bit offset dstOffset.
+func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) {
+ transferBitmap(transferCopy, src, srcOffset, length, dst, dstOffset)
+}
+
+// InvertBitmap copies a bit range of a bitmap, inverting it as it copies
+// over into the destination.
+func InvertBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) {
+ transferBitmap(transferInvert, src, srcOffset, length, dst, dstOffset)
+}
+
type bitOp struct {
opWord func(uint64, uint64) uint64
opByte func(byte, byte) byte
@@ -440,6 +472,14 @@ var (
opWord: func(l, r uint64) uint64 { return l | r },
opByte: func(l, r byte) byte { return l | r },
}
+ bitAndNotOp = bitOp{
+ opWord: func(l, r uint64) uint64 { return l &^ r },
+ opByte: func(l, r byte) byte { return l &^ r },
+ }
+ bitXorOp = bitOp{
+ opWord: func(l, r uint64) uint64 { return l ^ r },
+ opByte: func(l, r byte) byte { return l ^ r },
+ }
)
func alignedBitmapOp(op bitOp, left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) {
@@ -532,6 +572,22 @@ func BitmapOrAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset in
return BitmapOpAlloc(mem, bitOrOp, left, right, lOffset, rOffset, length, outOffset)
}
+func BitmapAndNot(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) {
+ BitmapOp(bitAndNotOp, left, right, lOffset, rOffset, out, outOffset, length)
+}
+
+func BitmapAndNotAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer {
+ return BitmapOpAlloc(mem, bitAndNotOp, left, right, lOffset, rOffset, length, outOffset)
+}
+
+func BitmapXor(left, right []byte, lOffset, rOffset int64, out []byte, outOffset int64, length int64) {
+ BitmapOp(bitXorOp, left, right, lOffset, rOffset, out, outOffset, length)
+}
+
+func BitmapXorAlloc(mem memory.Allocator, left, right []byte, lOffset, rOffset int64, length, outOffset int64) *memory.Buffer {
+ return BitmapOpAlloc(mem, bitXorOp, left, right, lOffset, rOffset, length, outOffset)
+}
+
func BitmapEquals(left, right []byte, lOffset, rOffset int64, length int64) bool {
if lOffset%8 == 0 && rOffset%8 == 0 {
// byte aligned, fast path, can use bytes.Equal (memcmp)
@@ -584,3 +640,108 @@ type OptionalBitIndexer struct {
func (b *OptionalBitIndexer) GetBit(i int) bool {
return b.Bitmap == nil || BitIsSet(b.Bitmap, b.Offset+i)
}
+
+type Bitmap struct {
+ Data []byte
+ Offset, Len int64
+}
+
+func bitLength(bitmaps []Bitmap) (int64, error) {
+ for _, b := range bitmaps[1:] {
+ if b.Len != bitmaps[0].Len {
+ return -1, errors.New("bitmaps must be same length")
+ }
+ }
+ return bitmaps[0].Len, nil
+}
+
+func runVisitWordsAndWriteLoop(bitLen int64, rdrs []*BitmapWordReader, wrs []*BitmapWordWriter, visitor func(in, out []uint64)) {
+ const bitWidth int64 = int64(uint64SizeBits)
+
+ visited := make([]uint64, len(rdrs))
+ output := make([]uint64, len(wrs))
+
+ // every reader will have same number of words, since they are same
+ // length'ed. This will be inefficient in some cases. When there's
+ // offsets beyond the Word boundary, every word would have to be
+ // created from 2 adjoining words
+ nwords := int64(rdrs[0].Words())
+ bitLen -= nwords * bitWidth
+ for nwords > 0 {
+ nwords--
+ for i := range visited {
+ visited[i] = rdrs[i].NextWord()
+ }
+ visitor(visited, output)
+ for i := range output {
+ wrs[i].PutNextWord(output[i])
+ }
+ }
+
+ // every reader will have the same number of trailing bytes, because
+ // we already confirmed they have the same length. Because
+ // offsets beyond the Word boundary can cause adjoining words, the
+ // tailing portion could be more than one word remaining full/partial
+ // words to write.
+ if bitLen == 0 {
+ return
+ }
+
+ // convert the word visitor to a bytevisitor
+ byteVisitor := func(in, out []byte) {
+ for i, w := range in {
+ visited[i] = uint64(w)
+ }
+ visitor(visited, output)
+ for i, w := range output {
+ out[i] = byte(w)
+ }
+ }
+
+ visitedBytes := make([]byte, len(rdrs))
+ outputBytes := make([]byte, len(wrs))
+ nbytes := rdrs[0].trailingBytes
+ for nbytes > 0 {
+ nbytes--
+ memory.Set(visitedBytes, 0)
+ memory.Set(outputBytes, 0)
+
+ var validBits int
+ for i := range rdrs {
+ visitedBytes[i], validBits = rdrs[i].NextTrailingByte()
+ }
+ byteVisitor(visitedBytes, outputBytes)
+ for i, w := range outputBytes {
+ wrs[i].PutNextTrailingByte(w, validBits)
+ }
+ }
+}
+
+// VisitWordsAndWrite visits words of bits from each input bitmap and
+// collects outputs to a slice of output Bitmaps.
+//
+// All bitmaps must have identical lengths. The first bit in a visited
+// bitmap may be offset within the first visited word, but words will
+// otherwise contain densely packed bits loaded from the bitmap. That
+// offset within the first word is returned.
+//
+// NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+// It also has a large prolog/epilog overhead and should be used
+// carefully in other cases. For 2 or fewer bitmaps, and/or smaller
+// bitmaps, try BitmapReader and or other utilities.
+func VisitWordsAndWrite(args []Bitmap, out []Bitmap, visitor func(in, out []uint64)) error {
+ bitLen, err := bitLength(args)
+ if err != nil {
+ return err
+ }
+
+ rdrs, wrs := make([]*BitmapWordReader, len(args)), make([]*BitmapWordWriter, len(out))
+ for i, in := range args {
+ rdrs[i] = NewBitmapWordReader(in.Data, int(in.Offset), int(in.Len))
+ }
+ for i, o := range out {
+ wrs[i] = NewBitmapWordWriter(o.Data, int(o.Offset), int(o.Len))
+ }
+ runVisitWordsAndWriteLoop(bitLen, rdrs, wrs, visitor)
+ return nil
+}
diff --git a/go/arrow/compute/internal/kernels/scalar_boolean.go b/go/arrow/compute/internal/kernels/scalar_boolean.go
new file mode 100644
index 0000000000..a458306451
--- /dev/null
+++ b/go/arrow/compute/internal/kernels/scalar_boolean.go
@@ -0,0 +1,332 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package kernels
+
+import (
+ "github.com/apache/arrow/go/v10/arrow/bitutil"
+ "github.com/apache/arrow/go/v10/arrow/compute/internal/exec"
+ "github.com/apache/arrow/go/v10/arrow/scalar"
+)
+
+type computeWordFN func(leftTrue, leftFalse, rightTrue, rightFalse uint64) (outValid, outData uint64)
+
+func computeKleene(computeWord computeWordFN, ctx *exec.KernelCtx, left, right *exec.ArraySpan, out *exec.ExecResult) error {
+ var (
+ inBMs = [4]bitutil.Bitmap{
+ {Data: left.Buffers[0].Buf, Offset: left.Offset, Len: left.Len},
+ {Data: left.Buffers[1].Buf, Offset: left.Offset, Len: left.Len},
+ {Data: right.Buffers[1].Buf, Offset: right.Offset, Len: right.Len},
+ {Data: right.Buffers[0].Buf, Offset: right.Offset, Len: right.Len},
+ }
+ outBMs = [2]bitutil.Bitmap{
+ {Data: out.Buffers[0].Buf, Offset: out.Offset, Len: out.Len},
+ {Data: out.Buffers[1].Buf, Offset: out.Offset, Len: out.Len},
+ }
+ apply = func(leftValid, leftData uint64, rightValid, rightData uint64) (outValidity, outData uint64) {
+ leftTrue, leftFalse := leftValid&leftData, leftValid&^leftData
+ rightTrue, rightFalse := rightValid&rightData, rightValid&^rightData
+ return computeWord(leftTrue, leftFalse, rightTrue, rightFalse)
+ }
+ )
+
+ switch {
+ case right.UpdateNullCount() == 0:
+ return bitutil.VisitWordsAndWrite(inBMs[:3], outBMs[:],
+ func(in, out []uint64) {
+ out[0], out[1] = apply(in[0], in[1], ^uint64(0), in[2])
+ })
+ case left.UpdateNullCount() == 0:
+ return bitutil.VisitWordsAndWrite(inBMs[1:], outBMs[:],
+ func(in, out []uint64) {
+ out[0], out[1] = apply(^uint64(0), in[0], in[2], in[1])
+ })
+ default:
+ return bitutil.VisitWordsAndWrite(inBMs[:], outBMs[:],
+ func(in, out []uint64) {
+ out[0], out[1] = apply(in[0], in[1], in[3], in[2])
+ })
+ }
+}
+
+type AndOpKernel struct {
+ commutativeBinaryKernel[AndOpKernel]
+}
+
+func (AndOpKernel) Call(ctx *exec.KernelCtx, left, right *exec.ArraySpan, out *exec.ExecResult) error {
+ bitutil.BitmapAnd(left.Buffers[1].Buf, right.Buffers[1].Buf,
+ left.Offset, right.Offset, out.Buffers[1].Buf, out.Offset, left.Len)
+ return nil
+}
+
+func (AndOpKernel) CallScalarLeft(ctx *exec.KernelCtx, left scalar.Scalar, right *exec.ArraySpan, out *exec.ExecResult) error {
+ if !left.IsValid() {
+ return nil
+ }
+
+ outBM := out.Buffers[1].Buf
+ if left.(*scalar.Boolean).Value {
+ bitutil.CopyBitmap(right.Buffers[1].Buf, int(right.Offset),
+ int(right.Len), outBM, int(out.Offset))
+ } else {
+ bitutil.SetBitsTo(outBM, out.Offset, out.Len, false)
+ }
+ return nil
+}
+
+type KleeneAndOpKernel struct {
+ commutativeBinaryKernel[KleeneAndOpKernel]
+}
+
+func (KleeneAndOpKernel) Call(ctx *exec.KernelCtx, left, right *exec.ArraySpan, out *exec.ExecResult) error {
+ if left.UpdateNullCount() == 0 && right.UpdateNullCount() == 0 {
+ bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, true)
+ out.Nulls = 0
+ return (AndOpKernel{}).Call(ctx, left, right, out)
+ }
+
+ computeWord := func(leftTrue, leftFalse, rightTrue, rightFalse uint64) (outValid, outData uint64) {
+ return leftFalse | rightFalse | (leftTrue & rightTrue), leftTrue & rightTrue
+ }
+ return computeKleene(computeWord, ctx, left, right, out)
+}
+
+func (KleeneAndOpKernel) CallScalarLeft(ctx *exec.KernelCtx, left scalar.Scalar, right *exec.ArraySpan, out *exec.ExecResult) error {
+ var (
+ leftTrue = left.IsValid() && left.(*scalar.Boolean).Value
+ leftFalse = left.IsValid() && !left.(*scalar.Boolean).Value
+ )
+
+ switch {
+ case leftFalse:
+ bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, true)
+ out.Nulls = 0
+ bitutil.SetBitsTo(out.Buffers[1].Buf, out.Offset, out.Len, false)
+ case leftTrue:
+ if right.UpdateNullCount() == 0 {
+ bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, true)
+ out.Nulls = 0
+ } else {
+ bitutil.CopyBitmap(right.Buffers[0].Buf, int(right.Offset), int(right.Len),
+ out.Buffers[0].Buf, int(out.Offset))
+ }
+ bitutil.CopyBitmap(right.Buffers[1].Buf, int(right.Offset), int(right.Len),
+ out.Buffers[1].Buf, int(out.Offset))
+ default: // scalar was null: out[i] is valid iff right[i] was false
+ if right.UpdateNullCount() == 0 {
+ bitutil.InvertBitmap(right.Buffers[1].Buf, int(right.Offset), int(right.Len),
+ out.Buffers[0].Buf, int(out.Offset))
+ } else {
+ bitutil.BitmapAndNot(right.Buffers[0].Buf, right.Buffers[1].Buf, right.Offset,
+ right.Offset, out.Buffers[0].Buf, out.Offset, right.Len)
+ }
+ bitutil.CopyBitmap(right.Buffers[1].Buf, int(right.Offset), int(right.Len),
+ out.Buffers[1].Buf, int(out.Offset))
+ }
+ return nil
+}
+
+type OrOpKernel struct {
+ commutativeBinaryKernel[OrOpKernel]
+}
+
+func (OrOpKernel) Call(ctx *exec.KernelCtx, left, right *exec.ArraySpan, out *exec.ExecResult) error {
+ bitutil.BitmapOr(left.Buffers[1].Buf, right.Buffers[1].Buf,
+ left.Offset, right.Offset, out.Buffers[1].Buf, out.Offset, left.Len)
+ return nil
+}
+
+func (OrOpKernel) CallScalarLeft(ctx *exec.KernelCtx, left scalar.Scalar, right *exec.ArraySpan, out *exec.ExecResult) error {
+ if !left.IsValid() {
+ return nil
+ }
+
+ outBM := out.Buffers[1].Buf
+ if left.(*scalar.Boolean).Value {
+ bitutil.SetBitsTo(outBM, out.Offset, out.Len, true)
+ } else {
+ bitutil.CopyBitmap(right.Buffers[1].Buf, int(right.Offset),
+ int(right.Len), outBM, int(out.Offset))
+ }
+ return nil
+}
+
+type KleeneOrOpKernel struct {
+ commutativeBinaryKernel[KleeneOrOpKernel]
+}
+
+func (KleeneOrOpKernel) Call(ctx *exec.KernelCtx, left, right *exec.ArraySpan, out *exec.ExecResult) error {
+ if left.UpdateNullCount() == 0 && right.UpdateNullCount() == 0 {
+ bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, true)
+ out.Nulls = 0
+ return (OrOpKernel{}).Call(ctx, left, right, out)
+ }
+
+ computeWord := func(leftTrue, leftFalse, rightTrue, rightFalse uint64) (outValid, outData uint64) {
+ return leftTrue | rightTrue | (leftFalse & rightFalse), leftTrue | rightTrue
+ }
+ return computeKleene(computeWord, ctx, left, right, out)
+}
+
+func (KleeneOrOpKernel) CallScalarLeft(ctx *exec.KernelCtx, left scalar.Scalar, right *exec.ArraySpan, out *exec.ExecResult) error {
+ var (
+ leftTrue = left.IsValid() && left.(*scalar.Boolean).Value
+ leftFalse = left.IsValid() && !left.(*scalar.Boolean).Value
+ )
+
+ switch {
+ case leftTrue:
+ bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, true)
+ out.Nulls = 0
+ bitutil.SetBitsTo(out.Buffers[1].Buf, out.Offset, out.Len, true) // all true case
+ case leftFalse:
+ if right.UpdateNullCount() == 0 {
+ bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, true)
+ out.Nulls = 0
+ } else {
+ bitutil.CopyBitmap(right.Buffers[0].Buf, int(right.Offset), int(right.Len),
+ out.Buffers[0].Buf, int(out.Offset))
+ }
+ bitutil.CopyBitmap(right.Buffers[1].Buf, int(right.Offset), int(right.Len),
+ out.Buffers[1].Buf, int(out.Offset))
+ default: // scalar was null: out[i] is valid iff right[i] was true
+ if right.UpdateNullCount() == 0 {
+ bitutil.CopyBitmap(right.Buffers[1].Buf, int(right.Offset), int(right.Len),
+ out.Buffers[0].Buf, int(out.Offset))
+ } else {
+ bitutil.BitmapAnd(right.Buffers[0].Buf, right.Buffers[1].Buf, right.Offset,
+ right.Offset, out.Buffers[0].Buf, out.Offset, right.Len)
+ }
+ bitutil.CopyBitmap(right.Buffers[1].Buf, int(right.Offset), int(right.Len),
+ out.Buffers[1].Buf, int(out.Offset))
+ }
+ return nil
+}
+
+type XorOpKernel struct {
+ commutativeBinaryKernel[XorOpKernel]
+}
+
+func (XorOpKernel) Call(ctx *exec.KernelCtx, left, right *exec.ArraySpan, out *exec.ExecResult) error {
+ bitutil.BitmapXor(left.Buffers[1].Buf, right.Buffers[1].Buf,
+ left.Offset, right.Offset, out.Buffers[1].Buf, out.Offset, out.Len)
+ return nil
+}
+
+func (XorOpKernel) CallScalarLeft(ctx *exec.KernelCtx, left scalar.Scalar, right *exec.ArraySpan, out *exec.ExecResult) error {
+ if !left.IsValid() {
+ return nil
+ }
+
+ outBM := out.Buffers[1].Buf
+ if left.(*scalar.Boolean).Value {
+ bitutil.InvertBitmap(right.Buffers[1].Buf, int(right.Offset), int(right.Len),
+ outBM, int(out.Offset))
+ } else {
+ bitutil.CopyBitmap(right.Buffers[1].Buf, int(right.Offset), int(right.Len),
+ outBM, int(out.Offset))
+ }
+ return nil
+}
+
+func invertScalar(in scalar.Scalar) *scalar.Boolean {
+ if in.IsValid() {
+ return scalar.NewBooleanScalar(!in.(*scalar.Boolean).Value)
+ }
+ return in.(*scalar.Boolean)
+}
+
+type AndNotOpKernel struct{}
+
+func (AndNotOpKernel) Call(ctx *exec.KernelCtx, left, right *exec.ArraySpan, out *exec.ExecResult) error {
+ bitutil.BitmapAndNot(left.Buffers[1].Buf, right.Buffers[1].Buf, left.Offset, right.Offset,
+ out.Buffers[1].Buf, out.Offset, right.Len)
+ return nil
+}
+
+func (AndNotOpKernel) CallScalarLeft(ctx *exec.KernelCtx, left scalar.Scalar, right *exec.ArraySpan, out *exec.ExecResult) error {
+ if !left.IsValid() {
+ return nil
+ }
+
+ outBM := out.Buffers[1].Buf
+ if left.(*scalar.Boolean).Value {
+ bitutil.InvertBitmap(right.Buffers[1].Buf, int(right.Offset), int(right.Len),
+ outBM, int(out.Offset))
+ } else {
+ bitutil.SetBitsTo(outBM, out.Offset, out.Len, false)
+ }
+ return nil
+}
+
+func (AndNotOpKernel) CallScalarRight(ctx *exec.KernelCtx, left *exec.ArraySpan, right scalar.Scalar, out *exec.ExecResult) error {
+ return (AndOpKernel{}).CallScalarRight(ctx, left, invertScalar(right), out)
+}
+
+type KleeneAndNotOpKernel struct{}
+
+func (KleeneAndNotOpKernel) Call(ctx *exec.KernelCtx, left, right *exec.ArraySpan, out *exec.ExecResult) error {
+ if left.UpdateNullCount() == 0 && right.UpdateNullCount() == 0 {
+ bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, true)
+ out.Nulls = 0
+ return (AndNotOpKernel{}).Call(ctx, left, right, out)
+ }
+
+ computeWord := func(leftTrue, leftFalse, rightTrue, rightFalse uint64) (outValid, outData uint64) {
+ return leftFalse | rightTrue | (leftTrue & rightFalse), leftTrue & rightFalse
+ }
+
+ return computeKleene(computeWord, ctx, left, right, out)
+}
+
+func (KleeneAndNotOpKernel) CallScalarLeft(ctx *exec.KernelCtx, left scalar.Scalar, right *exec.ArraySpan, out *exec.ExecResult) error {
+ var (
+ leftTrue = left.IsValid() && left.(*scalar.Boolean).Value
+ leftFalse = left.IsValid() && !left.(*scalar.Boolean).Value
+ )
+
+ switch {
+ case leftFalse:
+ bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, true)
+ out.Nulls = 0
+ bitutil.SetBitsTo(out.Buffers[1].Buf, out.Offset, out.Len, false)
+ case leftTrue:
+ if right.UpdateNullCount() == 0 {
+ bitutil.SetBitsTo(out.Buffers[0].Buf, out.Offset, out.Len, true)
+ out.Nulls = 0
+ } else {
+ bitutil.CopyBitmap(right.Buffers[0].Buf, int(right.Offset), int(right.Len),
+ out.Buffers[0].Buf, int(out.Offset))
+ }
+ bitutil.InvertBitmap(right.Buffers[1].Buf, int(right.Offset), int(right.Len),
+ out.Buffers[1].Buf, int(out.Offset))
+ default: // scalar was null: out[i] is valid iff right[i] was true
+ if right.UpdateNullCount() == 0 {
+ bitutil.CopyBitmap(right.Buffers[1].Buf, int(right.Offset), int(right.Len),
+ out.Buffers[0].Buf, int(out.Offset))
+ } else {
+ bitutil.BitmapAnd(right.Buffers[0].Buf, right.Buffers[1].Buf, right.Offset, right.Offset,
+ out.Buffers[0].Buf, out.Offset, right.Len)
+ }
+ bitutil.InvertBitmap(right.Buffers[1].Buf, int(right.Offset), int(right.Len),
+ out.Buffers[1].Buf, int(out.Offset))
+ }
+ return nil
+}
+
+func (KleeneAndNotOpKernel) CallScalarRight(ctx *exec.KernelCtx, left *exec.ArraySpan, right scalar.Scalar, out *exec.ExecResult) error {
+ return (KleeneAndOpKernel{}).CallScalarRight(ctx, left, invertScalar(right), out)
+}
diff --git a/go/arrow/compute/internal/kernels/types.go b/go/arrow/compute/internal/kernels/types.go
index eeae4b6c4e..073e1c608c 100644
--- a/go/arrow/compute/internal/kernels/types.go
+++ b/go/arrow/compute/internal/kernels/types.go
@@ -17,7 +17,12 @@
package kernels
import (
+ "fmt"
+
"github.com/apache/arrow/go/v10/arrow"
+ "github.com/apache/arrow/go/v10/arrow/compute/internal/exec"
+ "github.com/apache/arrow/go/v10/arrow/internal/debug"
+ "github.com/apache/arrow/go/v10/arrow/scalar"
)
var (
@@ -62,3 +67,41 @@ const (
CmpLT
CmpLE
)
+
+type simpleBinaryKernel interface {
+ Call(*exec.KernelCtx, *exec.ArraySpan, *exec.ArraySpan, *exec.ExecResult) error
+ CallScalarLeft(*exec.KernelCtx, scalar.Scalar, *exec.ArraySpan, *exec.ExecResult) error
+}
+
+type commutativeBinaryKernel[T simpleBinaryKernel] struct{}
+
+func (commutativeBinaryKernel[T]) CallScalarRight(ctx *exec.KernelCtx, left *exec.ArraySpan, right scalar.Scalar, out *exec.ExecResult) error {
+ var t T
+ return t.CallScalarLeft(ctx, right, left, out)
+}
+
+type SimpleBinaryKernel interface {
+ simpleBinaryKernel
+ CallScalarRight(*exec.KernelCtx, *exec.ArraySpan, scalar.Scalar, *exec.ExecResult) error
+}
+
+func SimpleBinary[K SimpleBinaryKernel](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
+ if batch.Len == 0 {
+ return nil
+ }
+
+ var k K
+ if batch.Values[0].IsArray() {
+ if batch.Values[1].IsArray() {
+ return k.Call(ctx, &batch.Values[0].Array, &batch.Values[1].Array, out)
+ }
+ return k.CallScalarRight(ctx, &batch.Values[0].Array, batch.Values[1].Scalar, out)
+ }
+
+ if batch.Values[1].IsArray() {
+ return k.CallScalarLeft(ctx, batch.Values[0].Scalar, &batch.Values[1].Array, out)
+ }
+
+ debug.Assert(false, "should be unreachable")
+ return fmt.Errorf("%w: should be unreachable", arrow.ErrInvalid)
+}
diff --git a/go/arrow/compute/registry.go b/go/arrow/compute/registry.go
index d56605f407..c28eea619a 100644
--- a/go/arrow/compute/registry.go
+++ b/go/arrow/compute/registry.go
@@ -46,6 +46,7 @@ func GetFunctionRegistry() FunctionRegistry {
registry = NewRegistry()
RegisterScalarCast(registry)
RegisterVectorSelection(registry)
+ RegisterScalarBoolean(registry)
RegisterScalarArithmetic(registry)
})
return registry
diff --git a/go/arrow/compute/scalar_bool.go b/go/arrow/compute/scalar_bool.go
new file mode 100644
index 0000000000..0a0f6afd19
--- /dev/null
+++ b/go/arrow/compute/scalar_bool.go
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compute
+
+import (
+ "fmt"
+
+ "github.com/apache/arrow/go/v10/arrow"
+ "github.com/apache/arrow/go/v10/arrow/compute/internal/exec"
+ "github.com/apache/arrow/go/v10/arrow/compute/internal/kernels"
+)
+
+var (
+ andDoc = FunctionDoc{
+ Summary: "Logical 'and' boolean values",
+ Description: "When a null is encountered in either input, a null is output.\nFor a different null behavior, see function 'and_kleene'",
+ ArgNames: []string{"x", "y"},
+ }
+ andNotDoc = FunctionDoc{
+ Summary: "Logical 'and not' boolean values",
+ Description: "When a null is encountered in either input, a null is output.\nFor a different null behavior, see function 'and_not_kleene'",
+ ArgNames: []string{"x", "y"},
+ }
+ orDoc = FunctionDoc{
+ Summary: "Logical 'or' boolean values",
+ Description: "When a null is encountered in either input, a null is output.\nFor a different null behavior, see function 'or_kleene'",
+ ArgNames: []string{"x", "y"},
+ }
+ xorDoc = FunctionDoc{
+ Summary: "Logical 'xor' boolean values",
+ Description: "When a null is encountered in either input, a null is output.",
+ ArgNames: []string{"x", "y"},
+ }
+ andKleeneDoc = FunctionDoc{
+ Summary: "Logical 'and' boolean values (Kleene logic)",
+ Description: `This function behaves as follows with nulls:
+
+ - true and null = null
+ - null and true = null
+ - false and null = false
+ - null and false = false
+ - null and null = null
+
+ In other words, in this context, a null value really means "unknown"
+ and an unknown value "and" false is always false.
+ For a different null behavior, see function "and".`,
+ ArgNames: []string{"x", "y"},
+ }
+ andNotKleeneDoc = FunctionDoc{
+ Summary: "Logical 'and_not' boolean values (Kleene logic)",
+ Description: `This function behaves as follows with nulls:
+
+ - true and not null = null
+ - null and not false = null
+ - false and not null = false
+ - null and not true = false
+ - null and not null = null
+
+ In other words, in this context, a null value really means "unknown"
+ and an unknown value "and not" true is always false, as is false
+ "and not" an unknown value.
+ For a different null behavior, see function "and_not".`,
+ ArgNames: []string{"x", "y"},
+ }
+ orKleeneDoc = FunctionDoc{
+ Summary: "Logical 'or' boolean values (Kleene logic)",
+ Description: `This function behaves as follows with nulls:
+
+ - true or null = true
+ - null or true = true
+ - false or null = null
+ - null or false = null
+ - null or null = null
+
+ In other words, in this context, a null value really means "unknown"
+ and an unknown value "or" true is always true.
+ For a different null behavior, see function "and".`,
+ ArgNames: []string{"x", "y"},
+ }
+)
+
+func makeFunction(reg FunctionRegistry, name string, arity int, ex exec.ArrayKernelExec, doc FunctionDoc, nulls exec.NullHandling) {
+ fn := NewScalarFunction(name, Arity{NArgs: arity}, doc)
+
+ inTypes := make([]exec.InputType, arity)
+ for i := range inTypes {
+ inTypes[i] = exec.NewExactInput(arrow.FixedWidthTypes.Boolean)
+ }
+
+ k := exec.NewScalarKernel(inTypes, exec.NewOutputType(arrow.FixedWidthTypes.Boolean), ex, nil)
+ k.NullHandling = nulls
+
+ if err := fn.AddKernel(k); err != nil {
+ panic(err)
+ }
+
+ if !reg.AddFunction(fn, false) {
+ panic(fmt.Errorf("function '%s' already exists", name))
+ }
+}
+
+func RegisterScalarBoolean(reg FunctionRegistry) {
+ makeFunction(reg, "and", 2, kernels.SimpleBinary[kernels.AndOpKernel],
+ andDoc, exec.NullIntersection)
+ makeFunction(reg, "and_not", 2, kernels.SimpleBinary[kernels.AndNotOpKernel],
+ andNotDoc, exec.NullIntersection)
+ makeFunction(reg, "or", 2, kernels.SimpleBinary[kernels.OrOpKernel],
+ orDoc, exec.NullIntersection)
+ makeFunction(reg, "xor", 2, kernels.SimpleBinary[kernels.XorOpKernel],
+ xorDoc, exec.NullIntersection)
+ makeFunction(reg, "and_kleene", 2, kernels.SimpleBinary[kernels.KleeneAndOpKernel],
+ andKleeneDoc, exec.NullComputedPrealloc)
+ makeFunction(reg, "and_not_kleene", 2, kernels.SimpleBinary[kernels.KleeneAndNotOpKernel],
+ andNotKleeneDoc, exec.NullComputedPrealloc)
+ makeFunction(reg, "or_kleene", 2, kernels.SimpleBinary[kernels.KleeneOrOpKernel],
+ orKleeneDoc, exec.NullComputedPrealloc)
+}
diff --git a/go/arrow/compute/scalar_bool_test.go b/go/arrow/compute/scalar_bool_test.go
new file mode 100644
index 0000000000..956118d265
--- /dev/null
+++ b/go/arrow/compute/scalar_bool_test.go
@@ -0,0 +1,152 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package compute_test
+
+import (
+ "context"
+ "strings"
+ "testing"
+
+ "github.com/apache/arrow/go/v10/arrow"
+ "github.com/apache/arrow/go/v10/arrow/array"
+ "github.com/apache/arrow/go/v10/arrow/compute"
+ "github.com/apache/arrow/go/v10/arrow/memory"
+ "github.com/apache/arrow/go/v10/arrow/scalar"
+ "github.com/stretchr/testify/require"
+)
+
+func checkScalarBinary(t *testing.T, fn string, left, right, expected compute.Datum, opts compute.FunctionOptions) {
+ checkScalar(t, fn, []compute.Datum{left, right}, expected, opts)
+}
+
+func checkBooleanScalarArrayBinary(t *testing.T, ctx context.Context, funcName string, array compute.Datum) {
+ mem := compute.GetAllocator(ctx)
+ for _, sc := range []scalar.Scalar{scalar.MakeNullScalar(arrow.FixedWidthTypes.Boolean), scalar.NewBooleanScalar(true), scalar.NewBooleanScalar(false)} {
+ constantArr, err := scalar.MakeArrayFromScalar(sc, int(array.Len()), mem)
+ defer constantArr.Release()
+
+ require.NoError(t, err)
+ expected, err := compute.CallFunction(ctx, funcName, nil, &compute.ArrayDatum{Value: constantArr.Data()}, array)
+ require.NoError(t, err)
+ defer expected.Release()
+
+ checkScalar(t, funcName, []compute.Datum{compute.NewDatum(sc), array}, expected, nil)
+
+ expected, err = compute.CallFunction(ctx, funcName, nil, array, &compute.ArrayDatum{Value: constantArr.Data()})
+ require.NoError(t, err)
+ defer expected.Release()
+ checkScalar(t, funcName, []compute.Datum{array, compute.NewDatum(sc)}, expected, nil)
+ }
+}
+
+func TestBooleanKernels(t *testing.T) {
+ tests := []struct {
+ fn string
+ expectedJSON string
+ commutative bool
+ }{
+ {"and", `[true, false, null, false, null, null]`, true},
+ {"or", `[true, true, null, false, null, null]`, true},
+ {"xor", `[false, true, null, false, null, null]`, true},
+ {"and_not", `[false, true, null, false, false, null, null, null, null]`, false},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.fn, func(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+ defer mem.AssertSize(t, 0)
+
+ var (
+ leftJSON = `[true, true, true, false, false, null]`
+ rightJSON = `[true, false, null, false, null, null]`
+ )
+
+ if !tt.commutative {
+ leftJSON = `[true, true, true, false, false, false, null, null, null]`
+ rightJSON = `[true, false, null, true, false, null, true, false, null]`
+ }
+
+ left, _, _ := array.FromJSON(mem, arrow.FixedWidthTypes.Boolean,
+ strings.NewReader(leftJSON))
+ defer left.Release()
+ right, _, _ := array.FromJSON(mem, arrow.FixedWidthTypes.Boolean,
+ strings.NewReader(rightJSON))
+ defer right.Release()
+ exp, _, _ := array.FromJSON(mem, arrow.FixedWidthTypes.Boolean, strings.NewReader(tt.expectedJSON))
+ defer exp.Release()
+
+ checkScalarBinary(t, tt.fn, &compute.ArrayDatum{Value: left.Data()}, &compute.ArrayDatum{Value: right.Data()}, &compute.ArrayDatum{Value: exp.Data()}, nil)
+ ctx := compute.WithAllocator(context.Background(), mem)
+ checkBooleanScalarArrayBinary(t, ctx, tt.fn, &compute.ArrayDatum{Value: left.Data()})
+ })
+ }
+}
+
+func TestBooleanKleeneKernels(t *testing.T) {
+ tests := []struct {
+ fn string
+ expectedJSON []string
+ commutative bool
+ }{
+ {"and_kleene", []string{`[true, false, null, false, false, null]`, `[true, false, false, null, false]`, `[true, false, false, false]`}, true},
+ {"or_kleene", []string{`[true, true, true, false, null, null]`, `[true, true, false, true, null]`, `[true, true, false, true]`}, true},
+ {"and_not_kleene", []string{`[false, true, null, false, false, false, false, null, null]`, `[false, true, false, false]`}, false},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.fn, func(t *testing.T) {
+ var (
+ leftJSON = make([]string, len(tt.expectedJSON))
+ rightJSON = make([]string, len(tt.expectedJSON))
+ )
+
+ if tt.commutative {
+ leftJSON[0] = `[true, true, true, false, false, null]`
+ rightJSON[0] = `[true, false, null, false, null, null]`
+ leftJSON[1] = `[true, true, false, null, null]`
+ rightJSON[1] = `[true, false, false, true, false]`
+ leftJSON[2] = `[true, true, false, true]`
+ rightJSON[2] = `[true, false, false, false]`
+ } else {
+ leftJSON[0] = `[true, true, true, false, false, false, null, null, null]`
+ rightJSON[0] = `[true, false, null, true, false, null, true, false, null]`
+ leftJSON[1] = `[true, true, false, false]`
+ rightJSON[1] = `[true, false, true, false]`
+ }
+
+ for i := range tt.expectedJSON {
+ func() {
+ mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+ defer mem.AssertSize(t, 0)
+
+ left, _, _ := array.FromJSON(mem, arrow.FixedWidthTypes.Boolean,
+ strings.NewReader(leftJSON[i]))
+ defer left.Release()
+ right, _, _ := array.FromJSON(mem, arrow.FixedWidthTypes.Boolean,
+ strings.NewReader(rightJSON[i]))
+ defer right.Release()
+ exp, _, _ := array.FromJSON(mem, arrow.FixedWidthTypes.Boolean, strings.NewReader(tt.expectedJSON[i]))
+ defer exp.Release()
+
+ checkScalarBinary(t, tt.fn, &compute.ArrayDatum{Value: left.Data()}, &compute.ArrayDatum{Value: right.Data()}, &compute.ArrayDatum{Value: exp.Data()}, nil)
+ ctx := compute.WithAllocator(context.Background(), mem)
+ checkBooleanScalarArrayBinary(t, ctx, tt.fn, &compute.ArrayDatum{Value: left.Data()})
+ }()
+ }
+ })
+ }
+}
diff --git a/go/go.sum b/go/go.sum
index 04695d5559..b247b659cc 100644
--- a/go/go.sum
+++ b/go/go.sum
@@ -137,6 +137,7 @@ github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qq
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
+github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=