You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ze...@apache.org on 2023/01/19 22:41:35 UTC
[arrow] branch master updated: GH-32946: [Go] Implement REE Array and Compare (#14111)
This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 9a1373452f GH-32946: [Go] Implement REE Array and Compare (#14111)
9a1373452f is described below
commit 9a1373452ff5b4cf41cc371e0585d8dda91ffd36
Author: Matt Topol <zo...@gmail.com>
AuthorDate: Thu Jan 19 17:41:28 2023 -0500
GH-32946: [Go] Implement REE Array and Compare (#14111)
* Closes: #32946
Authored-by: Matt Topol <zo...@gmail.com>
Signed-off-by: Matt Topol <zo...@gmail.com>
---
go/arrow/array/array.go | 2 +-
go/arrow/array/builder.go | 9 --
go/arrow/array/compare.go | 6 +
go/arrow/array/encoded.go | 158 +++++++++++++++++++++++++
go/arrow/array/encoded_test.go | 209 +++++++++++++++++++++++++++++++++
go/arrow/compare.go | 4 +
go/arrow/compute/internal/exec/span.go | 2 +
go/arrow/datatype.go | 7 +-
go/arrow/datatype_encoded.go | 64 ++++++++++
go/arrow/encoded/ree_utils.go | 202 +++++++++++++++++++++++++++++++
go/arrow/encoded/ree_utils_test.go | 148 +++++++++++++++++++++++
go/arrow/internal/utils.go | 2 +-
go/arrow/scalar/scalar.go | 51 ++++----
go/arrow/type_string.go | 6 +-
14 files changed, 820 insertions(+), 50 deletions(-)
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index ee8c216d08..34efc8c5e2 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -175,8 +175,8 @@ func init() {
arrow.LARGE_STRING: func(data arrow.ArrayData) arrow.Array { return NewLargeStringData(data) },
arrow.LARGE_BINARY: func(data arrow.ArrayData) arrow.Array { return NewLargeBinaryData(data) },
arrow.LARGE_LIST: func(data arrow.ArrayData) arrow.Array { return NewLargeListData(data) },
- arrow.INTERVAL: func(data arrow.ArrayData) arrow.Array { return NewIntervalData(data) },
arrow.INTERVAL_MONTH_DAY_NANO: func(data arrow.ArrayData) arrow.Array { return NewMonthDayNanoIntervalData(data) },
+ arrow.RUN_END_ENCODED: func(data arrow.ArrayData) arrow.Array { return NewRunEndEncodedData(data) },
// invalid data types to fill out array to size 2^6 - 1
63: invalidDataType,
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index 8eebd5ac64..6a0d58ec2e 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -281,15 +281,6 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
case arrow.TIME64:
typ := dtype.(*arrow.Time64Type)
return NewTime64Builder(mem, typ)
- case arrow.INTERVAL:
- switch dtype.(type) {
- case *arrow.DayTimeIntervalType:
- return NewDayTimeIntervalBuilder(mem)
- case *arrow.MonthIntervalType:
- return NewMonthIntervalBuilder(mem)
- case *arrow.MonthDayNanoIntervalType:
- return NewMonthDayNanoIntervalBuilder(mem)
- }
case arrow.INTERVAL_MONTHS:
return NewMonthIntervalBuilder(mem)
case arrow.INTERVAL_DAY_TIME:
diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go
index 68143e0086..7d15134e54 100644
--- a/go/arrow/array/compare.go
+++ b/go/arrow/array/compare.go
@@ -333,6 +333,9 @@ func Equal(left, right arrow.Array) bool {
case *DenseUnion:
r := right.(*DenseUnion)
return arrayDenseUnionEqual(l, r)
+ case *RunEndEncoded:
+ r := right.(*RunEndEncoded)
+ return arrayRunEndEncodedEqual(l, r)
default:
panic(fmt.Errorf("arrow/array: unknown array type %T", l))
}
@@ -591,6 +594,9 @@ func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool {
case *DenseUnion:
r := right.(*DenseUnion)
return arrayDenseUnionApproxEqual(l, r, opt)
+ case *RunEndEncoded:
+ r := right.(*RunEndEncoded)
+ return arrayRunEndEncodedApproxEqual(l, r, opt)
default:
panic(fmt.Errorf("arrow/array: unknown array type %T", l))
}
diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go
new file mode 100644
index 0000000000..ddb8ef1c8b
--- /dev/null
+++ b/go/arrow/array/encoded.go
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array
+
+import (
+ "bytes"
+ "fmt"
+
+ "github.com/apache/arrow/go/v11/arrow"
+ "github.com/apache/arrow/go/v11/arrow/encoded"
+ "github.com/apache/arrow/go/v11/arrow/internal/debug"
+ "github.com/apache/arrow/go/v11/arrow/memory"
+ "github.com/goccy/go-json"
+)
+
+// RunEndEncoded represents an array containing two children:
+// an array of int32 values defining the ends of each run of values
+// and an array of values
+type RunEndEncoded struct {
+ array
+
+ ends arrow.Array
+ values arrow.Array
+}
+
+func NewRunEndEncodedArray(runEnds, values arrow.Array, logicalLength, offset int) *RunEndEncoded {
+ data := NewData(arrow.RunEndEncodedOf(runEnds.DataType(), values.DataType()), logicalLength,
+ []*memory.Buffer{nil}, []arrow.ArrayData{runEnds.Data(), values.Data()}, 0, offset)
+ defer data.Release()
+ return NewRunEndEncodedData(data)
+}
+
+func NewRunEndEncodedData(data arrow.ArrayData) *RunEndEncoded {
+ r := &RunEndEncoded{}
+ r.refCount = 1
+ r.setData(data.(*Data))
+ return r
+}
+
+func (r *RunEndEncoded) Values() arrow.Array { return r.values }
+func (r *RunEndEncoded) RunEndsArr() arrow.Array { return r.ends }
+
+func (r *RunEndEncoded) Retain() {
+ r.array.Retain()
+ r.values.Retain()
+ r.ends.Retain()
+}
+
+func (r *RunEndEncoded) Release() {
+ r.array.Release()
+ r.values.Release()
+ r.ends.Release()
+}
+
+func (r *RunEndEncoded) setData(data *Data) {
+ if len(data.childData) != 2 {
+ panic(fmt.Errorf("%w: arrow/array: RLE array must have exactly 2 children", arrow.ErrInvalid))
+ }
+ debug.Assert(data.dtype.ID() == arrow.RUN_END_ENCODED, "invalid type for RunLengthEncoded")
+ if !data.dtype.(*arrow.RunEndEncodedType).ValidRunEndsType(data.childData[0].DataType()) {
+ panic(fmt.Errorf("%w: arrow/array: run ends array must be int16, int32, or int64", arrow.ErrInvalid))
+ }
+ if data.childData[0].NullN() > 0 {
+ panic(fmt.Errorf("%w: arrow/array: run ends array cannot contain nulls", arrow.ErrInvalid))
+ }
+
+ r.array.setData(data)
+
+ r.ends = MakeFromData(r.data.childData[0])
+ r.values = MakeFromData(r.data.childData[1])
+}
+
+func (r *RunEndEncoded) GetPhysicalOffset() int {
+ return encoded.FindPhysicalOffset(r.data)
+}
+
+func (r *RunEndEncoded) GetPhysicalLength() int {
+ return encoded.GetPhysicalLength(r.data)
+}
+
+func (r *RunEndEncoded) String() string {
+ var buf bytes.Buffer
+ buf.WriteByte('[')
+ for i := 0; i < r.ends.Len(); i++ {
+ if i != 0 {
+ buf.WriteByte(',')
+ }
+ fmt.Fprintf(&buf, "{%v -> %v}",
+ r.ends.(arraymarshal).getOneForMarshal(i),
+ r.values.(arraymarshal).getOneForMarshal(i))
+ }
+
+ buf.WriteByte(']')
+ return buf.String()
+}
+
+func (r *RunEndEncoded) getOneForMarshal(i int) interface{} {
+ return [2]interface{}{r.ends.(arraymarshal).getOneForMarshal(i),
+ r.values.(arraymarshal).getOneForMarshal(i)}
+}
+
+func (r *RunEndEncoded) MarshalJSON() ([]byte, error) {
+ var buf bytes.Buffer
+ enc := json.NewEncoder(&buf)
+ buf.WriteByte('[')
+ for i := 0; i < r.ends.Len(); i++ {
+ if i != 0 {
+ buf.WriteByte(',')
+ }
+ if err := enc.Encode(r.getOneForMarshal(i)); err != nil {
+ return nil, err
+ }
+ }
+ buf.WriteByte(']')
+ return buf.Bytes(), nil
+}
+
+func arrayRunEndEncodedEqual(l, r *RunEndEncoded) bool {
+ // types were already checked before getting here, so we know
+ // the encoded types are equal
+ mr := encoded.NewMergedRuns([2]arrow.Array{l, r})
+ for mr.Next() {
+ lIndex := mr.IndexIntoArray(0)
+ rIndex := mr.IndexIntoArray(1)
+ if !SliceEqual(l.values, lIndex, lIndex+1, r.values, rIndex, rIndex+1) {
+ return false
+ }
+ }
+ return true
+}
+
+func arrayRunEndEncodedApproxEqual(l, r *RunEndEncoded, opt equalOption) bool {
+ // types were already checked before getting here, so we know
+ // the encoded types are equal
+ mr := encoded.NewMergedRuns([2]arrow.Array{l, r})
+ for mr.Next() {
+ lIndex := mr.IndexIntoArray(0)
+ rIndex := mr.IndexIntoArray(1)
+ if !sliceApproxEqual(l.values, lIndex, lIndex+1, r.values, rIndex, rIndex+1, opt) {
+ return false
+ }
+ }
+ return true
+}
diff --git a/go/arrow/array/encoded_test.go b/go/arrow/array/encoded_test.go
new file mode 100644
index 0000000000..4619e24442
--- /dev/null
+++ b/go/arrow/array/encoded_test.go
@@ -0,0 +1,209 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/apache/arrow/go/v11/arrow"
+ "github.com/apache/arrow/go/v11/arrow/array"
+ "github.com/apache/arrow/go/v11/arrow/memory"
+ "github.com/stretchr/testify/assert"
+)
+
+var (
+ stringValues, _, _ = array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World", null]`))
+ int32Values, _, _ = array.FromJSON(memory.DefaultAllocator, arrow.PrimitiveTypes.Int32, strings.NewReader(`[10, 20, 30]`))
+ int32OnlyNull = array.MakeArrayOfNull(memory.DefaultAllocator, arrow.PrimitiveTypes.Int32, 3)
+)
+
+func TestMakeRLEArray(t *testing.T) {
+ rleArr := array.NewRunEndEncodedArray(int32Values, stringValues, 3, 0)
+ defer rleArr.Release()
+
+ arrData := rleArr.Data()
+ newArr := array.MakeFromData(arrData)
+ defer newArr.Release()
+
+ assert.Same(t, newArr.Data(), arrData)
+ assert.IsType(t, (*array.RunEndEncoded)(nil), newArr)
+}
+
+func TestRLEFromRunEndsAndValues(t *testing.T) {
+ rleArray := array.NewRunEndEncodedArray(int32Values, int32Values, 3, 0)
+ defer rleArray.Release()
+
+ assert.EqualValues(t, 3, rleArray.Len())
+ assert.Truef(t, array.Equal(int32Values, rleArray.Values()), "expected: %s\ngot: %s", int32Values, rleArray.Values())
+ assert.Truef(t, array.Equal(int32Values, rleArray.RunEndsArr()), "expected: %s\ngot: %s", int32Values, rleArray.RunEndsArr())
+ assert.Zero(t, rleArray.Offset())
+ assert.Zero(t, rleArray.Data().NullN())
+ // one dummy buffer, since code may assume there's at least one nil buffer
+ assert.Len(t, rleArray.Data().Buffers(), 1)
+
+ // explicit offset
+ rleArray = array.NewRunEndEncodedArray(int32Values, stringValues, 2, 1)
+ defer rleArray.Release()
+
+ assert.EqualValues(t, 2, rleArray.Len())
+ assert.Truef(t, array.Equal(stringValues, rleArray.Values()), "expected: %s\ngot: %s", stringValues, rleArray.Values())
+ assert.Truef(t, array.Equal(int32Values, rleArray.RunEndsArr()), "expected: %s\ngot: %s", int32Values, rleArray.RunEndsArr())
+ assert.EqualValues(t, 1, rleArray.Offset())
+ assert.Zero(t, rleArray.Data().NullN())
+
+ assert.PanicsWithError(t, "invalid: arrow/array: run ends array must be int16, int32, or int64", func() {
+ array.NewRunEndEncodedArray(stringValues, int32Values, 3, 0)
+ })
+ assert.PanicsWithError(t, "invalid: arrow/array: run ends array cannot contain nulls", func() {
+ array.NewRunEndEncodedArray(int32OnlyNull, int32Values, 3, 0)
+ })
+}
+
+func TestRunLengthEncodedOffsetLength(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+ defer mem.AssertSize(t, 0)
+
+ runEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[100, 200, 300, 400, 500]`))
+ defer runEnds.Release()
+
+ values, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "beautiful", "world", "of", "RLE"]`))
+ defer values.Release()
+
+ rleArray := array.NewRunEndEncodedArray(runEnds, values, 500, 0)
+ defer rleArray.Release()
+
+ assert.EqualValues(t, 5, rleArray.GetPhysicalLength())
+ assert.EqualValues(t, 0, rleArray.GetPhysicalOffset())
+
+ slice := array.NewSlice(rleArray, 199, 204).(*array.RunEndEncoded)
+ defer slice.Release()
+
+ assert.EqualValues(t, 2, slice.GetPhysicalLength())
+ assert.EqualValues(t, 1, slice.GetPhysicalOffset())
+
+ slice2 := array.NewSlice(rleArray, 199, 300).(*array.RunEndEncoded)
+ defer slice2.Release()
+
+ assert.EqualValues(t, 2, slice2.GetPhysicalLength())
+ assert.EqualValues(t, 1, slice2.GetPhysicalOffset())
+
+ slice3 := array.NewSlice(rleArray, 400, 500).(*array.RunEndEncoded)
+ defer slice3.Release()
+
+ assert.EqualValues(t, 1, slice3.GetPhysicalLength())
+ assert.EqualValues(t, 4, slice3.GetPhysicalOffset())
+
+ slice4 := array.NewSlice(rleArray, 0, 150).(*array.RunEndEncoded)
+ defer slice4.Release()
+
+ assert.EqualValues(t, 2, slice4.GetPhysicalLength())
+ assert.EqualValues(t, 0, slice4.GetPhysicalOffset())
+
+ zeroLengthAtEnd := array.NewSlice(rleArray, 500, 500).(*array.RunEndEncoded)
+ defer zeroLengthAtEnd.Release()
+
+ assert.EqualValues(t, 0, zeroLengthAtEnd.GetPhysicalLength())
+ assert.EqualValues(t, 5, zeroLengthAtEnd.GetPhysicalOffset())
+}
+
+func TestRLECompare(t *testing.T) {
+ rleArray := array.NewRunEndEncodedArray(int32Values, stringValues, 30, 0)
+ // second that is a copy of the first
+ standardEquals := array.MakeFromData(rleArray.Data().(*array.Data).Copy())
+
+ defer rleArray.Release()
+ defer standardEquals.Release()
+
+ assert.Truef(t, array.Equal(rleArray, standardEquals), "left: %s\nright: %s", rleArray, standardEquals)
+ assert.False(t, array.Equal(array.NewSlice(rleArray, 0, 29), array.NewSlice(rleArray, 1, 30)))
+
+ // array that is logically the same as our rleArray, but has 2 small
+ // runs for the first value instead of one large run
+ mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+ defer mem.AssertSize(t, 0)
+
+ t.Run("logical duplicate", func(t *testing.T) {
+ dupRunEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[5, 10, 20, 30]`))
+ defer dupRunEnds.Release()
+ strValues, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String,
+ strings.NewReader(`["Hello", "Hello", "World", null]`))
+ defer strValues.Release()
+
+ dupArr := array.NewRunEndEncodedArray(dupRunEnds, strValues, 30, 0)
+ defer dupArr.Release()
+
+ assert.Truef(t, array.Equal(rleArray, dupArr), "expected: %sgot: %s", rleArray, dupArr)
+ })
+
+ t.Run("emptyArr", func(t *testing.T) {
+ emptyRuns, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[]`))
+ emptyVals, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[]`))
+ defer emptyRuns.Release()
+ defer emptyVals.Release()
+
+ emptyArr := array.NewRunEndEncodedArray(emptyRuns, emptyVals, 0, 0)
+ defer emptyArr.Release()
+
+ dataCopy := emptyArr.Data().(*array.Data).Copy()
+ defer dataCopy.Release()
+ emptyArr2 := array.MakeFromData(dataCopy)
+ defer emptyArr2.Release()
+
+ assert.Truef(t, array.Equal(emptyArr, emptyArr2), "expected: %sgot: %s", emptyArr, emptyArr2)
+ })
+
+ t.Run("different offsets", func(t *testing.T) {
+ // three different slices that have the value [3, 3, 3, 4, 4, 4, 4]
+ offsetsa, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32,
+ strings.NewReader(`[2, 5, 12, 58, 60]`))
+ offsetsb, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32,
+ strings.NewReader(`[81, 86, 99, 100]`))
+ offsetsc, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32,
+ strings.NewReader(`[3, 7]`))
+ valsa, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64,
+ strings.NewReader(`[1, 2, 3, 4, 5]`))
+ valsb, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64,
+ strings.NewReader(`[2, 3, 4, 5]`))
+ valsc, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64,
+ strings.NewReader(`[3, 4]`))
+ defer func() {
+ offsetsa.Release()
+ offsetsb.Release()
+ offsetsc.Release()
+ valsa.Release()
+ valsb.Release()
+ valsc.Release()
+ }()
+
+ differentOffsetsA := array.NewRunEndEncodedArray(offsetsa, valsa, 60, 0)
+ defer differentOffsetsA.Release()
+ differentOffsetsB := array.NewRunEndEncodedArray(offsetsb, valsb, 100, 0)
+ defer differentOffsetsB.Release()
+ differentOffsetsC := array.NewRunEndEncodedArray(offsetsc, valsc, 7, 0)
+ defer differentOffsetsC.Release()
+
+ sliceA := array.NewSlice(differentOffsetsA, 9, 16)
+ defer sliceA.Release()
+ sliceB := array.NewSlice(differentOffsetsB, 83, 90)
+ defer sliceB.Release()
+
+ assert.True(t, array.Equal(sliceA, sliceB))
+ assert.True(t, array.Equal(sliceA, differentOffsetsC))
+ assert.True(t, array.Equal(sliceB, differentOffsetsC))
+ })
+}
diff --git a/go/arrow/compare.go b/go/arrow/compare.go
index 511abe2238..19221a7f00 100644
--- a/go/arrow/compare.go
+++ b/go/arrow/compare.go
@@ -121,6 +121,10 @@ func TypeEqual(left, right DataType, opts ...TypeEqualOption) bool {
case *TimestampType:
r := right.(*TimestampType)
return l.Unit == r.Unit && l.TimeZone == r.TimeZone
+ case *RunEndEncodedType:
+ r := right.(*RunEndEncodedType)
+ return TypeEqual(l.Encoded(), r.Encoded(), opts...) &&
+ TypeEqual(l.ends, r.ends, opts...)
default:
return reflect.DeepEqual(left, right)
}
diff --git a/go/arrow/compute/internal/exec/span.go b/go/arrow/compute/internal/exec/span.go
index c0a8bff04d..f701b6f39c 100644
--- a/go/arrow/compute/internal/exec/span.go
+++ b/go/arrow/compute/internal/exec/span.go
@@ -590,6 +590,8 @@ type ExecSpan struct {
func getNumBuffers(dt arrow.DataType) int {
switch dt.ID() {
+ case arrow.RUN_END_ENCODED:
+ return 0
case arrow.NULL, arrow.STRUCT, arrow.FIXED_SIZE_LIST:
return 1
case arrow.BINARY, arrow.LARGE_BINARY, arrow.STRING, arrow.LARGE_STRING, arrow.DENSE_UNION:
diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go
index 94a07d73ef..627b1b417b 100644
--- a/go/arrow/datatype.go
+++ b/go/arrow/datatype.go
@@ -150,12 +150,7 @@ const (
// calendar interval with three fields
INTERVAL_MONTH_DAY_NANO
- // INTERVAL could be any of the interval types, kept to avoid breaking anyone
- // after switching to individual type ids for the interval types that were using
- // it when calling MakeFromData or NewBuilder
- //
- // Deprecated and will be removed in the next major version release
- INTERVAL
+ RUN_END_ENCODED
// Alias to ensure we do not break any consumers
DECIMAL = DECIMAL128
diff --git a/go/arrow/datatype_encoded.go b/go/arrow/datatype_encoded.go
new file mode 100644
index 0000000000..1263f0a9c1
--- /dev/null
+++ b/go/arrow/datatype_encoded.go
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package arrow
+
+type EncodedType interface {
+ DataType
+ Encoded() DataType
+}
+
+// RunEndEncodedType is the datatype to represent a run-end encoded
+// array of data.
+type RunEndEncodedType struct {
+ ends DataType
+ enc DataType
+}
+
+func RunEndEncodedOf(runEnds, encoded DataType) *RunEndEncodedType {
+ return &RunEndEncodedType{ends: runEnds, enc: encoded}
+}
+
+func (*RunEndEncodedType) ID() Type { return RUN_END_ENCODED }
+func (*RunEndEncodedType) Name() string { return "run_end_encoded" }
+func (*RunEndEncodedType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecAlwaysNull()}}
+}
+
+func (t *RunEndEncodedType) String() string {
+ return t.Name() + "<run_ends: " + t.ends.String() + ", values: " + t.enc.String() + ">"
+}
+
+func (t *RunEndEncodedType) Fingerprint() string {
+ return typeFingerprint(t) + "{" + t.ends.Fingerprint() + ";" + t.enc.Fingerprint() + ";}"
+}
+
+func (t *RunEndEncodedType) Encoded() DataType { return t.enc }
+
+func (t *RunEndEncodedType) Fields() []Field {
+ return []Field{
+ {Name: "run_ends", Type: t.ends},
+ {Name: "values", Type: t.enc, Nullable: true},
+ }
+}
+
+func (*RunEndEncodedType) ValidRunEndsType(dt DataType) bool {
+ switch dt.ID() {
+ case INT16, INT32, INT64:
+ return true
+ }
+ return false
+}
diff --git a/go/arrow/encoded/ree_utils.go b/go/arrow/encoded/ree_utils.go
new file mode 100644
index 0000000000..6e78cca3bc
--- /dev/null
+++ b/go/arrow/encoded/ree_utils.go
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoded
+
+import (
+ "math"
+ "sort"
+
+ "github.com/apache/arrow/go/v11/arrow"
+)
+
+// FindPhysicalOffset performs a binary search on the run-ends to return
+// the appropriate physical offset into the values/run-ends that corresponds
+// with the logical offset defined in the array.
+//
+// For example, an array with run-ends [10, 20, 30, 40, 50] and a logical
+// offset of 25 will return the value 2. This returns the smallest offset
+// whose run-end is greater than the logical offset, which would also be the
+// offset index into the values that contains the correct value.
+//
+// This function assumes it receives Run End Encoded array data
+func FindPhysicalOffset(arr arrow.ArrayData) int {
+ data := arr.Children()[0]
+ logicalOffset := arr.Offset()
+
+ switch data.DataType().ID() {
+ case arrow.INT16:
+ runEnds := arrow.Int16Traits.CastFromBytes(data.Buffers()[1].Bytes())
+ runEnds = runEnds[data.Offset() : data.Offset()+data.Len()]
+ return sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > int16(logicalOffset) })
+ case arrow.INT32:
+ runEnds := arrow.Int32Traits.CastFromBytes(data.Buffers()[1].Bytes())
+ runEnds = runEnds[data.Offset() : data.Offset()+data.Len()]
+ return sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > int32(logicalOffset) })
+ case arrow.INT64:
+ runEnds := arrow.Int64Traits.CastFromBytes(data.Buffers()[1].Bytes())
+ runEnds = runEnds[data.Offset() : data.Offset()+data.Len()]
+ return sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > int64(logicalOffset) })
+ default:
+ panic("only int16, int32, and int64 are allowed for the run-ends")
+ }
+}
+
+// GetPhysicalLength returns the physical number of values which are in
+// the passed in RunEndEncoded array data. This will take into account
+// the offset and length of the array as reported in the array data
+// (so that it properly handles slices).
+//
+// This function assumes it receives Run End Encoded array data
+func GetPhysicalLength(arr arrow.ArrayData) int {
+ if arr.Len() == 0 {
+ return 0
+ }
+
+ data := arr.Children()[0]
+ physicalOffset := FindPhysicalOffset(arr)
+ start, length := data.Offset()+physicalOffset, data.Len()-physicalOffset
+ offset := arr.Offset() + arr.Len() - 1
+
+ switch data.DataType().ID() {
+ case arrow.INT16:
+ runEnds := arrow.Int16Traits.CastFromBytes(data.Buffers()[1].Bytes())
+ runEnds = runEnds[start : start+length]
+ return sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > int16(offset) }) + 1
+ case arrow.INT32:
+ runEnds := arrow.Int32Traits.CastFromBytes(data.Buffers()[1].Bytes())
+ runEnds = runEnds[start : start+length]
+ return sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > int32(offset) }) + 1
+ case arrow.INT64:
+ runEnds := arrow.Int64Traits.CastFromBytes(data.Buffers()[1].Bytes())
+ runEnds = runEnds[start : start+length]
+ return sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > int64(offset) }) + 1
+ default:
+ panic("arrow/rle: can only get rle.PhysicalLength for int16/int32/int64 run ends array")
+ }
+}
+
+func getRunEnds(arr arrow.ArrayData) func(int64) int64 {
+ switch arr.DataType().ID() {
+ case arrow.INT16:
+ runEnds := arrow.Int16Traits.CastFromBytes(arr.Buffers()[1].Bytes())
+ runEnds = runEnds[arr.Offset() : arr.Offset()+arr.Len()]
+ return func(i int64) int64 { return int64(runEnds[i]) }
+ case arrow.INT32:
+ runEnds := arrow.Int32Traits.CastFromBytes(arr.Buffers()[1].Bytes())
+ runEnds = runEnds[arr.Offset() : arr.Offset()+arr.Len()]
+ return func(i int64) int64 { return int64(runEnds[i]) }
+ case arrow.INT64:
+ runEnds := arrow.Int64Traits.CastFromBytes(arr.Buffers()[1].Bytes())
+ runEnds = runEnds[arr.Offset() : arr.Offset()+arr.Len()]
+ return func(i int64) int64 { return int64(runEnds[i]) }
+ default:
+ panic("only int16, int32, and int64 are allowed for the run-ends")
+ }
+}
+
+// MergedRuns is used to take two Run End Encoded arrays and iterate
+// them, finding the correct physical indices to correspond with the
+// runs.
+type MergedRuns struct {
+ inputs [2]arrow.Array
+ runIndex [2]int64
+ inputRunEnds [2]func(int64) int64
+ runEnds [2]int64
+ logicalLen int
+ logicalPos int
+ mergedEnd int64
+}
+
+// NewMergedRuns takes two RunEndEncoded arrays and returns a MergedRuns
+// object that will allow iterating over the physical indices of the runs.
+func NewMergedRuns(inputs [2]arrow.Array) *MergedRuns {
+ if len(inputs) == 0 {
+ return &MergedRuns{logicalLen: 0}
+ }
+
+ mr := &MergedRuns{inputs: inputs, logicalLen: inputs[0].Len()}
+ for i, in := range inputs {
+ if in.DataType().ID() != arrow.RUN_END_ENCODED {
+ panic("arrow/rle: NewMergedRuns can only be called with RunLengthEncoded arrays")
+ }
+ if in.Len() != mr.logicalLen {
+ panic("arrow/rle: can only merge runs of RLE arrays of the same length")
+ }
+
+ mr.inputRunEnds[i] = getRunEnds(in.Data().Children()[0])
+ // initialize the runIndex at the physical offset - 1 so the first
+ // call to Next will increment it to the correct initial offset
+ // since the initial state is logicalPos == 0 and mergedEnd == 0
+ mr.runIndex[i] = int64(FindPhysicalOffset(in.Data())) - 1
+ }
+
+ return mr
+}
+
+// Next returns true if there are more values/runs to iterate and false
+// when one of the arrays has reached the end.
+func (mr *MergedRuns) Next() bool {
+ mr.logicalPos = int(mr.mergedEnd)
+ if mr.isEnd() {
+ return false
+ }
+
+ for i := range mr.inputs {
+ if mr.logicalPos == int(mr.runEnds[i]) {
+ mr.runIndex[i]++
+ }
+ }
+ mr.findMergedRun()
+
+ return true
+}
+
+// IndexIntoBuffer returns the physical index into the value buffer of
+// the passed in array index (ie: 0 for the first array and 1 for the second)
+// this takes into account the offset of the array so it is the true physical
+// index into the value *buffer* in the child.
+func (mr *MergedRuns) IndexIntoBuffer(id int) int64 {
+ return mr.runIndex[id] + int64(mr.inputs[id].Data().Children()[1].Offset())
+}
+
+// IndexIntoArray is like IndexIntoBuffer but it doesn't take into account
+// the array offset and instead is the index that can be used with the .Value
+// method on the array to get the correct value.
+func (mr *MergedRuns) IndexIntoArray(id int) int64 { return mr.runIndex[id] }
+
+// RunLength returns the logical length of the current merged run being looked at.
+func (mr *MergedRuns) RunLength() int64 { return mr.mergedEnd - int64(mr.logicalPos) }
+
+// AccumulatedRunLength returns the logical run end of the current merged run.
+func (mr *MergedRuns) AccumulatedRunLength() int64 { return mr.mergedEnd }
+
+func (mr *MergedRuns) findMergedRun() {
+ mr.mergedEnd = int64(math.MaxInt64)
+ for i, in := range mr.inputs {
+ // logical indices of the end of the run we are currently in each input
+ mr.runEnds[i] = int64(mr.inputRunEnds[i](mr.runIndex[i]) - int64(in.Data().Offset()))
+ // the logical length may end in the middle of a run, in case the array was sliced
+ if mr.logicalLen < int(mr.runEnds[i]) {
+ mr.runEnds[i] = int64(mr.logicalLen)
+ }
+ if mr.runEnds[i] < mr.mergedEnd {
+ mr.mergedEnd = mr.runEnds[i]
+ }
+ }
+}
+
+func (mr *MergedRuns) isEnd() bool { return mr.logicalPos == mr.logicalLen }
diff --git a/go/arrow/encoded/ree_utils_test.go b/go/arrow/encoded/ree_utils_test.go
new file mode 100644
index 0000000000..5506d3d9e2
--- /dev/null
+++ b/go/arrow/encoded/ree_utils_test.go
@@ -0,0 +1,148 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoded_test
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/apache/arrow/go/v11/arrow"
+ "github.com/apache/arrow/go/v11/arrow/array"
+ "github.com/apache/arrow/go/v11/arrow/encoded"
+ "github.com/apache/arrow/go/v11/arrow/memory"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestFindPhysicalOffset(t *testing.T) {
+ tests := []struct {
+ vals []int32
+ offset int
+ exp int
+ }{
+ {[]int32{1}, 0, 0},
+ {[]int32{1, 2, 3}, 0, 0},
+ {[]int32{1, 2, 3}, 1, 1},
+ {[]int32{1, 2, 3}, 2, 2},
+ {[]int32{2, 3, 4}, 0, 0},
+ {[]int32{2, 3, 4}, 1, 0},
+ {[]int32{2, 3, 4}, 2, 1},
+ {[]int32{2, 3, 4}, 3, 2},
+ {[]int32{2, 4, 6}, 3, 1},
+ {[]int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 1000, 1005, 1015, 1020, 1025, 1050}, 1000, 10},
+ // out-of-range logical offset should return len(vals)
+ {[]int32{2, 4, 6}, 6, 3},
+ {[]int32{2, 4, 6}, 10000, 3},
+ }
+
+ reeType := arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32)
+ for _, tt := range tests {
+ t.Run(fmt.Sprintf("%v find %d", tt.vals, tt.offset), func(t *testing.T) {
+ child := array.NewData(arrow.PrimitiveTypes.Int32, len(tt.vals), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(tt.vals))}, nil, 0, 0)
+ arr := array.NewData(reeType, -1, nil, []arrow.ArrayData{child}, 0, tt.offset)
+
+ assert.Equal(t, tt.exp, encoded.FindPhysicalOffset(arr))
+ })
+ }
+}
+
+func TestMergedRunsIter(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+ defer mem.AssertSize(t, 0)
+
+ leftRunEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32,
+ strings.NewReader(`[1, 2, 3, 4, 5, 6, 7, 8, 9, 1000, 1005, 1015, 1020, 1025, 30000]`))
+ defer leftRunEnds.Release()
+
+ rightRunEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32,
+ strings.NewReader(`[1, 2, 3, 4, 5, 2005, 2009, 2025, 2050]`))
+ defer rightRunEnds.Release()
+
+ var (
+ expectedRunLengths = []int32{5, 4, 6, 5, 5, 25}
+ expectedLeftVisits = []int32{110, 111, 111, 112, 113, 114}
+ expectedRightVisits = []int32{205, 206, 207, 207, 207, 208}
+ leftPrntOffset int32 = 1000
+ leftChildOffset int32 = 100
+ rightPrntOffset int32 = 2000
+ rightChildOffset int32 = 200
+
+ leftChild arrow.Array = array.NewNull(int(leftChildOffset) + leftRunEnds.Len())
+ rightChild arrow.Array = array.NewNull(int(rightChildOffset) + rightRunEnds.Len())
+ )
+
+ leftChild = array.NewSlice(leftChild, int64(leftChildOffset), int64(leftChildOffset)+int64(leftRunEnds.Len()))
+ rightChild = array.NewSlice(rightChild, int64(rightChildOffset), int64(rightChild.Len()))
+
+ leftArray := arrow.Array(array.NewRunEndEncodedArray(leftRunEnds, leftChild, 1050, 0))
+ defer leftArray.Release()
+ rightArray := arrow.Array(array.NewRunEndEncodedArray(rightRunEnds, rightChild, 2050, 0))
+ defer rightArray.Release()
+
+ leftArray = array.NewSlice(leftArray, int64(leftPrntOffset), int64(leftArray.Len()))
+ defer leftArray.Release()
+ rightArray = array.NewSlice(rightArray, int64(rightPrntOffset), int64(rightArray.Len()))
+ defer rightArray.Release()
+
+ pos, logicalPos := 0, 0
+ mr := encoded.NewMergedRuns([2]arrow.Array{leftArray, rightArray})
+ for mr.Next() {
+ assert.EqualValues(t, expectedRunLengths[pos], mr.RunLength())
+ assert.EqualValues(t, expectedLeftVisits[pos], mr.IndexIntoBuffer(0))
+ assert.EqualValues(t, expectedRightVisits[pos], mr.IndexIntoBuffer(1))
+ assert.EqualValues(t, expectedLeftVisits[pos]-int32(leftChildOffset), mr.IndexIntoArray(0))
+ assert.EqualValues(t, expectedRightVisits[pos]-int32(rightChildOffset), mr.IndexIntoArray(1))
+ pos++
+ logicalPos += int(mr.RunLength())
+ assert.EqualValues(t, logicalPos, mr.AccumulatedRunLength())
+ }
+ assert.EqualValues(t, len(expectedRunLengths), pos)
+
+ t.Run("left array only", func(t *testing.T) {
+ leftOnlyRunLengths := []int32{5, 10, 5, 5, 25}
+ pos, logicalPos := 0, 0
+ mr := encoded.NewMergedRuns([2]arrow.Array{leftArray, leftArray})
+ for mr.Next() {
+ assert.EqualValues(t, leftOnlyRunLengths[pos], mr.RunLength())
+ assert.EqualValues(t, 110+pos, mr.IndexIntoBuffer(0))
+ assert.EqualValues(t, 110+pos, mr.IndexIntoBuffer(1))
+ assert.EqualValues(t, 10+pos, mr.IndexIntoArray(0))
+ assert.EqualValues(t, 10+pos, mr.IndexIntoArray(1))
+ pos++
+ logicalPos += int(mr.RunLength())
+ assert.EqualValues(t, logicalPos, mr.AccumulatedRunLength())
+ }
+ assert.EqualValues(t, len(leftOnlyRunLengths), pos)
+ })
+
+ t.Run("right array only", func(t *testing.T) {
+ rightOnlyRunLengths := []int32{5, 4, 16, 25}
+ pos, logicalPos := 0, 0
+ mr := encoded.NewMergedRuns([2]arrow.Array{rightArray, rightArray})
+ for mr.Next() {
+ assert.EqualValues(t, rightOnlyRunLengths[pos], mr.RunLength())
+ assert.EqualValues(t, 205+pos, mr.IndexIntoBuffer(0))
+ assert.EqualValues(t, 205+pos, mr.IndexIntoBuffer(1))
+ assert.EqualValues(t, 5+pos, mr.IndexIntoArray(0))
+ assert.EqualValues(t, 5+pos, mr.IndexIntoArray(1))
+ pos++
+ logicalPos += int(mr.RunLength())
+ assert.EqualValues(t, logicalPos, mr.AccumulatedRunLength())
+ }
+ assert.EqualValues(t, len(rightOnlyRunLengths), pos)
+ })
+}
diff --git a/go/arrow/internal/utils.go b/go/arrow/internal/utils.go
index d2a9c35e5c..5344bafab5 100644
--- a/go/arrow/internal/utils.go
+++ b/go/arrow/internal/utils.go
@@ -40,7 +40,7 @@ func HasValidityBitmap(id arrow.Type, version flatbuf.MetadataVersion) bool {
}
switch id {
- case arrow.NULL, arrow.DENSE_UNION, arrow.SPARSE_UNION:
+ case arrow.NULL, arrow.DENSE_UNION, arrow.SPARSE_UNION, arrow.RUN_END_ENCODED:
return false
}
return true
diff --git a/go/arrow/scalar/scalar.go b/go/arrow/scalar/scalar.go
index c03f380699..d91a10c8d3 100644
--- a/go/arrow/scalar/scalar.go
+++ b/go/arrow/scalar/scalar.go
@@ -483,36 +483,27 @@ var makeNullFn [64]scalarMakeNullFn
func init() {
makeNullFn = [...]scalarMakeNullFn{
- arrow.NULL: func(dt arrow.DataType) Scalar { return ScalarNull },
- arrow.BOOL: func(dt arrow.DataType) Scalar { return &Boolean{scalar: scalar{dt, false}} },
- arrow.UINT8: func(dt arrow.DataType) Scalar { return &Uint8{scalar: scalar{dt, false}} },
- arrow.INT8: func(dt arrow.DataType) Scalar { return &Int8{scalar: scalar{dt, false}} },
- arrow.UINT16: func(dt arrow.DataType) Scalar { return &Uint16{scalar: scalar{dt, false}} },
- arrow.INT16: func(dt arrow.DataType) Scalar { return &Int16{scalar: scalar{dt, false}} },
- arrow.UINT32: func(dt arrow.DataType) Scalar { return &Uint32{scalar: scalar{dt, false}} },
- arrow.INT32: func(dt arrow.DataType) Scalar { return &Int32{scalar: scalar{dt, false}} },
- arrow.UINT64: func(dt arrow.DataType) Scalar { return &Uint64{scalar: scalar{dt, false}} },
- arrow.INT64: func(dt arrow.DataType) Scalar { return &Int64{scalar: scalar{dt, false}} },
- arrow.FLOAT16: func(dt arrow.DataType) Scalar { return &Float16{scalar: scalar{dt, false}} },
- arrow.FLOAT32: func(dt arrow.DataType) Scalar { return &Float32{scalar: scalar{dt, false}} },
- arrow.FLOAT64: func(dt arrow.DataType) Scalar { return &Float64{scalar: scalar{dt, false}} },
- arrow.STRING: func(dt arrow.DataType) Scalar { return &String{&Binary{scalar: scalar{dt, false}}} },
- arrow.BINARY: func(dt arrow.DataType) Scalar { return &Binary{scalar: scalar{dt, false}} },
- arrow.FIXED_SIZE_BINARY: func(dt arrow.DataType) Scalar { return &FixedSizeBinary{&Binary{scalar: scalar{dt, false}}} },
- arrow.DATE32: func(dt arrow.DataType) Scalar { return &Date32{scalar: scalar{dt, false}} },
- arrow.DATE64: func(dt arrow.DataType) Scalar { return &Date64{scalar: scalar{dt, false}} },
- arrow.TIMESTAMP: func(dt arrow.DataType) Scalar { return &Timestamp{scalar: scalar{dt, false}} },
- arrow.TIME32: func(dt arrow.DataType) Scalar { return &Time32{scalar: scalar{dt, false}} },
- arrow.TIME64: func(dt arrow.DataType) Scalar { return &Time64{scalar: scalar{dt, false}} },
- arrow.INTERVAL: func(dt arrow.DataType) Scalar {
- if arrow.TypeEqual(dt, arrow.FixedWidthTypes.MonthInterval) {
- return &MonthInterval{scalar: scalar{dt, false}}
- }
- if arrow.TypeEqual(dt, arrow.FixedWidthTypes.MonthDayNanoInterval) {
- return &MonthDayNanoInterval{scalar: scalar{dt, false}}
- }
- return &DayTimeInterval{scalar: scalar{dt, false}}
- },
+ arrow.NULL: func(dt arrow.DataType) Scalar { return ScalarNull },
+ arrow.BOOL: func(dt arrow.DataType) Scalar { return &Boolean{scalar: scalar{dt, false}} },
+ arrow.UINT8: func(dt arrow.DataType) Scalar { return &Uint8{scalar: scalar{dt, false}} },
+ arrow.INT8: func(dt arrow.DataType) Scalar { return &Int8{scalar: scalar{dt, false}} },
+ arrow.UINT16: func(dt arrow.DataType) Scalar { return &Uint16{scalar: scalar{dt, false}} },
+ arrow.INT16: func(dt arrow.DataType) Scalar { return &Int16{scalar: scalar{dt, false}} },
+ arrow.UINT32: func(dt arrow.DataType) Scalar { return &Uint32{scalar: scalar{dt, false}} },
+ arrow.INT32: func(dt arrow.DataType) Scalar { return &Int32{scalar: scalar{dt, false}} },
+ arrow.UINT64: func(dt arrow.DataType) Scalar { return &Uint64{scalar: scalar{dt, false}} },
+ arrow.INT64: func(dt arrow.DataType) Scalar { return &Int64{scalar: scalar{dt, false}} },
+ arrow.FLOAT16: func(dt arrow.DataType) Scalar { return &Float16{scalar: scalar{dt, false}} },
+ arrow.FLOAT32: func(dt arrow.DataType) Scalar { return &Float32{scalar: scalar{dt, false}} },
+ arrow.FLOAT64: func(dt arrow.DataType) Scalar { return &Float64{scalar: scalar{dt, false}} },
+ arrow.STRING: func(dt arrow.DataType) Scalar { return &String{&Binary{scalar: scalar{dt, false}}} },
+ arrow.BINARY: func(dt arrow.DataType) Scalar { return &Binary{scalar: scalar{dt, false}} },
+ arrow.FIXED_SIZE_BINARY: func(dt arrow.DataType) Scalar { return &FixedSizeBinary{&Binary{scalar: scalar{dt, false}}} },
+ arrow.DATE32: func(dt arrow.DataType) Scalar { return &Date32{scalar: scalar{dt, false}} },
+ arrow.DATE64: func(dt arrow.DataType) Scalar { return &Date64{scalar: scalar{dt, false}} },
+ arrow.TIMESTAMP: func(dt arrow.DataType) Scalar { return &Timestamp{scalar: scalar{dt, false}} },
+ arrow.TIME32: func(dt arrow.DataType) Scalar { return &Time32{scalar: scalar{dt, false}} },
+ arrow.TIME64: func(dt arrow.DataType) Scalar { return &Time64{scalar: scalar{dt, false}} },
arrow.INTERVAL_MONTHS: func(dt arrow.DataType) Scalar { return &MonthInterval{scalar: scalar{dt, false}} },
arrow.INTERVAL_DAY_TIME: func(dt arrow.DataType) Scalar { return &DayTimeInterval{scalar: scalar{dt, false}} },
arrow.INTERVAL_MONTH_DAY_NANO: func(dt arrow.DataType) Scalar { return &MonthDayNanoInterval{scalar: scalar{dt, false}} },
diff --git a/go/arrow/type_string.go b/go/arrow/type_string.go
index 3ba93f30d5..41a4073863 100644
--- a/go/arrow/type_string.go
+++ b/go/arrow/type_string.go
@@ -46,12 +46,12 @@ func _() {
_ = x[LARGE_BINARY-35]
_ = x[LARGE_LIST-36]
_ = x[INTERVAL_MONTH_DAY_NANO-37]
- _ = x[INTERVAL-38]
+ _ = x[RUN_END_ENCODED-38]
}
-const _Type_name = "NULLBOOLUINT8INT8UINT16INT16UINT32INT32UINT64INT64FLOAT16FLOAT32FLOAT64STRINGBINARYFIXED_SIZE_BINARYDATE32DATE64TIMESTAMPTIME32TIME64INTERVAL_MONTHSINTERVAL_DAY_TIMEDECIMAL128DECIMAL256LISTSTRUCTSPARSE_UNIONDENSE_UNIONDICTIONARYMAPEXTENSIONFIXED_SIZE_LISTDURATIONLARGE_STRINGLARGE_BINARYLARGE_LISTINTERVAL_MONTH_DAY_NANOINTERVAL"
+const _Type_name = "NULLBOOLUINT8INT8UINT16INT16UINT32INT32UINT64INT64FLOAT16FLOAT32FLOAT64STRINGBINARYFIXED_SIZE_BINARYDATE32DATE64TIMESTAMPTIME32TIME64INTERVAL_MONTHSINTERVAL_DAY_TIMEDECIMAL128DECIMAL256LISTSTRUCTSPARSE_UNIONDENSE_UNIONDICTIONARYMAPEXTENSIONFIXED_SIZE_LISTDURATIONLARGE_STRINGLARGE_BINARYLARGE_LISTINTERVAL_MONTH_DAY_NANORUN_END_ENCODED"
-var _Type_index = [...]uint16{0, 4, 8, 13, 17, 23, 28, 34, 39, 45, 50, 57, 64, 71, 77, 83, 100, 106, 112, 121, 127, 133, 148, 165, 175, 185, 189, 195, 207, 218, 228, 231, 240, 255, 263, 275, 287, 297, 320, 328}
+var _Type_index = [...]uint16{0, 4, 8, 13, 17, 23, 28, 34, 39, 45, 50, 57, 64, 71, 77, 83, 100, 106, 112, 121, 127, 133, 148, 165, 175, 185, 189, 195, 207, 218, 228, 231, 240, 255, 263, 275, 287, 297, 320, 335}
func (i Type) String() string {
if i < 0 || i >= Type(len(_Type_index)-1) {