You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ze...@apache.org on 2023/01/19 22:41:35 UTC

[arrow] branch master updated: GH-32946: [Go] Implement REE Array and Compare (#14111)

This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 9a1373452f GH-32946: [Go] Implement REE Array and Compare (#14111)
9a1373452f is described below

commit 9a1373452ff5b4cf41cc371e0585d8dda91ffd36
Author: Matt Topol <zo...@gmail.com>
AuthorDate: Thu Jan 19 17:41:28 2023 -0500

    GH-32946: [Go] Implement REE Array and Compare (#14111)
    
    
    * Closes: #32946
    
    Authored-by: Matt Topol <zo...@gmail.com>
    Signed-off-by: Matt Topol <zo...@gmail.com>
---
 go/arrow/array/array.go                |   2 +-
 go/arrow/array/builder.go              |   9 --
 go/arrow/array/compare.go              |   6 +
 go/arrow/array/encoded.go              | 158 +++++++++++++++++++++++++
 go/arrow/array/encoded_test.go         | 209 +++++++++++++++++++++++++++++++++
 go/arrow/compare.go                    |   4 +
 go/arrow/compute/internal/exec/span.go |   2 +
 go/arrow/datatype.go                   |   7 +-
 go/arrow/datatype_encoded.go           |  64 ++++++++++
 go/arrow/encoded/ree_utils.go          | 202 +++++++++++++++++++++++++++++++
 go/arrow/encoded/ree_utils_test.go     | 148 +++++++++++++++++++++++
 go/arrow/internal/utils.go             |   2 +-
 go/arrow/scalar/scalar.go              |  51 ++++----
 go/arrow/type_string.go                |   6 +-
 14 files changed, 820 insertions(+), 50 deletions(-)

diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index ee8c216d08..34efc8c5e2 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -175,8 +175,8 @@ func init() {
 		arrow.LARGE_STRING:            func(data arrow.ArrayData) arrow.Array { return NewLargeStringData(data) },
 		arrow.LARGE_BINARY:            func(data arrow.ArrayData) arrow.Array { return NewLargeBinaryData(data) },
 		arrow.LARGE_LIST:              func(data arrow.ArrayData) arrow.Array { return NewLargeListData(data) },
-		arrow.INTERVAL:                func(data arrow.ArrayData) arrow.Array { return NewIntervalData(data) },
 		arrow.INTERVAL_MONTH_DAY_NANO: func(data arrow.ArrayData) arrow.Array { return NewMonthDayNanoIntervalData(data) },
+		arrow.RUN_END_ENCODED:         func(data arrow.ArrayData) arrow.Array { return NewRunEndEncodedData(data) },
 
 		// invalid data types to fill out array to size 2^6 - 1
 		63: invalidDataType,
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index 8eebd5ac64..6a0d58ec2e 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -281,15 +281,6 @@ func NewBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
 	case arrow.TIME64:
 		typ := dtype.(*arrow.Time64Type)
 		return NewTime64Builder(mem, typ)
-	case arrow.INTERVAL:
-		switch dtype.(type) {
-		case *arrow.DayTimeIntervalType:
-			return NewDayTimeIntervalBuilder(mem)
-		case *arrow.MonthIntervalType:
-			return NewMonthIntervalBuilder(mem)
-		case *arrow.MonthDayNanoIntervalType:
-			return NewMonthDayNanoIntervalBuilder(mem)
-		}
 	case arrow.INTERVAL_MONTHS:
 		return NewMonthIntervalBuilder(mem)
 	case arrow.INTERVAL_DAY_TIME:
diff --git a/go/arrow/array/compare.go b/go/arrow/array/compare.go
index 68143e0086..7d15134e54 100644
--- a/go/arrow/array/compare.go
+++ b/go/arrow/array/compare.go
@@ -333,6 +333,9 @@ func Equal(left, right arrow.Array) bool {
 	case *DenseUnion:
 		r := right.(*DenseUnion)
 		return arrayDenseUnionEqual(l, r)
+	case *RunEndEncoded:
+		r := right.(*RunEndEncoded)
+		return arrayRunEndEncodedEqual(l, r)
 	default:
 		panic(fmt.Errorf("arrow/array: unknown array type %T", l))
 	}
@@ -591,6 +594,9 @@ func arrayApproxEqual(left, right arrow.Array, opt equalOption) bool {
 	case *DenseUnion:
 		r := right.(*DenseUnion)
 		return arrayDenseUnionApproxEqual(l, r, opt)
+	case *RunEndEncoded:
+		r := right.(*RunEndEncoded)
+		return arrayRunEndEncodedApproxEqual(l, r, opt)
 	default:
 		panic(fmt.Errorf("arrow/array: unknown array type %T", l))
 	}
diff --git a/go/arrow/array/encoded.go b/go/arrow/array/encoded.go
new file mode 100644
index 0000000000..ddb8ef1c8b
--- /dev/null
+++ b/go/arrow/array/encoded.go
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/apache/arrow/go/v11/arrow"
+	"github.com/apache/arrow/go/v11/arrow/encoded"
+	"github.com/apache/arrow/go/v11/arrow/internal/debug"
+	"github.com/apache/arrow/go/v11/arrow/memory"
+	"github.com/goccy/go-json"
+)
+
+// RunEndEncoded represents an array containing two children:
+// an array of int32 values defining the ends of each run of values
+// and an array of values
+type RunEndEncoded struct {
+	array
+
+	ends   arrow.Array
+	values arrow.Array
+}
+
+func NewRunEndEncodedArray(runEnds, values arrow.Array, logicalLength, offset int) *RunEndEncoded {
+	data := NewData(arrow.RunEndEncodedOf(runEnds.DataType(), values.DataType()), logicalLength,
+		[]*memory.Buffer{nil}, []arrow.ArrayData{runEnds.Data(), values.Data()}, 0, offset)
+	defer data.Release()
+	return NewRunEndEncodedData(data)
+}
+
+func NewRunEndEncodedData(data arrow.ArrayData) *RunEndEncoded {
+	r := &RunEndEncoded{}
+	r.refCount = 1
+	r.setData(data.(*Data))
+	return r
+}
+
+func (r *RunEndEncoded) Values() arrow.Array     { return r.values }
+func (r *RunEndEncoded) RunEndsArr() arrow.Array { return r.ends }
+
+func (r *RunEndEncoded) Retain() {
+	r.array.Retain()
+	r.values.Retain()
+	r.ends.Retain()
+}
+
+func (r *RunEndEncoded) Release() {
+	r.array.Release()
+	r.values.Release()
+	r.ends.Release()
+}
+
+func (r *RunEndEncoded) setData(data *Data) {
+	if len(data.childData) != 2 {
+		panic(fmt.Errorf("%w: arrow/array: RLE array must have exactly 2 children", arrow.ErrInvalid))
+	}
+	debug.Assert(data.dtype.ID() == arrow.RUN_END_ENCODED, "invalid type for RunLengthEncoded")
+	if !data.dtype.(*arrow.RunEndEncodedType).ValidRunEndsType(data.childData[0].DataType()) {
+		panic(fmt.Errorf("%w: arrow/array: run ends array must be int16, int32, or int64", arrow.ErrInvalid))
+	}
+	if data.childData[0].NullN() > 0 {
+		panic(fmt.Errorf("%w: arrow/array: run ends array cannot contain nulls", arrow.ErrInvalid))
+	}
+
+	r.array.setData(data)
+
+	r.ends = MakeFromData(r.data.childData[0])
+	r.values = MakeFromData(r.data.childData[1])
+}
+
+func (r *RunEndEncoded) GetPhysicalOffset() int {
+	return encoded.FindPhysicalOffset(r.data)
+}
+
+func (r *RunEndEncoded) GetPhysicalLength() int {
+	return encoded.GetPhysicalLength(r.data)
+}
+
+func (r *RunEndEncoded) String() string {
+	var buf bytes.Buffer
+	buf.WriteByte('[')
+	for i := 0; i < r.ends.Len(); i++ {
+		if i != 0 {
+			buf.WriteByte(',')
+		}
+		fmt.Fprintf(&buf, "{%v -> %v}",
+			r.ends.(arraymarshal).getOneForMarshal(i),
+			r.values.(arraymarshal).getOneForMarshal(i))
+	}
+
+	buf.WriteByte(']')
+	return buf.String()
+}
+
+func (r *RunEndEncoded) getOneForMarshal(i int) interface{} {
+	return [2]interface{}{r.ends.(arraymarshal).getOneForMarshal(i),
+		r.values.(arraymarshal).getOneForMarshal(i)}
+}
+
+func (r *RunEndEncoded) MarshalJSON() ([]byte, error) {
+	var buf bytes.Buffer
+	enc := json.NewEncoder(&buf)
+	buf.WriteByte('[')
+	for i := 0; i < r.ends.Len(); i++ {
+		if i != 0 {
+			buf.WriteByte(',')
+		}
+		if err := enc.Encode(r.getOneForMarshal(i)); err != nil {
+			return nil, err
+		}
+	}
+	buf.WriteByte(']')
+	return buf.Bytes(), nil
+}
+
+func arrayRunEndEncodedEqual(l, r *RunEndEncoded) bool {
+	// types were already checked before getting here, so we know
+	// the encoded types are equal
+	mr := encoded.NewMergedRuns([2]arrow.Array{l, r})
+	for mr.Next() {
+		lIndex := mr.IndexIntoArray(0)
+		rIndex := mr.IndexIntoArray(1)
+		if !SliceEqual(l.values, lIndex, lIndex+1, r.values, rIndex, rIndex+1) {
+			return false
+		}
+	}
+	return true
+}
+
+func arrayRunEndEncodedApproxEqual(l, r *RunEndEncoded, opt equalOption) bool {
+	// types were already checked before getting here, so we know
+	// the encoded types are equal
+	mr := encoded.NewMergedRuns([2]arrow.Array{l, r})
+	for mr.Next() {
+		lIndex := mr.IndexIntoArray(0)
+		rIndex := mr.IndexIntoArray(1)
+		if !sliceApproxEqual(l.values, lIndex, lIndex+1, r.values, rIndex, rIndex+1, opt) {
+			return false
+		}
+	}
+	return true
+}
diff --git a/go/arrow/array/encoded_test.go b/go/arrow/array/encoded_test.go
new file mode 100644
index 0000000000..4619e24442
--- /dev/null
+++ b/go/arrow/array/encoded_test.go
@@ -0,0 +1,209 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package array_test
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/v11/arrow"
+	"github.com/apache/arrow/go/v11/arrow/array"
+	"github.com/apache/arrow/go/v11/arrow/memory"
+	"github.com/stretchr/testify/assert"
+)
+
+var (
+	stringValues, _, _ = array.FromJSON(memory.DefaultAllocator, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "World", null]`))
+	int32Values, _, _  = array.FromJSON(memory.DefaultAllocator, arrow.PrimitiveTypes.Int32, strings.NewReader(`[10, 20, 30]`))
+	int32OnlyNull      = array.MakeArrayOfNull(memory.DefaultAllocator, arrow.PrimitiveTypes.Int32, 3)
+)
+
+func TestMakeRLEArray(t *testing.T) {
+	rleArr := array.NewRunEndEncodedArray(int32Values, stringValues, 3, 0)
+	defer rleArr.Release()
+
+	arrData := rleArr.Data()
+	newArr := array.MakeFromData(arrData)
+	defer newArr.Release()
+
+	assert.Same(t, newArr.Data(), arrData)
+	assert.IsType(t, (*array.RunEndEncoded)(nil), newArr)
+}
+
+func TestRLEFromRunEndsAndValues(t *testing.T) {
+	rleArray := array.NewRunEndEncodedArray(int32Values, int32Values, 3, 0)
+	defer rleArray.Release()
+
+	assert.EqualValues(t, 3, rleArray.Len())
+	assert.Truef(t, array.Equal(int32Values, rleArray.Values()), "expected: %s\ngot: %s", int32Values, rleArray.Values())
+	assert.Truef(t, array.Equal(int32Values, rleArray.RunEndsArr()), "expected: %s\ngot: %s", int32Values, rleArray.RunEndsArr())
+	assert.Zero(t, rleArray.Offset())
+	assert.Zero(t, rleArray.Data().NullN())
+	// one dummy buffer, since code may assume there's at least one nil buffer
+	assert.Len(t, rleArray.Data().Buffers(), 1)
+
+	// explicit offset
+	rleArray = array.NewRunEndEncodedArray(int32Values, stringValues, 2, 1)
+	defer rleArray.Release()
+
+	assert.EqualValues(t, 2, rleArray.Len())
+	assert.Truef(t, array.Equal(stringValues, rleArray.Values()), "expected: %s\ngot: %s", stringValues, rleArray.Values())
+	assert.Truef(t, array.Equal(int32Values, rleArray.RunEndsArr()), "expected: %s\ngot: %s", int32Values, rleArray.RunEndsArr())
+	assert.EqualValues(t, 1, rleArray.Offset())
+	assert.Zero(t, rleArray.Data().NullN())
+
+	assert.PanicsWithError(t, "invalid: arrow/array: run ends array must be int16, int32, or int64", func() {
+		array.NewRunEndEncodedArray(stringValues, int32Values, 3, 0)
+	})
+	assert.PanicsWithError(t, "invalid: arrow/array: run ends array cannot contain nulls", func() {
+		array.NewRunEndEncodedArray(int32OnlyNull, int32Values, 3, 0)
+	})
+}
+
+func TestRunLengthEncodedOffsetLength(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	runEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[100, 200, 300, 400, 500]`))
+	defer runEnds.Release()
+
+	values, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`["Hello", "beautiful", "world", "of", "RLE"]`))
+	defer values.Release()
+
+	rleArray := array.NewRunEndEncodedArray(runEnds, values, 500, 0)
+	defer rleArray.Release()
+
+	assert.EqualValues(t, 5, rleArray.GetPhysicalLength())
+	assert.EqualValues(t, 0, rleArray.GetPhysicalOffset())
+
+	slice := array.NewSlice(rleArray, 199, 204).(*array.RunEndEncoded)
+	defer slice.Release()
+
+	assert.EqualValues(t, 2, slice.GetPhysicalLength())
+	assert.EqualValues(t, 1, slice.GetPhysicalOffset())
+
+	slice2 := array.NewSlice(rleArray, 199, 300).(*array.RunEndEncoded)
+	defer slice2.Release()
+
+	assert.EqualValues(t, 2, slice2.GetPhysicalLength())
+	assert.EqualValues(t, 1, slice2.GetPhysicalOffset())
+
+	slice3 := array.NewSlice(rleArray, 400, 500).(*array.RunEndEncoded)
+	defer slice3.Release()
+
+	assert.EqualValues(t, 1, slice3.GetPhysicalLength())
+	assert.EqualValues(t, 4, slice3.GetPhysicalOffset())
+
+	slice4 := array.NewSlice(rleArray, 0, 150).(*array.RunEndEncoded)
+	defer slice4.Release()
+
+	assert.EqualValues(t, 2, slice4.GetPhysicalLength())
+	assert.EqualValues(t, 0, slice4.GetPhysicalOffset())
+
+	zeroLengthAtEnd := array.NewSlice(rleArray, 500, 500).(*array.RunEndEncoded)
+	defer zeroLengthAtEnd.Release()
+
+	assert.EqualValues(t, 0, zeroLengthAtEnd.GetPhysicalLength())
+	assert.EqualValues(t, 5, zeroLengthAtEnd.GetPhysicalOffset())
+}
+
+func TestRLECompare(t *testing.T) {
+	rleArray := array.NewRunEndEncodedArray(int32Values, stringValues, 30, 0)
+	// second that is a copy of the first
+	standardEquals := array.MakeFromData(rleArray.Data().(*array.Data).Copy())
+
+	defer rleArray.Release()
+	defer standardEquals.Release()
+
+	assert.Truef(t, array.Equal(rleArray, standardEquals), "left: %s\nright: %s", rleArray, standardEquals)
+	assert.False(t, array.Equal(array.NewSlice(rleArray, 0, 29), array.NewSlice(rleArray, 1, 30)))
+
+	// array that is logically the same as our rleArray, but has 2 small
+	// runs for the first value instead of one large run
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	t.Run("logical duplicate", func(t *testing.T) {
+		dupRunEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[5, 10, 20, 30]`))
+		defer dupRunEnds.Release()
+		strValues, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String,
+			strings.NewReader(`["Hello", "Hello", "World", null]`))
+		defer strValues.Release()
+
+		dupArr := array.NewRunEndEncodedArray(dupRunEnds, strValues, 30, 0)
+		defer dupArr.Release()
+
+		assert.Truef(t, array.Equal(rleArray, dupArr), "expected: %sgot: %s", rleArray, dupArr)
+	})
+
+	t.Run("emptyArr", func(t *testing.T) {
+		emptyRuns, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32, strings.NewReader(`[]`))
+		emptyVals, _, _ := array.FromJSON(mem, arrow.BinaryTypes.String, strings.NewReader(`[]`))
+		defer emptyRuns.Release()
+		defer emptyVals.Release()
+
+		emptyArr := array.NewRunEndEncodedArray(emptyRuns, emptyVals, 0, 0)
+		defer emptyArr.Release()
+
+		dataCopy := emptyArr.Data().(*array.Data).Copy()
+		defer dataCopy.Release()
+		emptyArr2 := array.MakeFromData(dataCopy)
+		defer emptyArr2.Release()
+
+		assert.Truef(t, array.Equal(emptyArr, emptyArr2), "expected: %sgot: %s", emptyArr, emptyArr2)
+	})
+
+	t.Run("different offsets", func(t *testing.T) {
+		// three different slices that have the value [3, 3, 3, 4, 4, 4, 4]
+		offsetsa, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32,
+			strings.NewReader(`[2, 5, 12, 58, 60]`))
+		offsetsb, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32,
+			strings.NewReader(`[81, 86, 99, 100]`))
+		offsetsc, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32,
+			strings.NewReader(`[3, 7]`))
+		valsa, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64,
+			strings.NewReader(`[1, 2, 3, 4, 5]`))
+		valsb, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64,
+			strings.NewReader(`[2, 3, 4, 5]`))
+		valsc, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int64,
+			strings.NewReader(`[3, 4]`))
+		defer func() {
+			offsetsa.Release()
+			offsetsb.Release()
+			offsetsc.Release()
+			valsa.Release()
+			valsb.Release()
+			valsc.Release()
+		}()
+
+		differentOffsetsA := array.NewRunEndEncodedArray(offsetsa, valsa, 60, 0)
+		defer differentOffsetsA.Release()
+		differentOffsetsB := array.NewRunEndEncodedArray(offsetsb, valsb, 100, 0)
+		defer differentOffsetsB.Release()
+		differentOffsetsC := array.NewRunEndEncodedArray(offsetsc, valsc, 7, 0)
+		defer differentOffsetsC.Release()
+
+		sliceA := array.NewSlice(differentOffsetsA, 9, 16)
+		defer sliceA.Release()
+		sliceB := array.NewSlice(differentOffsetsB, 83, 90)
+		defer sliceB.Release()
+
+		assert.True(t, array.Equal(sliceA, sliceB))
+		assert.True(t, array.Equal(sliceA, differentOffsetsC))
+		assert.True(t, array.Equal(sliceB, differentOffsetsC))
+	})
+}
diff --git a/go/arrow/compare.go b/go/arrow/compare.go
index 511abe2238..19221a7f00 100644
--- a/go/arrow/compare.go
+++ b/go/arrow/compare.go
@@ -121,6 +121,10 @@ func TypeEqual(left, right DataType, opts ...TypeEqualOption) bool {
 	case *TimestampType:
 		r := right.(*TimestampType)
 		return l.Unit == r.Unit && l.TimeZone == r.TimeZone
+	case *RunEndEncodedType:
+		r := right.(*RunEndEncodedType)
+		return TypeEqual(l.Encoded(), r.Encoded(), opts...) &&
+			TypeEqual(l.ends, r.ends, opts...)
 	default:
 		return reflect.DeepEqual(left, right)
 	}
diff --git a/go/arrow/compute/internal/exec/span.go b/go/arrow/compute/internal/exec/span.go
index c0a8bff04d..f701b6f39c 100644
--- a/go/arrow/compute/internal/exec/span.go
+++ b/go/arrow/compute/internal/exec/span.go
@@ -590,6 +590,8 @@ type ExecSpan struct {
 
 func getNumBuffers(dt arrow.DataType) int {
 	switch dt.ID() {
+	case arrow.RUN_END_ENCODED:
+		return 0
 	case arrow.NULL, arrow.STRUCT, arrow.FIXED_SIZE_LIST:
 		return 1
 	case arrow.BINARY, arrow.LARGE_BINARY, arrow.STRING, arrow.LARGE_STRING, arrow.DENSE_UNION:
diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go
index 94a07d73ef..627b1b417b 100644
--- a/go/arrow/datatype.go
+++ b/go/arrow/datatype.go
@@ -150,12 +150,7 @@ const (
 	// calendar interval with three fields
 	INTERVAL_MONTH_DAY_NANO
 
-	// INTERVAL could be any of the interval types, kept to avoid breaking anyone
-	// after switching to individual type ids for the interval types that were using
-	// it when calling MakeFromData or NewBuilder
-	//
-	// Deprecated and will be removed in the next major version release
-	INTERVAL
+	RUN_END_ENCODED
 
 	// Alias to ensure we do not break any consumers
 	DECIMAL = DECIMAL128
diff --git a/go/arrow/datatype_encoded.go b/go/arrow/datatype_encoded.go
new file mode 100644
index 0000000000..1263f0a9c1
--- /dev/null
+++ b/go/arrow/datatype_encoded.go
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package arrow
+
+type EncodedType interface {
+	DataType
+	Encoded() DataType
+}
+
+// RunEndEncodedType is the datatype to represent a run-end encoded
+// array of data.
+type RunEndEncodedType struct {
+	ends DataType
+	enc  DataType
+}
+
+func RunEndEncodedOf(runEnds, encoded DataType) *RunEndEncodedType {
+	return &RunEndEncodedType{ends: runEnds, enc: encoded}
+}
+
+func (*RunEndEncodedType) ID() Type     { return RUN_END_ENCODED }
+func (*RunEndEncodedType) Name() string { return "run_end_encoded" }
+func (*RunEndEncodedType) Layout() DataTypeLayout {
+	return DataTypeLayout{Buffers: []BufferSpec{SpecAlwaysNull()}}
+}
+
+func (t *RunEndEncodedType) String() string {
+	return t.Name() + "<run_ends: " + t.ends.String() + ", values: " + t.enc.String() + ">"
+}
+
+func (t *RunEndEncodedType) Fingerprint() string {
+	return typeFingerprint(t) + "{" + t.ends.Fingerprint() + ";" + t.enc.Fingerprint() + ";}"
+}
+
+func (t *RunEndEncodedType) Encoded() DataType { return t.enc }
+
+func (t *RunEndEncodedType) Fields() []Field {
+	return []Field{
+		{Name: "run_ends", Type: t.ends},
+		{Name: "values", Type: t.enc, Nullable: true},
+	}
+}
+
+func (*RunEndEncodedType) ValidRunEndsType(dt DataType) bool {
+	switch dt.ID() {
+	case INT16, INT32, INT64:
+		return true
+	}
+	return false
+}
diff --git a/go/arrow/encoded/ree_utils.go b/go/arrow/encoded/ree_utils.go
new file mode 100644
index 0000000000..6e78cca3bc
--- /dev/null
+++ b/go/arrow/encoded/ree_utils.go
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoded
+
+import (
+	"math"
+	"sort"
+
+	"github.com/apache/arrow/go/v11/arrow"
+)
+
+// FindPhysicalOffset performs a binary search on the run-ends to return
+// the appropriate physical offset into the values/run-ends that corresponds
+// with the logical offset defined in the array.
+//
+// For example, an array with run-ends [10, 20, 30, 40, 50] and a logical
+// offset of 25 will return the value 2. This returns the smallest offset
+// whose run-end is greater than the logical offset, which would also be the
+// offset index into the values that contains the correct value.
+//
+// This function assumes it receives Run End Encoded array data
+func FindPhysicalOffset(arr arrow.ArrayData) int {
+	data := arr.Children()[0]
+	logicalOffset := arr.Offset()
+
+	switch data.DataType().ID() {
+	case arrow.INT16:
+		runEnds := arrow.Int16Traits.CastFromBytes(data.Buffers()[1].Bytes())
+		runEnds = runEnds[data.Offset() : data.Offset()+data.Len()]
+		return sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > int16(logicalOffset) })
+	case arrow.INT32:
+		runEnds := arrow.Int32Traits.CastFromBytes(data.Buffers()[1].Bytes())
+		runEnds = runEnds[data.Offset() : data.Offset()+data.Len()]
+		return sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > int32(logicalOffset) })
+	case arrow.INT64:
+		runEnds := arrow.Int64Traits.CastFromBytes(data.Buffers()[1].Bytes())
+		runEnds = runEnds[data.Offset() : data.Offset()+data.Len()]
+		return sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > int64(logicalOffset) })
+	default:
+		panic("only int16, int32, and int64 are allowed for the run-ends")
+	}
+}
+
+// GetPhysicalLength returns the physical number of values which are in
+// the passed in RunEndEncoded array data. This will take into account
+// the offset and length of the array as reported in the array data
+// (so that it properly handles slices).
+//
+// This function assumes it receives Run End Encoded array data
+func GetPhysicalLength(arr arrow.ArrayData) int {
+	if arr.Len() == 0 {
+		return 0
+	}
+
+	data := arr.Children()[0]
+	physicalOffset := FindPhysicalOffset(arr)
+	start, length := data.Offset()+physicalOffset, data.Len()-physicalOffset
+	offset := arr.Offset() + arr.Len() - 1
+
+	switch data.DataType().ID() {
+	case arrow.INT16:
+		runEnds := arrow.Int16Traits.CastFromBytes(data.Buffers()[1].Bytes())
+		runEnds = runEnds[start : start+length]
+		return sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > int16(offset) }) + 1
+	case arrow.INT32:
+		runEnds := arrow.Int32Traits.CastFromBytes(data.Buffers()[1].Bytes())
+		runEnds = runEnds[start : start+length]
+		return sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > int32(offset) }) + 1
+	case arrow.INT64:
+		runEnds := arrow.Int64Traits.CastFromBytes(data.Buffers()[1].Bytes())
+		runEnds = runEnds[start : start+length]
+		return sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > int64(offset) }) + 1
+	default:
+		panic("arrow/rle: can only get rle.PhysicalLength for int16/int32/int64 run ends array")
+	}
+}
+
+func getRunEnds(arr arrow.ArrayData) func(int64) int64 {
+	switch arr.DataType().ID() {
+	case arrow.INT16:
+		runEnds := arrow.Int16Traits.CastFromBytes(arr.Buffers()[1].Bytes())
+		runEnds = runEnds[arr.Offset() : arr.Offset()+arr.Len()]
+		return func(i int64) int64 { return int64(runEnds[i]) }
+	case arrow.INT32:
+		runEnds := arrow.Int32Traits.CastFromBytes(arr.Buffers()[1].Bytes())
+		runEnds = runEnds[arr.Offset() : arr.Offset()+arr.Len()]
+		return func(i int64) int64 { return int64(runEnds[i]) }
+	case arrow.INT64:
+		runEnds := arrow.Int64Traits.CastFromBytes(arr.Buffers()[1].Bytes())
+		runEnds = runEnds[arr.Offset() : arr.Offset()+arr.Len()]
+		return func(i int64) int64 { return int64(runEnds[i]) }
+	default:
+		panic("only int16, int32, and int64 are allowed for the run-ends")
+	}
+}
+
+// MergedRuns is used to take two Run End Encoded arrays and iterate
+// them, finding the correct physical indices to correspond with the
+// runs.
+type MergedRuns struct {
+	inputs       [2]arrow.Array
+	runIndex     [2]int64
+	inputRunEnds [2]func(int64) int64
+	runEnds      [2]int64
+	logicalLen   int
+	logicalPos   int
+	mergedEnd    int64
+}
+
+// NewMergedRuns takes two RunEndEncoded arrays and returns a MergedRuns
+// object that will allow iterating over the physical indices of the runs.
+func NewMergedRuns(inputs [2]arrow.Array) *MergedRuns {
+	if len(inputs) == 0 {
+		return &MergedRuns{logicalLen: 0}
+	}
+
+	mr := &MergedRuns{inputs: inputs, logicalLen: inputs[0].Len()}
+	for i, in := range inputs {
+		if in.DataType().ID() != arrow.RUN_END_ENCODED {
+			panic("arrow/rle: NewMergedRuns can only be called with RunLengthEncoded arrays")
+		}
+		if in.Len() != mr.logicalLen {
+			panic("arrow/rle: can only merge runs of RLE arrays of the same length")
+		}
+
+		mr.inputRunEnds[i] = getRunEnds(in.Data().Children()[0])
+		// initialize the runIndex at the physical offset - 1 so the first
+		// call to Next will increment it to the correct initial offset
+		// since the initial state is logicalPos == 0 and mergedEnd == 0
+		mr.runIndex[i] = int64(FindPhysicalOffset(in.Data())) - 1
+	}
+
+	return mr
+}
+
+// Next returns true if there are more values/runs to iterate and false
+// when one of the arrays has reached the end.
+func (mr *MergedRuns) Next() bool {
+	mr.logicalPos = int(mr.mergedEnd)
+	if mr.isEnd() {
+		return false
+	}
+
+	for i := range mr.inputs {
+		if mr.logicalPos == int(mr.runEnds[i]) {
+			mr.runIndex[i]++
+		}
+	}
+	mr.findMergedRun()
+
+	return true
+}
+
+// IndexIntoBuffer returns the physical index into the value buffer of
+// the passed in array index (ie: 0 for the first array and 1 for the second)
+// this takes into account the offset of the array so it is the true physical
+// index into the value *buffer* in the child.
+func (mr *MergedRuns) IndexIntoBuffer(id int) int64 {
+	return mr.runIndex[id] + int64(mr.inputs[id].Data().Children()[1].Offset())
+}
+
+// IndexIntoArray is like IndexIntoBuffer but it doesn't take into account
+// the array offset and instead is the index that can be used with the .Value
+// method on the array to get the correct value.
+func (mr *MergedRuns) IndexIntoArray(id int) int64 { return mr.runIndex[id] }
+
+// RunLength returns the logical length of the current merged run being looked at.
+func (mr *MergedRuns) RunLength() int64 { return mr.mergedEnd - int64(mr.logicalPos) }
+
+// AccumulatedRunLength returns the logical run end of the current merged run.
+func (mr *MergedRuns) AccumulatedRunLength() int64 { return mr.mergedEnd }
+
+func (mr *MergedRuns) findMergedRun() {
+	mr.mergedEnd = int64(math.MaxInt64)
+	for i, in := range mr.inputs {
+		// logical indices of the end of the run we are currently in each input
+		mr.runEnds[i] = int64(mr.inputRunEnds[i](mr.runIndex[i]) - int64(in.Data().Offset()))
+		// the logical length may end in the middle of a run, in case the array was sliced
+		if mr.logicalLen < int(mr.runEnds[i]) {
+			mr.runEnds[i] = int64(mr.logicalLen)
+		}
+		if mr.runEnds[i] < mr.mergedEnd {
+			mr.mergedEnd = mr.runEnds[i]
+		}
+	}
+}
+
+func (mr *MergedRuns) isEnd() bool { return mr.logicalPos == mr.logicalLen }
diff --git a/go/arrow/encoded/ree_utils_test.go b/go/arrow/encoded/ree_utils_test.go
new file mode 100644
index 0000000000..5506d3d9e2
--- /dev/null
+++ b/go/arrow/encoded/ree_utils_test.go
@@ -0,0 +1,148 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoded_test
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/apache/arrow/go/v11/arrow"
+	"github.com/apache/arrow/go/v11/arrow/array"
+	"github.com/apache/arrow/go/v11/arrow/encoded"
+	"github.com/apache/arrow/go/v11/arrow/memory"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestFindPhysicalOffset(t *testing.T) {
+	tests := []struct {
+		vals   []int32
+		offset int
+		exp    int
+	}{
+		{[]int32{1}, 0, 0},
+		{[]int32{1, 2, 3}, 0, 0},
+		{[]int32{1, 2, 3}, 1, 1},
+		{[]int32{1, 2, 3}, 2, 2},
+		{[]int32{2, 3, 4}, 0, 0},
+		{[]int32{2, 3, 4}, 1, 0},
+		{[]int32{2, 3, 4}, 2, 1},
+		{[]int32{2, 3, 4}, 3, 2},
+		{[]int32{2, 4, 6}, 3, 1},
+		{[]int32{1, 2, 3, 4, 5, 6, 7, 8, 9, 1000, 1005, 1015, 1020, 1025, 1050}, 1000, 10},
+		// out-of-range logical offset should return len(vals)
+		{[]int32{2, 4, 6}, 6, 3},
+		{[]int32{2, 4, 6}, 10000, 3},
+	}
+
+	reeType := arrow.RunEndEncodedOf(arrow.PrimitiveTypes.Int32, arrow.PrimitiveTypes.Int32)
+	for _, tt := range tests {
+		t.Run(fmt.Sprintf("%v find %d", tt.vals, tt.offset), func(t *testing.T) {
+			child := array.NewData(arrow.PrimitiveTypes.Int32, len(tt.vals), []*memory.Buffer{nil, memory.NewBufferBytes(arrow.Int32Traits.CastToBytes(tt.vals))}, nil, 0, 0)
+			arr := array.NewData(reeType, -1, nil, []arrow.ArrayData{child}, 0, tt.offset)
+
+			assert.Equal(t, tt.exp, encoded.FindPhysicalOffset(arr))
+		})
+	}
+}
+
+func TestMergedRunsIter(t *testing.T) {
+	mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
+	defer mem.AssertSize(t, 0)
+
+	leftRunEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32,
+		strings.NewReader(`[1, 2, 3, 4, 5, 6, 7, 8, 9, 1000, 1005, 1015, 1020, 1025, 30000]`))
+	defer leftRunEnds.Release()
+
+	rightRunEnds, _, _ := array.FromJSON(mem, arrow.PrimitiveTypes.Int32,
+		strings.NewReader(`[1, 2, 3, 4, 5, 2005, 2009, 2025, 2050]`))
+	defer rightRunEnds.Release()
+
+	var (
+		expectedRunLengths        = []int32{5, 4, 6, 5, 5, 25}
+		expectedLeftVisits        = []int32{110, 111, 111, 112, 113, 114}
+		expectedRightVisits       = []int32{205, 206, 207, 207, 207, 208}
+		leftPrntOffset      int32 = 1000
+		leftChildOffset     int32 = 100
+		rightPrntOffset     int32 = 2000
+		rightChildOffset    int32 = 200
+
+		leftChild  arrow.Array = array.NewNull(int(leftChildOffset) + leftRunEnds.Len())
+		rightChild arrow.Array = array.NewNull(int(rightChildOffset) + rightRunEnds.Len())
+	)
+
+	leftChild = array.NewSlice(leftChild, int64(leftChildOffset), int64(leftChildOffset)+int64(leftRunEnds.Len()))
+	rightChild = array.NewSlice(rightChild, int64(rightChildOffset), int64(rightChild.Len()))
+
+	leftArray := arrow.Array(array.NewRunEndEncodedArray(leftRunEnds, leftChild, 1050, 0))
+	defer leftArray.Release()
+	rightArray := arrow.Array(array.NewRunEndEncodedArray(rightRunEnds, rightChild, 2050, 0))
+	defer rightArray.Release()
+
+	leftArray = array.NewSlice(leftArray, int64(leftPrntOffset), int64(leftArray.Len()))
+	defer leftArray.Release()
+	rightArray = array.NewSlice(rightArray, int64(rightPrntOffset), int64(rightArray.Len()))
+	defer rightArray.Release()
+
+	pos, logicalPos := 0, 0
+	mr := encoded.NewMergedRuns([2]arrow.Array{leftArray, rightArray})
+	for mr.Next() {
+		assert.EqualValues(t, expectedRunLengths[pos], mr.RunLength())
+		assert.EqualValues(t, expectedLeftVisits[pos], mr.IndexIntoBuffer(0))
+		assert.EqualValues(t, expectedRightVisits[pos], mr.IndexIntoBuffer(1))
+		assert.EqualValues(t, expectedLeftVisits[pos]-int32(leftChildOffset), mr.IndexIntoArray(0))
+		assert.EqualValues(t, expectedRightVisits[pos]-int32(rightChildOffset), mr.IndexIntoArray(1))
+		pos++
+		logicalPos += int(mr.RunLength())
+		assert.EqualValues(t, logicalPos, mr.AccumulatedRunLength())
+	}
+	assert.EqualValues(t, len(expectedRunLengths), pos)
+
+	t.Run("left array only", func(t *testing.T) {
+		leftOnlyRunLengths := []int32{5, 10, 5, 5, 25}
+		pos, logicalPos := 0, 0
+		mr := encoded.NewMergedRuns([2]arrow.Array{leftArray, leftArray})
+		for mr.Next() {
+			assert.EqualValues(t, leftOnlyRunLengths[pos], mr.RunLength())
+			assert.EqualValues(t, 110+pos, mr.IndexIntoBuffer(0))
+			assert.EqualValues(t, 110+pos, mr.IndexIntoBuffer(1))
+			assert.EqualValues(t, 10+pos, mr.IndexIntoArray(0))
+			assert.EqualValues(t, 10+pos, mr.IndexIntoArray(1))
+			pos++
+			logicalPos += int(mr.RunLength())
+			assert.EqualValues(t, logicalPos, mr.AccumulatedRunLength())
+		}
+		assert.EqualValues(t, len(leftOnlyRunLengths), pos)
+	})
+
+	t.Run("right array only", func(t *testing.T) {
+		rightOnlyRunLengths := []int32{5, 4, 16, 25}
+		pos, logicalPos := 0, 0
+		mr := encoded.NewMergedRuns([2]arrow.Array{rightArray, rightArray})
+		for mr.Next() {
+			assert.EqualValues(t, rightOnlyRunLengths[pos], mr.RunLength())
+			assert.EqualValues(t, 205+pos, mr.IndexIntoBuffer(0))
+			assert.EqualValues(t, 205+pos, mr.IndexIntoBuffer(1))
+			assert.EqualValues(t, 5+pos, mr.IndexIntoArray(0))
+			assert.EqualValues(t, 5+pos, mr.IndexIntoArray(1))
+			pos++
+			logicalPos += int(mr.RunLength())
+			assert.EqualValues(t, logicalPos, mr.AccumulatedRunLength())
+		}
+		assert.EqualValues(t, len(rightOnlyRunLengths), pos)
+	})
+}
diff --git a/go/arrow/internal/utils.go b/go/arrow/internal/utils.go
index d2a9c35e5c..5344bafab5 100644
--- a/go/arrow/internal/utils.go
+++ b/go/arrow/internal/utils.go
@@ -40,7 +40,7 @@ func HasValidityBitmap(id arrow.Type, version flatbuf.MetadataVersion) bool {
 	}
 
 	switch id {
-	case arrow.NULL, arrow.DENSE_UNION, arrow.SPARSE_UNION:
+	case arrow.NULL, arrow.DENSE_UNION, arrow.SPARSE_UNION, arrow.RUN_END_ENCODED:
 		return false
 	}
 	return true
diff --git a/go/arrow/scalar/scalar.go b/go/arrow/scalar/scalar.go
index c03f380699..d91a10c8d3 100644
--- a/go/arrow/scalar/scalar.go
+++ b/go/arrow/scalar/scalar.go
@@ -483,36 +483,27 @@ var makeNullFn [64]scalarMakeNullFn
 
 func init() {
 	makeNullFn = [...]scalarMakeNullFn{
-		arrow.NULL:              func(dt arrow.DataType) Scalar { return ScalarNull },
-		arrow.BOOL:              func(dt arrow.DataType) Scalar { return &Boolean{scalar: scalar{dt, false}} },
-		arrow.UINT8:             func(dt arrow.DataType) Scalar { return &Uint8{scalar: scalar{dt, false}} },
-		arrow.INT8:              func(dt arrow.DataType) Scalar { return &Int8{scalar: scalar{dt, false}} },
-		arrow.UINT16:            func(dt arrow.DataType) Scalar { return &Uint16{scalar: scalar{dt, false}} },
-		arrow.INT16:             func(dt arrow.DataType) Scalar { return &Int16{scalar: scalar{dt, false}} },
-		arrow.UINT32:            func(dt arrow.DataType) Scalar { return &Uint32{scalar: scalar{dt, false}} },
-		arrow.INT32:             func(dt arrow.DataType) Scalar { return &Int32{scalar: scalar{dt, false}} },
-		arrow.UINT64:            func(dt arrow.DataType) Scalar { return &Uint64{scalar: scalar{dt, false}} },
-		arrow.INT64:             func(dt arrow.DataType) Scalar { return &Int64{scalar: scalar{dt, false}} },
-		arrow.FLOAT16:           func(dt arrow.DataType) Scalar { return &Float16{scalar: scalar{dt, false}} },
-		arrow.FLOAT32:           func(dt arrow.DataType) Scalar { return &Float32{scalar: scalar{dt, false}} },
-		arrow.FLOAT64:           func(dt arrow.DataType) Scalar { return &Float64{scalar: scalar{dt, false}} },
-		arrow.STRING:            func(dt arrow.DataType) Scalar { return &String{&Binary{scalar: scalar{dt, false}}} },
-		arrow.BINARY:            func(dt arrow.DataType) Scalar { return &Binary{scalar: scalar{dt, false}} },
-		arrow.FIXED_SIZE_BINARY: func(dt arrow.DataType) Scalar { return &FixedSizeBinary{&Binary{scalar: scalar{dt, false}}} },
-		arrow.DATE32:            func(dt arrow.DataType) Scalar { return &Date32{scalar: scalar{dt, false}} },
-		arrow.DATE64:            func(dt arrow.DataType) Scalar { return &Date64{scalar: scalar{dt, false}} },
-		arrow.TIMESTAMP:         func(dt arrow.DataType) Scalar { return &Timestamp{scalar: scalar{dt, false}} },
-		arrow.TIME32:            func(dt arrow.DataType) Scalar { return &Time32{scalar: scalar{dt, false}} },
-		arrow.TIME64:            func(dt arrow.DataType) Scalar { return &Time64{scalar: scalar{dt, false}} },
-		arrow.INTERVAL: func(dt arrow.DataType) Scalar {
-			if arrow.TypeEqual(dt, arrow.FixedWidthTypes.MonthInterval) {
-				return &MonthInterval{scalar: scalar{dt, false}}
-			}
-			if arrow.TypeEqual(dt, arrow.FixedWidthTypes.MonthDayNanoInterval) {
-				return &MonthDayNanoInterval{scalar: scalar{dt, false}}
-			}
-			return &DayTimeInterval{scalar: scalar{dt, false}}
-		},
+		arrow.NULL:                    func(dt arrow.DataType) Scalar { return ScalarNull },
+		arrow.BOOL:                    func(dt arrow.DataType) Scalar { return &Boolean{scalar: scalar{dt, false}} },
+		arrow.UINT8:                   func(dt arrow.DataType) Scalar { return &Uint8{scalar: scalar{dt, false}} },
+		arrow.INT8:                    func(dt arrow.DataType) Scalar { return &Int8{scalar: scalar{dt, false}} },
+		arrow.UINT16:                  func(dt arrow.DataType) Scalar { return &Uint16{scalar: scalar{dt, false}} },
+		arrow.INT16:                   func(dt arrow.DataType) Scalar { return &Int16{scalar: scalar{dt, false}} },
+		arrow.UINT32:                  func(dt arrow.DataType) Scalar { return &Uint32{scalar: scalar{dt, false}} },
+		arrow.INT32:                   func(dt arrow.DataType) Scalar { return &Int32{scalar: scalar{dt, false}} },
+		arrow.UINT64:                  func(dt arrow.DataType) Scalar { return &Uint64{scalar: scalar{dt, false}} },
+		arrow.INT64:                   func(dt arrow.DataType) Scalar { return &Int64{scalar: scalar{dt, false}} },
+		arrow.FLOAT16:                 func(dt arrow.DataType) Scalar { return &Float16{scalar: scalar{dt, false}} },
+		arrow.FLOAT32:                 func(dt arrow.DataType) Scalar { return &Float32{scalar: scalar{dt, false}} },
+		arrow.FLOAT64:                 func(dt arrow.DataType) Scalar { return &Float64{scalar: scalar{dt, false}} },
+		arrow.STRING:                  func(dt arrow.DataType) Scalar { return &String{&Binary{scalar: scalar{dt, false}}} },
+		arrow.BINARY:                  func(dt arrow.DataType) Scalar { return &Binary{scalar: scalar{dt, false}} },
+		arrow.FIXED_SIZE_BINARY:       func(dt arrow.DataType) Scalar { return &FixedSizeBinary{&Binary{scalar: scalar{dt, false}}} },
+		arrow.DATE32:                  func(dt arrow.DataType) Scalar { return &Date32{scalar: scalar{dt, false}} },
+		arrow.DATE64:                  func(dt arrow.DataType) Scalar { return &Date64{scalar: scalar{dt, false}} },
+		arrow.TIMESTAMP:               func(dt arrow.DataType) Scalar { return &Timestamp{scalar: scalar{dt, false}} },
+		arrow.TIME32:                  func(dt arrow.DataType) Scalar { return &Time32{scalar: scalar{dt, false}} },
+		arrow.TIME64:                  func(dt arrow.DataType) Scalar { return &Time64{scalar: scalar{dt, false}} },
 		arrow.INTERVAL_MONTHS:         func(dt arrow.DataType) Scalar { return &MonthInterval{scalar: scalar{dt, false}} },
 		arrow.INTERVAL_DAY_TIME:       func(dt arrow.DataType) Scalar { return &DayTimeInterval{scalar: scalar{dt, false}} },
 		arrow.INTERVAL_MONTH_DAY_NANO: func(dt arrow.DataType) Scalar { return &MonthDayNanoInterval{scalar: scalar{dt, false}} },
diff --git a/go/arrow/type_string.go b/go/arrow/type_string.go
index 3ba93f30d5..41a4073863 100644
--- a/go/arrow/type_string.go
+++ b/go/arrow/type_string.go
@@ -46,12 +46,12 @@ func _() {
 	_ = x[LARGE_BINARY-35]
 	_ = x[LARGE_LIST-36]
 	_ = x[INTERVAL_MONTH_DAY_NANO-37]
-	_ = x[INTERVAL-38]
+	_ = x[RUN_END_ENCODED-38]
 }
 
-const _Type_name = "NULLBOOLUINT8INT8UINT16INT16UINT32INT32UINT64INT64FLOAT16FLOAT32FLOAT64STRINGBINARYFIXED_SIZE_BINARYDATE32DATE64TIMESTAMPTIME32TIME64INTERVAL_MONTHSINTERVAL_DAY_TIMEDECIMAL128DECIMAL256LISTSTRUCTSPARSE_UNIONDENSE_UNIONDICTIONARYMAPEXTENSIONFIXED_SIZE_LISTDURATIONLARGE_STRINGLARGE_BINARYLARGE_LISTINTERVAL_MONTH_DAY_NANOINTERVAL"
+const _Type_name = "NULLBOOLUINT8INT8UINT16INT16UINT32INT32UINT64INT64FLOAT16FLOAT32FLOAT64STRINGBINARYFIXED_SIZE_BINARYDATE32DATE64TIMESTAMPTIME32TIME64INTERVAL_MONTHSINTERVAL_DAY_TIMEDECIMAL128DECIMAL256LISTSTRUCTSPARSE_UNIONDENSE_UNIONDICTIONARYMAPEXTENSIONFIXED_SIZE_LISTDURATIONLARGE_STRINGLARGE_BINARYLARGE_LISTINTERVAL_MONTH_DAY_NANORUN_END_ENCODED"
 
-var _Type_index = [...]uint16{0, 4, 8, 13, 17, 23, 28, 34, 39, 45, 50, 57, 64, 71, 77, 83, 100, 106, 112, 121, 127, 133, 148, 165, 175, 185, 189, 195, 207, 218, 228, 231, 240, 255, 263, 275, 287, 297, 320, 328}
+var _Type_index = [...]uint16{0, 4, 8, 13, 17, 23, 28, 34, 39, 45, 50, 57, 64, 71, 77, 83, 100, 106, 112, 121, 127, 133, 148, 165, 175, 185, 189, 195, 207, 218, 228, 231, 240, 255, 263, 275, 287, 297, 320, 335}
 
 func (i Type) String() string {
 	if i < 0 || i >= Type(len(_Type_index)-1) {