You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2022/06/02 01:40:35 UTC
[arrow] branch master updated: ARROW-16556: [Go] Add Layout method to DataTypes (#13136)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new c41b71d98c ARROW-16556: [Go] Add Layout method to DataTypes (#13136)
c41b71d98c is described below
commit c41b71d98caff65d6dbe26c51d2c3689ea908c29
Author: Matt Topol <mt...@factset.com>
AuthorDate: Wed Jun 1 21:40:30 2022 -0400
ARROW-16556: [Go] Add Layout method to DataTypes (#13136)
Lead-authored-by: Matt Topol <mt...@factset.com>
Co-authored-by: Matthew Topol <mt...@factset.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
go/arrow/array/array_test.go | 9 +++--
go/arrow/datatype.go | 75 +++++++++++++++++++++++++++++++++++
go/arrow/datatype_binary.go | 8 ++++
go/arrow/datatype_extension.go | 2 +
go/arrow/datatype_fixedwidth.go | 49 +++++++++++++++++++++++
go/arrow/datatype_nested.go | 16 ++++++++
go/arrow/datatype_null.go | 3 ++
go/arrow/datatype_numeric.gen.go | 48 ++++++++++++++++++++++
go/arrow/datatype_numeric.gen.go.tmpl | 4 ++
9 files changed, 210 insertions(+), 4 deletions(-)
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index d39bcfa115..7820218aab 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -31,10 +31,11 @@ type testDataType struct {
id arrow.Type
}
-func (d *testDataType) ID() arrow.Type { return d.id }
-func (d *testDataType) Name() string { panic("implement me") }
-func (d *testDataType) BitWidth() int { return 8 }
-func (d *testDataType) Fingerprint() string { return "" }
+func (d *testDataType) ID() arrow.Type { return d.id }
+func (d *testDataType) Name() string { panic("implement me") }
+func (d *testDataType) BitWidth() int { return 8 }
+func (d *testDataType) Fingerprint() string { return "" }
+func (testDataType) Layout() arrow.DataTypeLayout { return arrow.DataTypeLayout{} }
func TestMakeFromData(t *testing.T) {
tests := []struct {
diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go
index dc7a974907..ed6e803aea 100644
--- a/go/arrow/datatype.go
+++ b/go/arrow/datatype.go
@@ -165,6 +165,7 @@ type DataType interface {
// Name is name of the data type.
Name() string
Fingerprint() string
+ Layout() DataTypeLayout
}
// FixedWidthDataType is the representation of an Arrow type that
@@ -209,3 +210,77 @@ func timeUnitFingerprint(unit TimeUnit) rune {
return rune(0)
}
}
+
+// BufferKind describes the type of buffer expected when defining a layout specification
+type BufferKind int8
+
+// The expected types of buffers
+const (
+ KindFixedWidth BufferKind = iota
+ KindVarWidth
+ KindBitmap
+ KindAlwaysNull
+)
+
+// BufferSpec provides a specification for the buffers of a particular datatype
+type BufferSpec struct {
+ Kind BufferKind
+ ByteWidth int // for KindFixedWidth
+}
+
+func (b BufferSpec) Equals(other BufferSpec) bool {
+ return b.Kind == other.Kind && (b.Kind != KindFixedWidth || b.ByteWidth == other.ByteWidth)
+}
+
+// DataTypeLayout represents the physical layout of a datatype's buffers including
+// the number of and types of those binary buffers. This will correspond
+// with the buffers in the ArrayData for an array of that type.
+type DataTypeLayout struct {
+ Buffers []BufferSpec
+ HasDict bool
+}
+
+func SpecFixedWidth(w int) BufferSpec { return BufferSpec{KindFixedWidth, w} }
+func SpecVariableWidth() BufferSpec { return BufferSpec{KindVarWidth, -1} }
+func SpecBitmap() BufferSpec { return BufferSpec{KindBitmap, -1} }
+func SpecAlwaysNull() BufferSpec { return BufferSpec{KindAlwaysNull, -1} }
+
+// IsInteger is a helper to return true if the type ID provided is one of the
+// integral types of uint or int with the varying sizes.
+func IsInteger(t Type) bool {
+ switch t {
+ case UINT8, INT8, UINT16, INT16, UINT32, INT32, UINT64, INT64:
+ return true
+ }
+ return false
+}
+
+// IsPrimitive returns true if the provided type ID represents a fixed width
+// primitive type.
+func IsPrimitive(t Type) bool {
+ switch t {
+ case BOOL, UINT8, INT8, UINT16, INT16, UINT32, INT32, UINT64, INT64,
+ FLOAT16, FLOAT32, FLOAT64, DATE32, DATE64, TIME32, TIME64, TIMESTAMP,
+ DURATION, INTERVAL_MONTHS, INTERVAL_DAY_TIME, INTERVAL_MONTH_DAY_NANO:
+ return true
+ }
+ return false
+}
+
+// IsBaseBinary returns true for Binary/String and their LARGE variants
+func IsBaseBinary(t Type) bool {
+ switch t {
+ case BINARY, STRING, LARGE_BINARY, LARGE_STRING:
+ return true
+ }
+ return false
+}
+
+// IsFixedSizeBinary returns true for Decimal128/256 and FixedSizeBinary
+func IsFixedSizeBinary(t Type) bool {
+ switch t {
+ case DECIMAL128, DECIMAL256, FIXED_SIZE_BINARY:
+ return true
+ }
+ return false
+}
diff --git a/go/arrow/datatype_binary.go b/go/arrow/datatype_binary.go
index 110ef491e4..d77e7a64bd 100644
--- a/go/arrow/datatype_binary.go
+++ b/go/arrow/datatype_binary.go
@@ -23,6 +23,10 @@ func (t *BinaryType) Name() string { return "binary" }
func (t *BinaryType) String() string { return "binary" }
func (t *BinaryType) binary() {}
func (t *BinaryType) Fingerprint() string { return typeFingerprint(t) }
+func (t *BinaryType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(),
+ SpecFixedWidth(Int32SizeBytes), SpecVariableWidth()}}
+}
type StringType struct{}
@@ -31,6 +35,10 @@ func (t *StringType) Name() string { return "utf8" }
func (t *StringType) String() string { return "utf8" }
func (t *StringType) binary() {}
func (t *StringType) Fingerprint() string { return typeFingerprint(t) }
+func (t *StringType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(),
+ SpecFixedWidth(Int32SizeBytes), SpecVariableWidth()}}
+}
var (
BinaryTypes = struct {
diff --git a/go/arrow/datatype_extension.go b/go/arrow/datatype_extension.go
index c03a3716aa..0c1c35c0cb 100644
--- a/go/arrow/datatype_extension.go
+++ b/go/arrow/datatype_extension.go
@@ -162,6 +162,8 @@ func (e *ExtensionBase) Fields() []Field {
return nil
}
+func (e *ExtensionBase) Layout() DataTypeLayout { return e.Storage.Layout() }
+
// this no-op exists to ensure that this type must be embedded in any user-defined extension type.
//lint:ignore U1000 this function is intentionally unused as it only exists to ensure embedding happens
func (ExtensionBase) mustEmbedExtensionBase() {}
diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go
index c8c9b6d44d..bf64299a6f 100644
--- a/go/arrow/datatype_fixedwidth.go
+++ b/go/arrow/datatype_fixedwidth.go
@@ -35,6 +35,10 @@ func (t *BooleanType) Fingerprint() string { return typeFingerprint(t) }
// BitWidth returns the number of bits required to store a single element of this data type in memory.
func (t *BooleanType) BitWidth() int { return 1 }
+func (BooleanType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecBitmap()}}
+}
+
type FixedSizeBinaryType struct {
ByteWidth int
}
@@ -46,6 +50,9 @@ func (t *FixedSizeBinaryType) Fingerprint() string { return typeFingerprint(t) }
func (t *FixedSizeBinaryType) String() string {
return "fixed_size_binary[" + strconv.Itoa(t.ByteWidth) + "]"
}
+func (t *FixedSizeBinaryType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(t.ByteWidth)}}
+}
type (
Timestamp int64
@@ -346,6 +353,10 @@ func (t *TimestampType) Fingerprint() string {
// BitWidth returns the number of bits required to store a single element of this data type in memory.
func (*TimestampType) BitWidth() int { return 64 }
+func (TimestampType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(TimestampSizeBytes)}}
+}
+
func (t *TimestampType) TimeUnit() TimeUnit { return t.Unit }
// ClearCachedLocation clears the cached time.Location object in the type.
@@ -438,6 +449,10 @@ func (t *Time32Type) Fingerprint() string {
return typeFingerprint(t) + string(timeUnitFingerprint(t.Unit))
}
+func (Time32Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Time32SizeBytes)}}
+}
+
func (t *Time32Type) TimeUnit() TimeUnit { return t.Unit }
// Time64Type is encoded as a 64-bit signed integer, representing either microseconds or nanoseconds since midnight.
@@ -453,6 +468,10 @@ func (t *Time64Type) Fingerprint() string {
return typeFingerprint(t) + string(timeUnitFingerprint(t.Unit))
}
+func (Time64Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Time64SizeBytes)}}
+}
+
func (t *Time64Type) TimeUnit() TimeUnit { return t.Unit }
// DurationType is encoded as a 64-bit signed integer, representing an amount
@@ -469,6 +488,10 @@ func (t *DurationType) Fingerprint() string {
return typeFingerprint(t) + string(timeUnitFingerprint(t.Unit))
}
+func (DurationType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(DurationSizeBytes)}}
+}
+
func (t *DurationType) TimeUnit() TimeUnit { return t.Unit }
// Float16Type represents a floating point value encoded with a 16-bit precision.
@@ -482,6 +505,10 @@ func (t *Float16Type) Fingerprint() string { return typeFingerprint(t) }
// BitWidth returns the number of bits required to store a single element of this data type in memory.
func (t *Float16Type) BitWidth() int { return 16 }
+func (Float16Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Float16SizeBytes)}}
+}
+
// Decimal128Type represents a fixed-size 128-bit decimal type.
type Decimal128Type struct {
Precision int32
@@ -498,6 +525,10 @@ func (t *Decimal128Type) Fingerprint() string {
return fmt.Sprintf("%s[%d,%d,%d]", typeFingerprint(t), t.BitWidth(), t.Precision, t.Scale)
}
+func (Decimal128Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Decimal128SizeBytes)}}
+}
+
// MonthInterval represents a number of months.
type MonthInterval int32
@@ -531,6 +562,10 @@ func (*MonthIntervalType) Fingerprint() string { return typeIDFingerprint(INTERV
// BitWidth returns the number of bits required to store a single element of this data type in memory.
func (t *MonthIntervalType) BitWidth() int { return 32 }
+func (MonthIntervalType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(MonthIntervalSizeBytes)}}
+}
+
// DayTimeInterval represents a number of days and milliseconds (fraction of day).
type DayTimeInterval struct {
Days int32 `json:"days"`
@@ -549,6 +584,10 @@ func (*DayTimeIntervalType) Fingerprint() string { return typeIDFingerprint(INTE
// BitWidth returns the number of bits required to store a single element of this data type in memory.
func (t *DayTimeIntervalType) BitWidth() int { return 64 }
+func (DayTimeIntervalType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(DayTimeIntervalSizeBytes)}}
+}
+
// MonthDayNanoInterval represents a number of months, days and nanoseconds (fraction of day).
type MonthDayNanoInterval struct {
Months int32 `json:"months"`
@@ -571,6 +610,10 @@ func (*MonthDayNanoIntervalType) Fingerprint() string {
// BitWidth returns the number of bits required to store a single element of this data type in memory.
func (*MonthDayNanoIntervalType) BitWidth() int { return 128 }
+func (MonthDayNanoIntervalType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(MonthDayNanoIntervalSizeBytes)}}
+}
+
type op int8
const (
@@ -650,6 +693,12 @@ func (d *DictionaryType) Fingerprint() string {
return ordered
}
+func (d *DictionaryType) Layout() DataTypeLayout {
+ layout := d.IndexType.Layout()
+ layout.HasDict = true
+ return layout
+}
+
var (
FixedWidthTypes = struct {
Boolean FixedWidthDataType
diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go
index ee4a1befc9..108ef82779 100644
--- a/go/arrow/datatype_nested.go
+++ b/go/arrow/datatype_nested.go
@@ -90,6 +90,10 @@ func (t *ListType) ElemField() Field {
func (t *ListType) Fields() []Field { return []Field{t.ElemField()} }
+func (ListType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap(), SpecFixedWidth(Int32SizeBytes)}}
+}
+
// FixedSizeListType describes a nested type in which each array slot contains
// a fixed-size sequence of values, all having the same relative type.
type FixedSizeListType struct {
@@ -164,6 +168,10 @@ func (t *FixedSizeListType) Fingerprint() string {
func (t *FixedSizeListType) Fields() []Field { return []Field{t.ElemField()} }
+func (FixedSizeListType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap()}}
+}
+
// StructType describes a nested type parameterized by an ordered sequence
// of relative types, called its fields.
type StructType struct {
@@ -253,6 +261,10 @@ func (t *StructType) Fingerprint() string {
return b.String()
}
+func (StructType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecBitmap()}}
+}
+
type MapType struct {
value *ListType
KeysSorted bool
@@ -313,6 +325,10 @@ func (t *MapType) Fingerprint() string {
func (t *MapType) Fields() []Field { return t.ValueType().Fields() }
+func (t *MapType) Layout() DataTypeLayout {
+ return t.value.Layout()
+}
+
type Field struct {
Name string // Field name
Type DataType // The field's data type
diff --git a/go/arrow/datatype_null.go b/go/arrow/datatype_null.go
index 253e6ed49d..61412f3a9d 100644
--- a/go/arrow/datatype_null.go
+++ b/go/arrow/datatype_null.go
@@ -23,6 +23,9 @@ func (*NullType) ID() Type { return NULL }
func (*NullType) Name() string { return "null" }
func (*NullType) String() string { return "null" }
func (*NullType) Fingerprint() string { return typeIDFingerprint(NULL) }
+func (NullType) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{SpecAlwaysNull()}}
+}
var (
Null *NullType
diff --git a/go/arrow/datatype_numeric.gen.go b/go/arrow/datatype_numeric.gen.go
index d23ff8fd66..dfcdab5924 100644
--- a/go/arrow/datatype_numeric.gen.go
+++ b/go/arrow/datatype_numeric.gen.go
@@ -25,6 +25,10 @@ func (t *Int8Type) Name() string { return "int8" }
func (t *Int8Type) String() string { return "int8" }
func (t *Int8Type) BitWidth() int { return 8 }
func (t *Int8Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *Int8Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth(Int8SizeBytes)}}
+}
type Int16Type struct{}
@@ -33,6 +37,10 @@ func (t *Int16Type) Name() string { return "int16" }
func (t *Int16Type) String() string { return "int16" }
func (t *Int16Type) BitWidth() int { return 16 }
func (t *Int16Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *Int16Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth(Int16SizeBytes)}}
+}
type Int32Type struct{}
@@ -41,6 +49,10 @@ func (t *Int32Type) Name() string { return "int32" }
func (t *Int32Type) String() string { return "int32" }
func (t *Int32Type) BitWidth() int { return 32 }
func (t *Int32Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *Int32Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth(Int32SizeBytes)}}
+}
type Int64Type struct{}
@@ -49,6 +61,10 @@ func (t *Int64Type) Name() string { return "int64" }
func (t *Int64Type) String() string { return "int64" }
func (t *Int64Type) BitWidth() int { return 64 }
func (t *Int64Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *Int64Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth(Int64SizeBytes)}}
+}
type Uint8Type struct{}
@@ -57,6 +73,10 @@ func (t *Uint8Type) Name() string { return "uint8" }
func (t *Uint8Type) String() string { return "uint8" }
func (t *Uint8Type) BitWidth() int { return 8 }
func (t *Uint8Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *Uint8Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth(Uint8SizeBytes)}}
+}
type Uint16Type struct{}
@@ -65,6 +85,10 @@ func (t *Uint16Type) Name() string { return "uint16" }
func (t *Uint16Type) String() string { return "uint16" }
func (t *Uint16Type) BitWidth() int { return 16 }
func (t *Uint16Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *Uint16Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth(Uint16SizeBytes)}}
+}
type Uint32Type struct{}
@@ -73,6 +97,10 @@ func (t *Uint32Type) Name() string { return "uint32" }
func (t *Uint32Type) String() string { return "uint32" }
func (t *Uint32Type) BitWidth() int { return 32 }
func (t *Uint32Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *Uint32Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth(Uint32SizeBytes)}}
+}
type Uint64Type struct{}
@@ -81,6 +109,10 @@ func (t *Uint64Type) Name() string { return "uint64" }
func (t *Uint64Type) String() string { return "uint64" }
func (t *Uint64Type) BitWidth() int { return 64 }
func (t *Uint64Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *Uint64Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth(Uint64SizeBytes)}}
+}
type Float32Type struct{}
@@ -89,6 +121,10 @@ func (t *Float32Type) Name() string { return "float32" }
func (t *Float32Type) String() string { return "float32" }
func (t *Float32Type) BitWidth() int { return 32 }
func (t *Float32Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *Float32Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth(Float32SizeBytes)}}
+}
type Float64Type struct{}
@@ -97,6 +133,10 @@ func (t *Float64Type) Name() string { return "float64" }
func (t *Float64Type) String() string { return "float64" }
func (t *Float64Type) BitWidth() int { return 64 }
func (t *Float64Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *Float64Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth(Float64SizeBytes)}}
+}
type Date32Type struct{}
@@ -105,6 +145,10 @@ func (t *Date32Type) Name() string { return "date32" }
func (t *Date32Type) String() string { return "date32" }
func (t *Date32Type) BitWidth() int { return 32 }
func (t *Date32Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *Date32Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth(Date32SizeBytes)}}
+}
type Date64Type struct{}
@@ -113,6 +157,10 @@ func (t *Date64Type) Name() string { return "date64" }
func (t *Date64Type) String() string { return "date64" }
func (t *Date64Type) BitWidth() int { return 64 }
func (t *Date64Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *Date64Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth(Date64SizeBytes)}}
+}
var (
PrimitiveTypes = struct {
diff --git a/go/arrow/datatype_numeric.gen.go.tmpl b/go/arrow/datatype_numeric.gen.go.tmpl
index dd4c92f29b..a784619bd1 100644
--- a/go/arrow/datatype_numeric.gen.go.tmpl
+++ b/go/arrow/datatype_numeric.gen.go.tmpl
@@ -24,6 +24,10 @@ func (t *{{.Name}}Type) Name() string { return "{{.Name|lower}}" }
func (t *{{.Name}}Type) String() string { return "{{.Name|lower}}" }
func (t *{{.Name}}Type) BitWidth() int { return {{.Size}} }
func (t *{{.Name}}Type) Fingerprint() string { return typeFingerprint(t) }
+func (t *{{.Name}}Type) Layout() DataTypeLayout {
+ return DataTypeLayout{Buffers: []BufferSpec{
+ SpecBitmap(), SpecFixedWidth({{.Name}}SizeBytes)}}
+}
{{end}}