You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/10/01 14:35:27 UTC
[arrow-ballista] branch master updated: Update datafusion.proto (#299)
This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-ballista.git
The following commit(s) were added to refs/heads/master by this push:
new 5142cd07 Update datafusion.proto (#299)
5142cd07 is described below
commit 5142cd07c9bb17c5f2882b5261465805c2a85112
Author: Andy Grove <an...@gmail.com>
AuthorDate: Sat Oct 1 08:35:23 2022 -0600
Update datafusion.proto (#299)
---
ballista/rust/core/proto/datafusion.proto | 268 +++++++++++++++++-------------
1 file changed, 152 insertions(+), 116 deletions(-)
diff --git a/ballista/rust/core/proto/datafusion.proto b/ballista/rust/core/proto/datafusion.proto
index c53662dc..0a92d0d1 100644
--- a/ballista/rust/core/proto/datafusion.proto
+++ b/ballista/rust/core/proto/datafusion.proto
@@ -125,6 +125,8 @@ message SelectionNode {
message SortNode {
LogicalPlanNode input = 1;
repeated datafusion.LogicalExprNode expr = 2;
+ // Maximum number of highest/lowest rows to fetch; negative means no limit
+ int64 fetch = 3;
}
message RepartitionNode {
@@ -504,11 +506,13 @@ message AggregateExprNode {
AggregateFunction aggr_function = 1;
repeated LogicalExprNode expr = 2;
bool distinct = 3;
+ LogicalExprNode filter = 4;
}
message AggregateUDFExprNode {
string fun_name = 1;
repeated LogicalExprNode args = 2;
+ LogicalExprNode filter = 3;
}
message ScalarUDFExprNode {
@@ -649,53 +653,53 @@ message Field {
}
message FixedSizeBinary{
- int32 length = 1;
+ int32 length = 1;
}
message Timestamp{
- TimeUnit time_unit = 1;
- string timezone = 2;
+ TimeUnit time_unit = 1;
+ string timezone = 2;
}
enum DateUnit{
- Day = 0;
- DateMillisecond = 1;
+ Day = 0;
+ DateMillisecond = 1;
}
enum TimeUnit{
- Second = 0;
- TimeMillisecond = 1;
- Microsecond = 2;
- Nanosecond = 3;
+ Second = 0;
+ Millisecond = 1;
+ Microsecond = 2;
+ Nanosecond = 3;
}
enum IntervalUnit{
- YearMonth = 0;
- DayTime = 1;
- MonthDayNano = 2;
+ YearMonth = 0;
+ DayTime = 1;
+ MonthDayNano = 2;
}
message Decimal{
- uint64 whole = 1;
- uint64 fractional = 2;
+ uint64 whole = 1;
+ uint64 fractional = 2;
}
message List{
- Field field_type = 1;
+ Field field_type = 1;
}
message FixedSizeList{
- Field field_type = 1;
- int32 list_size = 2;
+ Field field_type = 1;
+ int32 list_size = 2;
}
message Dictionary{
- ArrowType key = 1;
- ArrowType value = 2;
+ ArrowType key = 1;
+ ArrowType value = 2;
}
message Struct{
- repeated Field sub_field_types = 1;
+ repeated Field sub_field_types = 1;
}
enum UnionMode{
@@ -704,14 +708,14 @@ enum UnionMode{
}
message Union{
- repeated Field union_types = 1;
- UnionMode union_mode = 2;
- repeated int32 type_ids = 3;
+ repeated Field union_types = 1;
+ UnionMode union_mode = 2;
+ repeated int32 type_ids = 3;
}
message ScalarListValue{
- Field field = 1;
- repeated ScalarValue values = 2;
+ Field field = 1;
+ repeated ScalarValue values = 2;
}
message ScalarTimestampValue {
@@ -724,33 +728,60 @@ message ScalarTimestampValue {
string timezone = 5;
}
+message ScalarDictionaryValue {
+ ArrowType index_type = 1;
+ ScalarValue value = 2;
+}
+
+message IntervalMonthDayNanoValue {
+ int32 months = 1;
+ int32 days = 2;
+ int64 nanos = 3;
+}
+
+message StructValue {
+ // Note that a null struct value must have one or more fields, so we
+ // encode a null StructValue as one witth an empty field_values
+ // list.
+ repeated ScalarValue field_values = 2;
+ repeated Field fields = 3;
+}
+
message ScalarValue{
- oneof value {
- bool bool_value = 1;
- string utf8_value = 2;
- string large_utf8_value = 3;
- int32 int8_value = 4;
- int32 int16_value = 5;
- int32 int32_value = 6;
- int64 int64_value = 7;
- uint32 uint8_value = 8;
- uint32 uint16_value = 9;
- uint32 uint32_value = 10;
- uint64 uint64_value = 11;
- float float32_value = 12;
- double float64_value = 13;
- //Literal Date32 value always has a unit of day
- int32 date_32_value = 14;
- ScalarListValue list_value = 17;
- ScalarType null_list_value = 18;
-
- PrimitiveScalarType null_value = 19;
- Decimal128 decimal128_value = 20;
- int64 date_64_value = 21;
- int32 interval_yearmonth_value = 24;
- int64 interval_daytime_value = 25;
- ScalarTimestampValue timestamp_value = 26;
- }
+ oneof value {
+ // Null value of any type (type is encoded)
+ PrimitiveScalarType null_value = 19;
+
+ bool bool_value = 1;
+ string utf8_value = 2;
+ string large_utf8_value = 3;
+ int32 int8_value = 4;
+ int32 int16_value = 5;
+ int32 int32_value = 6;
+ int64 int64_value = 7;
+ uint32 uint8_value = 8;
+ uint32 uint16_value = 9;
+ uint32 uint32_value = 10;
+ uint64 uint64_value = 11;
+ float float32_value = 12;
+ double float64_value = 13;
+ //Literal Date32 value always has a unit of day
+ int32 date_32_value = 14;
+ ScalarListValue list_value = 17;
+ ScalarType null_list_value = 18;
+
+ Decimal128 decimal128_value = 20;
+ int64 date_64_value = 21;
+ int32 interval_yearmonth_value = 24;
+ int64 interval_daytime_value = 25;
+ ScalarTimestampValue timestamp_value = 26;
+ ScalarDictionaryValue dictionary_value = 27;
+ bytes binary_value = 28;
+ bytes large_binary_value = 29;
+ int64 time64_value = 30;
+ IntervalMonthDayNanoValue interval_month_day_nano = 31;
+ StructValue struct_value = 32;
+ }
}
message Decimal128{
@@ -763,83 +794,88 @@ message Decimal128{
// List
enum PrimitiveScalarType{
- BOOL = 0; // arrow::Type::BOOL
- UINT8 = 1; // arrow::Type::UINT8
- INT8 = 2; // arrow::Type::INT8
- UINT16 = 3; // represents arrow::Type fields in src/arrow/type.h
- INT16 = 4;
- UINT32 = 5;
- INT32 = 6;
- UINT64 = 7;
- INT64 = 8;
- FLOAT32 = 9;
- FLOAT64 = 10;
- UTF8 = 11;
- LARGE_UTF8 = 12;
- DATE32 = 13;
- TIME_MICROSECOND = 14;
- TIME_NANOSECOND = 15;
- NULL = 16;
-
- DECIMAL128 = 17;
- DATE64 = 20;
- TIME_SECOND = 21;
- TIME_MILLISECOND = 22;
- INTERVAL_YEARMONTH = 23;
- INTERVAL_DAYTIME = 24;
+ BOOL = 0; // arrow::Type::BOOL
+ UINT8 = 1; // arrow::Type::UINT8
+ INT8 = 2; // arrow::Type::INT8
+ UINT16 = 3; // represents arrow::Type fields in src/arrow/type.h
+ INT16 = 4;
+ UINT32 = 5;
+ INT32 = 6;
+ UINT64 = 7;
+ INT64 = 8;
+ FLOAT32 = 9;
+ FLOAT64 = 10;
+ UTF8 = 11;
+ LARGE_UTF8 = 12;
+ DATE32 = 13;
+ TIMESTAMP_MICROSECOND = 14;
+ TIMESTAMP_NANOSECOND = 15;
+ NULL = 16;
+ DECIMAL128 = 17;
+ DATE64 = 20;
+ TIMESTAMP_SECOND = 21;
+ TIMESTAMP_MILLISECOND = 22;
+ INTERVAL_YEARMONTH = 23;
+ INTERVAL_DAYTIME = 24;
+ INTERVAL_MONTHDAYNANO = 28;
+
+ BINARY = 25;
+ LARGE_BINARY = 26;
+
+ TIME64 = 27;
}
message ScalarType{
- oneof datatype{
- PrimitiveScalarType scalar = 1;
- ScalarListType list = 2;
- }
+ oneof datatype{
+ PrimitiveScalarType scalar = 1;
+ ScalarListType list = 2;
+ }
}
message ScalarListType{
- repeated string field_names = 3;
- PrimitiveScalarType deepest_type = 2;
+ repeated string field_names = 3;
+ PrimitiveScalarType deepest_type = 2;
}
// Broke out into multiple message types so that type
// metadata did not need to be in separate message
-//All types that are of the empty message types contain no additional metadata
+// All types that are of the empty message types contain no additional metadata
// about the type
message ArrowType{
- oneof arrow_type_enum{
- EmptyMessage NONE = 1; // arrow::Type::NA
- EmptyMessage BOOL = 2; // arrow::Type::BOOL
- EmptyMessage UINT8 = 3; // arrow::Type::UINT8
- EmptyMessage INT8 = 4; // arrow::Type::INT8
- EmptyMessage UINT16 =5; // represents arrow::Type fields in src/arrow/type.h
- EmptyMessage INT16 = 6;
- EmptyMessage UINT32 =7;
- EmptyMessage INT32 = 8;
- EmptyMessage UINT64 =9;
- EmptyMessage INT64 =10 ;
- EmptyMessage FLOAT16 =11 ;
- EmptyMessage FLOAT32 =12 ;
- EmptyMessage FLOAT64 =13 ;
- EmptyMessage UTF8 =14 ;
- EmptyMessage LARGE_UTF8 = 32;
- EmptyMessage BINARY =15 ;
- int32 FIXED_SIZE_BINARY =16 ;
- EmptyMessage LARGE_BINARY = 31;
- EmptyMessage DATE32 =17 ;
- EmptyMessage DATE64 =18 ;
- TimeUnit DURATION = 19;
- Timestamp TIMESTAMP =20 ;
- TimeUnit TIME32 =21 ;
- TimeUnit TIME64 =22 ;
- IntervalUnit INTERVAL =23 ;
- Decimal DECIMAL =24 ;
- List LIST =25;
- List LARGE_LIST = 26;
- FixedSizeList FIXED_SIZE_LIST = 27;
- Struct STRUCT =28;
- Union UNION =29;
- Dictionary DICTIONARY =30;
- }
+ oneof arrow_type_enum{
+ EmptyMessage NONE = 1; // arrow::Type::NA
+ EmptyMessage BOOL = 2; // arrow::Type::BOOL
+ EmptyMessage UINT8 = 3; // arrow::Type::UINT8
+ EmptyMessage INT8 = 4; // arrow::Type::INT8
+ EmptyMessage UINT16 =5; // represents arrow::Type fields in src/arrow/type.h
+ EmptyMessage INT16 = 6;
+ EmptyMessage UINT32 =7;
+ EmptyMessage INT32 = 8;
+ EmptyMessage UINT64 =9;
+ EmptyMessage INT64 =10 ;
+ EmptyMessage FLOAT16 =11 ;
+ EmptyMessage FLOAT32 =12 ;
+ EmptyMessage FLOAT64 =13 ;
+ EmptyMessage UTF8 =14 ;
+ EmptyMessage LARGE_UTF8 = 32;
+ EmptyMessage BINARY =15 ;
+ int32 FIXED_SIZE_BINARY =16 ;
+ EmptyMessage LARGE_BINARY = 31;
+ EmptyMessage DATE32 =17 ;
+ EmptyMessage DATE64 =18 ;
+ TimeUnit DURATION = 19;
+ Timestamp TIMESTAMP =20 ;
+ TimeUnit TIME32 =21 ;
+ TimeUnit TIME64 =22 ;
+ IntervalUnit INTERVAL =23 ;
+ Decimal DECIMAL =24 ;
+ List LIST =25;
+ List LARGE_LIST = 26;
+ FixedSizeList FIXED_SIZE_LIST = 27;
+ Struct STRUCT =28;
+ Union UNION =29;
+ Dictionary DICTIONARY =30;
+ }
}
//Useful for representing an empty enum variant in rust