You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ag...@apache.org on 2022/10/01 14:35:27 UTC

[arrow-ballista] branch master updated: Update datafusion.proto (#299)

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-ballista.git


The following commit(s) were added to refs/heads/master by this push:
     new 5142cd07 Update datafusion.proto (#299)
5142cd07 is described below

commit 5142cd07c9bb17c5f2882b5261465805c2a85112
Author: Andy Grove <an...@gmail.com>
AuthorDate: Sat Oct 1 08:35:23 2022 -0600

    Update datafusion.proto (#299)
---
 ballista/rust/core/proto/datafusion.proto | 268 +++++++++++++++++-------------
 1 file changed, 152 insertions(+), 116 deletions(-)

diff --git a/ballista/rust/core/proto/datafusion.proto b/ballista/rust/core/proto/datafusion.proto
index c53662dc..0a92d0d1 100644
--- a/ballista/rust/core/proto/datafusion.proto
+++ b/ballista/rust/core/proto/datafusion.proto
@@ -125,6 +125,8 @@ message SelectionNode {
 message SortNode {
   LogicalPlanNode input = 1;
   repeated datafusion.LogicalExprNode expr = 2;
+  // Maximum number of highest/lowest rows to fetch; negative means no limit
+  int64 fetch = 3;
 }
 
 message RepartitionNode {
@@ -504,11 +506,13 @@ message AggregateExprNode {
   AggregateFunction aggr_function = 1;
   repeated LogicalExprNode expr = 2;
   bool distinct = 3;
+  LogicalExprNode filter = 4;
 }
 
 message AggregateUDFExprNode {
   string fun_name = 1;
   repeated LogicalExprNode args = 2;
+  LogicalExprNode filter = 3;
 }
 
 message ScalarUDFExprNode {
@@ -649,53 +653,53 @@ message Field {
 }
 
 message FixedSizeBinary{
-  int32 length = 1;
+    int32 length = 1;
 }
 
 message Timestamp{
-  TimeUnit time_unit = 1;
-  string timezone = 2;
+    TimeUnit time_unit = 1;
+    string timezone = 2;
 }
 
 enum DateUnit{
-  Day = 0;
-  DateMillisecond = 1;
+    Day = 0;
+    DateMillisecond = 1;
 }
 
 enum TimeUnit{
-  Second = 0;
-  TimeMillisecond = 1;
-  Microsecond = 2;
-  Nanosecond = 3;
+    Second = 0;
+    Millisecond = 1;
+    Microsecond = 2;
+    Nanosecond = 3;
 }
 
 enum IntervalUnit{
-  YearMonth = 0;
-  DayTime = 1;
-  MonthDayNano = 2;
+    YearMonth = 0;
+    DayTime = 1;
+    MonthDayNano = 2;
 }
 
 message Decimal{
-  uint64 whole = 1;
-  uint64 fractional = 2;
+    uint64 whole = 1;
+    uint64 fractional = 2;
 }
 
 message List{
-  Field field_type = 1;
+    Field field_type = 1;
 }
 
 message FixedSizeList{
-  Field field_type = 1;
-  int32 list_size = 2;
+    Field field_type = 1;
+    int32 list_size = 2;
 }
 
 message Dictionary{
-  ArrowType key = 1;
-  ArrowType value = 2;
+    ArrowType key = 1;
+    ArrowType value = 2;
 }
 
 message Struct{
-  repeated Field sub_field_types = 1;
+    repeated Field sub_field_types = 1;
 }
 
 enum UnionMode{
@@ -704,14 +708,14 @@ enum UnionMode{
 }
 
 message Union{
-  repeated Field union_types = 1;
-  UnionMode union_mode = 2;
-  repeated int32 type_ids = 3;
+    repeated Field union_types = 1;
+    UnionMode union_mode = 2;
+    repeated int32 type_ids = 3;
 }
 
 message ScalarListValue{
-  Field field = 1;
-  repeated ScalarValue values = 2;
+    Field field = 1;
+    repeated ScalarValue values = 2;
 }
 
 message ScalarTimestampValue {
@@ -724,33 +728,60 @@ message ScalarTimestampValue {
   string timezone = 5;
 }
 
+message ScalarDictionaryValue {
+  ArrowType index_type = 1;
+  ScalarValue value = 2;
+}
+
+message IntervalMonthDayNanoValue {
+  int32 months = 1;
+  int32 days = 2;
+  int64 nanos = 3;
+}
+
+message StructValue {
+  // Note that a null struct value must have one or more fields, so we
+  // encode a null StructValue as one witth an empty field_values
+  // list.
+  repeated ScalarValue field_values = 2;
+  repeated Field fields = 3;
+}
+
 message ScalarValue{
-  oneof value {
-    bool   bool_value = 1;
-    string utf8_value = 2;
-    string large_utf8_value = 3;
-    int32  int8_value = 4;
-    int32  int16_value = 5;
-    int32  int32_value = 6;
-    int64  int64_value = 7;
-    uint32 uint8_value = 8;
-    uint32 uint16_value = 9;
-    uint32 uint32_value = 10;
-    uint64 uint64_value = 11;
-    float  float32_value = 12;
-    double float64_value = 13;
-    //Literal Date32 value always has a unit of day
-    int32  date_32_value = 14;
-    ScalarListValue list_value = 17;
-    ScalarType null_list_value = 18;
-
-    PrimitiveScalarType null_value = 19;
-    Decimal128 decimal128_value = 20;
-    int64 date_64_value = 21;
-    int32 interval_yearmonth_value = 24;
-    int64 interval_daytime_value = 25;
-    ScalarTimestampValue timestamp_value = 26;
-  }
+    oneof value {
+        // Null value of any type (type is encoded)
+        PrimitiveScalarType null_value = 19;
+
+        bool   bool_value = 1;
+        string utf8_value = 2;
+        string large_utf8_value = 3;
+        int32  int8_value = 4;
+        int32  int16_value = 5;
+        int32  int32_value = 6;
+        int64  int64_value = 7;
+        uint32 uint8_value = 8;
+        uint32 uint16_value = 9;
+        uint32 uint32_value = 10;
+        uint64 uint64_value = 11;
+        float  float32_value = 12;
+        double float64_value = 13;
+        //Literal Date32 value always has a unit of day
+        int32  date_32_value = 14;
+        ScalarListValue list_value = 17;
+        ScalarType null_list_value = 18;
+
+        Decimal128 decimal128_value = 20;
+        int64 date_64_value = 21;
+        int32 interval_yearmonth_value = 24;
+        int64 interval_daytime_value = 25;
+        ScalarTimestampValue timestamp_value = 26;
+        ScalarDictionaryValue dictionary_value = 27;
+        bytes binary_value = 28;
+        bytes large_binary_value = 29;
+        int64 time64_value = 30;
+        IntervalMonthDayNanoValue interval_month_day_nano = 31;
+        StructValue struct_value = 32;
+    }
 }
 
 message Decimal128{
@@ -763,83 +794,88 @@ message Decimal128{
 // List
 enum PrimitiveScalarType{
 
-  BOOL = 0;     // arrow::Type::BOOL
-  UINT8 = 1;    // arrow::Type::UINT8
-  INT8 = 2;     // arrow::Type::INT8
-  UINT16 = 3;   // represents arrow::Type fields in src/arrow/type.h
-  INT16 = 4;
-  UINT32 = 5;
-  INT32 = 6;
-  UINT64 = 7;
-  INT64 = 8;
-  FLOAT32 = 9;
-  FLOAT64 = 10;
-  UTF8 = 11;
-  LARGE_UTF8 = 12;
-  DATE32 = 13;
-  TIME_MICROSECOND = 14;
-  TIME_NANOSECOND = 15;
-  NULL = 16;
-
-  DECIMAL128 = 17;
-  DATE64 = 20;
-  TIME_SECOND = 21;
-  TIME_MILLISECOND = 22;
-  INTERVAL_YEARMONTH = 23;
-  INTERVAL_DAYTIME = 24;
+    BOOL = 0;     // arrow::Type::BOOL
+    UINT8 = 1;    // arrow::Type::UINT8
+    INT8 = 2;     // arrow::Type::INT8
+    UINT16 = 3;   // represents arrow::Type fields in src/arrow/type.h
+    INT16 = 4;
+    UINT32 = 5;
+    INT32 = 6;
+    UINT64 = 7;
+    INT64 = 8;
+    FLOAT32 = 9;
+    FLOAT64 = 10;
+    UTF8 = 11;
+    LARGE_UTF8 = 12;
+    DATE32 = 13;
+    TIMESTAMP_MICROSECOND = 14;
+    TIMESTAMP_NANOSECOND = 15;
+    NULL = 16;
+    DECIMAL128 = 17;
+    DATE64 = 20;
+    TIMESTAMP_SECOND = 21;
+    TIMESTAMP_MILLISECOND = 22;
+    INTERVAL_YEARMONTH = 23;
+    INTERVAL_DAYTIME = 24;
+    INTERVAL_MONTHDAYNANO = 28;
+
+    BINARY = 25;
+    LARGE_BINARY = 26;
+
+    TIME64 = 27;
 }
 
 message ScalarType{
-  oneof datatype{
-    PrimitiveScalarType scalar = 1;
-    ScalarListType list = 2;
-  }
+    oneof datatype{
+        PrimitiveScalarType scalar = 1;
+        ScalarListType list = 2;
+    }
 }
 
 message ScalarListType{
-  repeated string field_names = 3;
-  PrimitiveScalarType deepest_type = 2;
+    repeated string field_names = 3;
+    PrimitiveScalarType deepest_type = 2;
 }
 
 // Broke out into multiple message types so that type
 // metadata did not need to be in separate message
-//All types that are of the empty message types contain no additional metadata
+// All types that are of the empty message types contain no additional metadata
 // about the type
 message ArrowType{
-  oneof arrow_type_enum{
-    EmptyMessage NONE = 1;     // arrow::Type::NA
-    EmptyMessage BOOL =  2;     // arrow::Type::BOOL
-    EmptyMessage UINT8 = 3;    // arrow::Type::UINT8
-    EmptyMessage INT8 =  4;     // arrow::Type::INT8
-    EmptyMessage UINT16 =5;   // represents arrow::Type fields in src/arrow/type.h
-    EmptyMessage INT16 = 6;
-    EmptyMessage UINT32 =7;
-    EmptyMessage INT32 = 8;
-    EmptyMessage UINT64 =9;
-    EmptyMessage INT64 =10 ;
-    EmptyMessage FLOAT16 =11 ;
-    EmptyMessage FLOAT32 =12 ;
-    EmptyMessage FLOAT64 =13 ;
-    EmptyMessage UTF8 =14 ;
-    EmptyMessage LARGE_UTF8 = 32;
-    EmptyMessage BINARY =15 ;
-    int32 FIXED_SIZE_BINARY =16 ;
-    EmptyMessage LARGE_BINARY = 31;
-    EmptyMessage DATE32 =17 ;
-    EmptyMessage DATE64 =18 ;
-    TimeUnit DURATION = 19;
-    Timestamp TIMESTAMP =20 ;
-    TimeUnit TIME32 =21 ;
-    TimeUnit TIME64 =22 ;
-    IntervalUnit INTERVAL =23 ;
-    Decimal DECIMAL =24 ;
-    List LIST =25;
-    List LARGE_LIST = 26;
-    FixedSizeList FIXED_SIZE_LIST = 27;
-    Struct STRUCT =28;
-    Union UNION =29;
-    Dictionary DICTIONARY =30;
-  }
+    oneof arrow_type_enum{
+        EmptyMessage NONE = 1;     // arrow::Type::NA
+        EmptyMessage BOOL =  2;     // arrow::Type::BOOL
+        EmptyMessage UINT8 = 3;    // arrow::Type::UINT8
+        EmptyMessage INT8 =  4;     // arrow::Type::INT8
+        EmptyMessage UINT16 =5;   // represents arrow::Type fields in src/arrow/type.h
+        EmptyMessage INT16 = 6;
+        EmptyMessage UINT32 =7;
+        EmptyMessage INT32 = 8;
+        EmptyMessage UINT64 =9;
+        EmptyMessage INT64 =10 ;
+        EmptyMessage FLOAT16 =11 ;
+        EmptyMessage FLOAT32 =12 ;
+        EmptyMessage FLOAT64 =13 ;
+        EmptyMessage UTF8 =14 ;
+        EmptyMessage LARGE_UTF8 = 32;
+        EmptyMessage BINARY =15 ;
+        int32 FIXED_SIZE_BINARY =16 ;
+        EmptyMessage LARGE_BINARY = 31;
+        EmptyMessage DATE32 =17 ;
+        EmptyMessage DATE64 =18 ;
+        TimeUnit DURATION = 19;
+        Timestamp TIMESTAMP =20 ;
+        TimeUnit TIME32 =21 ;
+        TimeUnit TIME64 =22 ;
+        IntervalUnit INTERVAL =23 ;
+        Decimal DECIMAL =24 ;
+        List LIST =25;
+        List LARGE_LIST = 26;
+        FixedSizeList FIXED_SIZE_LIST = 27;
+        Struct STRUCT =28;
+        Union UNION =29;
+        Dictionary DICTIONARY =30;
+    }
 }
 
 //Useful for representing an empty enum variant in rust