You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2021/08/30 21:23:57 UTC

[GitHub] [arrow] jorgecarleitao commented on a change in pull request #10934: [RFC] Arrow Compute Serialized Intermediate Representation draft for discussion

jorgecarleitao commented on a change in pull request #10934:
URL: https://github.com/apache/arrow/pull/10934#discussion_r698818377



##########
File path: format/experimental/computeir/Expression.fbs
##########
@@ -0,0 +1,351 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+include "../../Schema.fbs";
+include "Literal.fbs";
+include "InlineBuffer.fbs";
+
+namespace org.apache.arrow.computeir.flatbuf;
+
+/// Access a value for a given map key
+table MapKey {
+  key: string (required);
+}
+
+/// Struct field access
+table StructField {
+  /// The position of the field in the struct schema
+  position: uint32;
+}
+
+/// Zero-based array index
+table ArraySubscript {
+  position: uint32;
+}
+
+/// Zero-based range of elements in an array
+table ArraySlice {
+  /// The start of an array slice, inclusive
+  start_inclusive: uint32;
+  /// The end of an array slice, exclusive
+  end_exclusive: uint32;
+}
+
+/// Field name in a relation
+table FieldName {
+  position: uint32;
+}
+
+/// A union of possible dereference operations
+union Deref {
+  /// Access a value for a given map key
+  MapKey,
+  /// Access the value at a struct field
+  StructField,
+  /// Access the element at a given index in an array
+  ArraySubscript,
+  /// Access a range of elements in an array
+  ArraySlice,
+  /// Access a field of a relation
+  FieldName,
+}
+
+/// Access the data of a field
+table FieldRef {
+  /// A sequence of field names to allow referencing potentially nested fields
+  ref: Deref (required);
+  /// For Expressions which might reference fields in multiple Relations,
+  /// this index may be provided to indicate which Relation's fields
+  /// `path` points into. For example in the case of a join,
+  /// 0 refers to the left relation and 1 to the right relation.
+  relation_index: int;
+}
+
+/// A canonical (probably SQL equivalent) function
+//
+// TODO: variadics
+enum CanonicalFunctionId : uint32 {
+  // logical
+  And,
+  Not,
+  Or,
+
+  // arithmetic
+  Add,
+  Subtract,
+  Multiply,
+  Divide,
+  Power,
+  AbsoluteValue,
+  Negate,
+  Sign,
+
+  // date/time/timestamp operations
+  DateSub,
+  DateAdd,
+  DateDiff,
+  TimeAdd,
+  TimeSub,
+  TimeDiff,
+  TimestampAdd,
+  TimestampSub,
+  TimestampDiff,
+
+  // comparison
+  Equals,
+  NotEquals,
+  Greater,
+  GreaterEqual,
+  Less,
+  LessEqual,
+}
+
+table CanonicalFunction {
+  id: CanonicalFunctionId;
+}
+
+table NonCanonicalFunction {
+  name_space: string;
+  name: string (required);
+}
+
+union FunctionImpl {
+  CanonicalFunction,
+  NonCanonicalFunction,
+}
+
+/// A function call expression
+table Call {
+  /// The kind of function call this is.
+  kind: FunctionImpl (required);
+
+  /// The arguments passed to `function_name`.
+  arguments: [Expression] (required);
+
+  /// Parameters for `function_name`; content/format may be unique to each
+  /// value of `function_name`.
+  metadata: InlineBuffer;
+}
+
+/// A single WHEN x THEN y fragment.
+table CaseFragment {
+  when: Expression (required);
+  then: Expression (required);
+}
+
+/// Case statement-style expression.
+table Case {
+  cases: [CaseFragment] (required);
+  /// The default value if no cases match. This is typically NULL in SQL
+  //implementations.
+  ///
+  /// Defaulting to NULL is a frontend choice, so producers must specify NULL
+  /// if that's their desired behavior.
+  default: Expression (required);
+
+  /// Parameters for `function_name`; content/format may be unique to each
+  /// value of `function_name`.
+  metadata: InlineBuffer;
+}
+
+table Cast {
+  /// The expression to cast
+  expression: Expression (required);
+
+  /// The type to cast `argument` to.
+  type: org.apache.arrow.flatbuf.Field (required);
+
+  /// Parameters for `function_name`; content/format may be unique to each
+  /// value of `function_name`.
+  metadata: InlineBuffer;
+}
+
+table Extract {
+  /// Expression from which to extract components.
+  expression: Expression (required);
+
+  /// Field to extract from `expression`.
+  field: string (required);
+
+  /// Parameters for `function_name`; content/format may be unique to each
+  /// value of `function_name`.
+  metadata: InlineBuffer;
+}
+
+/// Whether lesser values should precede greater or vice versa,
+/// also whether nulls should preced or follow values.
+enum Ordering : uint8 {
+  ASCENDING_THEN_NULLS,
+  DESCENDING_THEN_NULLS,
+  NULLS_THEN_ASCENDING,
+  NULLS_THEN_DESCENDING

Review comment:
       Why not follow the IEEE754 recommendation?
   
   * Iceberg uses it: https://github.com/apache/iceberg/blob/master/site/docs/spec.md#sorting
   * Java uses it
   * Rust lang follows it in practice (see e.g. https://github.com/rust-lang/rust/issues/5585 and [ord-float](https://crates.io/crates/float-ord))




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org