You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by al...@apache.org on 2018/09/27 07:51:38 UTC

[flink] branch master updated: [FLINK-1960] Add comments and docs for withForwardedFields and related operators. (#6753)

This is an automated email from the ASF dual-hosted git repository.

aljoscha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git


The following commit(s) were added to refs/heads/master by this push:
     new 22a0bf0  [FLINK-1960] Add comments and docs for withForwardedFields and related operators. (#6753)
22a0bf0 is described below

commit 22a0bf06bd05c024fb2fa94ad597a9dfdcf2a098
Author: Dimitris Palyvos <dm...@users.noreply.github.com>
AuthorDate: Thu Sep 27 09:51:29 2018 +0200

    [FLINK-1960] Add comments and docs for withForwardedFields and related operators. (#6753)
---
 .../scala/org/apache/flink/api/scala/DataSet.scala | 136 +++++++++++++++++++++
 1 file changed, 136 insertions(+)

diff --git a/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala b/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala
index 71037c3..c3a46c7 100644
--- a/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala
+++ b/flink-scala/src/main/scala/org/apache/flink/api/scala/DataSet.scala
@@ -282,6 +282,49 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) {
     this
   }
 
+  /**
+    * Adds semantic information about forwarded fields of the user-defined function.
+    * The forwarded fields information declares fields which are never modified by the function and
+    * which are forwarded to the same position in the output or copied unchanged to another position
+    * in the output.
+    *
+    * <p>Fields that are forwarded to the same position are specified just by their position.
+    * The specified position must be valid for the input and output data type and have
+    * the same type.
+    * For example <code>withForwardedFields("_3")</code> declares that the third field of
+    * an input tuple is copied to the third field of an output tuple.
+    *
+    * <p>Fields which are copied to another position in the output unchanged are declared by
+    * specifying the source field reference in the input and the target field reference
+    * in the output.
+    * {@code withForwardedFields("_1->_3")} denotes that the first field of the input tuple is
+    * copied to the third field of the output tuple unchanged. When using a wildcard ("*") ensure
+    * that the number of declared fields and their types in input and output type match.
+    *
+    * <p>Multiple forwarded fields can be annotated in one
+    * ({@code withForwardedFields("_2; _3->_1; _4")})
+    * or separate Strings ({@code withForwardedFields("_2", "_3->_1", "_4")}).
+    * Please refer to the JavaDoc of {@link org.apache.flink.api.common.functions.Function}
+    * or Flink's documentation for details on field references such as nested fields and wildcard.
+    *
+    * <p>It is not possible to override existing semantic information about forwarded fields
+    * which was for example added by a
+    * {@link org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields} class
+    * annotation.
+    *
+    * <p><b>NOTE: Adding semantic information for functions is optional!
+    * If used correctly, semantic information can help the Flink optimizer to generate more
+    * efficient execution plans.
+    * However, incorrect semantic information can cause the optimizer to generate incorrect
+    * execution plans which compute wrong results!
+    * So be careful when adding semantic information.
+    * </b>
+    *
+    * @param forwardedFields A list of field forward expressions.
+    * @return This operator with annotated forwarded field information.
+    * @see org.apache.flink.api.java.functions.FunctionAnnotation
+    * @see org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields
+    */
   def withForwardedFields(forwardedFields: String*) = {
     javaSet match {
       case op: SingleInputUdfOperator[_, _, _] => op.withForwardedFields(forwardedFields: _*)
@@ -292,6 +335,52 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) {
     this
   }
 
+  /**
+    * Adds semantic information about forwarded fields of the first input
+    * of the user-defined function.
+    * The forwarded fields information declares fields which are never modified by the function and
+    * which are forwarded to the same position in the output or copied unchanged
+    * to another position in the output.
+    *
+    * <p>Fields that are forwarded to the same position are specified just by their position.
+    * The specified position must be valid for the input and output data type
+    * and have the same type.
+    * For example <code>withForwardedFieldsFirst("_3")</code> declares that the third field
+    * of an input tuple from the first input is copied to the third field of an output tuple.
+    *
+    * <p>Fields which are copied from the first input to another position in the output unchanged
+    * are declared by specifying the source field reference in the first input and the target field
+    * reference in the output. {@code withForwardedFieldsFirst("_1->_3")} denotes that the first
+    * field of the first input tuple is copied to the third field of the output tuple unchanged.
+    * When using a wildcard ("*") ensure that the number of declared fields and their types
+    * in the first input and output type match.
+    *
+    * <p>Multiple forwarded fields can be annotated in one
+    * ({@code withForwardedFieldsFirst("_2; _3->_0; _4")})
+    * or separate Strings ({@code withForwardedFieldsFirst("_2", "_3->_0", "_4")}).
+    * Please refer to the JavaDoc of
+    * {@link org.apache.flink.api.common.functions.Function} or Flink's documentation for
+    * details on field references such as nested fields and wildcard.
+    *
+    * <p>It is not possible to override existing semantic information about forwarded fields
+    * of the first input which was for example added by a
+    * {@link org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst}
+    * class annotation.
+    *
+    * <p><b>NOTE: Adding semantic information for functions is optional!
+    * If used correctly, semantic information can help the Flink optimizer to generate more
+    * efficient execution plans.
+    * However, incorrect semantic information can cause the optimizer to generate incorrect
+    * execution plans which compute wrong results!
+    * So be careful when adding semantic information.
+    * </b>
+    *
+    * @param forwardedFields A list of forwarded field expressions for the first input
+    *                        of the function.
+    * @return This operator with annotated forwarded field information.
+    * @see org.apache.flink.api.java.functions.FunctionAnnotation
+    * @see org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst
+    */
   def withForwardedFieldsFirst(forwardedFields: String*) = {
     javaSet match {
       case op: TwoInputUdfOperator[_, _, _, _] => op.withForwardedFieldsFirst(forwardedFields: _*)
@@ -302,6 +391,53 @@ class DataSet[T: ClassTag](set: JavaDataSet[T]) {
     this
   }
 
+  /**
+    * Adds semantic information about forwarded fields of the second input
+    * of the user-defined function.
+    * The forwarded fields information declares fields which are never modified by the function and
+    * which are forwarded to the same position in the output or copied unchanged
+    * to another position in the output.
+    *
+    * <p>Fields that are forwarded to the same position are specified just by their position.
+    * The specified position must be valid for the input and output data type
+    * and have the same type.
+    * For example <code>withForwardedFieldsFirst("_3")</code> declares that the third field
+    * of an input tuple from the second input is copied to the third field of an output tuple.
+    *
+    * <p>Fields which are copied from the second input to another position
+    * in the output unchanged are declared by specifying the source field reference
+    * in the second input and the target field reference in the output.
+    * {@code withForwardedFieldsFirst("_1->_3")} denotes that the first field of the second input
+    * tuple is copied to the third field of the output tuple unchanged. When using a wildcard ("*")
+    * ensure that the number of declared fields and their types in the second input and
+    * output type match.
+    *
+    * <p>Multiple forwarded fields can be annotated in one
+    * ({@code withForwardedFieldsFirst("_2; _3->_0; _4")})
+    * or separate Strings ({@code withForwardedFieldsFirst("_2", "_3->_0", "_4")}).
+    * Please refer to the JavaDoc of
+    * {@link org.apache.flink.api.common.functions.Function} or Flink's documentation for
+    * details on field references such as nested fields and wildcard.
+    *
+    * <p>It is not possible to override existing semantic information about forwarded fields
+    * of the second input which was for example added by a
+    * {@link org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst}
+    * class annotation.
+    *
+    * <p><b>NOTE: Adding semantic information for functions is optional!
+    * If used correctly, semantic information can help the Flink optimizer to generate more
+    * efficient execution plans.
+    * However, incorrect semantic information can cause the optimizer to generate incorrect
+    * execution plans which compute wrong results!
+    * So be careful when adding semantic information.
+    * </b>
+    *
+    * @param forwardedFields A list of forwarded field expressions for the second input
+    *                        of the function.
+    * @return This operator with annotated forwarded field information.
+    * @see org.apache.flink.api.java.functions.FunctionAnnotation
+    * @see org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFieldsFirst
+    */
   def withForwardedFieldsSecond(forwardedFields: String*) = {
     javaSet match {
       case op: TwoInputUdfOperator[_, _, _, _] => op.withForwardedFieldsSecond(forwardedFields: _*)