You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by mx...@apache.org on 2020/01/06 18:37:05 UTC

[beam] 02/02: Merge pull request #10027: [BEAM-8577] Initialize FileSystems in coder for Reshuffle

This is an automated email from the ASF dual-hosted git repository.

mxm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git

commit 785609f22d013411b7973bbf9e2d15c3c8171fb2
Merge: c9c239f 2dc923b
Author: Maximilian Michels <mx...@apache.org>
AuthorDate: Mon Jan 6 19:30:10 2020 +0100

    Merge pull request #10027: [BEAM-8577] Initialize FileSystems in coder for Reshuffle

 .../translation/types/CoderTypeSerializer.java     | 19 ++++++++++
 .../translation/types/CoderTypeSerializer.java     | 19 ++++++++++
 .../flink/FlinkBatchTransformTranslators.java      | 24 ++++++++++---
 .../functions/FlinkIdentityFunction.java           | 42 ++++++++++++++++++++++
 .../translation/types/CoderTypeInformation.java    | 26 ++++++++++++--
 5 files changed, 124 insertions(+), 6 deletions(-)

diff --cc runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java
index 229eca5,27c9fba..28351d5
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java
@@@ -306,11 -308,19 +308,25 @@@ class FlinkBatchTransformTranslators 
      @Override
      public void translateNode(
          Reshuffle<K, InputT> transform, FlinkBatchTranslationContext context) {
- 
-       DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
+       final DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
            context.getInputDataSet(context.getInput(transform));
- 
-       context.setOutputDataSet(context.getOutput(transform), inputDataSet.rebalance());
++      // Construct an instance of CoderTypeInformation which contains the pipeline options.
++      // This will be used to initialized FileSystems.
+       @SuppressWarnings("unchecked")
+       final CoderTypeInformation<WindowedValue<KV<K, InputT>>> outputType =
+           ((CoderTypeInformation) inputDataSet.getType())
+               .withPipelineOptions(context.getPipelineOptions());
++      // We insert a NOOP here to initialize the FileSystems via the above CoderTypeInformation.
++      // The output type coder may be relying on file system access. The shuffled data may have to
++      // be deserialized on a different machine using this coder where FileSystems has not been
++      // initialized.
+       final DataSet<WindowedValue<KV<K, InputT>>> retypedDataSet =
+           new MapOperator<>(
+               inputDataSet,
+               outputType,
+               FlinkIdentityFunction.of(),
+               getCurrentTransformName(context));
+       context.setOutputDataSet(context.getOutput(transform), retypedDataSet.rebalance());
      }
    }
  
diff --cc runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkIdentityFunction.java
index 0000000,be3db7c..f9128e7
mode 000000,100644..100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkIdentityFunction.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkIdentityFunction.java
@@@ -1,0 -1,42 +1,42 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements.  See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership.  The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License.  You may obtain a copy of the License at
+  *
+  *     http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software
+  * distributed under the License is distributed on an "AS IS" BASIS,
+  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  * See the License for the specific language governing permissions and
+  * limitations under the License.
+  */
+ package org.apache.beam.runners.flink.translation.functions;
+ 
+ import org.apache.flink.api.common.functions.MapFunction;
+ 
+ /**
+  * A map function that outputs the input element without any change.
+  *
+  * @param <T> Input element type.
+  */
+ public class FlinkIdentityFunction<T> implements MapFunction<T, T> {
+ 
 -  private static FlinkIdentityFunction<?> INSTANCE = new FlinkIdentityFunction<>();
++  private static final FlinkIdentityFunction<?> INSTANCE = new FlinkIdentityFunction<>();
+ 
+   @SuppressWarnings("unchecked")
+   public static <T> FlinkIdentityFunction<T> of() {
+     return (FlinkIdentityFunction) INSTANCE;
+   }
+ 
+   private FlinkIdentityFunction() {}
+ 
+   @Override
+   public T map(T value) {
+     return value;
+   }
+ }