You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by bo...@apache.org on 2020/12/10 21:47:43 UTC

[beam] branch master updated: Add SDF capability_matrix.

This is an automated email from the ASF dual-hosted git repository.

boyuanz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 5e1c544  Add SDF capability_matrix.
     new 1481977  Merge pull request #13368 from [BEAM-10480] Add SDF capability_matrix.
5e1c544 is described below

commit 5e1c544c46789c7bb9a75f6d29e84d2f7fbc5126
Author: Boyuan Zhang <bo...@google.com>
AuthorDate: Tue Nov 17 17:03:22 2020 -0800

    Add SDF capability_matrix.
---
 website/www/site/data/capability_matrix.yaml       | 714 +++++++++++++++++++--
 .../www/site/data/capability_matrix_snapshot.yaml  | 195 ++++++
 2 files changed, 862 insertions(+), 47 deletions(-)

diff --git a/website/www/site/data/capability_matrix.yaml b/website/www/site/data/capability_matrix.yaml
index 8c2b79f..dacddee 100644
--- a/website/www/site/data/capability_matrix.yaml
+++ b/website/www/site/data/capability_matrix.yaml
@@ -36,6 +36,10 @@ capability-matrix:
       name: Hazelcast Jet
     - class: twister2
       name: Twister2
+    - class: python direct
+      name: Python Direct FnRunner
+    - class: go direct
+      name: Go Direct Runner
 
   categories:
     - description: What is being computed?
@@ -95,6 +99,14 @@ capability-matrix:
               l1: "Yes"
               l2: fully supported
               l3: ""
+            - class: python direct
+              l1: ""
+              l2:
+              l3: ""
+            - class: go direct
+              l1: ""
+              l2:
+              l3: ""
         - name: GroupByKey
           values:
             - class: model
@@ -145,6 +157,14 @@ capability-matrix:
               l1: "Yes"
               l2: fully supported
               l3: ""
+            - class: python direct
+              l1: ""
+              l2:
+              l3: ""
+            - class: go direct
+              l1: ""
+              l2:
+              l3: ""
         - name: Flatten
           values:
             - class: model
@@ -195,6 +215,14 @@ capability-matrix:
               l1: "Yes"
               l2: fully supported
               l3: ""
+            - class: python direct
+              l1: ""
+              l2:
+              l3: ""
+            - class: go direct
+              l1: ""
+              l2:
+              l3: ""
         - name: Combine
           values:
             - class: model
@@ -245,6 +273,14 @@ capability-matrix:
               l1: "Yes"
               l2: fully supported
               l3: ""
+            - class: python direct
+              l1: ""
+              l2:
+              l3: ""
+            - class: go direct
+              l1: ""
+              l2:
+              l3: ""
         - name: Composite Transforms
           values:
             - class: model
@@ -295,6 +331,14 @@ capability-matrix:
               l1: "Partially"
               l2: supported via inlining
               l3: ""
+            - class: python direct
+              l1: ""
+              l2:
+              l3: ""
+            - class: go direct
+              l1: ""
+              l2:
+              l3: ""
         - name: Side Inputs
           values:
             - class: model
@@ -345,6 +389,14 @@ capability-matrix:
               l1: "Yes"
               l2: fully supported
               l3: ""
+            - class: python direct
+              l1: ""
+              l2:
+              l3: ""
+            - class: go direct
+              l1: ""
+              l2:
+              l3: ""
         - name: Source API
           values:
             - class: model
@@ -395,55 +447,13 @@ capability-matrix:
               l1: "Yes"
               l2: fully supported
               l3: ""
-        - name: Splittable DoFn (SDF)
-          values:
-            - class: model
-              l1: "Partially"
-              l2: DoFn where processing of each element can be split for parallelism, or suspended and resumed
-              l3: Allows users to develop DoFn's that process a single element in portions ("restrictions"), executed in parallel or sequentially. This supersedes the unbounded and bounded `Source` APIs by supporting all of their features on a per-element basis. See http://s.apache.org/splittable-do-fn. Design is in progress on achieving parity with Source API regarding progress signals.
-            - class: dataflow
-              l1: "Yes"
-              l2:
-              l3: Does not yet support autotuning features of the Source API.
-            - class: flink
-              l1: "Yes"
+            - class: python direct
+              l1: ""
               l2:
-              l3:
-            - class: spark-rdd
-              l1: "Partially"
-              l2: supports bounded-per-element SDFs
-              l3:
-            - class: spark-dataset
-              l1: "No"
-              l2: not implemented
-              l3:
-            - class: mapreduce
-              l1: "No"
-              l2: not implemented
-              l3:
-            - class: jstorm
-              l1: "No"
-              l2: not implemented
-              l3:
-            - class: ibmstreams
-              l1: "No"
-              l2: not implemented
-              l3:
-            - class: samza
-              l1: "Partially"
-              l2: supports bounded-per-element SDFs
-              l3:
-            - class: nemo
-              l1: "No"
-              l2: not implemented
-              l3: ""
-            - class: jet
-              l1: "No"
-              l2: not implemented
               l3: ""
-            - class: twister2
-              l1: "Partially"
-              l2: supports bounded
+            - class: go direct
+              l1: ""
+              l2:
               l3: ""
         - name: Metrics
           values:
@@ -495,6 +505,14 @@ capability-matrix:
               l1: "No"
               l2: not implemented
               l3: ""
+            - class: python direct
+              l1: ""
+              l2:
+              l3: ""
+            - class: go direct
+              l1: ""
+              l2:
+              l3: ""
         - name: Stateful Processing
           values:
             - class: model
@@ -545,6 +563,608 @@ capability-matrix:
               l1: "No"
               l2: not implemented
               l3: ""
+            - class: python direct
+              l1: ""
+              l2:
+              l3: ""
+            - class: go direct
+              l1: ""
+              l2:
+              l3: ""
+    - description: Bounded Splittable DoFn Support Status
+      anchor: what
+      color-b: "ca1"
+      color-y: "ec3"
+      color-p: "fe5"
+      color-n: "ddd"
+      rows:
+        - name: Base
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3: ""
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner V2 supports this.
+              l3: ""
+            - class: flink
+              l1: "Partially"
+              l2: Only portable Flink Runner supports this.
+              l3: ""
+            - class: spark-rdd
+              l1:
+              l2:
+              l3: ""
+            - class: spark-dataset
+              l1:
+              l2:
+              l3: ""
+            - class: mapreduce
+              l1:
+              l2:
+              l3:
+            - class: jstorm
+              l1:
+              l2:
+              l3: ""
+            - class: ibmstreams
+              l1:
+              l2:
+              l3: ""
+            - class: samza
+              l1:
+              l2:
+              l3: ""
+            - class: nemo
+              l1:
+              l2:
+              l3: ""
+            - class: jet
+              l1:
+              l2:
+              l3: ""
+            - class: twister2
+              l1:
+              l2:
+              l3: ""
+            - class: python direct
+              l1: "Yes"
+              l2:
+              l3:
+            - class: go direct
+              l1: "Yes"
+              l2:
+              l3:
+        - name: Side Inputs
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner V2 supports this.
+              l3: ""
+            - class: flink
+              l1: "Partially"
+              l2: Only portable Flink Runner supports this.
+              l3: ""
+            - class: spark-rdd
+              l1:
+              l2:
+              l3: ""
+            - class: spark-dataset
+              l1:
+              l2:
+              l3: ""
+            - class: mapreduce
+              l1:
+              l2:
+              l3:
+            - class: jstorm
+              l1:
+              l2:
+              l3: ""
+            - class: ibmstreams
+              l1:
+              l2:
+              l3: ""
+            - class: samza
+              l1:
+              l2:
+              l3: ""
+            - class: nemo
+              l1:
+              l2:
+              l3: ""
+            - class: jet
+              l1:
+              l2:
+              l3: ""
+            - class: twister2
+              l1:
+              l2:
+              l3: ""
+            - class: python direct
+              l1:
+              l2:
+              l3:
+            - class: go direct
+              l1: "Yes"
+              l2:
+              l3:
+        - name: Splittable DoFn Initiated Checkpointing
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner v2 supports this.
+              l3: ""
+            - class: flink
+              l1: "Partially"
+              l2: Only portable Flink Runner supports this.
+              l3: ""
+            - class: spark-rdd
+              l1:
+              l2:
+              l3: ""
+            - class: spark-dataset
+              l1:
+              l2:
+              l3: ""
+            - class: mapreduce
+              l1:
+              l2:
+              l3:
+            - class: jstorm
+              l1:
+              l2:
+              l3: ""
+            - class: ibmstreams
+              l1:
+              l2:
+              l3: ""
+            - class: samza
+              l1:
+              l2:
+              l3: ""
+            - class: nemo
+              l1:
+              l2:
+              l3: ""
+            - class: jet
+              l1:
+              l2:
+              l3: ""
+            - class: twister2
+              l1:
+              l2:
+              l3: ""
+            - class: python direct
+              l1: "Yes"
+              l2:
+              l3:
+            - class: go direct
+              l1: "No"
+              l2:
+              l3:
+        - name: Dynamic Splitting
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner V2 supports this.
+              l3: ""
+            - class: flink
+              l1: "No"
+              l2:
+              l3: ""
+            - class: spark-rdd
+              l1:
+              l2:
+              l3: ""
+            - class: spark-dataset
+              l1:
+              l2:
+              l3: ""
+            - class: mapreduce
+              l1:
+              l2:
+              l3:
+            - class: jstorm
+              l1:
+              l2:
+              l3: ""
+            - class: ibmstreams
+              l1:
+              l2:
+              l3: ""
+            - class: samza
+              l1:
+              l2:
+              l3: ""
+            - class: nemo
+              l1:
+              l2:
+              l3: ""
+            - class: jet
+              l1:
+              l2:
+              l3: ""
+            - class: twister2
+              l1:
+              l2:
+              l3: ""
+            - class: python direct
+              l1: "Yes"
+              l2: Only with Python SDK
+              l3:
+            - class: go direct
+              l1: "No"
+              l2:
+              l3:
+        - name: Bundle Finalization
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner V2 supports this.
+              l3: ""
+            - class: flink
+              l1: "No"
+              l2:
+              l3: ""
+            - class: spark-rdd
+              l1:
+              l2:
+              l3: ""
+            - class: spark-dataset
+              l1:
+              l2:
+              l3: ""
+            - class: mapreduce
+              l1:
+              l2:
+              l3:
+            - class: jstorm
+              l1:
+              l2:
+              l3: ""
+            - class: ibmstreams
+              l1:
+              l2:
+              l3: ""
+            - class: samza
+              l1:
+              l2:
+              l3: ""
+            - class: nemo
+              l1:
+              l2:
+              l3: ""
+            - class: jet
+              l1:
+              l2:
+              l3: ""
+            - class: twister2
+              l1:
+              l2:
+              l3: ""
+            - class: python direct
+              l1: "Yes"
+              l2:
+              l3:
+            - class: go direct
+              l1: "No"
+              l2:
+              l3:
+    - description: Unbounded Splittable DoFn Support Status
+      anchor: what
+      color-b: "ca1"
+      color-y: "ec3"
+      color-p: "fe5"
+      color-n: "ddd"
+      rows:
+        - name: Base
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Yes"
+              l2:
+              l3: ""
+            - class: flink
+              l1: "Yes"
+              l2:
+              l3: ""
+            - class: spark-rdd
+              l1:
+              l2:
+              l3: ""
+            - class: spark-dataset
+              l1:
+              l2:
+              l3: ""
+            - class: mapreduce
+              l1:
+              l2:
+              l3:
+            - class: jstorm
+              l1:
+              l2:
+              l3: ""
+            - class: ibmstreams
+              l1:
+              l2:
+              l3: ""
+            - class: samza
+              l1:
+              l2:
+              l3: ""
+            - class: nemo
+              l1:
+              l2:
+              l3: ""
+            - class: jet
+              l1:
+              l2:
+              l3: ""
+            - class: twister2
+              l1:
+              l2:
+              l3: ""
+            - class: python direct
+              l1: "Yes"
+              l2:
+              l3:
+            - class: go direct
+              l1: "No"
+              l2:
+              l3:
+        - name: Side Inputs
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner V2 supports this.
+              l3: ""
+            - class: flink
+              l1: "Partially"
+              l2: Only portable Flink Runner supports this.
+              l3: ""
+            - class: spark-rdd
+              l1:
+              l2:
+              l3: ""
+            - class: spark-dataset
+              l1:
+              l2:
+              l3: ""
+            - class: mapreduce
+              l1:
+              l2:
+              l3:
+            - class: jstorm
+              l1:
+              l2:
+              l3: ""
+            - class: ibmstreams
+              l1:
+              l2:
+              l3: ""
+            - class: samza
+              l1:
+              l2:
+              l3: ""
+            - class: nemo
+              l1:
+              l2:
+              l3: ""
+            - class: jet
+              l1:
+              l2:
+              l3: ""
+            - class: twister2
+              l1:
+              l2:
+              l3: ""
+            - class: python direct
+              l1:
+              l2:
+              l3:
+            - class: go direct
+              l1: "Yes"
+              l2:
+              l3:
+        - name: Splittable DoFn Initiated Checkpointing
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Yes"
+              l2:
+              l3: ""
+            - class: flink
+              l1: "Yes"
+              l2:
+              l3: ""
+            - class: spark-rdd
+              l1:
+              l2:
+              l3: ""
+            - class: spark-dataset
+              l1:
+              l2:
+              l3: ""
+            - class: mapreduce
+              l1:
+              l2:
+              l3:
+            - class: jstorm
+              l1:
+              l2:
+              l3: ""
+            - class: ibmstreams
+              l1:
+              l2:
+              l3: ""
+            - class: samza
+              l1:
+              l2:
+              l3: ""
+            - class: nemo
+              l1:
+              l2:
+              l3: ""
+            - class: jet
+              l1:
+              l2:
+              l3: ""
+            - class: twister2
+              l1:
+              l2:
+              l3: ""
+            - class: python direct
+              l1: "Yes"
+              l2:
+              l3:
+            - class: go direct
+              l1: "No"
+              l2:
+              l3:
+        - name: Dynamic Splitting
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "No"
+              l2:
+              l3: ""
+            - class: flink
+              l1: "No"
+              l2:
+              l3: ""
+            - class: spark-rdd
+              l1:
+              l2:
+              l3: ""
+            - class: spark-dataset
+              l1:
+              l2:
+              l3: ""
+            - class: mapreduce
+              l1:
+              l2:
+              l3:
+            - class: jstorm
+              l1:
+              l2:
+              l3: ""
+            - class: ibmstreams
+              l1:
+              l2:
+              l3: ""
+            - class: samza
+              l1:
+              l2:
+              l3: ""
+            - class: nemo
+              l1:
+              l2:
+              l3: ""
+            - class: jet
+              l1:
+              l2:
+              l3: ""
+            - class: twister2
+              l1:
+              l2:
+              l3: ""
+            - class: python direct
+              l1: "No"
+              l2:
+              l3:
+            - class: go direct
+              l1: "No"
+              l2:
+              l3:
+        - name: Bundle Finalization
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner V2 supports this.
+              l3: ""
+            - class: flink
+              l1: "Partially"
+              l2: Only portable Flink Runner supports this with checkpointing enabled.
+              l3: ""
+            - class: spark-rdd
+              l1:
+              l2:
+              l3: ""
+            - class: spark-dataset
+              l1:
+              l2:
+              l3: ""
+            - class: mapreduce
+              l1:
+              l2:
+              l3:
+            - class: jstorm
+              l1:
+              l2:
+              l3: ""
+            - class: ibmstreams
+              l1:
+              l2:
+              l3: ""
+            - class: samza
+              l1:
+              l2:
+              l3: ""
+            - class: nemo
+              l1:
+              l2:
+              l3: ""
+            - class: jet
+              l1:
+              l2:
+              l3: ""
+            - class: twister2
+              l1:
+              l2:
+              l3: ""
+            - class: python direct
+              l1: "Yes"
+              l2:
+              l3:
+            - class: go direct
+              l1: "No"
+              l2:
+              l3:
     - description: Where in event time?
       anchor: where
       color-b: "37d"
diff --git a/website/www/site/data/capability_matrix_snapshot.yaml b/website/www/site/data/capability_matrix_snapshot.yaml
index 112efc2..74f5601 100644
--- a/website/www/site/data/capability_matrix_snapshot.yaml
+++ b/website/www/site/data/capability_matrix_snapshot.yaml
@@ -565,3 +565,198 @@ capability-matrix-snapshot:
               l1: "No"
               l2: pending model support
               l3: ""
+
+    - description: Bounded Splittable DoFn Support Status
+      anchor: what
+      color-b: "ca1"
+      color-y: "ec3"
+      color-p: "fe5"
+      color-n: "ddd"
+      rows:
+        - name: Base
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3: ""
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner V2 supports this.
+              l3: ""
+            - class: flink
+              l1: "Partially"
+              l2: Only portable Flink Runner supports this.
+              l3: ""
+            - class: spark
+              l1:
+              l2:
+              l3: ""
+        - name: Side Inputs
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner V2 supports this.
+              l3: ""
+            - class: flink
+              l1: "Partially"
+              l2: Only portable Flink Runner supports this.
+              l3: ""
+            - class: spark
+              l1:
+              l2:
+              l3:
+        - name: Splittable DoFn Initiated Checkpointing
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner v2 supports this.
+              l3: ""
+            - class: flink
+              l1: "Partially"
+              l2: Only portable Flink Runner supports this.
+              l3: ""
+            - class: spark
+              l1:
+              l2:
+              l3: ""
+        - name: Dynamic Splitting
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner V2 supports this.
+              l3: ""
+            - class: flink
+              l1: "No"
+              l2:
+              l3: ""
+            - class: spark
+              l1:
+              l2:
+              l3: ""
+        - name: Bundle Finalization
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner V2 supports this.
+              l3: ""
+            - class: flink
+              l1: "No"
+              l2:
+              l3: ""
+            - class: spark
+              l1:
+              l2:
+              l3: ""
+    - description: Unbounded Splittable DoFn Support Status
+      anchor: what
+      color-b: "ca1"
+      color-y: "ec3"
+      color-p: "fe5"
+      color-n: "ddd"
+      rows:
+        - name: Base
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Yes"
+              l2:
+              l3: ""
+            - class: flink
+              l1: "Yes"
+              l2:
+              l3: ""
+            - class: spark
+              l1:
+              l2:
+              l3: ""
+        - name: Side Inputs
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner V2 supports this.
+              l3: ""
+            - class: flink
+              l1: "Partially"
+              l2: Only portable Flink Runner supports this.
+              l3: ""
+            - class: spark
+              l1:
+              l2:
+              l3: ""
+        - name: Splittable DoFn Initiated Checkpointing
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Yes"
+              l2:
+              l3: ""
+            - class: flink
+              l1: "Yes"
+              l2:
+              l3: ""
+            - class: spark
+              l1:
+              l2:
+              l3: ""
+        - name: Dynamic Splitting
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "No"
+              l2:
+              l3: ""
+            - class: flink
+              l1: "No"
+              l2:
+              l3: ""
+            - class: spark
+              l1:
+              l2:
+              l3: ""
+        - name: Bundle Finalization
+          values:
+            - class: model
+              l1: "Yes"
+              l2:
+              l3:
+            - class: dataflow
+              l1: "Partially"
+              l2: Only Dataflow Runner V2 supports this.
+              l3: ""
+            - class: flink
+              l1: "Partially"
+              l2: Only portable Flink Runner supports this with checkpointing enabled.
+              l3: ""
+            - class: spark
+              l1:
+              l2:
+              l3: ""