You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by bc...@apache.org on 2016/04/01 03:09:03 UTC
[1/2] incubator-beam git commit: DebuggingWordCount now takes filter
as an option
Repository: incubator-beam
Updated Branches:
refs/heads/master 061e6b5d8 -> e5bca60de
DebuggingWordCount now takes filter as an option
Previously it was hard-coded as "Flourish|stomach".
Now it is a PipelineOption with that as the default.
This allows "breaking" the pipeline by mis-specifying the pattern
without changing the code.
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/abb24cff
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/abb24cff
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/abb24cff
Branch: refs/heads/master
Commit: abb24cff479b714fec8a61d85af18bdea0a6aa16
Parents: 061e6b5
Author: bchambers <bc...@google.com>
Authored: Thu Mar 31 15:18:09 2016 -0700
Committer: bchambers <bc...@google.com>
Committed: Thu Mar 31 17:55:07 2016 -0700
----------------------------------------------------------------------
.../dataflow/examples/DebuggingWordCount.java | 20 +++++++++++++++++--
.../src/main/java/DebuggingWordCount.java | 21 ++++++++++++++++++--
2 files changed, 37 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/abb24cff/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java b/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
index 1f76181..331d7c6 100644
--- a/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
+++ b/examples/java/src/main/java/com/google/cloud/dataflow/examples/DebuggingWordCount.java
@@ -17,9 +17,10 @@
*/
package com.google.cloud.dataflow.examples;
-import com.google.cloud.dataflow.examples.WordCount.WordCountOptions;
import com.google.cloud.dataflow.sdk.Pipeline;
import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
import com.google.cloud.dataflow.sdk.transforms.Aggregator;
@@ -151,6 +152,21 @@ public class DebuggingWordCount {
}
}
+ /**
+ * Options supported by {@link DebuggingWordCount}.
+ *
+ * <p>Inherits standard configuration options and all options defined in
+ * {@link WordCount.WordCountOptions}.
+ */
+ public static interface WordCountOptions extends WordCount.WordCountOptions {
+
+ @Description("Regex filter pattern to use in DebuggingWordCount. "
+ + "Only words matching this pattern will be counted.")
+ @Default.String("Flourish|stomach")
+ String getFilterPattern();
+ void setFilterPattern(String value);
+ }
+
public static void main(String[] args) {
WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
.as(WordCountOptions.class);
@@ -159,7 +175,7 @@ public class DebuggingWordCount {
PCollection<KV<String, Long>> filteredWords =
p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
.apply(new WordCount.CountWords())
- .apply(ParDo.of(new FilterTextFn("Flourish|stomach")));
+ .apply(ParDo.of(new FilterTextFn(options.getFilterPattern())));
/**
* Concept #4: DataflowAssert is a set of convenient PTransforms in the style of
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/abb24cff/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
index 905670f..32fca4e 100644
--- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/src/main/java/DebuggingWordCount.java
@@ -17,9 +17,11 @@
*/
package ${package};
-import ${package}.WordCount.WordCountOptions;
+import ${package}.WordCount;
import com.google.cloud.dataflow.sdk.Pipeline;
import com.google.cloud.dataflow.sdk.io.TextIO;
+import com.google.cloud.dataflow.sdk.options.Default;
+import com.google.cloud.dataflow.sdk.options.Description;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
import com.google.cloud.dataflow.sdk.testing.DataflowAssert;
import com.google.cloud.dataflow.sdk.transforms.Aggregator;
@@ -150,6 +152,21 @@ public class DebuggingWordCount {
}
}
}
+
+ /**
+ * Options supported by {@link DebuggingWordCount}.
+ *
+ * <p>Inherits standard configuration options and all options defined in
+ * {@link WordCount.WordCountOptions}.
+ */
+ public static interface WordCountOptions extends WordCount.WordCountOptions {
+
+ @Description("Regex filter pattern to use in DebuggingWordCount. "
+ + "Only words matching this pattern will be counted.")
+ @Default.String("Flourish|stomach")
+ String getFilterPattern();
+ void setFilterPattern(String value);
+ }
public static void main(String[] args) {
WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
@@ -159,7 +176,7 @@ public class DebuggingWordCount {
PCollection<KV<String, Long>> filteredWords =
p.apply(TextIO.Read.named("ReadLines").from(options.getInputFile()))
.apply(new WordCount.CountWords())
- .apply(ParDo.of(new FilterTextFn("Flourish|stomach")));
+ .apply(ParDo.of(new FilterTextFn(options.getFilterPattern())));
/**
* Concept #4: DataflowAssert is a set of convenient PTransforms in the style of
[2/2] incubator-beam git commit: This closes #108
Posted by bc...@apache.org.
This closes #108
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/e5bca60d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/e5bca60d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/e5bca60d
Branch: refs/heads/master
Commit: e5bca60de2e94c0ba33592533cea73605e39eaf3
Parents: 061e6b5 abb24cf
Author: bchambers <bc...@google.com>
Authored: Thu Mar 31 17:55:36 2016 -0700
Committer: bchambers <bc...@google.com>
Committed: Thu Mar 31 17:55:36 2016 -0700
----------------------------------------------------------------------
.../dataflow/examples/DebuggingWordCount.java | 20 +++++++++++++++++--
.../src/main/java/DebuggingWordCount.java | 21 ++++++++++++++++++--
2 files changed, 37 insertions(+), 4 deletions(-)
----------------------------------------------------------------------