You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ke...@apache.org on 2016/11/08 03:41:19 UTC
[20/50] incubator-beam git commit: Format Regex according to style
guidelines
Format Regex according to style guidelines
Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/79b04551
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/79b04551
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/79b04551
Branch: refs/heads/gearpump-runner
Commit: 79b04551c7c9f964908ab4a1d95119ef8a7fff84
Parents: 6954abe
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Nov 7 10:10:59 2016 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Nov 7 10:10:59 2016 -0800
----------------------------------------------------------------------
.../org/apache/beam/sdk/transforms/Regex.java | 525 +++++++++----------
.../apache/beam/sdk/transforms/RegexTest.java | 106 ++--
2 files changed, 283 insertions(+), 348 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/79b04551/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Regex.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Regex.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Regex.java
index 27104f6..a94130d 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Regex.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Regex.java
@@ -24,22 +24,17 @@ import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
/**
- * {@code PTransorm}s to use Regular Expressions to process elements in a
- * {@link PCollection}.
+ * {@code PTransorm}s to use Regular Expressions to process elements in a {@link PCollection}.
*
- * <p>
- * {@link Regex#matches(String, int)} can be used to see if an entire line matches
- * a Regex. {@link Regex#matchesKV(String, int, int)} can be used to see if an entire
- * line matches a Regex and output certain groups as a {@link KV}.
- * </p>
- * <p>
- * {@link Regex#find(String, int)} can be used to see if a portion of a line
- * matches a Regex. {@link Regex#matchesKV(String, int, int)} can be used to see if a
- * portion of a line matches a Regex and output certain groups as a {@link KV}.
- * </p>
- * <p>
- * Lines that do not match the Regex will not be output.
- * </p>
+ * <p>{@link Regex#matches(String, int)} can be used to see if an entire line matches a Regex.
+ * {@link Regex#matchesKV(String, int, int)} can be used to see if an entire line matches a Regex
+ * and output certain groups as a {@link KV}.
+ *
+ * <p>{@link Regex#find(String, int)} can be used to see if a portion of a line matches a Regex.
+ * {@link Regex#matchesKV(String, int, int)} can be used to see if a portion of a line matches a
+ * Regex and output certain groups as a {@link KV}.
+ *
+ * <p>Lines that do not match the Regex will not be output.
*/
public class Regex {
private Regex() {
@@ -47,159 +42,135 @@ public class Regex {
}
/**
- * Returns a {@link Regex.Matches} {@link PTransform} that checks if
- * the entire line matches the Regex. Returns the entire line (group 0) as a
- * {@link PCollection}.
- * @param regex
- * The regular expression to run
+ * Returns a {@link Regex.Matches} {@link PTransform} that checks if the entire line matches the
+ * Regex. Returns the entire line (group 0) as a {@link PCollection}.
+ *
+ * @param regex The regular expression to run
*/
public static Matches matches(String regex) {
return matches(regex, 0);
}
/**
- * Returns a {@link Regex.Matches} {@link PTransform} that checks if
- * the entire line matches the Regex. Returns the group as a
- * {@link PCollection}.
- * @param regex
- * The regular expression to run
- * @param group
- * The Regex group to return as a PCollection
+ * Returns a {@link Regex.Matches} {@link PTransform} that checks if the entire line matches the
+ * Regex. Returns the group as a {@link PCollection}.
+ *
+ * @param regex The regular expression to run
+ * @param group The Regex group to return as a PCollection
*/
public static Matches matches(String regex, int group) {
return new Matches(regex, group);
}
/**
- * Returns a {@link Regex.MatchesKV} {@link PTransform} that checks
- * if the entire line matches the Regex. Returns the specified groups as the
- * key and value as a {@link PCollection}.
- * @param regex
- * The regular expression to run
- * @param keyGroup
- * The Regex group to use as the key
- * @param valueGroup
- * The Regex group to use the value
+ * Returns a {@link Regex.MatchesKV} {@link PTransform} that checks if the entire line matches the
+ * Regex. Returns the specified groups as the key and value as a {@link PCollection}.
+ *
+ * @param regex The regular expression to run
+ * @param keyGroup The Regex group to use as the key
+ * @param valueGroup The Regex group to use the value
*/
- public static MatchesKV matchesKV(String regex, int keyGroup,
- int valueGroup) {
+ public static MatchesKV matchesKV(String regex, int keyGroup, int valueGroup) {
return new MatchesKV(regex, keyGroup, valueGroup);
}
/**
- * Returns a {@link Regex.Find} {@link PTransform} that checks if a
- * portion of the line matches the Regex. Returns the entire line (group 0) as
- * a {@link PCollection}.
- * @param regex
- * The regular expression to run
+ * Returns a {@link Regex.Find} {@link PTransform} that checks if a portion of the line matches
+ * the Regex. Returns the entire line (group 0) as a {@link PCollection}.
+ *
+ * @param regex The regular expression to run
*/
public static Find find(String regex) {
return find(regex, 0);
}
/**
- * Returns a {@link Regex.Find} {@link PTransform} that checks if a
- * portion of the line matches the Regex. Returns the group as a
- * {@link PCollection}.
- * @param regex
- * The regular expression to run
- * @param group
- * The Regex group to return as a PCollection
+ * Returns a {@link Regex.Find} {@link PTransform} that checks if a portion of the line matches
+ * the Regex. Returns the group as a {@link PCollection}.
+ *
+ * @param regex The regular expression to run
+ * @param group The Regex group to return as a PCollection
*/
public static Find find(String regex, int group) {
return new Find(regex, group);
}
/**
- * Returns a {@link Regex.FindKV} {@link PTransform} that checks if a
- * portion of the line matches the Regex. Returns the specified groups as the
- * key and value as a {@link PCollection}.
- * @param regex
- * The regular expression to run
- * @param keyGroup
- * The Regex group to use as the key
- * @param valueGroup
- * The Regex group to use the value
+ * Returns a {@link Regex.FindKV} {@link PTransform} that checks if a portion of the line matches
+ * the Regex. Returns the specified groups as the key and value as a {@link PCollection}.
+ *
+ * @param regex The regular expression to run
+ * @param keyGroup The Regex group to use as the key
+ * @param valueGroup The Regex group to use the value
*/
public static FindKV findKV(String regex, int keyGroup, int valueGroup) {
return new FindKV(regex, keyGroup, valueGroup);
}
/**
- * Returns a {@link Regex.ReplaceAll} {@link PTransform} that checks if a
- * portion of the line matches the Regex and replaces all matches with the replacement
- * String. Returns the group as a {@link PCollection}.
- * @param regex
- * The regular expression to run
- * @param replacement
- * The string to be substituted for each match
+ * Returns a {@link Regex.ReplaceAll} {@link PTransform} that checks if a portion of the line
+ * matches the Regex and replaces all matches with the replacement String. Returns the group as a
+ * {@link PCollection}.
+ *
+ * @param regex The regular expression to run
+ * @param replacement The string to be substituted for each match
*/
public static ReplaceAll replaceAll(String regex, String replacement) {
return new ReplaceAll(regex, replacement);
}
/**
- * Returns a {@link Regex.ReplaceAll} {@link PTransform} that checks if a
- * portion of the line matches the Regex and replaces the first match with the replacement
- * String. Returns the group as a {@link PCollection}.
- * @param regex
- * The regular expression to run
- * @param replacement
- * The string to be substituted for each match
+ * Returns a {@link Regex.ReplaceAll} {@link PTransform} that checks if a portion of the line
+ * matches the Regex and replaces the first match with the replacement String. Returns the group
+ * as a {@link PCollection}.
+ *
+ * @param regex The regular expression to run
+ * @param replacement The string to be substituted for each match
*/
public static ReplaceFirst replaceFirst(String regex, String replacement) {
return new ReplaceFirst(regex, replacement);
}
- /**
- * Returns a {@link Regex.Split} {@link PTransform} that splits a string
- * on the regular expression and then outputs each item. It will not output empty
- * items. Returns the group as a {@link PCollection}.
- * a {@link PCollection}.
- * @param regex
- * The regular expression to run
+ /**
+ * Returns a {@link Regex.Split} {@link PTransform} that splits a string on the regular expression
+ * and then outputs each item. It will not output empty items. Returns the group as a {@link
+ * PCollection}. a {@link PCollection}.
+ *
+ * @param regex The regular expression to run
*/
public static Split split(String regex) {
return split(regex, false);
}
/**
- * Returns a {@link Regex.Split} {@link PTransform} that splits a string
- * on the regular expression and then outputs each item. Returns the group as a
- * {@link PCollection}.
- * @param regex
- * The regular expression to run
- * @param outputEmpty
- * Should empty be output. True to output empties and false if not.
+ * Returns a {@link Regex.Split} {@link PTransform} that splits a string on the regular expression
+ * and then outputs each item. Returns the group as a {@link PCollection}.
+ *
+ * @param regex The regular expression to run
+ * @param outputEmpty Should empty be output. True to output empties and false if not.
*/
public static Split split(String regex, boolean outputEmpty) {
return new Split(regex, outputEmpty);
}
/**
- * {@code Regex.Matches<String>} takes a {@code PCollection<String>}
- * and returns a {@code PCollection<String>} representing the value
- * extracted from the Regex groups of the input {@code PCollection}
- * to the number of times that element occurs in the input.
- *
- * <p>
- * This transform runs a Regex on the entire input line. If the entire line
- * does not match the Regex, the line will not be output. If it does match the
- * entire line, the group in the Regex will be used. The output will be the
- * Regex group.
- *
- * <p>
- * Example of use:
- * <pre>
- * {@code
+ * {@code Regex.Matches<String>} takes a {@code PCollection<String>} and returns a {@code
+ * PCollection<String>} representing the value extracted from the Regex groups of the input {@code
+ * PCollection} to the number of times that element occurs in the input.
+ *
+ * <p>This transform runs a Regex on the entire input line. If the entire line does not match the
+ * Regex, the line will not be output. If it does match the entire line, the group in the Regex
+ * will be used. The output will be the Regex group.
+ *
+ * <p>Example of use:
+ *
+ * <pre>{@code
* PCollection<String> words = ...;
* PCollection<String> values =
* words.apply(Regex.matches("myregex (mygroup)", 1));
- * }
- * </pre>
+ * }</pre>
*/
- public static class Matches
- extends PTransform<PCollection<String>, PCollection<String>> {
+ public static class Matches extends PTransform<PCollection<String>, PCollection<String>> {
Pattern pattern;
int group;
@@ -209,42 +180,38 @@ public class Regex {
}
public PCollection<String> apply(PCollection<String> in) {
- return in
- .apply(ParDo.of(new DoFn<String, String>() {
- @ProcessElement
- public void processElement(ProcessContext c) throws Exception {
- Matcher m = pattern.matcher((String) c.element());
-
- if (m.matches()) {
- c.output(m.group(group));
- }
- }
- }));
+ return in.apply(
+ ParDo.of(
+ new DoFn<String, String>() {
+ @ProcessElement
+ public void processElement(ProcessContext c) throws Exception {
+ Matcher m = pattern.matcher((String) c.element());
+
+ if (m.matches()) {
+ c.output(m.group(group));
+ }
+ }
+ }));
}
}
/**
- * {@code Regex.MatchesKV<KV<String, String>>} takes a
- * {@code PCollection<String>} and returns a
- * {@code PCollection<KV<String, String>>} representing the key and value
- * extracted from the Regex groups of the input {@code PCollection} to the
- * number of times that element occurs in the input.
- *
- * <p>
- * This transform runs a Regex on the entire input line. If the entire line
- * does not match the Regex, the line will not be output. If it does match the
- * entire line, the groups in the Regex will be used. The key will be the
- * key's group and the value will be the value's group.
- *
- * <p>
- * Example of use:
- * <pre>
- * {@code
+ * {@code Regex.MatchesKV<KV<String, String>>} takes a {@code PCollection<String>} and returns a
+ * {@code PCollection<KV<String, String>>} representing the key and value extracted from the Regex
+ * groups of the input {@code PCollection} to the number of times that element occurs in the
+ * input.
+ *
+ * <p>This transform runs a Regex on the entire input line. If the entire line does not match the
+ * Regex, the line will not be output. If it does match the entire line, the groups in the Regex
+ * will be used. The key will be the key's group and the value will be the value's group.
+ *
+ * <p>Example of use:
+ *
+ * <pre>{@code
* PCollection<String> words = ...;
* PCollection<KV<String, String>> keysAndValues =
* words.apply(Regex.matchesKV("myregex (mykeygroup) (myvaluegroup)", 1, 2));
- * }
- * </pre>
+ * }</pre>
*/
public static class MatchesKV
extends PTransform<PCollection<String>, PCollection<KV<String, String>>> {
@@ -258,44 +225,39 @@ public class Regex {
}
public PCollection<KV<String, String>> apply(PCollection<String> in) {
- return in.apply(ParDo
- .of(new DoFn<String, KV<String, String>>() {
- @ProcessElement
- public void processElement(ProcessContext c) throws Exception {
- Matcher m = pattern.matcher((String) c.element());
-
- if (m.find()) {
- c.output(KV.of(m.group(keyGroup), m.group(valueGroup)));
- }
- }
- }));
+ return in.apply(
+ ParDo.of(
+ new DoFn<String, KV<String, String>>() {
+ @ProcessElement
+ public void processElement(ProcessContext c) throws Exception {
+ Matcher m = pattern.matcher((String) c.element());
+
+ if (m.find()) {
+ c.output(KV.of(m.group(keyGroup), m.group(valueGroup)));
+ }
+ }
+ }));
}
}
/**
- * {@code Regex.Find<String>} takes a {@code PCollection<String>} and
- * returns a {@code PCollection<String>} representing the value extracted
- * from the Regex groups of the input {@code PCollection} to
- * the number of times that element occurs in the input.
- *
- * <p>
- * This transform runs a Regex on the entire input line. If a portion of the
- * line does not match the Regex, the line will not be output. If it does
- * match a portion of the line, the group in the Regex will be used. The
- * output will be the Regex group.
- *
- * <p>
- * Example of use:
- * <pre>
- * {@code
+ * {@code Regex.Find<String>} takes a {@code PCollection<String>} and returns a {@code
+ * PCollection<String>} representing the value extracted from the Regex groups of the input {@code
+ * PCollection} to the number of times that element occurs in the input.
+ *
+ * <p>This transform runs a Regex on the entire input line. If a portion of the line does not
+ * match the Regex, the line will not be output. If it does match a portion of the line, the group
+ * in the Regex will be used. The output will be the Regex group.
+ *
+ * <p>Example of use:
+ *
+ * <pre>{@code
* PCollection<String> words = ...;
* PCollection<String> values =
* words.apply(Regex.find("myregex (mygroup)", 1));
- * }
- * </pre>
+ * }</pre>
*/
- public static class Find
- extends PTransform<PCollection<String>, PCollection<String>> {
+ public static class Find extends PTransform<PCollection<String>, PCollection<String>> {
Pattern pattern;
int group;
@@ -305,41 +267,39 @@ public class Regex {
}
public PCollection<String> apply(PCollection<String> in) {
- return in.apply(ParDo.of(new DoFn<String, String>() {
- @ProcessElement
- public void processElement(ProcessContext c) throws Exception {
- Matcher m = pattern.matcher((String) c.element());
-
- if (m.find()) {
- c.output(m.group(group));
- }
- }
- }));
+ return in.apply(
+ ParDo.of(
+ new DoFn<String, String>() {
+ @ProcessElement
+ public void processElement(ProcessContext c) throws Exception {
+ Matcher m = pattern.matcher((String) c.element());
+
+ if (m.find()) {
+ c.output(m.group(group));
+ }
+ }
+ }));
}
}
/**
- * {@code Regex.MatchesKV<KV<String, String>>} takes a
- * {@code PCollection<String>} and returns a
- * {@code PCollection<KV<String, String>>} representing the key and value
- * extracted from the Regex groups of the input {@code PCollection} to the
- * number of times that element occurs in the input.
- *
- * <p>
- * This transform runs a Regex on the entire input line. If a portion of the
- * line does not match the Regex, the line will not be output. If it does
- * match a portion of the line, the groups in the Regex will be used. The key
- * will be the key's group and the value will be the value's group.
- *
- * <p>
- * Example of use:
- * <pre>
- * {@code
+ * {@code Regex.MatchesKV<KV<String, String>>} takes a {@code PCollection<String>} and returns a
+ * {@code PCollection<KV<String, String>>} representing the key and value extracted from the Regex
+ * groups of the input {@code PCollection} to the number of times that element occurs in the
+ * input.
+ *
+ * <p>This transform runs a Regex on the entire input line. If a portion of the line does not
+ * match the Regex, the line will not be output. If it does match a portion of the line, the
+ * groups in the Regex will be used. The key will be the key's group and the value will be the
+ * value's group.
+ *
+ * <p>Example of use:
+ *
+ * <pre>{@code
* PCollection<String> words = ...;
* PCollection<KV<String, String>> keysAndValues =
* words.apply(Regex.findKV("myregex (mykeygroup) (myvaluegroup)", 1, 2));
- * }
- * </pre>
+ * }</pre>
*/
public static class FindKV
extends PTransform<PCollection<String>, PCollection<KV<String, String>>> {
@@ -354,42 +314,38 @@ public class Regex {
public PCollection<KV<String, String>> apply(PCollection<String> in) {
return in.apply(
- ParDo.of(new DoFn<String, KV<String, String>>() {
- @ProcessElement
- public void processElement(ProcessContext c) throws Exception {
- Matcher m = pattern.matcher((String) c.element());
-
- if (m.find()) {
- c.output(KV.of(m.group(keyGroup), m.group(valueGroup)));
- }
- }
- }));
+ ParDo.of(
+ new DoFn<String, KV<String, String>>() {
+ @ProcessElement
+ public void processElement(ProcessContext c) throws Exception {
+ Matcher m = pattern.matcher((String) c.element());
+
+ if (m.find()) {
+ c.output(KV.of(m.group(keyGroup), m.group(valueGroup)));
+ }
+ }
+ }));
}
}
/**
- * {@code Regex.ReplaceAll<String>} takes a {@code PCollection<String>} and
- * returns a {@code PCollection<String>} with all Strings that matched the
- * Regex being replaced with the replacement string.
- *
- * <p>
- * This transform runs a Regex on the entire input line. If a portion of the
- * line does not match the Regex, the line will be output without changes. If it does
- * match a portion of the line, all portions matching the Regex will be replaced
- * with the replacement String.
- *
- * <p>
- * Example of use:
- * <pre>
- * {@code
+ * {@code Regex.ReplaceAll<String>} takes a {@code PCollection<String>} and returns a {@code
+ * PCollection<String>} with all Strings that matched the Regex being replaced with the
+ * replacement string.
+ *
+ * <p>This transform runs a Regex on the entire input line. If a portion of the line does not
+ * match the Regex, the line will be output without changes. If it does match a portion of the
+ * line, all portions matching the Regex will be replaced with the replacement String.
+ *
+ * <p>Example of use:
+ *
+ * <pre>{@code
* PCollection<String> words = ...;
* PCollection<String> values =
* words.apply(Regex.replaceAll("myregex", "myreplacement"));
- * }
- * </pre>
+ * }</pre>
*/
- public static class ReplaceAll
- extends PTransform<PCollection<String>, PCollection<String>> {
+ public static class ReplaceAll extends PTransform<PCollection<String>, PCollection<String>> {
Pattern pattern;
String replacement;
@@ -399,39 +355,36 @@ public class Regex {
}
public PCollection<String> apply(PCollection<String> in) {
- return in.apply(ParDo.of(new DoFn<String, String>() {
- @ProcessElement
- public void processElement(ProcessContext c) throws Exception {
- Matcher m = pattern.matcher((String) c.element());
- c.output(m.replaceAll(replacement));
- }
- }));
+ return in.apply(
+ ParDo.of(
+ new DoFn<String, String>() {
+ @ProcessElement
+ public void processElement(ProcessContext c) throws Exception {
+ Matcher m = pattern.matcher((String) c.element());
+ c.output(m.replaceAll(replacement));
+ }
+ }));
}
}
/**
- * {@code Regex.ReplaceFirst<String>} takes a {@code PCollection<String>} and
- * returns a {@code PCollection<String>} with the first Strings that matched the
- * Regex being replaced with the replacement string.
- *
- * <p>
- * This transform runs a Regex on the entire input line. If a portion of the
- * line does not match the Regex, the line will be output without changes. If it does
- * match a portion of the line, the first portion matching the Regex will be replaced
- * with the replacement String.
- *
- * <p>
- * Example of use:
- * <pre>
- * {@code
+ * {@code Regex.ReplaceFirst<String>} takes a {@code PCollection<String>} and returns a {@code
+ * PCollection<String>} with the first Strings that matched the Regex being replaced with the
+ * replacement string.
+ *
+ * <p>This transform runs a Regex on the entire input line. If a portion of the line does not
+ * match the Regex, the line will be output without changes. If it does match a portion of the
+ * line, the first portion matching the Regex will be replaced with the replacement String.
+ *
+ * <p>Example of use:
+ *
+ * <pre>{@code
* PCollection<String> words = ...;
* PCollection<String> values =
* words.apply(Regex.replaceFirst("myregex", "myreplacement"));
- * }
- * </pre>
+ * }</pre>
*/
- public static class ReplaceFirst
- extends PTransform<PCollection<String>, PCollection<String>> {
+ public static class ReplaceFirst extends PTransform<PCollection<String>, PCollection<String>> {
Pattern pattern;
String replacement;
@@ -441,44 +394,38 @@ public class Regex {
}
public PCollection<String> apply(PCollection<String> in) {
- return in.apply(ParDo.of(new DoFn<String, String>() {
- @ProcessElement
- public void processElement(ProcessContext c) throws Exception {
- Matcher m = pattern.matcher((String) c.element());
- c.output(m.replaceFirst(replacement));
- }
- }));
+ return in.apply(
+ ParDo.of(
+ new DoFn<String, String>() {
+ @ProcessElement
+ public void processElement(ProcessContext c) throws Exception {
+ Matcher m = pattern.matcher((String) c.element());
+ c.output(m.replaceFirst(replacement));
+ }
+ }));
}
}
/**
- * {@code Regex.Split<String>} takes a {@code PCollection<String>} and
- * returns a {@code PCollection<String>} with the input string split into
- * individual items in a list. Each item is then output as a separate string.
- *
- * <p>
- * This transform runs a Regex as part of a splint the entire input line. The split
- * gives back an array of items. Each item is output as a separate item in the
- * {@code PCollection<String>}.
- * </p>
- *
- * <p>
- * Depending on the Regex, a split can be an empty or
- * "" string. You can pass in a parameter if you want empty strings or not.
- * </p>
- *
- * <p>
- * Example of use:
- * <pre>
- * {@code
+ * {@code Regex.Split<String>} takes a {@code PCollection<String>} and returns a {@code
+ * PCollection<String>} with the input string split into individual items in a list. Each item is
+ * then output as a separate string.
+ *
+ * <p>This transform runs a Regex as part of a splint the entire input line. The split gives back
+ * an array of items. Each item is output as a separate item in the {@code PCollection<String>}.
+ *
+ * <p>Depending on the Regex, a split can be an empty or "" string. You can pass in a parameter if
+ * you want empty strings or not.
+ *
+ * <p>Example of use:
+ *
+ * <pre>{@code
* PCollection<String> words = ...;
* PCollection<String> values =
* words.apply(Regex.split("\W*"));
- * }
- * </pre>
+ * }</pre>
*/
- public static class Split
- extends PTransform<PCollection<String>, PCollection<String>> {
+ public static class Split extends PTransform<PCollection<String>, PCollection<String>> {
Pattern pattern;
boolean outputEmpty;
@@ -488,18 +435,20 @@ public class Regex {
}
public PCollection<String> apply(PCollection<String> in) {
- return in.apply(ParDo.of(new DoFn<String, String>() {
- @ProcessElement
- public void processElement(ProcessContext c) throws Exception {
- String[] items = pattern.split(c.element());
-
- for (String item : items) {
- if (outputEmpty || !item.isEmpty()) {
- c.output(item);
- }
- }
- }
- }));
+ return in.apply(
+ ParDo.of(
+ new DoFn<String, String>() {
+ @ProcessElement
+ public void processElement(ProcessContext c) throws Exception {
+ String[] items = pattern.split(c.element());
+
+ for (String item : items) {
+ if (outputEmpty || !item.isEmpty()) {
+ c.output(item);
+ }
+ }
+ }
+ }));
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/79b04551/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/RegexTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/RegexTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/RegexTest.java
index 71f080e..6e196b4 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/RegexTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/RegexTest.java
@@ -30,9 +30,7 @@ import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
-/**
- * Tests for {@link Regex}.
- */
+/** Tests for {@link Regex}. */
@RunWith(JUnit4.class)
public class RegexTest implements Serializable {
@Test
@@ -40,9 +38,8 @@ public class RegexTest implements Serializable {
public void testFind() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("aj", "xj", "yj", "zj"))
- .apply(Regex.find("[xyz]"));
+ PCollection<String> output =
+ p.apply(Create.of("aj", "xj", "yj", "zj")).apply(Regex.find("[xyz]"));
PAssert.that(output).containsInAnyOrder("x", "y", "z");
p.run();
@@ -53,9 +50,8 @@ public class RegexTest implements Serializable {
public void testFindGroup() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("aj", "xj", "yj", "zj"))
- .apply(Regex.find("([xyz])", 1));
+ PCollection<String> output =
+ p.apply(Create.of("aj", "xj", "yj", "zj")).apply(Regex.find("([xyz])", 1));
PAssert.that(output).containsInAnyOrder("x", "y", "z");
p.run();
@@ -66,9 +62,7 @@ public class RegexTest implements Serializable {
public void testFindNone() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("a", "b", "c", "d"))
- .apply(Regex.find("[xyz]"));
+ PCollection<String> output = p.apply(Create.of("a", "b", "c", "d")).apply(Regex.find("[xyz]"));
PAssert.that(output).empty();
p.run();
@@ -79,9 +73,8 @@ public class RegexTest implements Serializable {
public void testKVFind() {
TestPipeline p = TestPipeline.create();
- PCollection<KV<String, String>> output = p
- .apply(Create.of("a b c"))
- .apply(Regex.findKV("a (b) (c)", 1, 2));
+ PCollection<KV<String, String>> output =
+ p.apply(Create.of("a b c")).apply(Regex.findKV("a (b) (c)", 1, 2));
PAssert.that(output).containsInAnyOrder(KV.of("b", "c"));
p.run();
@@ -92,9 +85,8 @@ public class RegexTest implements Serializable {
public void testKVFindNone() {
TestPipeline p = TestPipeline.create();
- PCollection<KV<String, String>> output = p
- .apply(Create.of("x y z"))
- .apply(Regex.findKV("a (b) (c)", 1, 2));
+ PCollection<KV<String, String>> output =
+ p.apply(Create.of("x y z")).apply(Regex.findKV("a (b) (c)", 1, 2));
PAssert.that(output).empty();
p.run();
@@ -105,9 +97,8 @@ public class RegexTest implements Serializable {
public void testMatches() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("a", "x", "y", "z"))
- .apply(Regex.matches("[xyz]"));
+ PCollection<String> output =
+ p.apply(Create.of("a", "x", "y", "z")).apply(Regex.matches("[xyz]"));
PAssert.that(output).containsInAnyOrder("x", "y", "z");
p.run();
@@ -118,9 +109,8 @@ public class RegexTest implements Serializable {
public void testMatchesNone() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("a", "b", "c", "d"))
- .apply(Regex.matches("[xyz]"));
+ PCollection<String> output =
+ p.apply(Create.of("a", "b", "c", "d")).apply(Regex.matches("[xyz]"));
PAssert.that(output).empty();
p.run();
@@ -131,9 +121,8 @@ public class RegexTest implements Serializable {
public void testMatchesGroup() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("a", "x xxx", "x yyy", "x zzz"))
- .apply(Regex.matches("x ([xyz]*)", 1));
+ PCollection<String> output =
+ p.apply(Create.of("a", "x xxx", "x yyy", "x zzz")).apply(Regex.matches("x ([xyz]*)", 1));
PAssert.that(output).containsInAnyOrder("xxx", "yyy", "zzz");
p.run();
@@ -144,9 +133,8 @@ public class RegexTest implements Serializable {
public void testKVMatches() {
TestPipeline p = TestPipeline.create();
- PCollection<KV<String, String>> output = p
- .apply(Create.of("a b c"))
- .apply(Regex.matchesKV("a (b) (c)", 1, 2));
+ PCollection<KV<String, String>> output =
+ p.apply(Create.of("a b c")).apply(Regex.matchesKV("a (b) (c)", 1, 2));
PAssert.that(output).containsInAnyOrder(KV.of("b", "c"));
p.run();
@@ -157,9 +145,8 @@ public class RegexTest implements Serializable {
public void testKVMatchesNone() {
TestPipeline p = TestPipeline.create();
- PCollection<KV<String, String>> output = p
- .apply(Create.of("x y z"))
- .apply(Regex.matchesKV("a (b) (c)", 1, 2));
+ PCollection<KV<String, String>> output =
+ p.apply(Create.of("x y z")).apply(Regex.matchesKV("a (b) (c)", 1, 2));
PAssert.that(output).empty();
p.run();
}
@@ -169,9 +156,8 @@ public class RegexTest implements Serializable {
public void testReplaceAll() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("xj", "yj", "zj"))
- .apply(Regex.replaceAll("[xyz]", "new"));
+ PCollection<String> output =
+ p.apply(Create.of("xj", "yj", "zj")).apply(Regex.replaceAll("[xyz]", "new"));
PAssert.that(output).containsInAnyOrder("newj", "newj", "newj");
p.run();
@@ -182,9 +168,8 @@ public class RegexTest implements Serializable {
public void testReplaceAllMixed() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("abc", "xj", "yj", "zj", "def"))
- .apply(Regex.replaceAll("[xyz]", "new"));
+ PCollection<String> output =
+ p.apply(Create.of("abc", "xj", "yj", "zj", "def")).apply(Regex.replaceAll("[xyz]", "new"));
PAssert.that(output).containsInAnyOrder("abc", "newj", "newj", "newj", "def");
p.run();
@@ -195,9 +180,8 @@ public class RegexTest implements Serializable {
public void testReplaceFirst() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("xjx", "yjy", "zjz"))
- .apply(Regex.replaceFirst("[xyz]", "new"));
+ PCollection<String> output =
+ p.apply(Create.of("xjx", "yjy", "zjz")).apply(Regex.replaceFirst("[xyz]", "new"));
PAssert.that(output).containsInAnyOrder("newjx", "newjy", "newjz");
p.run();
@@ -208,9 +192,9 @@ public class RegexTest implements Serializable {
public void testReplaceFirstMixed() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("abc", "xjx", "yjy", "zjz", "def"))
- .apply(Regex.replaceFirst("[xyz]", "new"));
+ PCollection<String> output =
+ p.apply(Create.of("abc", "xjx", "yjy", "zjz", "def"))
+ .apply(Regex.replaceFirst("[xyz]", "new"));
PAssert.that(output).containsInAnyOrder("abc", "newjx", "newjy", "newjz", "def");
p.run();
@@ -221,12 +205,12 @@ public class RegexTest implements Serializable {
public void testSplits() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("The quick brown fox jumps over the lazy dog"))
- .apply(Regex.split("\\W+"));
+ PCollection<String> output =
+ p.apply(Create.of("The quick brown fox jumps over the lazy dog"))
+ .apply(Regex.split("\\W+"));
- PAssert.that(output).containsInAnyOrder("The", "quick", "brown",
- "fox", "jumps", "over", "the", "lazy", "dog");
+ PAssert.that(output)
+ .containsInAnyOrder("The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog");
p.run();
}
@@ -235,14 +219,16 @@ public class RegexTest implements Serializable {
public void testSplitsWithEmpty() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("The quick brown fox jumps over the lazy dog"))
- .apply(Regex.split("\\s", true));
+ PCollection<String> output =
+ p.apply(Create.of("The quick brown fox jumps over the lazy dog"))
+ .apply(Regex.split("\\s", true));
String[] outputStr = "The quick brown fox jumps over the lazy dog".split("\\s");
- PAssert.that(output).containsInAnyOrder("The", "", "quick", "brown", "", "",
- "fox", "jumps", "over", "", "", "", "the", "lazy", "dog");
+ PAssert.that(output)
+ .containsInAnyOrder(
+ "The", "", "quick", "brown", "", "", "fox", "jumps", "over", "", "", "", "the", "lazy",
+ "dog");
p.run();
}
@@ -251,12 +237,12 @@ public class RegexTest implements Serializable {
public void testSplitsWithoutEmpty() {
TestPipeline p = TestPipeline.create();
- PCollection<String> output = p
- .apply(Create.of("The quick brown fox jumps over the lazy dog"))
- .apply(Regex.split("\\s", false));
+ PCollection<String> output =
+ p.apply(Create.of("The quick brown fox jumps over the lazy dog"))
+ .apply(Regex.split("\\s", false));
- PAssert.that(output).containsInAnyOrder("The", "quick", "brown",
- "fox", "jumps", "over", "the", "lazy", "dog");
+ PAssert.that(output)
+ .containsInAnyOrder("The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog");
p.run();
}
}