You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ke...@apache.org on 2016/11/08 03:41:19 UTC

[20/50] incubator-beam git commit: Format Regex according to style guidelines

Format Regex according to style guidelines


Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/79b04551
Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/79b04551
Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/79b04551

Branch: refs/heads/gearpump-runner
Commit: 79b04551c7c9f964908ab4a1d95119ef8a7fff84
Parents: 6954abe
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Nov 7 10:10:59 2016 -0800
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Nov 7 10:10:59 2016 -0800

----------------------------------------------------------------------
 .../org/apache/beam/sdk/transforms/Regex.java   | 525 +++++++++----------
 .../apache/beam/sdk/transforms/RegexTest.java   | 106 ++--
 2 files changed, 283 insertions(+), 348 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/79b04551/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Regex.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Regex.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Regex.java
index 27104f6..a94130d 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Regex.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Regex.java
@@ -24,22 +24,17 @@ import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 
 /**
- * {@code PTransorm}s to use Regular Expressions to process elements in a
- * {@link PCollection}.
+ * {@code PTransorm}s to use Regular Expressions to process elements in a {@link PCollection}.
  *
- * <p>
- * {@link Regex#matches(String, int)} can be used to see if an entire line matches
- * a Regex. {@link Regex#matchesKV(String, int, int)} can be used to see if an entire
- * line matches a Regex and output certain groups as a {@link KV}.
- * </p>
- * <p>
- * {@link Regex#find(String, int)} can be used to see if a portion of a line
- * matches a Regex. {@link Regex#matchesKV(String, int, int)} can be used to see if a
- * portion of a line matches a Regex and output certain groups as a {@link KV}.
- * </p>
- * <p>
- * Lines that do not match the Regex will not be output.
- * </p>
+ * <p>{@link Regex#matches(String, int)} can be used to see if an entire line matches a Regex.
+ * {@link Regex#matchesKV(String, int, int)} can be used to see if an entire line matches a Regex
+ * and output certain groups as a {@link KV}.
+ *
+ * <p>{@link Regex#find(String, int)} can be used to see if a portion of a line matches a Regex.
+ * {@link Regex#matchesKV(String, int, int)} can be used to see if a portion of a line matches a
+ * Regex and output certain groups as a {@link KV}.
+ *
+ * <p>Lines that do not match the Regex will not be output.
  */
 public class Regex {
   private Regex() {
@@ -47,159 +42,135 @@ public class Regex {
   }
 
   /**
-   * Returns a {@link Regex.Matches} {@link PTransform} that checks if
-   * the entire line matches the Regex. Returns the entire line (group 0) as a
-   * {@link PCollection}.
-   * @param regex
-   *          The regular expression to run
+   * Returns a {@link Regex.Matches} {@link PTransform} that checks if the entire line matches the
+   * Regex. Returns the entire line (group 0) as a {@link PCollection}.
+   *
+   * @param regex The regular expression to run
    */
   public static Matches matches(String regex) {
     return matches(regex, 0);
   }
 
   /**
-   * Returns a {@link Regex.Matches} {@link PTransform} that checks if
-   * the entire line matches the Regex. Returns the group as a
-   * {@link PCollection}.
-   * @param regex
-   *          The regular expression to run
-   * @param group
-   *          The Regex group to return as a PCollection
+   * Returns a {@link Regex.Matches} {@link PTransform} that checks if the entire line matches the
+   * Regex. Returns the group as a {@link PCollection}.
+   *
+   * @param regex The regular expression to run
+   * @param group The Regex group to return as a PCollection
    */
   public static Matches matches(String regex, int group) {
     return new Matches(regex, group);
   }
 
   /**
-   * Returns a {@link Regex.MatchesKV} {@link PTransform} that checks
-   * if the entire line matches the Regex. Returns the specified groups as the
-   * key and value as a {@link PCollection}.
-   * @param regex
-   *          The regular expression to run
-   * @param keyGroup
-   *          The Regex group to use as the key
-   * @param valueGroup
-   *          The Regex group to use the value
+   * Returns a {@link Regex.MatchesKV} {@link PTransform} that checks if the entire line matches the
+   * Regex. Returns the specified groups as the key and value as a {@link PCollection}.
+   *
+   * @param regex The regular expression to run
+   * @param keyGroup The Regex group to use as the key
+   * @param valueGroup The Regex group to use the value
    */
-  public static MatchesKV matchesKV(String regex, int keyGroup,
-      int valueGroup) {
+  public static MatchesKV matchesKV(String regex, int keyGroup, int valueGroup) {
     return new MatchesKV(regex, keyGroup, valueGroup);
   }
 
   /**
-   * Returns a {@link Regex.Find} {@link PTransform} that checks if a
-   * portion of the line matches the Regex. Returns the entire line (group 0) as
-   * a {@link PCollection}.
-   * @param regex
-   *          The regular expression to run
+   * Returns a {@link Regex.Find} {@link PTransform} that checks if a portion of the line matches
+   * the Regex. Returns the entire line (group 0) as a {@link PCollection}.
+   *
+   * @param regex The regular expression to run
    */
   public static Find find(String regex) {
     return find(regex, 0);
   }
 
   /**
-   * Returns a {@link Regex.Find} {@link PTransform} that checks if a
-   * portion of the line matches the Regex. Returns the group as a
-   * {@link PCollection}.
-   * @param regex
-   *          The regular expression to run
-   * @param group
-   *          The Regex group to return as a PCollection
+   * Returns a {@link Regex.Find} {@link PTransform} that checks if a portion of the line matches
+   * the Regex. Returns the group as a {@link PCollection}.
+   *
+   * @param regex The regular expression to run
+   * @param group The Regex group to return as a PCollection
    */
   public static Find find(String regex, int group) {
     return new Find(regex, group);
   }
 
   /**
-   * Returns a {@link Regex.FindKV} {@link PTransform} that checks if a
-   * portion of the line matches the Regex. Returns the specified groups as the
-   * key and value as a {@link PCollection}.
-   * @param regex
-   *          The regular expression to run
-   * @param keyGroup
-   *          The Regex group to use as the key
-   * @param valueGroup
-   *          The Regex group to use the value
+   * Returns a {@link Regex.FindKV} {@link PTransform} that checks if a portion of the line matches
+   * the Regex. Returns the specified groups as the key and value as a {@link PCollection}.
+   *
+   * @param regex The regular expression to run
+   * @param keyGroup The Regex group to use as the key
+   * @param valueGroup The Regex group to use the value
    */
   public static FindKV findKV(String regex, int keyGroup, int valueGroup) {
     return new FindKV(regex, keyGroup, valueGroup);
   }
 
   /**
-   * Returns a {@link Regex.ReplaceAll} {@link PTransform} that checks if a
-   * portion of the line matches the Regex and replaces all matches with the replacement
-   * String. Returns the group as a {@link PCollection}.
-   * @param regex
-   *          The regular expression to run
-   * @param replacement
-   *          The string to be substituted for each match
+   * Returns a {@link Regex.ReplaceAll} {@link PTransform} that checks if a portion of the line
+   * matches the Regex and replaces all matches with the replacement String. Returns the group as a
+   * {@link PCollection}.
+   *
+   * @param regex The regular expression to run
+   * @param replacement The string to be substituted for each match
    */
   public static ReplaceAll replaceAll(String regex, String replacement) {
     return new ReplaceAll(regex, replacement);
   }
 
   /**
-   * Returns a {@link Regex.ReplaceAll} {@link PTransform} that checks if a
-   * portion of the line matches the Regex and replaces the first match with the replacement
-   * String. Returns the group as a {@link PCollection}.
-   * @param regex
-   *          The regular expression to run
-   * @param replacement
-   *          The string to be substituted for each match
+   * Returns a {@link Regex.ReplaceAll} {@link PTransform} that checks if a portion of the line
+   * matches the Regex and replaces the first match with the replacement String. Returns the group
+   * as a {@link PCollection}.
+   *
+   * @param regex The regular expression to run
+   * @param replacement The string to be substituted for each match
    */
   public static ReplaceFirst replaceFirst(String regex, String replacement) {
     return new ReplaceFirst(regex, replacement);
   }
 
-    /**
-   * Returns a {@link Regex.Split} {@link PTransform} that splits a string
-   * on the regular expression and then outputs each item. It will not output empty
-   * items. Returns the group as a {@link PCollection}.
-   * a {@link PCollection}.
-   * @param regex
-   *          The regular expression to run
+  /**
+   * Returns a {@link Regex.Split} {@link PTransform} that splits a string on the regular expression
+   * and then outputs each item. It will not output empty items. Returns the group as a {@link
+   * PCollection}. a {@link PCollection}.
+   *
+   * @param regex The regular expression to run
    */
   public static Split split(String regex) {
     return split(regex, false);
   }
 
   /**
-   * Returns a {@link Regex.Split} {@link PTransform} that splits a string
-   * on the regular expression and then outputs each item. Returns the group as a
-   * {@link PCollection}.
-   * @param regex
-   *          The regular expression to run
-   * @param outputEmpty
-   *          Should empty be output. True to output empties and false if not.
+   * Returns a {@link Regex.Split} {@link PTransform} that splits a string on the regular expression
+   * and then outputs each item. Returns the group as a {@link PCollection}.
+   *
+   * @param regex The regular expression to run
+   * @param outputEmpty Should empty be output. True to output empties and false if not.
    */
   public static Split split(String regex, boolean outputEmpty) {
     return new Split(regex, outputEmpty);
   }
 
   /**
-   * {@code Regex.Matches<String>} takes a {@code PCollection<String>}
-   * and returns a {@code PCollection<String>} representing the value
-   * extracted from the Regex groups of the input {@code PCollection}
-   * to the number of times that element occurs in the input.
-   *
-   * <p>
-   * This transform runs a Regex on the entire input line. If the entire line
-   * does not match the Regex, the line will not be output. If it does match the
-   * entire line, the group in the Regex will be used. The output will be the
-   * Regex group.
-   *
-   * <p>
-   * Example of use:
-   * <pre>
-   *  {@code
+   * {@code Regex.Matches<String>} takes a {@code PCollection<String>} and returns a {@code
+   * PCollection<String>} representing the value extracted from the Regex groups of the input {@code
+   * PCollection} to the number of times that element occurs in the input.
+   *
+   * <p>This transform runs a Regex on the entire input line. If the entire line does not match the
+   * Regex, the line will not be output. If it does match the entire line, the group in the Regex
+   * will be used. The output will be the Regex group.
+   *
+   * <p>Example of use:
+   *
+   * <pre>{@code
    * PCollection<String> words = ...;
    * PCollection<String> values =
    *     words.apply(Regex.matches("myregex (mygroup)", 1));
-   * }
-   * </pre>
+   * }</pre>
    */
-  public static class Matches
-      extends PTransform<PCollection<String>, PCollection<String>> {
+  public static class Matches extends PTransform<PCollection<String>, PCollection<String>> {
     Pattern pattern;
     int group;
 
@@ -209,42 +180,38 @@ public class Regex {
     }
 
     public PCollection<String> apply(PCollection<String> in) {
-      return in
-          .apply(ParDo.of(new DoFn<String, String>() {
-            @ProcessElement
-            public void processElement(ProcessContext c) throws Exception {
-              Matcher m = pattern.matcher((String) c.element());
-
-              if (m.matches()) {
-                c.output(m.group(group));
-              }
-            }
-          }));
+      return in.apply(
+          ParDo.of(
+              new DoFn<String, String>() {
+                @ProcessElement
+                public void processElement(ProcessContext c) throws Exception {
+                  Matcher m = pattern.matcher((String) c.element());
+
+                  if (m.matches()) {
+                    c.output(m.group(group));
+                  }
+                }
+              }));
     }
   }
 
   /**
-   * {@code Regex.MatchesKV<KV<String, String>>} takes a
-   * {@code PCollection<String>} and returns a
-   * {@code PCollection<KV<String, String>>} representing the key and value
-   * extracted from the Regex groups of the input {@code PCollection} to the
-   * number of times that element occurs in the input.
-   *
-   * <p>
-   * This transform runs a Regex on the entire input line. If the entire line
-   * does not match the Regex, the line will not be output. If it does match the
-   * entire line, the groups in the Regex will be used. The key will be the
-   * key's group and the value will be the value's group.
-   *
-   * <p>
-   * Example of use:
-   * <pre>
-   *  {@code
+   * {@code Regex.MatchesKV<KV<String, String>>} takes a {@code PCollection<String>} and returns a
+   * {@code PCollection<KV<String, String>>} representing the key and value extracted from the Regex
+   * groups of the input {@code PCollection} to the number of times that element occurs in the
+   * input.
+   *
+   * <p>This transform runs a Regex on the entire input line. If the entire line does not match the
+   * Regex, the line will not be output. If it does match the entire line, the groups in the Regex
+   * will be used. The key will be the key's group and the value will be the value's group.
+   *
+   * <p>Example of use:
+   *
+   * <pre>{@code
    * PCollection<String> words = ...;
    * PCollection<KV<String, String>> keysAndValues =
    *     words.apply(Regex.matchesKV("myregex (mykeygroup) (myvaluegroup)", 1, 2));
-   * }
-   * </pre>
+   * }</pre>
    */
   public static class MatchesKV
       extends PTransform<PCollection<String>, PCollection<KV<String, String>>> {
@@ -258,44 +225,39 @@ public class Regex {
     }
 
     public PCollection<KV<String, String>> apply(PCollection<String> in) {
-      return in.apply(ParDo
-          .of(new DoFn<String, KV<String, String>>() {
-            @ProcessElement
-            public void processElement(ProcessContext c) throws Exception {
-              Matcher m = pattern.matcher((String) c.element());
-
-              if (m.find()) {
-                c.output(KV.of(m.group(keyGroup), m.group(valueGroup)));
-              }
-            }
-          }));
+      return in.apply(
+          ParDo.of(
+              new DoFn<String, KV<String, String>>() {
+                @ProcessElement
+                public void processElement(ProcessContext c) throws Exception {
+                  Matcher m = pattern.matcher((String) c.element());
+
+                  if (m.find()) {
+                    c.output(KV.of(m.group(keyGroup), m.group(valueGroup)));
+                  }
+                }
+              }));
     }
   }
 
   /**
-   * {@code Regex.Find<String>} takes a {@code PCollection<String>} and
-   * returns a {@code PCollection<String>} representing the value extracted
-   * from the Regex groups of the input {@code PCollection} to
-   * the number of times that element occurs in the input.
-   *
-   * <p>
-   * This transform runs a Regex on the entire input line. If a portion of the
-   * line does not match the Regex, the line will not be output. If it does
-   * match a portion of the line, the group in the Regex will be used. The
-   * output will be the Regex group.
-   *
-   * <p>
-   * Example of use:
-   * <pre>
-   *  {@code
+   * {@code Regex.Find<String>} takes a {@code PCollection<String>} and returns a {@code
+   * PCollection<String>} representing the value extracted from the Regex groups of the input {@code
+   * PCollection} to the number of times that element occurs in the input.
+   *
+   * <p>This transform runs a Regex on the entire input line. If a portion of the line does not
+   * match the Regex, the line will not be output. If it does match a portion of the line, the group
+   * in the Regex will be used. The output will be the Regex group.
+   *
+   * <p>Example of use:
+   *
+   * <pre>{@code
    * PCollection<String> words = ...;
    * PCollection<String> values =
    *     words.apply(Regex.find("myregex (mygroup)", 1));
-   * }
-   * </pre>
+   * }</pre>
    */
-  public static class Find
-      extends PTransform<PCollection<String>, PCollection<String>> {
+  public static class Find extends PTransform<PCollection<String>, PCollection<String>> {
     Pattern pattern;
     int group;
 
@@ -305,41 +267,39 @@ public class Regex {
     }
 
     public PCollection<String> apply(PCollection<String> in) {
-      return in.apply(ParDo.of(new DoFn<String, String>() {
-        @ProcessElement
-        public void processElement(ProcessContext c) throws Exception {
-          Matcher m = pattern.matcher((String) c.element());
-
-          if (m.find()) {
-            c.output(m.group(group));
-          }
-        }
-      }));
+      return in.apply(
+          ParDo.of(
+              new DoFn<String, String>() {
+                @ProcessElement
+                public void processElement(ProcessContext c) throws Exception {
+                  Matcher m = pattern.matcher((String) c.element());
+
+                  if (m.find()) {
+                    c.output(m.group(group));
+                  }
+                }
+              }));
     }
   }
 
   /**
-   * {@code Regex.MatchesKV<KV<String, String>>} takes a
-   * {@code PCollection<String>} and returns a
-   * {@code PCollection<KV<String, String>>} representing the key and value
-   * extracted from the Regex groups of the input {@code PCollection} to the
-   * number of times that element occurs in the input.
-   *
-   * <p>
-   * This transform runs a Regex on the entire input line. If a portion of the
-   * line does not match the Regex, the line will not be output. If it does
-   * match a portion of the line, the groups in the Regex will be used. The key
-   * will be the key's group and the value will be the value's group.
-   *
-   * <p>
-   * Example of use:
-   * <pre>
-   *  {@code
+   * {@code Regex.MatchesKV<KV<String, String>>} takes a {@code PCollection<String>} and returns a
+   * {@code PCollection<KV<String, String>>} representing the key and value extracted from the Regex
+   * groups of the input {@code PCollection} to the number of times that element occurs in the
+   * input.
+   *
+   * <p>This transform runs a Regex on the entire input line. If a portion of the line does not
+   * match the Regex, the line will not be output. If it does match a portion of the line, the
+   * groups in the Regex will be used. The key will be the key's group and the value will be the
+   * value's group.
+   *
+   * <p>Example of use:
+   *
+   * <pre>{@code
    * PCollection<String> words = ...;
    * PCollection<KV<String, String>> keysAndValues =
    *     words.apply(Regex.findKV("myregex (mykeygroup) (myvaluegroup)", 1, 2));
-   * }
-   * </pre>
+   * }</pre>
    */
   public static class FindKV
       extends PTransform<PCollection<String>, PCollection<KV<String, String>>> {
@@ -354,42 +314,38 @@ public class Regex {
 
     public PCollection<KV<String, String>> apply(PCollection<String> in) {
       return in.apply(
-          ParDo.of(new DoFn<String, KV<String, String>>() {
-            @ProcessElement
-            public void processElement(ProcessContext c) throws Exception {
-              Matcher m = pattern.matcher((String) c.element());
-
-              if (m.find()) {
-                c.output(KV.of(m.group(keyGroup), m.group(valueGroup)));
-              }
-            }
-          }));
+          ParDo.of(
+              new DoFn<String, KV<String, String>>() {
+                @ProcessElement
+                public void processElement(ProcessContext c) throws Exception {
+                  Matcher m = pattern.matcher((String) c.element());
+
+                  if (m.find()) {
+                    c.output(KV.of(m.group(keyGroup), m.group(valueGroup)));
+                  }
+                }
+              }));
     }
   }
 
   /**
-   * {@code Regex.ReplaceAll<String>} takes a {@code PCollection<String>} and
-   * returns a {@code PCollection<String>} with all Strings that matched the
-   * Regex being replaced with the replacement string.
-   *
-   * <p>
-   * This transform runs a Regex on the entire input line. If a portion of the
-   * line does not match the Regex, the line will be output without changes. If it does
-   * match a portion of the line, all portions matching the Regex will be replaced
-   * with the replacement String.
-   *
-   * <p>
-   * Example of use:
-   * <pre>
-   *  {@code
+   * {@code Regex.ReplaceAll<String>} takes a {@code PCollection<String>} and returns a {@code
+   * PCollection<String>} with all Strings that matched the Regex being replaced with the
+   * replacement string.
+   *
+   * <p>This transform runs a Regex on the entire input line. If a portion of the line does not
+   * match the Regex, the line will be output without changes. If it does match a portion of the
+   * line, all portions matching the Regex will be replaced with the replacement String.
+   *
+   * <p>Example of use:
+   *
+   * <pre>{@code
    * PCollection<String> words = ...;
    * PCollection<String> values =
    *     words.apply(Regex.replaceAll("myregex", "myreplacement"));
-   * }
-   * </pre>
+   * }</pre>
    */
-  public static class ReplaceAll
-      extends PTransform<PCollection<String>, PCollection<String>> {
+  public static class ReplaceAll extends PTransform<PCollection<String>, PCollection<String>> {
     Pattern pattern;
     String replacement;
 
@@ -399,39 +355,36 @@ public class Regex {
     }
 
     public PCollection<String> apply(PCollection<String> in) {
-      return in.apply(ParDo.of(new DoFn<String, String>() {
-        @ProcessElement
-        public void processElement(ProcessContext c) throws Exception {
-          Matcher m = pattern.matcher((String) c.element());
-          c.output(m.replaceAll(replacement));
-        }
-      }));
+      return in.apply(
+          ParDo.of(
+              new DoFn<String, String>() {
+                @ProcessElement
+                public void processElement(ProcessContext c) throws Exception {
+                  Matcher m = pattern.matcher((String) c.element());
+                  c.output(m.replaceAll(replacement));
+                }
+              }));
     }
   }
 
   /**
-   * {@code Regex.ReplaceFirst<String>} takes a {@code PCollection<String>} and
-   * returns a {@code PCollection<String>} with the first Strings that matched the
-   * Regex being replaced with the replacement string.
-   *
-   * <p>
-   * This transform runs a Regex on the entire input line. If a portion of the
-   * line does not match the Regex, the line will be output without changes. If it does
-   * match a portion of the line, the first portion matching the Regex will be replaced
-   * with the replacement String.
-   *
-   * <p>
-   * Example of use:
-   * <pre>
-   *  {@code
+   * {@code Regex.ReplaceFirst<String>} takes a {@code PCollection<String>} and returns a {@code
+   * PCollection<String>} with the first Strings that matched the Regex being replaced with the
+   * replacement string.
+   *
+   * <p>This transform runs a Regex on the entire input line. If a portion of the line does not
+   * match the Regex, the line will be output without changes. If it does match a portion of the
+   * line, the first portion matching the Regex will be replaced with the replacement String.
+   *
+   * <p>Example of use:
+   *
+   * <pre>{@code
    * PCollection<String> words = ...;
    * PCollection<String> values =
    *     words.apply(Regex.replaceFirst("myregex", "myreplacement"));
-   * }
-   * </pre>
+   * }</pre>
    */
-  public static class ReplaceFirst
-      extends PTransform<PCollection<String>, PCollection<String>> {
+  public static class ReplaceFirst extends PTransform<PCollection<String>, PCollection<String>> {
     Pattern pattern;
     String replacement;
 
@@ -441,44 +394,38 @@ public class Regex {
     }
 
     public PCollection<String> apply(PCollection<String> in) {
-      return in.apply(ParDo.of(new DoFn<String, String>() {
-        @ProcessElement
-        public void processElement(ProcessContext c) throws Exception {
-          Matcher m = pattern.matcher((String) c.element());
-          c.output(m.replaceFirst(replacement));
-        }
-      }));
+      return in.apply(
+          ParDo.of(
+              new DoFn<String, String>() {
+                @ProcessElement
+                public void processElement(ProcessContext c) throws Exception {
+                  Matcher m = pattern.matcher((String) c.element());
+                  c.output(m.replaceFirst(replacement));
+                }
+              }));
     }
   }
 
   /**
-   * {@code Regex.Split<String>} takes a {@code PCollection<String>} and
-   * returns a {@code PCollection<String>} with the input string split into
-   * individual items in a list. Each item is then output as a separate string.
-   *
-   * <p>
-   * This transform runs a Regex as part of a splint the entire input line. The split
-   * gives back an array of items. Each item is output as a separate item in the
-   * {@code PCollection<String>}.
-   * </p>
-   *
-   * <p>
-   * Depending on the Regex, a split can be an empty or
-   * "" string. You can pass in a parameter if you want empty strings or not.
-   * </p>
-   *
-   * <p>
-   * Example of use:
-   * <pre>
-   *  {@code
+   * {@code Regex.Split<String>} takes a {@code PCollection<String>} and returns a {@code
+   * PCollection<String>} with the input string split into individual items in a list. Each item is
+   * then output as a separate string.
+   *
+   * <p>This transform runs a Regex as part of a splint the entire input line. The split gives back
+   * an array of items. Each item is output as a separate item in the {@code PCollection<String>}.
+   *
+   * <p>Depending on the Regex, a split can be an empty or "" string. You can pass in a parameter if
+   * you want empty strings or not.
+   *
+   * <p>Example of use:
+   *
+   * <pre>{@code
    * PCollection<String> words = ...;
    * PCollection<String> values =
    *     words.apply(Regex.split("\W*"));
-   * }
-   * </pre>
+   * }</pre>
    */
-  public static class Split
-      extends PTransform<PCollection<String>, PCollection<String>> {
+  public static class Split extends PTransform<PCollection<String>, PCollection<String>> {
     Pattern pattern;
     boolean outputEmpty;
 
@@ -488,18 +435,20 @@ public class Regex {
     }
 
     public PCollection<String> apply(PCollection<String> in) {
-      return in.apply(ParDo.of(new DoFn<String, String>() {
-        @ProcessElement
-        public void processElement(ProcessContext c) throws Exception {
-          String[] items = pattern.split(c.element());
-
-          for (String item : items) {
-            if (outputEmpty || !item.isEmpty()) {
-              c.output(item);
-            }
-          }
-        }
-      }));
+      return in.apply(
+          ParDo.of(
+              new DoFn<String, String>() {
+                @ProcessElement
+                public void processElement(ProcessContext c) throws Exception {
+                  String[] items = pattern.split(c.element());
+
+                  for (String item : items) {
+                    if (outputEmpty || !item.isEmpty()) {
+                      c.output(item);
+                    }
+                  }
+                }
+              }));
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/79b04551/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/RegexTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/RegexTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/RegexTest.java
index 71f080e..6e196b4 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/RegexTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/RegexTest.java
@@ -30,9 +30,7 @@ import org.junit.experimental.categories.Category;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
 
-/**
- * Tests for {@link Regex}.
- */
+/** Tests for {@link Regex}. */
 @RunWith(JUnit4.class)
 public class RegexTest implements Serializable {
   @Test
@@ -40,9 +38,8 @@ public class RegexTest implements Serializable {
   public void testFind() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("aj", "xj", "yj", "zj"))
-        .apply(Regex.find("[xyz]"));
+    PCollection<String> output =
+        p.apply(Create.of("aj", "xj", "yj", "zj")).apply(Regex.find("[xyz]"));
 
     PAssert.that(output).containsInAnyOrder("x", "y", "z");
     p.run();
@@ -53,9 +50,8 @@ public class RegexTest implements Serializable {
   public void testFindGroup() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("aj", "xj", "yj", "zj"))
-        .apply(Regex.find("([xyz])", 1));
+    PCollection<String> output =
+        p.apply(Create.of("aj", "xj", "yj", "zj")).apply(Regex.find("([xyz])", 1));
 
     PAssert.that(output).containsInAnyOrder("x", "y", "z");
     p.run();
@@ -66,9 +62,7 @@ public class RegexTest implements Serializable {
   public void testFindNone() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("a", "b", "c", "d"))
-        .apply(Regex.find("[xyz]"));
+    PCollection<String> output = p.apply(Create.of("a", "b", "c", "d")).apply(Regex.find("[xyz]"));
 
     PAssert.that(output).empty();
     p.run();
@@ -79,9 +73,8 @@ public class RegexTest implements Serializable {
   public void testKVFind() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<KV<String, String>> output = p
-        .apply(Create.of("a b c"))
-        .apply(Regex.findKV("a (b) (c)", 1, 2));
+    PCollection<KV<String, String>> output =
+        p.apply(Create.of("a b c")).apply(Regex.findKV("a (b) (c)", 1, 2));
 
     PAssert.that(output).containsInAnyOrder(KV.of("b", "c"));
     p.run();
@@ -92,9 +85,8 @@ public class RegexTest implements Serializable {
   public void testKVFindNone() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<KV<String, String>> output = p
-        .apply(Create.of("x y z"))
-        .apply(Regex.findKV("a (b) (c)", 1, 2));
+    PCollection<KV<String, String>> output =
+        p.apply(Create.of("x y z")).apply(Regex.findKV("a (b) (c)", 1, 2));
 
     PAssert.that(output).empty();
     p.run();
@@ -105,9 +97,8 @@ public class RegexTest implements Serializable {
   public void testMatches() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("a", "x", "y", "z"))
-        .apply(Regex.matches("[xyz]"));
+    PCollection<String> output =
+        p.apply(Create.of("a", "x", "y", "z")).apply(Regex.matches("[xyz]"));
 
     PAssert.that(output).containsInAnyOrder("x", "y", "z");
     p.run();
@@ -118,9 +109,8 @@ public class RegexTest implements Serializable {
   public void testMatchesNone() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("a", "b", "c", "d"))
-        .apply(Regex.matches("[xyz]"));
+    PCollection<String> output =
+        p.apply(Create.of("a", "b", "c", "d")).apply(Regex.matches("[xyz]"));
 
     PAssert.that(output).empty();
     p.run();
@@ -131,9 +121,8 @@ public class RegexTest implements Serializable {
   public void testMatchesGroup() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("a", "x xxx", "x yyy", "x zzz"))
-        .apply(Regex.matches("x ([xyz]*)", 1));
+    PCollection<String> output =
+        p.apply(Create.of("a", "x xxx", "x yyy", "x zzz")).apply(Regex.matches("x ([xyz]*)", 1));
 
     PAssert.that(output).containsInAnyOrder("xxx", "yyy", "zzz");
     p.run();
@@ -144,9 +133,8 @@ public class RegexTest implements Serializable {
   public void testKVMatches() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<KV<String, String>> output = p
-        .apply(Create.of("a b c"))
-        .apply(Regex.matchesKV("a (b) (c)", 1, 2));
+    PCollection<KV<String, String>> output =
+        p.apply(Create.of("a b c")).apply(Regex.matchesKV("a (b) (c)", 1, 2));
 
     PAssert.that(output).containsInAnyOrder(KV.of("b", "c"));
     p.run();
@@ -157,9 +145,8 @@ public class RegexTest implements Serializable {
   public void testKVMatchesNone() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<KV<String, String>> output = p
-        .apply(Create.of("x y z"))
-        .apply(Regex.matchesKV("a (b) (c)", 1, 2));
+    PCollection<KV<String, String>> output =
+        p.apply(Create.of("x y z")).apply(Regex.matchesKV("a (b) (c)", 1, 2));
     PAssert.that(output).empty();
     p.run();
   }
@@ -169,9 +156,8 @@ public class RegexTest implements Serializable {
   public void testReplaceAll() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("xj", "yj", "zj"))
-        .apply(Regex.replaceAll("[xyz]", "new"));
+    PCollection<String> output =
+        p.apply(Create.of("xj", "yj", "zj")).apply(Regex.replaceAll("[xyz]", "new"));
 
     PAssert.that(output).containsInAnyOrder("newj", "newj", "newj");
     p.run();
@@ -182,9 +168,8 @@ public class RegexTest implements Serializable {
   public void testReplaceAllMixed() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("abc", "xj", "yj", "zj", "def"))
-        .apply(Regex.replaceAll("[xyz]", "new"));
+    PCollection<String> output =
+        p.apply(Create.of("abc", "xj", "yj", "zj", "def")).apply(Regex.replaceAll("[xyz]", "new"));
 
     PAssert.that(output).containsInAnyOrder("abc", "newj", "newj", "newj", "def");
     p.run();
@@ -195,9 +180,8 @@ public class RegexTest implements Serializable {
   public void testReplaceFirst() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("xjx", "yjy", "zjz"))
-        .apply(Regex.replaceFirst("[xyz]", "new"));
+    PCollection<String> output =
+        p.apply(Create.of("xjx", "yjy", "zjz")).apply(Regex.replaceFirst("[xyz]", "new"));
 
     PAssert.that(output).containsInAnyOrder("newjx", "newjy", "newjz");
     p.run();
@@ -208,9 +192,9 @@ public class RegexTest implements Serializable {
   public void testReplaceFirstMixed() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("abc", "xjx", "yjy", "zjz", "def"))
-        .apply(Regex.replaceFirst("[xyz]", "new"));
+    PCollection<String> output =
+        p.apply(Create.of("abc", "xjx", "yjy", "zjz", "def"))
+            .apply(Regex.replaceFirst("[xyz]", "new"));
 
     PAssert.that(output).containsInAnyOrder("abc", "newjx", "newjy", "newjz", "def");
     p.run();
@@ -221,12 +205,12 @@ public class RegexTest implements Serializable {
   public void testSplits() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("The  quick   brown fox jumps over    the lazy dog"))
-        .apply(Regex.split("\\W+"));
+    PCollection<String> output =
+        p.apply(Create.of("The  quick   brown fox jumps over    the lazy dog"))
+            .apply(Regex.split("\\W+"));
 
-    PAssert.that(output).containsInAnyOrder("The", "quick", "brown",
-      "fox", "jumps", "over", "the", "lazy", "dog");
+    PAssert.that(output)
+        .containsInAnyOrder("The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog");
     p.run();
   }
 
@@ -235,14 +219,16 @@ public class RegexTest implements Serializable {
   public void testSplitsWithEmpty() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("The  quick   brown fox jumps over    the lazy dog"))
-        .apply(Regex.split("\\s", true));
+    PCollection<String> output =
+        p.apply(Create.of("The  quick   brown fox jumps over    the lazy dog"))
+            .apply(Regex.split("\\s", true));
 
     String[] outputStr = "The  quick   brown fox jumps over    the lazy dog".split("\\s");
 
-    PAssert.that(output).containsInAnyOrder("The", "", "quick", "brown", "", "",
-      "fox", "jumps", "over", "", "", "", "the", "lazy", "dog");
+    PAssert.that(output)
+        .containsInAnyOrder(
+            "The", "", "quick", "brown", "", "", "fox", "jumps", "over", "", "", "", "the", "lazy",
+            "dog");
     p.run();
   }
 
@@ -251,12 +237,12 @@ public class RegexTest implements Serializable {
   public void testSplitsWithoutEmpty() {
     TestPipeline p = TestPipeline.create();
 
-    PCollection<String> output = p
-        .apply(Create.of("The  quick   brown fox jumps over    the lazy dog"))
-        .apply(Regex.split("\\s", false));
+    PCollection<String> output =
+        p.apply(Create.of("The  quick   brown fox jumps over    the lazy dog"))
+            .apply(Regex.split("\\s", false));
 
-    PAssert.that(output).containsInAnyOrder("The", "quick", "brown",
-      "fox", "jumps", "over", "the", "lazy", "dog");
+    PAssert.that(output)
+        .containsInAnyOrder("The", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog");
     p.run();
   }
 }