You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ki...@apache.org on 2015/02/05 03:33:52 UTC

[2/2] [text] Finish adapting code to commons-text, adding missing header and fixing Javadocs

Finish adapting code to commons-text, adding missing header and fixing Javadocs


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/a7e88eef
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/a7e88eef
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/a7e88eef

Branch: refs/heads/myers-algo
Commit: a7e88eef1a77649648a7fc6e9015448eb97c4572
Parents: cca1f19
Author: Bruno P. Kinoshita <ki...@apache.org>
Authored: Thu Feb 5 00:33:14 2015 -0200
Committer: Bruno P. Kinoshita <ki...@apache.org>
Committed: Thu Feb 5 00:33:14 2015 -0200

----------------------------------------------------------------------
 .../commons/text/diff/CommandVisitor.java       |  12 +-
 .../apache/commons/text/diff/DeleteCommand.java |   6 +-
 .../apache/commons/text/diff/EditCommand.java   |   9 +-
 .../apache/commons/text/diff/EditScript.java    |   7 +-
 .../apache/commons/text/diff/InsertCommand.java |   6 +-
 .../apache/commons/text/diff/KeepCommand.java   |   6 +-
 .../commons/text/diff/ReplacementsFinder.java   |  10 +-
 .../commons/text/diff/ReplacementsHandler.java  |   3 +-
 .../commons/text/diff/StringsComparator.java    | 137 +++++++++++++++----
 .../apache/commons/text/diff/package-info.java  |   3 +
 10 files changed, 147 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
index ae3833c..d73dde8 100644
--- a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
+++ b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
@@ -28,14 +28,17 @@ package org.apache.commons.text.diff;
  * it will perform the loop over all commands in the script and the
  * proper methods of the user class will be called as the commands are
  * encountered.
+ * </p>
  * <p>
  * The implementation of the user visitor class will depend on the
  * need. Here are two examples.
+ * </p>
  * <p>
  * The first example is a visitor that build the longest common
  * subsequence:
+ * </p>
  * <pre>
- * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ * import org.apache.commons.text.diff.CommandVisitor;
  *
  * import java.util.ArrayList;
  *
@@ -67,7 +70,7 @@ package org.apache.commons.text.diff;
  * The second example is a visitor that shows the commands and the way
  * they transform the first sequence into the second one:
  * <pre>
- * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ * import org.apache.commons.text.diff.CommandVisitor;
  *
  * import java.util.Arrays;
  * import java.util.ArrayList;
@@ -97,7 +100,7 @@ package org.apache.commons.text.diff;
  *   }
  *
  *   private void display(String commandName, Object object) {
- *     System.out.println(commandName + " " + object + " ->" + this);
+ *     System.out.println(commandName + " " + object + ": " + this);
  *   }
  *
  *   public String toString() {
@@ -114,8 +117,7 @@ package org.apache.commons.text.diff;
  * }
  * </pre>
  *
- * @since 4.0
- * @version $Id: CommandVisitor.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public interface CommandVisitor<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
index 0de3a43..7494002 100644
--- a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
@@ -24,12 +24,12 @@ package org.apache.commons.text.diff;
  * transforming the first sequence into the second sequence uses an instance of
  * this class to represent the deletion of this object. The objects embedded in
  * these type of commands always come from the first sequence.
+ * </p>
  *
- * @see SequencesComparator
+ * @see StringsComparator
  * @see EditScript
  *
- * @since 4.0
- * @version $Id: DeleteCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public class DeleteCommand<T> extends EditCommand<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/EditCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditCommand.java b/src/main/java/org/apache/commons/text/diff/EditCommand.java
index 7c5ff76..627db4d 100644
--- a/src/main/java/org/apache/commons/text/diff/EditCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/EditCommand.java
@@ -21,9 +21,10 @@ package org.apache.commons.text.diff;
  * into another one.
  * <p>
  * When two objects sequences are compared through the
- * {@link SequencesComparator#getScript SequencesComparator.getScript} method,
+ * {@link StringsComparator#getScript StringsComparator.getScript} method,
  * the result is provided has a {@link EditScript script} containing the commands
  * that progressively transform the first sequence into the second one.
+ * </p>
  * <p>
  * There are only three types of commands, all of which are subclasses of this
  * abstract class. Each command is associated with one object belonging to at
@@ -38,12 +39,12 @@ package org.apache.commons.text.diff;
  * the object passed as an argument comes from the second sequence) ; this can
  * be important if subclassing is used for some elements in the first sequence
  * and the <code>equals</code> method is specialized.
+ * </p>
  *
- * @see SequencesComparator
+ * @see StringsComparator
  * @see EditScript
  *
- * @since 4.0
- * @version $Id: EditCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public abstract class EditCommand<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/EditScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditScript.java b/src/main/java/org/apache/commons/text/diff/EditScript.java
index ab7b04e..641d60b 100644
--- a/src/main/java/org/apache/commons/text/diff/EditScript.java
+++ b/src/main/java/org/apache/commons/text/diff/EditScript.java
@@ -25,7 +25,7 @@ import java.util.List;
  * <p>
  * An edit script is the most general view of the differences between two
  * sequences. It is built as the result of the comparison between two sequences
- * by the {@link SequencesComparator SequencesComparator} class. The user can
+ * by the {@link StringsComparator StringsComparator} class. The user can
  * walk through it using the <em>visitor</em> design pattern.
  * <p>
  * It is guaranteed that the objects embedded in the {@link InsertCommand insert
@@ -35,13 +35,12 @@ import java.util.List;
  * is used for some elements in the first sequence and the <code>equals</code>
  * method is specialized.
  *
- * @see SequencesComparator
+ * @see StringsComparator
  * @see EditCommand
  * @see CommandVisitor
  * @see ReplacementsHandler
  *
- * @since 4.0
- * @version $Id: EditScript.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public class EditScript<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/InsertCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/InsertCommand.java b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
index 9ec9c6a..9a365d0 100644
--- a/src/main/java/org/apache/commons/text/diff/InsertCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
@@ -24,12 +24,12 @@ package org.apache.commons.text.diff;
  * transforming the first sequence into the second sequence uses an instance of
  * this class to represent the insertion of this object. The objects embedded in
  * these type of commands always come from the second sequence.
+ * </p>
  *
- * @see SequencesComparator
+ * @see StringsComparator
  * @see EditScript
  *
- * @since 4.0
- * @version $Id: InsertCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public class InsertCommand<T> extends EditCommand<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/KeepCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/KeepCommand.java b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
index a511bfe..687f7e7 100644
--- a/src/main/java/org/apache/commons/text/diff/KeepCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
@@ -24,12 +24,12 @@ package org.apache.commons.text.diff;
  * transforming the first sequence into the second sequence uses an instance of
  * this class to represent the keeping of this object. The objects embedded in
  * these type of commands always come from the first sequence.
+ * </p>
  *
- * @see SequencesComparator
+ * @see StringsComparator
  * @see EditScript
  *
- * @since 4.0
- * @version $Id: KeepCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public class KeepCommand<T> extends EditCommand<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
index 361a26d..52e4112 100644
--- a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
@@ -34,25 +34,26 @@ import java.util.List;
  * {@link DeleteCommand delete}, {@link KeepCommand keep} and
  * {@link InsertCommand insert} commands by fewer replacements acting on
  * complete sub-sequences.
+ * </p>
  * <p>
  * This class is devoted to perform this interpretation. It visits an
  * {@link EditScript edit script} (because it implements the
  * {@link CommandVisitor CommandVisitor} interface) and calls a user-supplied
  * handler implementing the {@link ReplacementsHandler ReplacementsHandler}
  * interface to process the sub-sequences.
+ * </p>
  *
  * @see ReplacementsHandler
  * @see EditScript
- * @see SequencesComparator
+ * @see StringsComparator
  *
- * @since 4.0
- * @version $Id: ReplacementsFinder.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public class ReplacementsFinder<T> implements CommandVisitor<T> {
 
     private final List<T> pendingInsertions;
     private final List<T> pendingDeletions;
-    private int     skipped;
+    private int skipped;
 
     /** Handler to call when synchronized sequences are found. */
     private final ReplacementsHandler<T> handler;
@@ -83,6 +84,7 @@ public class ReplacementsFinder<T> implements CommandVisitor<T> {
      * <p>
      * When a synchronization object is identified, the pending insertions and
      * pending deletions sets are provided to the user handler as subsequences.
+     * </p>
      *
      * @param object  synchronization object detected
      */

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
index fe99a03..d5d61a4 100644
--- a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
@@ -22,8 +22,7 @@ import java.util.List;
  * This interface is devoted to handle synchronized replacement sequences.
  *
  * @see ReplacementsFinder
- * @since 4.0
- * @version $Id: ReplacementsHandler.java 1543277 2013-11-19 00:53:50Z ggregory $
+ * @since 1.0
  */
 public interface ReplacementsHandler<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/StringsComparator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/StringsComparator.java b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
index c59f89f..c2fbfab 100644
--- a/src/main/java/org/apache/commons/text/diff/StringsComparator.java
+++ b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
@@ -1,25 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.commons.text.diff;
 
-
-
+/**
+ * <p>
+ * It is guaranteed that the comparisons will always be done as
+ * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
+ * sequence and <code>o2</code> belongs to the second sequence. This can
+ * be important if subclassing is used for some elements in the first
+ * sequence and the <code>equals</code> method is specialized.
+ * </p>
+ * <p>
+ * Comparison can be seen from two points of view: either as giving the smallest
+ * modification allowing to transform the first sequence into the second one, or
+ * as giving the longest sequence which is a subsequence of both initial
+ * sequences. The <code>equals</code> method is used to compare objects, so any
+ * object can be put into sequences. Modifications include deleting, inserting
+ * or keeping one object, starting from the beginning of the first sequence.
+ * </p>
+ * <p>
+ * This class implements the comparison algorithm, which is the very efficient
+ * algorithm from Eugene W. Myers
+ * <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">
+ * An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces
+ * the shortest possible {@link EditScript edit script} containing all the
+ * {@link EditCommand commands} needed to transform the first sequence into
+ * the second one.
+ *
+ * <p>
+ * This class has been adapted from the commons-collections implementation.
+ * </p>
+ *
+ * @see EditScript
+ * @see EditCommand
+ * @see CommandVisitor
+ *
+ * @since 1.0
+ */
 public class StringsComparator {
-    
+
+    /**
+     * First character sequence.
+     */
     private final String left;
+    /**
+     * Second character sequence.
+     */
     private final String right;
-    
+
     /** Temporary variables. */
     private final int[] vDown;
     private final int[] vUp;
-    
+
+    /**
+     * Simple constructor.
+     * <p>
+     * Creates a new instance of StringsComparator.
+     * </p>
+     * <p>
+     * It is <em>guaranteed</em> that the comparisons will always be done as
+     * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
+     * sequence and <code>o2</code> belongs to the second sequence. This can be
+     * important if subclassing is used for some elements in the first sequence
+     * and the <code>equals</code> method is specialized.
+     * </p>
+     *
+     * @param left first character sequence to be compared
+     * @param right second character sequence to be compared
+     */
     public StringsComparator(String left, String right) {
         this.left = left;
         this.right = right;
-        
+
         final int size = left.length() + right.length() + 2;
         vDown = new int[size];
         vUp   = new int[size];
     }
-    
+
     /**
      * Get the {@link EditScript} object.
      * <p>
@@ -29,12 +100,13 @@ public class StringsComparator {
      * {@link KeepCommand keep commands} come from the first sequence. This can
      * be important if subclassing is used for some elements in the first
      * sequence and the <code>equals</code> method is specialized.
+     * </p>
      *
      * @return the edit script resulting from the comparison of the two
      *         sequences
      */
-    public EditScript getScript() {
-        final EditScript script = new EditScript();
+    public EditScript<Character> getScript() {
+        final EditScript<Character> script = new EditScript<Character>();
         buildScript(0, left.length(), 0, right.length(), script);
         return script;
     }
@@ -49,7 +121,7 @@ public class StringsComparator {
      * @param script the edited script
      */
     private void buildScript(final int start1, final int end1, final int start2, final int end2,
-            final EditScript script) {
+            final EditScript<Character> script) {
         final Snake middle = getMiddleSnake(start1, end1, start2, end2);
 
         if (middle == null
@@ -60,15 +132,15 @@ public class StringsComparator {
             int j = start2;
             while (i < end1 || j < end2) {
                 if (i < end1 && j < end2 && left.charAt(i) == right.charAt(j)) {
-                    script.append(new KeepCommand(left.charAt(i)));
+                    script.append(new KeepCommand<Character>(left.charAt(i)));
                     ++i;
                     ++j;
                 } else {
                     if (end1 - start1 > end2 - start2) {
-                        script.append(new DeleteCommand(left.charAt(i)));
+                        script.append(new DeleteCommand<Character>(left.charAt(i)));
                         ++i;
                     } else {
-                        script.append(new InsertCommand(right.charAt(j)));
+                        script.append(new InsertCommand<Character>(right.charAt(j)));
                         ++j;
                     }
                 }
@@ -80,16 +152,33 @@ public class StringsComparator {
                         start2, middle.getStart() - middle.getDiag(),
                         script);
             for (int i = middle.getStart(); i < middle.getEnd(); ++i) {
-                script.append(new KeepCommand(left.charAt(i)));
+                script.append(new KeepCommand<Character>(left.charAt(i)));
             }
             buildScript(middle.getEnd(), end1,
                         middle.getEnd() - middle.getDiag(), end2,
                         script);
         }
     }
-    
+
+    /**
+     * Get the middle snake corresponding to two subsequences of the
+     * main sequences.
+     * <p>
+     * The snake is found using the MYERS Algorithm (this algorithms has
+     * also been implemented in the GNU diff program). This algorithm is
+     * explained in Eugene Myers article:
+     * <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">
+     * An O(ND) Difference Algorithm and Its Variations</a>.
+     * </p>
+     *
+     * @param start1  the begin of the first sequence to be compared
+     * @param end1  the end of the first sequence to be compared
+     * @param start2  the begin of the second sequence to be compared
+     * @param end2  the end of the second sequence to be compared
+     * @return the middle snake
+     */
     private Snake getMiddleSnake(int start1, int end1, int start2, int end2) {
-     // Myers Algorithm
+        // Myers Algorithm
         // Initialisations
         final int m = end1 - start1;
         final int n = end2 - start2;
@@ -160,7 +249,7 @@ public class StringsComparator {
         // this should not happen
         throw new RuntimeException("Internal Error");
     }
-    
+
     /**
      * Build a snake.
      *
@@ -235,17 +324,17 @@ public class StringsComparator {
             return diag;
         }
     }
-    
+
     public static void main(String[] args) {
         StringsComparator sc = new StringsComparator("O Bruno eh um bom rapaz. Ele eh do Brasil.", "O Bruno foi um bom rapaz. Ele eh do Brasil .");
-        EditScript es = sc.getScript();
-        es.visit(new CommandVisitor() {
-            
+        EditScript<Character> es = sc.getScript();
+        es.visit(new CommandVisitor<Character>() {
+
             boolean nlAdd = true;
             boolean nlRemove = true;
 
             @Override
-            public void visitInsertCommand(Object object) {
+            public void visitInsertCommand(Character object) {
                 if (nlAdd) {
                     System.out.println();
                     System.out.print("> ");
@@ -255,7 +344,7 @@ public class StringsComparator {
             }
 
             @Override
-            public void visitKeepCommand(Object object) {
+            public void visitKeepCommand(Character object) {
                 if (!nlAdd) {
                     nlAdd = true;
                 }
@@ -267,7 +356,7 @@ public class StringsComparator {
             }
 
             @Override
-            public void visitDeleteCommand(Object object) {
+            public void visitDeleteCommand(Character object) {
                 if (nlRemove) {
                     System.out.println();
                     System.out.print("< ");

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/package-info.java b/src/main/java/org/apache/commons/text/diff/package-info.java
index 7a565d4..92fde86 100644
--- a/src/main/java/org/apache/commons/text/diff/package-info.java
+++ b/src/main/java/org/apache/commons/text/diff/package-info.java
@@ -17,6 +17,9 @@
 /**
  * <p>Provides algorithms for diff between strings.</p>
  *
+ * <p>The initial implementation of the Myers algorithm was adapted from the
+ * commons-collections sequence package.</p>
+ *
  * @since 1.0
  */
 package org.apache.commons.text.diff;