You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ki...@apache.org on 2015/02/05 03:33:52 UTC
[2/2] [text] Finish adapting code to commons-text,
adding missing header and fixing Javadocs
Finish adapting code to commons-text, adding missing header and fixing Javadocs
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/a7e88eef
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/a7e88eef
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/a7e88eef
Branch: refs/heads/myers-algo
Commit: a7e88eef1a77649648a7fc6e9015448eb97c4572
Parents: cca1f19
Author: Bruno P. Kinoshita <ki...@apache.org>
Authored: Thu Feb 5 00:33:14 2015 -0200
Committer: Bruno P. Kinoshita <ki...@apache.org>
Committed: Thu Feb 5 00:33:14 2015 -0200
----------------------------------------------------------------------
.../commons/text/diff/CommandVisitor.java | 12 +-
.../apache/commons/text/diff/DeleteCommand.java | 6 +-
.../apache/commons/text/diff/EditCommand.java | 9 +-
.../apache/commons/text/diff/EditScript.java | 7 +-
.../apache/commons/text/diff/InsertCommand.java | 6 +-
.../apache/commons/text/diff/KeepCommand.java | 6 +-
.../commons/text/diff/ReplacementsFinder.java | 10 +-
.../commons/text/diff/ReplacementsHandler.java | 3 +-
.../commons/text/diff/StringsComparator.java | 137 +++++++++++++++----
.../apache/commons/text/diff/package-info.java | 3 +
10 files changed, 147 insertions(+), 52 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
index ae3833c..d73dde8 100644
--- a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
+++ b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
@@ -28,14 +28,17 @@ package org.apache.commons.text.diff;
* it will perform the loop over all commands in the script and the
* proper methods of the user class will be called as the commands are
* encountered.
+ * </p>
* <p>
* The implementation of the user visitor class will depend on the
* need. Here are two examples.
+ * </p>
* <p>
* The first example is a visitor that build the longest common
* subsequence:
+ * </p>
* <pre>
- * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ * import org.apache.commons.text.diff.CommandVisitor;
*
* import java.util.ArrayList;
*
@@ -67,7 +70,7 @@ package org.apache.commons.text.diff;
* The second example is a visitor that shows the commands and the way
* they transform the first sequence into the second one:
* <pre>
- * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ * import org.apache.commons.text.diff.CommandVisitor;
*
* import java.util.Arrays;
* import java.util.ArrayList;
@@ -97,7 +100,7 @@ package org.apache.commons.text.diff;
* }
*
* private void display(String commandName, Object object) {
- * System.out.println(commandName + " " + object + " ->" + this);
+ * System.out.println(commandName + " " + object + ": " + this);
* }
*
* public String toString() {
@@ -114,8 +117,7 @@ package org.apache.commons.text.diff;
* }
* </pre>
*
- * @since 4.0
- * @version $Id: CommandVisitor.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public interface CommandVisitor<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
index 0de3a43..7494002 100644
--- a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
@@ -24,12 +24,12 @@ package org.apache.commons.text.diff;
* transforming the first sequence into the second sequence uses an instance of
* this class to represent the deletion of this object. The objects embedded in
* these type of commands always come from the first sequence.
+ * </p>
*
- * @see SequencesComparator
+ * @see StringsComparator
* @see EditScript
*
- * @since 4.0
- * @version $Id: DeleteCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public class DeleteCommand<T> extends EditCommand<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/EditCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditCommand.java b/src/main/java/org/apache/commons/text/diff/EditCommand.java
index 7c5ff76..627db4d 100644
--- a/src/main/java/org/apache/commons/text/diff/EditCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/EditCommand.java
@@ -21,9 +21,10 @@ package org.apache.commons.text.diff;
* into another one.
* <p>
* When two objects sequences are compared through the
- * {@link SequencesComparator#getScript SequencesComparator.getScript} method,
+ * {@link StringsComparator#getScript StringsComparator.getScript} method,
* the result is provided has a {@link EditScript script} containing the commands
* that progressively transform the first sequence into the second one.
+ * </p>
* <p>
* There are only three types of commands, all of which are subclasses of this
* abstract class. Each command is associated with one object belonging to at
@@ -38,12 +39,12 @@ package org.apache.commons.text.diff;
* the object passed as an argument comes from the second sequence) ; this can
* be important if subclassing is used for some elements in the first sequence
* and the <code>equals</code> method is specialized.
+ * </p>
*
- * @see SequencesComparator
+ * @see StringsComparator
* @see EditScript
*
- * @since 4.0
- * @version $Id: EditCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public abstract class EditCommand<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/EditScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditScript.java b/src/main/java/org/apache/commons/text/diff/EditScript.java
index ab7b04e..641d60b 100644
--- a/src/main/java/org/apache/commons/text/diff/EditScript.java
+++ b/src/main/java/org/apache/commons/text/diff/EditScript.java
@@ -25,7 +25,7 @@ import java.util.List;
* <p>
* An edit script is the most general view of the differences between two
* sequences. It is built as the result of the comparison between two sequences
- * by the {@link SequencesComparator SequencesComparator} class. The user can
+ * by the {@link StringsComparator StringsComparator} class. The user can
* walk through it using the <em>visitor</em> design pattern.
* <p>
* It is guaranteed that the objects embedded in the {@link InsertCommand insert
@@ -35,13 +35,12 @@ import java.util.List;
* is used for some elements in the first sequence and the <code>equals</code>
* method is specialized.
*
- * @see SequencesComparator
+ * @see StringsComparator
* @see EditCommand
* @see CommandVisitor
* @see ReplacementsHandler
*
- * @since 4.0
- * @version $Id: EditScript.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public class EditScript<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/InsertCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/InsertCommand.java b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
index 9ec9c6a..9a365d0 100644
--- a/src/main/java/org/apache/commons/text/diff/InsertCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
@@ -24,12 +24,12 @@ package org.apache.commons.text.diff;
* transforming the first sequence into the second sequence uses an instance of
* this class to represent the insertion of this object. The objects embedded in
* these type of commands always come from the second sequence.
+ * </p>
*
- * @see SequencesComparator
+ * @see StringsComparator
* @see EditScript
*
- * @since 4.0
- * @version $Id: InsertCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public class InsertCommand<T> extends EditCommand<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/KeepCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/KeepCommand.java b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
index a511bfe..687f7e7 100644
--- a/src/main/java/org/apache/commons/text/diff/KeepCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
@@ -24,12 +24,12 @@ package org.apache.commons.text.diff;
* transforming the first sequence into the second sequence uses an instance of
* this class to represent the keeping of this object. The objects embedded in
* these type of commands always come from the first sequence.
+ * </p>
*
- * @see SequencesComparator
+ * @see StringsComparator
* @see EditScript
*
- * @since 4.0
- * @version $Id: KeepCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public class KeepCommand<T> extends EditCommand<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
index 361a26d..52e4112 100644
--- a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
@@ -34,25 +34,26 @@ import java.util.List;
* {@link DeleteCommand delete}, {@link KeepCommand keep} and
* {@link InsertCommand insert} commands by fewer replacements acting on
* complete sub-sequences.
+ * </p>
* <p>
* This class is devoted to perform this interpretation. It visits an
* {@link EditScript edit script} (because it implements the
* {@link CommandVisitor CommandVisitor} interface) and calls a user-supplied
* handler implementing the {@link ReplacementsHandler ReplacementsHandler}
* interface to process the sub-sequences.
+ * </p>
*
* @see ReplacementsHandler
* @see EditScript
- * @see SequencesComparator
+ * @see StringsComparator
*
- * @since 4.0
- * @version $Id: ReplacementsFinder.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public class ReplacementsFinder<T> implements CommandVisitor<T> {
private final List<T> pendingInsertions;
private final List<T> pendingDeletions;
- private int skipped;
+ private int skipped;
/** Handler to call when synchronized sequences are found. */
private final ReplacementsHandler<T> handler;
@@ -83,6 +84,7 @@ public class ReplacementsFinder<T> implements CommandVisitor<T> {
* <p>
* When a synchronization object is identified, the pending insertions and
* pending deletions sets are provided to the user handler as subsequences.
+ * </p>
*
* @param object synchronization object detected
*/
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
index fe99a03..d5d61a4 100644
--- a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
@@ -22,8 +22,7 @@ import java.util.List;
* This interface is devoted to handle synchronized replacement sequences.
*
* @see ReplacementsFinder
- * @since 4.0
- * @version $Id: ReplacementsHandler.java 1543277 2013-11-19 00:53:50Z ggregory $
+ * @since 1.0
*/
public interface ReplacementsHandler<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/StringsComparator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/StringsComparator.java b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
index c59f89f..c2fbfab 100644
--- a/src/main/java/org/apache/commons/text/diff/StringsComparator.java
+++ b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
@@ -1,25 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.commons.text.diff;
-
-
+/**
+ * <p>
+ * It is guaranteed that the comparisons will always be done as
+ * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
+ * sequence and <code>o2</code> belongs to the second sequence. This can
+ * be important if subclassing is used for some elements in the first
+ * sequence and the <code>equals</code> method is specialized.
+ * </p>
+ * <p>
+ * Comparison can be seen from two points of view: either as giving the smallest
+ * modification allowing to transform the first sequence into the second one, or
+ * as giving the longest sequence which is a subsequence of both initial
+ * sequences. The <code>equals</code> method is used to compare objects, so any
+ * object can be put into sequences. Modifications include deleting, inserting
+ * or keeping one object, starting from the beginning of the first sequence.
+ * </p>
+ * <p>
+ * This class implements the comparison algorithm, which is the very efficient
+ * algorithm from Eugene W. Myers
+ * <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">
+ * An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces
+ * the shortest possible {@link EditScript edit script} containing all the
+ * {@link EditCommand commands} needed to transform the first sequence into
+ * the second one.
+ *
+ * <p>
+ * This class has been adapted from the commons-collections implementation.
+ * </p>
+ *
+ * @see EditScript
+ * @see EditCommand
+ * @see CommandVisitor
+ *
+ * @since 1.0
+ */
public class StringsComparator {
-
+
+ /**
+ * First character sequence.
+ */
private final String left;
+ /**
+ * Second character sequence.
+ */
private final String right;
-
+
/** Temporary variables. */
private final int[] vDown;
private final int[] vUp;
-
+
+ /**
+ * Simple constructor.
+ * <p>
+ * Creates a new instance of StringsComparator.
+ * </p>
+ * <p>
+ * It is <em>guaranteed</em> that the comparisons will always be done as
+ * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
+ * sequence and <code>o2</code> belongs to the second sequence. This can be
+ * important if subclassing is used for some elements in the first sequence
+ * and the <code>equals</code> method is specialized.
+ * </p>
+ *
+ * @param left first character sequence to be compared
+ * @param right second character sequence to be compared
+ */
public StringsComparator(String left, String right) {
this.left = left;
this.right = right;
-
+
final int size = left.length() + right.length() + 2;
vDown = new int[size];
vUp = new int[size];
}
-
+
/**
* Get the {@link EditScript} object.
* <p>
@@ -29,12 +100,13 @@ public class StringsComparator {
* {@link KeepCommand keep commands} come from the first sequence. This can
* be important if subclassing is used for some elements in the first
* sequence and the <code>equals</code> method is specialized.
+ * </p>
*
* @return the edit script resulting from the comparison of the two
* sequences
*/
- public EditScript getScript() {
- final EditScript script = new EditScript();
+ public EditScript<Character> getScript() {
+ final EditScript<Character> script = new EditScript<Character>();
buildScript(0, left.length(), 0, right.length(), script);
return script;
}
@@ -49,7 +121,7 @@ public class StringsComparator {
* @param script the edited script
*/
private void buildScript(final int start1, final int end1, final int start2, final int end2,
- final EditScript script) {
+ final EditScript<Character> script) {
final Snake middle = getMiddleSnake(start1, end1, start2, end2);
if (middle == null
@@ -60,15 +132,15 @@ public class StringsComparator {
int j = start2;
while (i < end1 || j < end2) {
if (i < end1 && j < end2 && left.charAt(i) == right.charAt(j)) {
- script.append(new KeepCommand(left.charAt(i)));
+ script.append(new KeepCommand<Character>(left.charAt(i)));
++i;
++j;
} else {
if (end1 - start1 > end2 - start2) {
- script.append(new DeleteCommand(left.charAt(i)));
+ script.append(new DeleteCommand<Character>(left.charAt(i)));
++i;
} else {
- script.append(new InsertCommand(right.charAt(j)));
+ script.append(new InsertCommand<Character>(right.charAt(j)));
++j;
}
}
@@ -80,16 +152,33 @@ public class StringsComparator {
start2, middle.getStart() - middle.getDiag(),
script);
for (int i = middle.getStart(); i < middle.getEnd(); ++i) {
- script.append(new KeepCommand(left.charAt(i)));
+ script.append(new KeepCommand<Character>(left.charAt(i)));
}
buildScript(middle.getEnd(), end1,
middle.getEnd() - middle.getDiag(), end2,
script);
}
}
-
+
+ /**
+ * Get the middle snake corresponding to two subsequences of the
+ * main sequences.
+ * <p>
+ * The snake is found using the MYERS Algorithm (this algorithms has
+ * also been implemented in the GNU diff program). This algorithm is
+ * explained in Eugene Myers article:
+ * <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">
+ * An O(ND) Difference Algorithm and Its Variations</a>.
+ * </p>
+ *
+ * @param start1 the begin of the first sequence to be compared
+ * @param end1 the end of the first sequence to be compared
+ * @param start2 the begin of the second sequence to be compared
+ * @param end2 the end of the second sequence to be compared
+ * @return the middle snake
+ */
private Snake getMiddleSnake(int start1, int end1, int start2, int end2) {
- // Myers Algorithm
+ // Myers Algorithm
// Initialisations
final int m = end1 - start1;
final int n = end2 - start2;
@@ -160,7 +249,7 @@ public class StringsComparator {
// this should not happen
throw new RuntimeException("Internal Error");
}
-
+
/**
* Build a snake.
*
@@ -235,17 +324,17 @@ public class StringsComparator {
return diag;
}
}
-
+
public static void main(String[] args) {
StringsComparator sc = new StringsComparator("O Bruno eh um bom rapaz. Ele eh do Brasil.", "O Bruno foi um bom rapaz. Ele eh do Brasil .");
- EditScript es = sc.getScript();
- es.visit(new CommandVisitor() {
-
+ EditScript<Character> es = sc.getScript();
+ es.visit(new CommandVisitor<Character>() {
+
boolean nlAdd = true;
boolean nlRemove = true;
@Override
- public void visitInsertCommand(Object object) {
+ public void visitInsertCommand(Character object) {
if (nlAdd) {
System.out.println();
System.out.print("> ");
@@ -255,7 +344,7 @@ public class StringsComparator {
}
@Override
- public void visitKeepCommand(Object object) {
+ public void visitKeepCommand(Character object) {
if (!nlAdd) {
nlAdd = true;
}
@@ -267,7 +356,7 @@ public class StringsComparator {
}
@Override
- public void visitDeleteCommand(Object object) {
+ public void visitDeleteCommand(Character object) {
if (nlRemove) {
System.out.println();
System.out.print("< ");
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/package-info.java b/src/main/java/org/apache/commons/text/diff/package-info.java
index 7a565d4..92fde86 100644
--- a/src/main/java/org/apache/commons/text/diff/package-info.java
+++ b/src/main/java/org/apache/commons/text/diff/package-info.java
@@ -17,6 +17,9 @@
/**
* <p>Provides algorithms for diff between strings.</p>
*
+ * <p>The initial implementation of the Myers algorithm was adapted from the
+ * commons-collections sequence package.</p>
+ *
* @since 1.0
*/
package org.apache.commons.text.diff;