You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ki...@apache.org on 2015/02/05 03:33:51 UTC

[1/2] [text] Migrating Myers algorithm from [collections]

Repository: commons-text
Updated Branches:
  refs/heads/myers-algo [created] a7e88eef1


Migrating Myers algorithm from [collections]


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/cca1f199
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/cca1f199
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/cca1f199

Branch: refs/heads/myers-algo
Commit: cca1f19918760d4e538ff4d70d875878418f6f1a
Parents: 37c7804
Author: Bruno P. Kinoshita <ki...@apache.org>
Authored: Sun Dec 14 23:41:43 2014 -0200
Committer: Bruno P. Kinoshita <ki...@apache.org>
Committed: Sun Dec 14 23:41:43 2014 -0200

----------------------------------------------------------------------
 .../commons/text/diff/CommandVisitor.java       | 143 ++++++++++
 .../apache/commons/text/diff/DeleteCommand.java |  55 ++++
 .../apache/commons/text/diff/EditCommand.java   |  82 ++++++
 .../apache/commons/text/diff/EditScript.java    | 133 +++++++++
 .../apache/commons/text/diff/InsertCommand.java |  57 ++++
 .../apache/commons/text/diff/KeepCommand.java   |  57 ++++
 .../commons/text/diff/ReplacementsFinder.java   | 109 +++++++
 .../commons/text/diff/ReplacementsHandler.java  |  52 ++++
 .../commons/text/diff/StringsComparator.java    | 281 +++++++++++++++++++
 .../apache/commons/text/diff/package-info.java  |  22 ++
 10 files changed, 991 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
new file mode 100644
index 0000000..ae3833c
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * This interface should be implemented by user object to walk
+ * through {@link EditScript EditScript} objects.
+ * <p>
+ * Users should implement this interface in order to walk through
+ * the {@link EditScript EditScript} object created by the comparison
+ * of two sequences. This is a direct application of the visitor
+ * design pattern. The {@link EditScript#visit EditScript.visit}
+ * method takes an object implementing this interface as an argument,
+ * it will perform the loop over all commands in the script and the
+ * proper methods of the user class will be called as the commands are
+ * encountered.
+ * <p>
+ * The implementation of the user visitor class will depend on the
+ * need. Here are two examples.
+ * <p>
+ * The first example is a visitor that build the longest common
+ * subsequence:
+ * <pre>
+ * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ *
+ * import java.util.ArrayList;
+ *
+ * public class LongestCommonSubSequence implements CommandVisitor {
+ *
+ *   public LongestCommonSubSequence() {
+ *     a = new ArrayList();
+ *   }
+ *
+ *   public void visitInsertCommand(Object object) {
+ *   }
+ *
+ *   public void visitKeepCommand(Object object) {
+ *     a.add(object);
+ *   }
+ *
+ *   public void visitDeleteCommand(Object object) {
+ *   }
+ *
+ *   public Object[] getSubSequence() {
+ *     return a.toArray();
+ *   }
+ *
+ *   private ArrayList a;
+ *
+ * }
+ * </pre>
+ * <p>
+ * The second example is a visitor that shows the commands and the way
+ * they transform the first sequence into the second one:
+ * <pre>
+ * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ *
+ * import java.util.Arrays;
+ * import java.util.ArrayList;
+ * import java.util.Iterator;
+ *
+ * public class ShowVisitor implements CommandVisitor {
+ *
+ *   public ShowVisitor(Object[] sequence1) {
+ *     v = new ArrayList();
+ *     v.addAll(Arrays.asList(sequence1));
+ *     index = 0;
+ *   }
+ *
+ *   public void visitInsertCommand(Object object) {
+ *     v.insertElementAt(object, index++);
+ *     display("insert", object);
+ *   }
+ *
+ *   public void visitKeepCommand(Object object) {
+ *     ++index;
+ *     display("keep  ", object);
+ *   }
+ *
+ *   public void visitDeleteCommand(Object object) {
+ *     v.remove(index);
+ *     display("delete", object);
+ *   }
+ *
+ *   private void display(String commandName, Object object) {
+ *     System.out.println(commandName + " " + object + " ->" + this);
+ *   }
+ *
+ *   public String toString() {
+ *     StringBuffer buffer = new StringBuffer();
+ *     for (Iterator iter = v.iterator(); iter.hasNext();) {
+ *       buffer.append(' ').append(iter.next());
+ *     }
+ *     return buffer.toString();
+ *   }
+ *
+ *   private ArrayList v;
+ *   private int index;
+ *
+ * }
+ * </pre>
+ *
+ * @since 4.0
+ * @version $Id: CommandVisitor.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public interface CommandVisitor<T> {
+
+    /**
+     * Method called when an insert command is encountered.
+     *
+     * @param object object to insert (this object comes from the second sequence)
+     */
+    void visitInsertCommand(T object);
+
+    /**
+     * Method called when a keep command is encountered.
+     *
+     * @param object object to keep (this object comes from the first sequence)
+     */
+    void visitKeepCommand(T object);
+
+    /**
+     * Method called when a delete command is encountered.
+     *
+     * @param object object to delete (this object comes from the first sequence)
+     */
+    void visitDeleteCommand(T object);
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
new file mode 100644
index 0000000..0de3a43
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * Command representing the deletion of one object of the first sequence.
+ * <p>
+ * When one object of the first sequence has no corresponding object in the
+ * second sequence at the right place, the {@link EditScript edit script}
+ * transforming the first sequence into the second sequence uses an instance of
+ * this class to represent the deletion of this object. The objects embedded in
+ * these type of commands always come from the first sequence.
+ *
+ * @see SequencesComparator
+ * @see EditScript
+ *
+ * @since 4.0
+ * @version $Id: DeleteCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class DeleteCommand<T> extends EditCommand<T> {
+
+    /**
+     * Simple constructor. Creates a new instance of {@link DeleteCommand}.
+     *
+     * @param object  the object of the first sequence that should be deleted
+     */
+    public DeleteCommand(final T object) {
+        super(object);
+    }
+
+    /**
+     * Accept a visitor. When a <code>DeleteCommand</code> accepts a visitor, it calls
+     * its {@link CommandVisitor#visitDeleteCommand visitDeleteCommand} method.
+     *
+     * @param visitor  the visitor to be accepted
+     */
+    @Override
+    public void accept(final CommandVisitor<T> visitor) {
+        visitor.visitDeleteCommand(getObject());
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/EditCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditCommand.java b/src/main/java/org/apache/commons/text/diff/EditCommand.java
new file mode 100644
index 0000000..7c5ff76
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/EditCommand.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * Abstract base class for all commands used to transform an objects sequence
+ * into another one.
+ * <p>
+ * When two objects sequences are compared through the
+ * {@link SequencesComparator#getScript SequencesComparator.getScript} method,
+ * the result is provided has a {@link EditScript script} containing the commands
+ * that progressively transform the first sequence into the second one.
+ * <p>
+ * There are only three types of commands, all of which are subclasses of this
+ * abstract class. Each command is associated with one object belonging to at
+ * least one of the sequences. These commands are {@link InsertCommand
+ * InsertCommand} which correspond to an object of the second sequence being
+ * inserted into the first sequence, {@link DeleteCommand DeleteCommand} which
+ * correspond to an object of the first sequence being removed and
+ * {@link KeepCommand KeepCommand} which correspond to an object of the first
+ * sequence which <code>equals</code> an object in the second sequence. It is
+ * guaranteed that comparison is always performed this way (i.e. the
+ * <code>equals</code> method of the object from the first sequence is used and
+ * the object passed as an argument comes from the second sequence) ; this can
+ * be important if subclassing is used for some elements in the first sequence
+ * and the <code>equals</code> method is specialized.
+ *
+ * @see SequencesComparator
+ * @see EditScript
+ *
+ * @since 4.0
+ * @version $Id: EditCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public abstract class EditCommand<T> {
+
+    /** Object on which the command should be applied. */
+    private final T object;
+
+    /**
+     * Simple constructor. Creates a new instance of EditCommand
+     *
+     * @param object  reference to the object associated with this command, this
+     *   refers to an element of one of the sequences being compared
+     */
+    protected EditCommand(final T object) {
+        this.object = object;
+    }
+
+    /**
+     * Returns the object associated with this command.
+     *
+     * @return the object on which the command is applied
+     */
+    protected T getObject() {
+        return object;
+    }
+
+    /**
+     * Accept a visitor.
+     * <p>
+     * This method is invoked for each commands belonging to
+     * an {@link EditScript EditScript}, in order to implement the visitor design pattern
+     *
+     * @param visitor  the visitor to be accepted
+     */
+    public abstract void accept(CommandVisitor<T> visitor);
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/EditScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditScript.java b/src/main/java/org/apache/commons/text/diff/EditScript.java
new file mode 100644
index 0000000..ab7b04e
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/EditScript.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class gathers all the {@link EditCommand commands} needed to transform
+ * one objects sequence into another objects sequence.
+ * <p>
+ * An edit script is the most general view of the differences between two
+ * sequences. It is built as the result of the comparison between two sequences
+ * by the {@link SequencesComparator SequencesComparator} class. The user can
+ * walk through it using the <em>visitor</em> design pattern.
+ * <p>
+ * It is guaranteed that the objects embedded in the {@link InsertCommand insert
+ * commands} come from the second sequence and that the objects embedded in
+ * either the {@link DeleteCommand delete commands} or {@link KeepCommand keep
+ * commands} come from the first sequence. This can be important if subclassing
+ * is used for some elements in the first sequence and the <code>equals</code>
+ * method is specialized.
+ *
+ * @see SequencesComparator
+ * @see EditCommand
+ * @see CommandVisitor
+ * @see ReplacementsHandler
+ *
+ * @since 4.0
+ * @version $Id: EditScript.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class EditScript<T> {
+
+    /** Container for the commands. */
+    private final List<EditCommand<T>> commands;
+
+    /** Length of the longest common subsequence. */
+    private int lcsLength;
+
+    /** Number of modifications. */
+    private int modifications;
+
+    /**
+     * Simple constructor. Creates a new empty script.
+     */
+    public EditScript() {
+        commands = new ArrayList<EditCommand<T>>();
+        lcsLength = 0;
+        modifications = 0;
+    }
+
+    /**
+     * Add a keep command to the script.
+     *
+     * @param command  command to add
+     */
+    public void append(final KeepCommand<T> command) {
+        commands.add(command);
+        ++lcsLength;
+    }
+
+    /**
+     * Add an insert command to the script.
+     *
+     * @param command  command to add
+     */
+    public void append(final InsertCommand<T> command) {
+        commands.add(command);
+        ++modifications;
+    }
+
+    /**
+     * Add a delete command to the script.
+     *
+     * @param command  command to add
+     */
+    public void append(final DeleteCommand<T> command) {
+        commands.add(command);
+        ++modifications;
+    }
+
+    /**
+     * Visit the script. The script implements the <em>visitor</em> design
+     * pattern, this method is the entry point to which the user supplies its
+     * own visitor, the script will be responsible to drive it through the
+     * commands in order and call the appropriate method as each command is
+     * encountered.
+     *
+     * @param visitor  the visitor that will visit all commands in turn
+     */
+    public void visit(final CommandVisitor<T> visitor) {
+        for (final EditCommand<T> command : commands) {
+            command.accept(visitor);
+        }
+    }
+
+    /**
+     * Get the length of the Longest Common Subsequence (LCS). The length of the
+     * longest common subsequence is the number of {@link KeepCommand keep
+     * commands} in the script.
+     *
+     * @return length of the Longest Common Subsequence
+     */
+    public int getLCSLength() {
+        return lcsLength;
+    }
+
+    /**
+     * Get the number of effective modifications. The number of effective
+     * modification is the number of {@link DeleteCommand delete} and
+     * {@link InsertCommand insert} commands in the script.
+     *
+     * @return number of effective modifications
+     */
+    public int getModifications() {
+        return modifications;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/InsertCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/InsertCommand.java b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
new file mode 100644
index 0000000..9ec9c6a
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * Command representing the insertion of one object of the second sequence.
+ * <p>
+ * When one object of the second sequence has no corresponding object in the
+ * first sequence at the right place, the {@link EditScript edit script}
+ * transforming the first sequence into the second sequence uses an instance of
+ * this class to represent the insertion of this object. The objects embedded in
+ * these type of commands always come from the second sequence.
+ *
+ * @see SequencesComparator
+ * @see EditScript
+ *
+ * @since 4.0
+ * @version $Id: InsertCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class InsertCommand<T> extends EditCommand<T> {
+
+    /**
+     * Simple constructor. Creates a new instance of InsertCommand
+     *
+     * @param object  the object of the second sequence that should be inserted
+     */
+    public InsertCommand(final T object) {
+        super(object);
+    }
+
+    /**
+     * Accept a visitor. When an <code>InsertCommand</code> accepts a visitor,
+     * it calls its {@link CommandVisitor#visitInsertCommand visitInsertCommand}
+     * method.
+     *
+     * @param visitor  the visitor to be accepted
+     */
+    @Override
+    public void accept(final CommandVisitor<T> visitor) {
+        visitor.visitInsertCommand(getObject());
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/KeepCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/KeepCommand.java b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
new file mode 100644
index 0000000..a511bfe
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * Command representing the keeping of one object present in both sequences.
+ * <p>
+ * When one object of the first sequence <code>equals</code> another objects in
+ * the second sequence at the right place, the {@link EditScript edit script}
+ * transforming the first sequence into the second sequence uses an instance of
+ * this class to represent the keeping of this object. The objects embedded in
+ * these type of commands always come from the first sequence.
+ *
+ * @see SequencesComparator
+ * @see EditScript
+ *
+ * @since 4.0
+ * @version $Id: KeepCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class KeepCommand<T> extends EditCommand<T> {
+
+    /**
+     * Simple constructor. Creates a new instance of KeepCommand
+     *
+     * @param object  the object belonging to both sequences (the object is a
+     *   reference to the instance in the first sequence which is known
+     *   to be equal to an instance in the second sequence)
+     */
+    public KeepCommand(final T object) {
+        super(object);
+    }
+
+    /**
+     * Accept a visitor. When a <code>KeepCommand</code> accepts a visitor, it
+     * calls its {@link CommandVisitor#visitKeepCommand visitKeepCommand} method.
+     *
+     * @param visitor  the visitor to be accepted
+     */
+    @Override
+    public void accept(final CommandVisitor<T> visitor) {
+        visitor.visitKeepCommand(getObject());
+    }
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
new file mode 100644
index 0000000..361a26d
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class handles sequences of replacements resulting from a comparison.
+ * <p>
+ * The comparison of two objects sequences leads to the identification of common
+ * parts and parts which only belong to the first or to the second sequence. The
+ * common parts appear in the edit script in the form of <em>keep</em> commands,
+ * they can be considered as synchronization objects between the two sequences.
+ * These synchronization objects split the two sequences in synchronized
+ * sub-sequences. The first sequence can be transformed into the second one by
+ * replacing each synchronized sub-sequence of the first sequence by the
+ * corresponding sub-sequence of the second sequence. This is a synthetic way to
+ * see an {@link EditScript edit script}, replacing individual
+ * {@link DeleteCommand delete}, {@link KeepCommand keep} and
+ * {@link InsertCommand insert} commands by fewer replacements acting on
+ * complete sub-sequences.
+ * <p>
+ * This class is devoted to perform this interpretation. It visits an
+ * {@link EditScript edit script} (because it implements the
+ * {@link CommandVisitor CommandVisitor} interface) and calls a user-supplied
+ * handler implementing the {@link ReplacementsHandler ReplacementsHandler}
+ * interface to process the sub-sequences.
+ *
+ * @see ReplacementsHandler
+ * @see EditScript
+ * @see SequencesComparator
+ *
+ * @since 4.0
+ * @version $Id: ReplacementsFinder.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class ReplacementsFinder<T> implements CommandVisitor<T> {
+
+    private final List<T> pendingInsertions;
+    private final List<T> pendingDeletions;
+    private int     skipped;
+
+    /** Handler to call when synchronized sequences are found. */
+    private final ReplacementsHandler<T> handler;
+
+    /**
+     * Simple constructor. Creates a new instance of {@link ReplacementsFinder}.
+     *
+     * @param handler  handler to call when synchronized sequences are found
+     */
+    public ReplacementsFinder(final ReplacementsHandler<T> handler) {
+        pendingInsertions = new ArrayList<T>();
+        pendingDeletions  = new ArrayList<T>();
+        skipped           = 0;
+        this.handler      = handler;
+    }
+
+    /**
+     * Add an object to the pending insertions set.
+     *
+     * @param object  object to insert
+     */
+    public void visitInsertCommand(final T object) {
+        pendingInsertions.add(object);
+    }
+
+    /**
+     * Handle a synchronization object.
+     * <p>
+     * When a synchronization object is identified, the pending insertions and
+     * pending deletions sets are provided to the user handler as subsequences.
+     *
+     * @param object  synchronization object detected
+     */
+    public void visitKeepCommand(final T object) {
+        if (pendingDeletions.isEmpty() && pendingInsertions.isEmpty()) {
+            ++skipped;
+        } else {
+            handler.handleReplacement(skipped, pendingDeletions, pendingInsertions);
+            pendingDeletions.clear();
+            pendingInsertions.clear();
+            skipped = 1;
+        }
+    }
+
+    /**
+     * Add an object to the pending deletions set.
+     *
+     * @param object  object to delete
+     */
+    public void visitDeleteCommand(final T object) {
+        pendingDeletions.add(object);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
new file mode 100644
index 0000000..fe99a03
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+import java.util.List;
+
+/**
+ * This interface is devoted to handle synchronized replacement sequences.
+ *
+ * @see ReplacementsFinder
+ * @since 4.0
+ * @version $Id: ReplacementsHandler.java 1543277 2013-11-19 00:53:50Z ggregory $
+ */
+public interface ReplacementsHandler<T> {
+
+    /**
+     * Handle two synchronized sequences.
+     * <p>
+     * This method is called by a {@link ReplacementsFinder ReplacementsFinder}
+     * instance when it has synchronized two sub-sequences of object arrays
+     * being compared, and at least one of the sequences is non-empty. Since the
+     * sequences are synchronized, the objects before the two sub-sequences are
+     * equals (if they exist). This property also holds for the objects after
+     * the two sub-sequences.
+     * <p>
+     * The replacement is defined as replacing the <code>from</code>
+     * sub-sequence into the <code>to</code> sub-sequence.
+     *
+     * @param skipped  number of tokens skipped since the last call (i.e. number of
+     *   tokens that were in both sequences), this number should be strictly positive
+     *   except on the very first call where it can be zero (if the first object of
+     *   the two sequences are different)
+     * @param from  sub-sequence of objects coming from the first sequence
+     * @param to  sub-sequence of objects coming from the second sequence
+     */
+    void handleReplacement(int skipped, List<T> from, List<T> to);
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/StringsComparator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/StringsComparator.java b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
new file mode 100644
index 0000000..c59f89f
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
@@ -0,0 +1,281 @@
+package org.apache.commons.text.diff;
+
+
+
+public class StringsComparator {
+    
+    private final String left;
+    private final String right;
+    
+    /** Temporary variables. */
+    private final int[] vDown;
+    private final int[] vUp;
+    
+    public StringsComparator(String left, String right) {
+        this.left = left;
+        this.right = right;
+        
+        final int size = left.length() + right.length() + 2;
+        vDown = new int[size];
+        vUp   = new int[size];
+    }
+    
+    /**
+     * Get the {@link EditScript} object.
+     * <p>
+     * It is guaranteed that the objects embedded in the {@link InsertCommand
+     * insert commands} come from the second sequence and that the objects
+     * embedded in either the {@link DeleteCommand delete commands} or
+     * {@link KeepCommand keep commands} come from the first sequence. This can
+     * be important if subclassing is used for some elements in the first
+     * sequence and the <code>equals</code> method is specialized.
+     *
+     * @return the edit script resulting from the comparison of the two
+     *         sequences
+     */
+    public EditScript getScript() {
+        final EditScript script = new EditScript();
+        buildScript(0, left.length(), 0, right.length(), script);
+        return script;
+    }
+
+    /**
+     * Build an edit script.
+     *
+     * @param start1  the begin of the first sequence to be compared
+     * @param end1  the end of the first sequence to be compared
+     * @param start2  the begin of the second sequence to be compared
+     * @param end2  the end of the second sequence to be compared
+     * @param script the edited script
+     */
+    private void buildScript(final int start1, final int end1, final int start2, final int end2,
+            final EditScript script) {
+        final Snake middle = getMiddleSnake(start1, end1, start2, end2);
+
+        if (middle == null
+                || middle.getStart() == end1 && middle.getDiag() == end1 - end2
+                || middle.getEnd() == start1 && middle.getDiag() == start1 - start2) {
+
+            int i = start1;
+            int j = start2;
+            while (i < end1 || j < end2) {
+                if (i < end1 && j < end2 && left.charAt(i) == right.charAt(j)) {
+                    script.append(new KeepCommand(left.charAt(i)));
+                    ++i;
+                    ++j;
+                } else {
+                    if (end1 - start1 > end2 - start2) {
+                        script.append(new DeleteCommand(left.charAt(i)));
+                        ++i;
+                    } else {
+                        script.append(new InsertCommand(right.charAt(j)));
+                        ++j;
+                    }
+                }
+            }
+
+        } else {
+
+            buildScript(start1, middle.getStart(),
+                        start2, middle.getStart() - middle.getDiag(),
+                        script);
+            for (int i = middle.getStart(); i < middle.getEnd(); ++i) {
+                script.append(new KeepCommand(left.charAt(i)));
+            }
+            buildScript(middle.getEnd(), end1,
+                        middle.getEnd() - middle.getDiag(), end2,
+                        script);
+        }
+    }
+    
+    private Snake getMiddleSnake(int start1, int end1, int start2, int end2) {
+     // Myers Algorithm
+        // Initialisations
+        final int m = end1 - start1;
+        final int n = end2 - start2;
+        if (m == 0 || n == 0) {
+            return null;
+        }
+
+        final int delta  = m - n;
+        final int sum    = n + m;
+        final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2;
+        vDown[1+offset] = start1;
+        vUp[1+offset]   = end1 + 1;
+
+        for (int d = 0; d <= offset ; ++d) {
+            // Down
+            for (int k = -d; k <= d; k += 2) {
+                // First step
+
+                final int i = k + offset;
+                if (k == -d || k != d && vDown[i-1] < vDown[i+1]) {
+                    vDown[i] = vDown[i+1];
+                } else {
+                    vDown[i] = vDown[i-1] + 1;
+                }
+
+                int x = vDown[i];
+                int y = x - start1 + start2 - k;
+
+                while (x < end1 && y < end2 && left.charAt(x) == right.charAt(y)) {
+                    vDown[i] = ++x;
+                    ++y;
+                }
+                // Second step
+                if (delta % 2 != 0 && delta - d <= k && k <= delta + d) {
+                    if (vUp[i-delta] <= vDown[i]) {
+                        return buildSnake(vUp[i-delta], k + start1 - start2, end1, end2);
+                    }
+                }
+            }
+
+            // Up
+            for (int k = delta - d; k <= delta + d; k += 2) {
+                // First step
+                final int i = k + offset - delta;
+                if (k == delta - d
+                        || k != delta + d && vUp[i+1] <= vUp[i-1]) {
+                    vUp[i] = vUp[i+1] - 1;
+                } else {
+                    vUp[i] = vUp[i-1];
+                }
+
+                int x = vUp[i] - 1;
+                int y = x - start1 + start2 - k;
+                while (x >= start1 && y >= start2
+                        && left.charAt(x) == right.charAt(y)) {
+                    vUp[i] = x--;
+                    y--;
+                }
+                // Second step
+                if (delta % 2 == 0 && -d <= k && k <= d ) {
+                    if (vUp[i] <= vDown[i + delta]) {
+                        return buildSnake(vUp[i], k + start1 - start2, end1, end2);
+                    }
+                }
+            }
+        }
+
+        // this should not happen
+        throw new RuntimeException("Internal Error");
+    }
+    
+    /**
+     * Build a snake.
+     *
+     * @param start  the value of the start of the snake
+     * @param diag  the value of the diagonal of the snake
+     * @param end1  the value of the end of the first sequence to be compared
+     * @param end2  the value of the end of the second sequence to be compared
+     * @return the snake built
+     */
+    private Snake buildSnake(final int start, final int diag, final int end1, final int end2) {
+        int end = start;
+        while (end - diag < end2
+                && end < end1
+                && left.charAt(end) == right.charAt(end - diag)) {
+            ++end;
+        }
+        return new Snake(start, end, diag);
+    }
+
+    /**
+     * This class is a simple placeholder to hold the end part of a path
+     * under construction in a {@link StringsComparator StringsComparator}.
+     */
+    private static class Snake {
+
+        /** Start index. */
+        private final int start;
+
+        /** End index. */
+        private final int end;
+
+        /** Diagonal number. */
+        private final int diag;
+
+        /**
+         * Simple constructor. Creates a new instance of Snake with specified indices.
+         *
+         * @param start  start index of the snake
+         * @param end  end index of the snake
+         * @param diag  diagonal number
+         */
+        public Snake(final int start, final int end, final int diag) {
+            this.start = start;
+            this.end   = end;
+            this.diag  = diag;
+        }
+
+        /**
+         * Get the start index of the snake.
+         *
+         * @return start index of the snake
+         */
+        public int getStart() {
+            return start;
+        }
+
+        /**
+         * Get the end index of the snake.
+         *
+         * @return end index of the snake
+         */
+        public int getEnd() {
+            return end;
+        }
+
+        /**
+         * Get the diagonal number of the snake.
+         *
+         * @return diagonal number of the snake
+         */
+        public int getDiag() {
+            return diag;
+        }
+    }
+    
+    public static void main(String[] args) {
+        StringsComparator sc = new StringsComparator("O Bruno eh um bom rapaz. Ele eh do Brasil.", "O Bruno foi um bom rapaz. Ele eh do Brasil .");
+        EditScript es = sc.getScript();
+        es.visit(new CommandVisitor() {
+            
+            boolean nlAdd = true;
+            boolean nlRemove = true;
+
+            @Override
+            public void visitInsertCommand(Object object) {
+                if (nlAdd) {
+                    System.out.println();
+                    System.out.print("> ");
+                    nlAdd = false;
+                }
+                System.out.print(object);
+            }
+
+            @Override
+            public void visitKeepCommand(Object object) {
+                if (!nlAdd) {
+                    nlAdd = true;
+                }
+                if (!nlRemove) {
+                    nlRemove = true;
+                    System.out.println();
+                }
+                System.out.print(object);
+            }
+
+            @Override
+            public void visitDeleteCommand(Object object) {
+                if (nlRemove) {
+                    System.out.println();
+                    System.out.print("< ");
+                    nlRemove = false;
+                }
+                System.out.print(object);
+            }
+        });
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/package-info.java b/src/main/java/org/apache/commons/text/diff/package-info.java
new file mode 100644
index 0000000..7a565d4
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * <p>Provides algorithms for diff between strings.</p>
+ *
+ * @since 1.0
+ */
+package org.apache.commons.text.diff;


[2/2] [text] Finish adapting code to commons-text, adding missing header and fixing Javadocs

Posted by ki...@apache.org.
Finish adapting code to commons-text, adding missing header and fixing Javadocs


Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/a7e88eef
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/a7e88eef
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/a7e88eef

Branch: refs/heads/myers-algo
Commit: a7e88eef1a77649648a7fc6e9015448eb97c4572
Parents: cca1f19
Author: Bruno P. Kinoshita <ki...@apache.org>
Authored: Thu Feb 5 00:33:14 2015 -0200
Committer: Bruno P. Kinoshita <ki...@apache.org>
Committed: Thu Feb 5 00:33:14 2015 -0200

----------------------------------------------------------------------
 .../commons/text/diff/CommandVisitor.java       |  12 +-
 .../apache/commons/text/diff/DeleteCommand.java |   6 +-
 .../apache/commons/text/diff/EditCommand.java   |   9 +-
 .../apache/commons/text/diff/EditScript.java    |   7 +-
 .../apache/commons/text/diff/InsertCommand.java |   6 +-
 .../apache/commons/text/diff/KeepCommand.java   |   6 +-
 .../commons/text/diff/ReplacementsFinder.java   |  10 +-
 .../commons/text/diff/ReplacementsHandler.java  |   3 +-
 .../commons/text/diff/StringsComparator.java    | 137 +++++++++++++++----
 .../apache/commons/text/diff/package-info.java  |   3 +
 10 files changed, 147 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
index ae3833c..d73dde8 100644
--- a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
+++ b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
@@ -28,14 +28,17 @@ package org.apache.commons.text.diff;
  * it will perform the loop over all commands in the script and the
  * proper methods of the user class will be called as the commands are
  * encountered.
+ * </p>
  * <p>
  * The implementation of the user visitor class will depend on the
  * need. Here are two examples.
+ * </p>
  * <p>
  * The first example is a visitor that build the longest common
  * subsequence:
+ * </p>
  * <pre>
- * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ * import org.apache.commons.text.diff.CommandVisitor;
  *
  * import java.util.ArrayList;
  *
@@ -67,7 +70,7 @@ package org.apache.commons.text.diff;
  * The second example is a visitor that shows the commands and the way
  * they transform the first sequence into the second one:
  * <pre>
- * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ * import org.apache.commons.text.diff.CommandVisitor;
  *
  * import java.util.Arrays;
  * import java.util.ArrayList;
@@ -97,7 +100,7 @@ package org.apache.commons.text.diff;
  *   }
  *
  *   private void display(String commandName, Object object) {
- *     System.out.println(commandName + " " + object + " ->" + this);
+ *     System.out.println(commandName + " " + object + ": " + this);
  *   }
  *
  *   public String toString() {
@@ -114,8 +117,7 @@ package org.apache.commons.text.diff;
  * }
  * </pre>
  *
- * @since 4.0
- * @version $Id: CommandVisitor.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public interface CommandVisitor<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
index 0de3a43..7494002 100644
--- a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
@@ -24,12 +24,12 @@ package org.apache.commons.text.diff;
  * transforming the first sequence into the second sequence uses an instance of
  * this class to represent the deletion of this object. The objects embedded in
  * these type of commands always come from the first sequence.
+ * </p>
  *
- * @see SequencesComparator
+ * @see StringsComparator
  * @see EditScript
  *
- * @since 4.0
- * @version $Id: DeleteCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public class DeleteCommand<T> extends EditCommand<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/EditCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditCommand.java b/src/main/java/org/apache/commons/text/diff/EditCommand.java
index 7c5ff76..627db4d 100644
--- a/src/main/java/org/apache/commons/text/diff/EditCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/EditCommand.java
@@ -21,9 +21,10 @@ package org.apache.commons.text.diff;
  * into another one.
  * <p>
  * When two objects sequences are compared through the
- * {@link SequencesComparator#getScript SequencesComparator.getScript} method,
+ * {@link StringsComparator#getScript StringsComparator.getScript} method,
  * the result is provided has a {@link EditScript script} containing the commands
  * that progressively transform the first sequence into the second one.
+ * </p>
  * <p>
  * There are only three types of commands, all of which are subclasses of this
  * abstract class. Each command is associated with one object belonging to at
@@ -38,12 +39,12 @@ package org.apache.commons.text.diff;
  * the object passed as an argument comes from the second sequence) ; this can
  * be important if subclassing is used for some elements in the first sequence
  * and the <code>equals</code> method is specialized.
+ * </p>
  *
- * @see SequencesComparator
+ * @see StringsComparator
  * @see EditScript
  *
- * @since 4.0
- * @version $Id: EditCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public abstract class EditCommand<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/EditScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditScript.java b/src/main/java/org/apache/commons/text/diff/EditScript.java
index ab7b04e..641d60b 100644
--- a/src/main/java/org/apache/commons/text/diff/EditScript.java
+++ b/src/main/java/org/apache/commons/text/diff/EditScript.java
@@ -25,7 +25,7 @@ import java.util.List;
  * <p>
  * An edit script is the most general view of the differences between two
  * sequences. It is built as the result of the comparison between two sequences
- * by the {@link SequencesComparator SequencesComparator} class. The user can
+ * by the {@link StringsComparator StringsComparator} class. The user can
  * walk through it using the <em>visitor</em> design pattern.
  * <p>
  * It is guaranteed that the objects embedded in the {@link InsertCommand insert
@@ -35,13 +35,12 @@ import java.util.List;
  * is used for some elements in the first sequence and the <code>equals</code>
  * method is specialized.
  *
- * @see SequencesComparator
+ * @see StringsComparator
  * @see EditCommand
  * @see CommandVisitor
  * @see ReplacementsHandler
  *
- * @since 4.0
- * @version $Id: EditScript.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public class EditScript<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/InsertCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/InsertCommand.java b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
index 9ec9c6a..9a365d0 100644
--- a/src/main/java/org/apache/commons/text/diff/InsertCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
@@ -24,12 +24,12 @@ package org.apache.commons.text.diff;
  * transforming the first sequence into the second sequence uses an instance of
  * this class to represent the insertion of this object. The objects embedded in
  * these type of commands always come from the second sequence.
+ * </p>
  *
- * @see SequencesComparator
+ * @see StringsComparator
  * @see EditScript
  *
- * @since 4.0
- * @version $Id: InsertCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public class InsertCommand<T> extends EditCommand<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/KeepCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/KeepCommand.java b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
index a511bfe..687f7e7 100644
--- a/src/main/java/org/apache/commons/text/diff/KeepCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
@@ -24,12 +24,12 @@ package org.apache.commons.text.diff;
  * transforming the first sequence into the second sequence uses an instance of
  * this class to represent the keeping of this object. The objects embedded in
  * these type of commands always come from the first sequence.
+ * </p>
  *
- * @see SequencesComparator
+ * @see StringsComparator
  * @see EditScript
  *
- * @since 4.0
- * @version $Id: KeepCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public class KeepCommand<T> extends EditCommand<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
index 361a26d..52e4112 100644
--- a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
@@ -34,25 +34,26 @@ import java.util.List;
  * {@link DeleteCommand delete}, {@link KeepCommand keep} and
  * {@link InsertCommand insert} commands by fewer replacements acting on
  * complete sub-sequences.
+ * </p>
  * <p>
  * This class is devoted to perform this interpretation. It visits an
  * {@link EditScript edit script} (because it implements the
  * {@link CommandVisitor CommandVisitor} interface) and calls a user-supplied
  * handler implementing the {@link ReplacementsHandler ReplacementsHandler}
  * interface to process the sub-sequences.
+ * </p>
  *
  * @see ReplacementsHandler
  * @see EditScript
- * @see SequencesComparator
+ * @see StringsComparator
  *
- * @since 4.0
- * @version $Id: ReplacementsFinder.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
  */
 public class ReplacementsFinder<T> implements CommandVisitor<T> {
 
     private final List<T> pendingInsertions;
     private final List<T> pendingDeletions;
-    private int     skipped;
+    private int skipped;
 
     /** Handler to call when synchronized sequences are found. */
     private final ReplacementsHandler<T> handler;
@@ -83,6 +84,7 @@ public class ReplacementsFinder<T> implements CommandVisitor<T> {
      * <p>
      * When a synchronization object is identified, the pending insertions and
      * pending deletions sets are provided to the user handler as subsequences.
+     * </p>
      *
      * @param object  synchronization object detected
      */

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
index fe99a03..d5d61a4 100644
--- a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
@@ -22,8 +22,7 @@ import java.util.List;
  * This interface is devoted to handle synchronized replacement sequences.
  *
  * @see ReplacementsFinder
- * @since 4.0
- * @version $Id: ReplacementsHandler.java 1543277 2013-11-19 00:53:50Z ggregory $
+ * @since 1.0
  */
 public interface ReplacementsHandler<T> {
 

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/StringsComparator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/StringsComparator.java b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
index c59f89f..c2fbfab 100644
--- a/src/main/java/org/apache/commons/text/diff/StringsComparator.java
+++ b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
@@ -1,25 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.commons.text.diff;
 
-
-
+/**
+ * <p>
+ * It is guaranteed that the comparisons will always be done as
+ * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
+ * sequence and <code>o2</code> belongs to the second sequence. This can
+ * be important if subclassing is used for some elements in the first
+ * sequence and the <code>equals</code> method is specialized.
+ * </p>
+ * <p>
+ * Comparison can be seen from two points of view: either as giving the smallest
+ * modification allowing to transform the first sequence into the second one, or
+ * as giving the longest sequence which is a subsequence of both initial
+ * sequences. The <code>equals</code> method is used to compare objects, so any
+ * object can be put into sequences. Modifications include deleting, inserting
+ * or keeping one object, starting from the beginning of the first sequence.
+ * </p>
+ * <p>
+ * This class implements the comparison algorithm, which is the very efficient
+ * algorithm from Eugene W. Myers
+ * <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">
+ * An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces
+ * the shortest possible {@link EditScript edit script} containing all the
+ * {@link EditCommand commands} needed to transform the first sequence into
+ * the second one.
+ *
+ * <p>
+ * This class has been adapted from the commons-collections implementation.
+ * </p>
+ *
+ * @see EditScript
+ * @see EditCommand
+ * @see CommandVisitor
+ *
+ * @since 1.0
+ */
 public class StringsComparator {
-    
+
+    /**
+     * First character sequence.
+     */
     private final String left;
+    /**
+     * Second character sequence.
+     */
     private final String right;
-    
+
     /** Temporary variables. */
     private final int[] vDown;
     private final int[] vUp;
-    
+
+    /**
+     * Simple constructor.
+     * <p>
+     * Creates a new instance of StringsComparator.
+     * </p>
+     * <p>
+     * It is <em>guaranteed</em> that the comparisons will always be done as
+     * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
+     * sequence and <code>o2</code> belongs to the second sequence. This can be
+     * important if subclassing is used for some elements in the first sequence
+     * and the <code>equals</code> method is specialized.
+     * </p>
+     *
+     * @param left first character sequence to be compared
+     * @param right second character sequence to be compared
+     */
     public StringsComparator(String left, String right) {
         this.left = left;
         this.right = right;
-        
+
         final int size = left.length() + right.length() + 2;
         vDown = new int[size];
         vUp   = new int[size];
     }
-    
+
     /**
      * Get the {@link EditScript} object.
      * <p>
@@ -29,12 +100,13 @@ public class StringsComparator {
      * {@link KeepCommand keep commands} come from the first sequence. This can
      * be important if subclassing is used for some elements in the first
      * sequence and the <code>equals</code> method is specialized.
+     * </p>
      *
      * @return the edit script resulting from the comparison of the two
      *         sequences
      */
-    public EditScript getScript() {
-        final EditScript script = new EditScript();
+    public EditScript<Character> getScript() {
+        final EditScript<Character> script = new EditScript<Character>();
         buildScript(0, left.length(), 0, right.length(), script);
         return script;
     }
@@ -49,7 +121,7 @@ public class StringsComparator {
      * @param script the edited script
      */
     private void buildScript(final int start1, final int end1, final int start2, final int end2,
-            final EditScript script) {
+            final EditScript<Character> script) {
         final Snake middle = getMiddleSnake(start1, end1, start2, end2);
 
         if (middle == null
@@ -60,15 +132,15 @@ public class StringsComparator {
             int j = start2;
             while (i < end1 || j < end2) {
                 if (i < end1 && j < end2 && left.charAt(i) == right.charAt(j)) {
-                    script.append(new KeepCommand(left.charAt(i)));
+                    script.append(new KeepCommand<Character>(left.charAt(i)));
                     ++i;
                     ++j;
                 } else {
                     if (end1 - start1 > end2 - start2) {
-                        script.append(new DeleteCommand(left.charAt(i)));
+                        script.append(new DeleteCommand<Character>(left.charAt(i)));
                         ++i;
                     } else {
-                        script.append(new InsertCommand(right.charAt(j)));
+                        script.append(new InsertCommand<Character>(right.charAt(j)));
                         ++j;
                     }
                 }
@@ -80,16 +152,33 @@ public class StringsComparator {
                         start2, middle.getStart() - middle.getDiag(),
                         script);
             for (int i = middle.getStart(); i < middle.getEnd(); ++i) {
-                script.append(new KeepCommand(left.charAt(i)));
+                script.append(new KeepCommand<Character>(left.charAt(i)));
             }
             buildScript(middle.getEnd(), end1,
                         middle.getEnd() - middle.getDiag(), end2,
                         script);
         }
     }
-    
+
+    /**
+     * Get the middle snake corresponding to two subsequences of the
+     * main sequences.
+     * <p>
+     * The snake is found using the MYERS Algorithm (this algorithms has
+     * also been implemented in the GNU diff program). This algorithm is
+     * explained in Eugene Myers article:
+     * <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">
+     * An O(ND) Difference Algorithm and Its Variations</a>.
+     * </p>
+     *
+     * @param start1  the begin of the first sequence to be compared
+     * @param end1  the end of the first sequence to be compared
+     * @param start2  the begin of the second sequence to be compared
+     * @param end2  the end of the second sequence to be compared
+     * @return the middle snake
+     */
     private Snake getMiddleSnake(int start1, int end1, int start2, int end2) {
-     // Myers Algorithm
+        // Myers Algorithm
         // Initialisations
         final int m = end1 - start1;
         final int n = end2 - start2;
@@ -160,7 +249,7 @@ public class StringsComparator {
         // this should not happen
         throw new RuntimeException("Internal Error");
     }
-    
+
     /**
      * Build a snake.
      *
@@ -235,17 +324,17 @@ public class StringsComparator {
             return diag;
         }
     }
-    
+
     public static void main(String[] args) {
         StringsComparator sc = new StringsComparator("O Bruno eh um bom rapaz. Ele eh do Brasil.", "O Bruno foi um bom rapaz. Ele eh do Brasil .");
-        EditScript es = sc.getScript();
-        es.visit(new CommandVisitor() {
-            
+        EditScript<Character> es = sc.getScript();
+        es.visit(new CommandVisitor<Character>() {
+
             boolean nlAdd = true;
             boolean nlRemove = true;
 
             @Override
-            public void visitInsertCommand(Object object) {
+            public void visitInsertCommand(Character object) {
                 if (nlAdd) {
                     System.out.println();
                     System.out.print("> ");
@@ -255,7 +344,7 @@ public class StringsComparator {
             }
 
             @Override
-            public void visitKeepCommand(Object object) {
+            public void visitKeepCommand(Character object) {
                 if (!nlAdd) {
                     nlAdd = true;
                 }
@@ -267,7 +356,7 @@ public class StringsComparator {
             }
 
             @Override
-            public void visitDeleteCommand(Object object) {
+            public void visitDeleteCommand(Character object) {
                 if (nlRemove) {
                     System.out.println();
                     System.out.print("< ");

http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/package-info.java b/src/main/java/org/apache/commons/text/diff/package-info.java
index 7a565d4..92fde86 100644
--- a/src/main/java/org/apache/commons/text/diff/package-info.java
+++ b/src/main/java/org/apache/commons/text/diff/package-info.java
@@ -17,6 +17,9 @@
 /**
  * <p>Provides algorithms for diff between strings.</p>
  *
+ * <p>The initial implementation of the Myers algorithm was adapted from the
+ * commons-collections sequence package.</p>
+ *
  * @since 1.0
  */
 package org.apache.commons.text.diff;