You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by ki...@apache.org on 2015/02/05 03:33:51 UTC
[1/2] [text] Migrating Myers algorithm from [collections]
Repository: commons-text
Updated Branches:
refs/heads/myers-algo [created] a7e88eef1
Migrating Myers algorithm from [collections]
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/cca1f199
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/cca1f199
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/cca1f199
Branch: refs/heads/myers-algo
Commit: cca1f19918760d4e538ff4d70d875878418f6f1a
Parents: 37c7804
Author: Bruno P. Kinoshita <ki...@apache.org>
Authored: Sun Dec 14 23:41:43 2014 -0200
Committer: Bruno P. Kinoshita <ki...@apache.org>
Committed: Sun Dec 14 23:41:43 2014 -0200
----------------------------------------------------------------------
.../commons/text/diff/CommandVisitor.java | 143 ++++++++++
.../apache/commons/text/diff/DeleteCommand.java | 55 ++++
.../apache/commons/text/diff/EditCommand.java | 82 ++++++
.../apache/commons/text/diff/EditScript.java | 133 +++++++++
.../apache/commons/text/diff/InsertCommand.java | 57 ++++
.../apache/commons/text/diff/KeepCommand.java | 57 ++++
.../commons/text/diff/ReplacementsFinder.java | 109 +++++++
.../commons/text/diff/ReplacementsHandler.java | 52 ++++
.../commons/text/diff/StringsComparator.java | 281 +++++++++++++++++++
.../apache/commons/text/diff/package-info.java | 22 ++
10 files changed, 991 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
new file mode 100644
index 0000000..ae3833c
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * This interface should be implemented by user object to walk
+ * through {@link EditScript EditScript} objects.
+ * <p>
+ * Users should implement this interface in order to walk through
+ * the {@link EditScript EditScript} object created by the comparison
+ * of two sequences. This is a direct application of the visitor
+ * design pattern. The {@link EditScript#visit EditScript.visit}
+ * method takes an object implementing this interface as an argument,
+ * it will perform the loop over all commands in the script and the
+ * proper methods of the user class will be called as the commands are
+ * encountered.
+ * <p>
+ * The implementation of the user visitor class will depend on the
+ * need. Here are two examples.
+ * <p>
+ * The first example is a visitor that build the longest common
+ * subsequence:
+ * <pre>
+ * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ *
+ * import java.util.ArrayList;
+ *
+ * public class LongestCommonSubSequence implements CommandVisitor {
+ *
+ * public LongestCommonSubSequence() {
+ * a = new ArrayList();
+ * }
+ *
+ * public void visitInsertCommand(Object object) {
+ * }
+ *
+ * public void visitKeepCommand(Object object) {
+ * a.add(object);
+ * }
+ *
+ * public void visitDeleteCommand(Object object) {
+ * }
+ *
+ * public Object[] getSubSequence() {
+ * return a.toArray();
+ * }
+ *
+ * private ArrayList a;
+ *
+ * }
+ * </pre>
+ * <p>
+ * The second example is a visitor that shows the commands and the way
+ * they transform the first sequence into the second one:
+ * <pre>
+ * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ *
+ * import java.util.Arrays;
+ * import java.util.ArrayList;
+ * import java.util.Iterator;
+ *
+ * public class ShowVisitor implements CommandVisitor {
+ *
+ * public ShowVisitor(Object[] sequence1) {
+ * v = new ArrayList();
+ * v.addAll(Arrays.asList(sequence1));
+ * index = 0;
+ * }
+ *
+ * public void visitInsertCommand(Object object) {
+ * v.insertElementAt(object, index++);
+ * display("insert", object);
+ * }
+ *
+ * public void visitKeepCommand(Object object) {
+ * ++index;
+ * display("keep ", object);
+ * }
+ *
+ * public void visitDeleteCommand(Object object) {
+ * v.remove(index);
+ * display("delete", object);
+ * }
+ *
+ * private void display(String commandName, Object object) {
+ * System.out.println(commandName + " " + object + " ->" + this);
+ * }
+ *
+ * public String toString() {
+ * StringBuffer buffer = new StringBuffer();
+ * for (Iterator iter = v.iterator(); iter.hasNext();) {
+ * buffer.append(' ').append(iter.next());
+ * }
+ * return buffer.toString();
+ * }
+ *
+ * private ArrayList v;
+ * private int index;
+ *
+ * }
+ * </pre>
+ *
+ * @since 4.0
+ * @version $Id: CommandVisitor.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public interface CommandVisitor<T> {
+
+ /**
+ * Method called when an insert command is encountered.
+ *
+ * @param object object to insert (this object comes from the second sequence)
+ */
+ void visitInsertCommand(T object);
+
+ /**
+ * Method called when a keep command is encountered.
+ *
+ * @param object object to keep (this object comes from the first sequence)
+ */
+ void visitKeepCommand(T object);
+
+ /**
+ * Method called when a delete command is encountered.
+ *
+ * @param object object to delete (this object comes from the first sequence)
+ */
+ void visitDeleteCommand(T object);
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
new file mode 100644
index 0000000..0de3a43
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * Command representing the deletion of one object of the first sequence.
+ * <p>
+ * When one object of the first sequence has no corresponding object in the
+ * second sequence at the right place, the {@link EditScript edit script}
+ * transforming the first sequence into the second sequence uses an instance of
+ * this class to represent the deletion of this object. The objects embedded in
+ * these type of commands always come from the first sequence.
+ *
+ * @see SequencesComparator
+ * @see EditScript
+ *
+ * @since 4.0
+ * @version $Id: DeleteCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class DeleteCommand<T> extends EditCommand<T> {
+
+ /**
+ * Simple constructor. Creates a new instance of {@link DeleteCommand}.
+ *
+ * @param object the object of the first sequence that should be deleted
+ */
+ public DeleteCommand(final T object) {
+ super(object);
+ }
+
+ /**
+ * Accept a visitor. When a <code>DeleteCommand</code> accepts a visitor, it calls
+ * its {@link CommandVisitor#visitDeleteCommand visitDeleteCommand} method.
+ *
+ * @param visitor the visitor to be accepted
+ */
+ @Override
+ public void accept(final CommandVisitor<T> visitor) {
+ visitor.visitDeleteCommand(getObject());
+ }
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/EditCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditCommand.java b/src/main/java/org/apache/commons/text/diff/EditCommand.java
new file mode 100644
index 0000000..7c5ff76
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/EditCommand.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * Abstract base class for all commands used to transform an objects sequence
+ * into another one.
+ * <p>
+ * When two objects sequences are compared through the
+ * {@link SequencesComparator#getScript SequencesComparator.getScript} method,
+ * the result is provided has a {@link EditScript script} containing the commands
+ * that progressively transform the first sequence into the second one.
+ * <p>
+ * There are only three types of commands, all of which are subclasses of this
+ * abstract class. Each command is associated with one object belonging to at
+ * least one of the sequences. These commands are {@link InsertCommand
+ * InsertCommand} which correspond to an object of the second sequence being
+ * inserted into the first sequence, {@link DeleteCommand DeleteCommand} which
+ * correspond to an object of the first sequence being removed and
+ * {@link KeepCommand KeepCommand} which correspond to an object of the first
+ * sequence which <code>equals</code> an object in the second sequence. It is
+ * guaranteed that comparison is always performed this way (i.e. the
+ * <code>equals</code> method of the object from the first sequence is used and
+ * the object passed as an argument comes from the second sequence) ; this can
+ * be important if subclassing is used for some elements in the first sequence
+ * and the <code>equals</code> method is specialized.
+ *
+ * @see SequencesComparator
+ * @see EditScript
+ *
+ * @since 4.0
+ * @version $Id: EditCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public abstract class EditCommand<T> {
+
+ /** Object on which the command should be applied. */
+ private final T object;
+
+ /**
+ * Simple constructor. Creates a new instance of EditCommand
+ *
+ * @param object reference to the object associated with this command, this
+ * refers to an element of one of the sequences being compared
+ */
+ protected EditCommand(final T object) {
+ this.object = object;
+ }
+
+ /**
+ * Returns the object associated with this command.
+ *
+ * @return the object on which the command is applied
+ */
+ protected T getObject() {
+ return object;
+ }
+
+ /**
+ * Accept a visitor.
+ * <p>
+ * This method is invoked for each commands belonging to
+ * an {@link EditScript EditScript}, in order to implement the visitor design pattern
+ *
+ * @param visitor the visitor to be accepted
+ */
+ public abstract void accept(CommandVisitor<T> visitor);
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/EditScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditScript.java b/src/main/java/org/apache/commons/text/diff/EditScript.java
new file mode 100644
index 0000000..ab7b04e
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/EditScript.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class gathers all the {@link EditCommand commands} needed to transform
+ * one objects sequence into another objects sequence.
+ * <p>
+ * An edit script is the most general view of the differences between two
+ * sequences. It is built as the result of the comparison between two sequences
+ * by the {@link SequencesComparator SequencesComparator} class. The user can
+ * walk through it using the <em>visitor</em> design pattern.
+ * <p>
+ * It is guaranteed that the objects embedded in the {@link InsertCommand insert
+ * commands} come from the second sequence and that the objects embedded in
+ * either the {@link DeleteCommand delete commands} or {@link KeepCommand keep
+ * commands} come from the first sequence. This can be important if subclassing
+ * is used for some elements in the first sequence and the <code>equals</code>
+ * method is specialized.
+ *
+ * @see SequencesComparator
+ * @see EditCommand
+ * @see CommandVisitor
+ * @see ReplacementsHandler
+ *
+ * @since 4.0
+ * @version $Id: EditScript.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class EditScript<T> {
+
+ /** Container for the commands. */
+ private final List<EditCommand<T>> commands;
+
+ /** Length of the longest common subsequence. */
+ private int lcsLength;
+
+ /** Number of modifications. */
+ private int modifications;
+
+ /**
+ * Simple constructor. Creates a new empty script.
+ */
+ public EditScript() {
+ commands = new ArrayList<EditCommand<T>>();
+ lcsLength = 0;
+ modifications = 0;
+ }
+
+ /**
+ * Add a keep command to the script.
+ *
+ * @param command command to add
+ */
+ public void append(final KeepCommand<T> command) {
+ commands.add(command);
+ ++lcsLength;
+ }
+
+ /**
+ * Add an insert command to the script.
+ *
+ * @param command command to add
+ */
+ public void append(final InsertCommand<T> command) {
+ commands.add(command);
+ ++modifications;
+ }
+
+ /**
+ * Add a delete command to the script.
+ *
+ * @param command command to add
+ */
+ public void append(final DeleteCommand<T> command) {
+ commands.add(command);
+ ++modifications;
+ }
+
+ /**
+ * Visit the script. The script implements the <em>visitor</em> design
+ * pattern, this method is the entry point to which the user supplies its
+ * own visitor, the script will be responsible to drive it through the
+ * commands in order and call the appropriate method as each command is
+ * encountered.
+ *
+ * @param visitor the visitor that will visit all commands in turn
+ */
+ public void visit(final CommandVisitor<T> visitor) {
+ for (final EditCommand<T> command : commands) {
+ command.accept(visitor);
+ }
+ }
+
+ /**
+ * Get the length of the Longest Common Subsequence (LCS). The length of the
+ * longest common subsequence is the number of {@link KeepCommand keep
+ * commands} in the script.
+ *
+ * @return length of the Longest Common Subsequence
+ */
+ public int getLCSLength() {
+ return lcsLength;
+ }
+
+ /**
+ * Get the number of effective modifications. The number of effective
+ * modification is the number of {@link DeleteCommand delete} and
+ * {@link InsertCommand insert} commands in the script.
+ *
+ * @return number of effective modifications
+ */
+ public int getModifications() {
+ return modifications;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/InsertCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/InsertCommand.java b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
new file mode 100644
index 0000000..9ec9c6a
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * Command representing the insertion of one object of the second sequence.
+ * <p>
+ * When one object of the second sequence has no corresponding object in the
+ * first sequence at the right place, the {@link EditScript edit script}
+ * transforming the first sequence into the second sequence uses an instance of
+ * this class to represent the insertion of this object. The objects embedded in
+ * these type of commands always come from the second sequence.
+ *
+ * @see SequencesComparator
+ * @see EditScript
+ *
+ * @since 4.0
+ * @version $Id: InsertCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class InsertCommand<T> extends EditCommand<T> {
+
+ /**
+ * Simple constructor. Creates a new instance of InsertCommand
+ *
+ * @param object the object of the second sequence that should be inserted
+ */
+ public InsertCommand(final T object) {
+ super(object);
+ }
+
+ /**
+ * Accept a visitor. When an <code>InsertCommand</code> accepts a visitor,
+ * it calls its {@link CommandVisitor#visitInsertCommand visitInsertCommand}
+ * method.
+ *
+ * @param visitor the visitor to be accepted
+ */
+ @Override
+ public void accept(final CommandVisitor<T> visitor) {
+ visitor.visitInsertCommand(getObject());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/KeepCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/KeepCommand.java b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
new file mode 100644
index 0000000..a511bfe
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+/**
+ * Command representing the keeping of one object present in both sequences.
+ * <p>
+ * When one object of the first sequence <code>equals</code> another objects in
+ * the second sequence at the right place, the {@link EditScript edit script}
+ * transforming the first sequence into the second sequence uses an instance of
+ * this class to represent the keeping of this object. The objects embedded in
+ * these type of commands always come from the first sequence.
+ *
+ * @see SequencesComparator
+ * @see EditScript
+ *
+ * @since 4.0
+ * @version $Id: KeepCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class KeepCommand<T> extends EditCommand<T> {
+
+ /**
+ * Simple constructor. Creates a new instance of KeepCommand
+ *
+ * @param object the object belonging to both sequences (the object is a
+ * reference to the instance in the first sequence which is known
+ * to be equal to an instance in the second sequence)
+ */
+ public KeepCommand(final T object) {
+ super(object);
+ }
+
+ /**
+ * Accept a visitor. When a <code>KeepCommand</code> accepts a visitor, it
+ * calls its {@link CommandVisitor#visitKeepCommand visitKeepCommand} method.
+ *
+ * @param visitor the visitor to be accepted
+ */
+ @Override
+ public void accept(final CommandVisitor<T> visitor) {
+ visitor.visitKeepCommand(getObject());
+ }
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
new file mode 100644
index 0000000..361a26d
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This class handles sequences of replacements resulting from a comparison.
+ * <p>
+ * The comparison of two objects sequences leads to the identification of common
+ * parts and parts which only belong to the first or to the second sequence. The
+ * common parts appear in the edit script in the form of <em>keep</em> commands,
+ * they can be considered as synchronization objects between the two sequences.
+ * These synchronization objects split the two sequences in synchronized
+ * sub-sequences. The first sequence can be transformed into the second one by
+ * replacing each synchronized sub-sequence of the first sequence by the
+ * corresponding sub-sequence of the second sequence. This is a synthetic way to
+ * see an {@link EditScript edit script}, replacing individual
+ * {@link DeleteCommand delete}, {@link KeepCommand keep} and
+ * {@link InsertCommand insert} commands by fewer replacements acting on
+ * complete sub-sequences.
+ * <p>
+ * This class is devoted to perform this interpretation. It visits an
+ * {@link EditScript edit script} (because it implements the
+ * {@link CommandVisitor CommandVisitor} interface) and calls a user-supplied
+ * handler implementing the {@link ReplacementsHandler ReplacementsHandler}
+ * interface to process the sub-sequences.
+ *
+ * @see ReplacementsHandler
+ * @see EditScript
+ * @see SequencesComparator
+ *
+ * @since 4.0
+ * @version $Id: ReplacementsFinder.java 1477760 2013-04-30 18:34:03Z tn $
+ */
+public class ReplacementsFinder<T> implements CommandVisitor<T> {
+
+ private final List<T> pendingInsertions;
+ private final List<T> pendingDeletions;
+ private int skipped;
+
+ /** Handler to call when synchronized sequences are found. */
+ private final ReplacementsHandler<T> handler;
+
+ /**
+ * Simple constructor. Creates a new instance of {@link ReplacementsFinder}.
+ *
+ * @param handler handler to call when synchronized sequences are found
+ */
+ public ReplacementsFinder(final ReplacementsHandler<T> handler) {
+ pendingInsertions = new ArrayList<T>();
+ pendingDeletions = new ArrayList<T>();
+ skipped = 0;
+ this.handler = handler;
+ }
+
+ /**
+ * Add an object to the pending insertions set.
+ *
+ * @param object object to insert
+ */
+ public void visitInsertCommand(final T object) {
+ pendingInsertions.add(object);
+ }
+
+ /**
+ * Handle a synchronization object.
+ * <p>
+ * When a synchronization object is identified, the pending insertions and
+ * pending deletions sets are provided to the user handler as subsequences.
+ *
+ * @param object synchronization object detected
+ */
+ public void visitKeepCommand(final T object) {
+ if (pendingDeletions.isEmpty() && pendingInsertions.isEmpty()) {
+ ++skipped;
+ } else {
+ handler.handleReplacement(skipped, pendingDeletions, pendingInsertions);
+ pendingDeletions.clear();
+ pendingInsertions.clear();
+ skipped = 1;
+ }
+ }
+
+ /**
+ * Add an object to the pending deletions set.
+ *
+ * @param object object to delete
+ */
+ public void visitDeleteCommand(final T object) {
+ pendingDeletions.add(object);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
new file mode 100644
index 0000000..fe99a03
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.diff;
+
+import java.util.List;
+
+/**
+ * This interface is devoted to handle synchronized replacement sequences.
+ *
+ * @see ReplacementsFinder
+ * @since 4.0
+ * @version $Id: ReplacementsHandler.java 1543277 2013-11-19 00:53:50Z ggregory $
+ */
+public interface ReplacementsHandler<T> {
+
+ /**
+ * Handle two synchronized sequences.
+ * <p>
+ * This method is called by a {@link ReplacementsFinder ReplacementsFinder}
+ * instance when it has synchronized two sub-sequences of object arrays
+ * being compared, and at least one of the sequences is non-empty. Since the
+ * sequences are synchronized, the objects before the two sub-sequences are
+ * equals (if they exist). This property also holds for the objects after
+ * the two sub-sequences.
+ * <p>
+ * The replacement is defined as replacing the <code>from</code>
+ * sub-sequence into the <code>to</code> sub-sequence.
+ *
+ * @param skipped number of tokens skipped since the last call (i.e. number of
+ * tokens that were in both sequences), this number should be strictly positive
+ * except on the very first call where it can be zero (if the first object of
+ * the two sequences are different)
+ * @param from sub-sequence of objects coming from the first sequence
+ * @param to sub-sequence of objects coming from the second sequence
+ */
+ void handleReplacement(int skipped, List<T> from, List<T> to);
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/StringsComparator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/StringsComparator.java b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
new file mode 100644
index 0000000..c59f89f
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
@@ -0,0 +1,281 @@
+package org.apache.commons.text.diff;
+
+
+
+public class StringsComparator {
+
+ private final String left;
+ private final String right;
+
+ /** Temporary variables. */
+ private final int[] vDown;
+ private final int[] vUp;
+
+ public StringsComparator(String left, String right) {
+ this.left = left;
+ this.right = right;
+
+ final int size = left.length() + right.length() + 2;
+ vDown = new int[size];
+ vUp = new int[size];
+ }
+
+ /**
+ * Get the {@link EditScript} object.
+ * <p>
+ * It is guaranteed that the objects embedded in the {@link InsertCommand
+ * insert commands} come from the second sequence and that the objects
+ * embedded in either the {@link DeleteCommand delete commands} or
+ * {@link KeepCommand keep commands} come from the first sequence. This can
+ * be important if subclassing is used for some elements in the first
+ * sequence and the <code>equals</code> method is specialized.
+ *
+ * @return the edit script resulting from the comparison of the two
+ * sequences
+ */
+ public EditScript getScript() {
+ final EditScript script = new EditScript();
+ buildScript(0, left.length(), 0, right.length(), script);
+ return script;
+ }
+
+ /**
+ * Build an edit script.
+ *
+ * @param start1 the begin of the first sequence to be compared
+ * @param end1 the end of the first sequence to be compared
+ * @param start2 the begin of the second sequence to be compared
+ * @param end2 the end of the second sequence to be compared
+ * @param script the edited script
+ */
+ private void buildScript(final int start1, final int end1, final int start2, final int end2,
+ final EditScript script) {
+ final Snake middle = getMiddleSnake(start1, end1, start2, end2);
+
+ if (middle == null
+ || middle.getStart() == end1 && middle.getDiag() == end1 - end2
+ || middle.getEnd() == start1 && middle.getDiag() == start1 - start2) {
+
+ int i = start1;
+ int j = start2;
+ while (i < end1 || j < end2) {
+ if (i < end1 && j < end2 && left.charAt(i) == right.charAt(j)) {
+ script.append(new KeepCommand(left.charAt(i)));
+ ++i;
+ ++j;
+ } else {
+ if (end1 - start1 > end2 - start2) {
+ script.append(new DeleteCommand(left.charAt(i)));
+ ++i;
+ } else {
+ script.append(new InsertCommand(right.charAt(j)));
+ ++j;
+ }
+ }
+ }
+
+ } else {
+
+ buildScript(start1, middle.getStart(),
+ start2, middle.getStart() - middle.getDiag(),
+ script);
+ for (int i = middle.getStart(); i < middle.getEnd(); ++i) {
+ script.append(new KeepCommand(left.charAt(i)));
+ }
+ buildScript(middle.getEnd(), end1,
+ middle.getEnd() - middle.getDiag(), end2,
+ script);
+ }
+ }
+
+ private Snake getMiddleSnake(int start1, int end1, int start2, int end2) {
+ // Myers Algorithm
+ // Initialisations
+ final int m = end1 - start1;
+ final int n = end2 - start2;
+ if (m == 0 || n == 0) {
+ return null;
+ }
+
+ final int delta = m - n;
+ final int sum = n + m;
+ final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2;
+ vDown[1+offset] = start1;
+ vUp[1+offset] = end1 + 1;
+
+ for (int d = 0; d <= offset ; ++d) {
+ // Down
+ for (int k = -d; k <= d; k += 2) {
+ // First step
+
+ final int i = k + offset;
+ if (k == -d || k != d && vDown[i-1] < vDown[i+1]) {
+ vDown[i] = vDown[i+1];
+ } else {
+ vDown[i] = vDown[i-1] + 1;
+ }
+
+ int x = vDown[i];
+ int y = x - start1 + start2 - k;
+
+ while (x < end1 && y < end2 && left.charAt(x) == right.charAt(y)) {
+ vDown[i] = ++x;
+ ++y;
+ }
+ // Second step
+ if (delta % 2 != 0 && delta - d <= k && k <= delta + d) {
+ if (vUp[i-delta] <= vDown[i]) {
+ return buildSnake(vUp[i-delta], k + start1 - start2, end1, end2);
+ }
+ }
+ }
+
+ // Up
+ for (int k = delta - d; k <= delta + d; k += 2) {
+ // First step
+ final int i = k + offset - delta;
+ if (k == delta - d
+ || k != delta + d && vUp[i+1] <= vUp[i-1]) {
+ vUp[i] = vUp[i+1] - 1;
+ } else {
+ vUp[i] = vUp[i-1];
+ }
+
+ int x = vUp[i] - 1;
+ int y = x - start1 + start2 - k;
+ while (x >= start1 && y >= start2
+ && left.charAt(x) == right.charAt(y)) {
+ vUp[i] = x--;
+ y--;
+ }
+ // Second step
+ if (delta % 2 == 0 && -d <= k && k <= d ) {
+ if (vUp[i] <= vDown[i + delta]) {
+ return buildSnake(vUp[i], k + start1 - start2, end1, end2);
+ }
+ }
+ }
+ }
+
+ // this should not happen
+ throw new RuntimeException("Internal Error");
+ }
+
+ /**
+ * Build a snake.
+ *
+ * @param start the value of the start of the snake
+ * @param diag the value of the diagonal of the snake
+ * @param end1 the value of the end of the first sequence to be compared
+ * @param end2 the value of the end of the second sequence to be compared
+ * @return the snake built
+ */
+ private Snake buildSnake(final int start, final int diag, final int end1, final int end2) {
+ int end = start;
+ while (end - diag < end2
+ && end < end1
+ && left.charAt(end) == right.charAt(end - diag)) {
+ ++end;
+ }
+ return new Snake(start, end, diag);
+ }
+
+ /**
+ * This class is a simple placeholder to hold the end part of a path
+ * under construction in a {@link StringsComparator StringsComparator}.
+ */
+ private static class Snake {
+
+ /** Start index. */
+ private final int start;
+
+ /** End index. */
+ private final int end;
+
+ /** Diagonal number. */
+ private final int diag;
+
+ /**
+ * Simple constructor. Creates a new instance of Snake with specified indices.
+ *
+ * @param start start index of the snake
+ * @param end end index of the snake
+ * @param diag diagonal number
+ */
+ public Snake(final int start, final int end, final int diag) {
+ this.start = start;
+ this.end = end;
+ this.diag = diag;
+ }
+
+ /**
+ * Get the start index of the snake.
+ *
+ * @return start index of the snake
+ */
+ public int getStart() {
+ return start;
+ }
+
+ /**
+ * Get the end index of the snake.
+ *
+ * @return end index of the snake
+ */
+ public int getEnd() {
+ return end;
+ }
+
+ /**
+ * Get the diagonal number of the snake.
+ *
+ * @return diagonal number of the snake
+ */
+ public int getDiag() {
+ return diag;
+ }
+ }
+
+ public static void main(String[] args) {
+ StringsComparator sc = new StringsComparator("O Bruno eh um bom rapaz. Ele eh do Brasil.", "O Bruno foi um bom rapaz. Ele eh do Brasil .");
+ EditScript es = sc.getScript();
+ es.visit(new CommandVisitor() {
+
+ boolean nlAdd = true;
+ boolean nlRemove = true;
+
+ @Override
+ public void visitInsertCommand(Object object) {
+ if (nlAdd) {
+ System.out.println();
+ System.out.print("> ");
+ nlAdd = false;
+ }
+ System.out.print(object);
+ }
+
+ @Override
+ public void visitKeepCommand(Object object) {
+ if (!nlAdd) {
+ nlAdd = true;
+ }
+ if (!nlRemove) {
+ nlRemove = true;
+ System.out.println();
+ }
+ System.out.print(object);
+ }
+
+ @Override
+ public void visitDeleteCommand(Object object) {
+ if (nlRemove) {
+ System.out.println();
+ System.out.print("< ");
+ nlRemove = false;
+ }
+ System.out.print(object);
+ }
+ });
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/cca1f199/src/main/java/org/apache/commons/text/diff/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/package-info.java b/src/main/java/org/apache/commons/text/diff/package-info.java
new file mode 100644
index 0000000..7a565d4
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/diff/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * <p>Provides algorithms for diff between strings.</p>
+ *
+ * @since 1.0
+ */
+package org.apache.commons.text.diff;
[2/2] [text] Finish adapting code to commons-text,
adding missing header and fixing Javadocs
Posted by ki...@apache.org.
Finish adapting code to commons-text, adding missing header and fixing Javadocs
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/a7e88eef
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/a7e88eef
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/a7e88eef
Branch: refs/heads/myers-algo
Commit: a7e88eef1a77649648a7fc6e9015448eb97c4572
Parents: cca1f19
Author: Bruno P. Kinoshita <ki...@apache.org>
Authored: Thu Feb 5 00:33:14 2015 -0200
Committer: Bruno P. Kinoshita <ki...@apache.org>
Committed: Thu Feb 5 00:33:14 2015 -0200
----------------------------------------------------------------------
.../commons/text/diff/CommandVisitor.java | 12 +-
.../apache/commons/text/diff/DeleteCommand.java | 6 +-
.../apache/commons/text/diff/EditCommand.java | 9 +-
.../apache/commons/text/diff/EditScript.java | 7 +-
.../apache/commons/text/diff/InsertCommand.java | 6 +-
.../apache/commons/text/diff/KeepCommand.java | 6 +-
.../commons/text/diff/ReplacementsFinder.java | 10 +-
.../commons/text/diff/ReplacementsHandler.java | 3 +-
.../commons/text/diff/StringsComparator.java | 137 +++++++++++++++----
.../apache/commons/text/diff/package-info.java | 3 +
10 files changed, 147 insertions(+), 52 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
index ae3833c..d73dde8 100644
--- a/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
+++ b/src/main/java/org/apache/commons/text/diff/CommandVisitor.java
@@ -28,14 +28,17 @@ package org.apache.commons.text.diff;
* it will perform the loop over all commands in the script and the
* proper methods of the user class will be called as the commands are
* encountered.
+ * </p>
* <p>
* The implementation of the user visitor class will depend on the
* need. Here are two examples.
+ * </p>
* <p>
* The first example is a visitor that build the longest common
* subsequence:
+ * </p>
* <pre>
- * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ * import org.apache.commons.text.diff.CommandVisitor;
*
* import java.util.ArrayList;
*
@@ -67,7 +70,7 @@ package org.apache.commons.text.diff;
* The second example is a visitor that shows the commands and the way
* they transform the first sequence into the second one:
* <pre>
- * import org.apache.commons.collections4.comparators.sequence.CommandVisitor;
+ * import org.apache.commons.text.diff.CommandVisitor;
*
* import java.util.Arrays;
* import java.util.ArrayList;
@@ -97,7 +100,7 @@ package org.apache.commons.text.diff;
* }
*
* private void display(String commandName, Object object) {
- * System.out.println(commandName + " " + object + " ->" + this);
+ * System.out.println(commandName + " " + object + ": " + this);
* }
*
* public String toString() {
@@ -114,8 +117,7 @@ package org.apache.commons.text.diff;
* }
* </pre>
*
- * @since 4.0
- * @version $Id: CommandVisitor.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public interface CommandVisitor<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
index 0de3a43..7494002 100644
--- a/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/DeleteCommand.java
@@ -24,12 +24,12 @@ package org.apache.commons.text.diff;
* transforming the first sequence into the second sequence uses an instance of
* this class to represent the deletion of this object. The objects embedded in
* these type of commands always come from the first sequence.
+ * </p>
*
- * @see SequencesComparator
+ * @see StringsComparator
* @see EditScript
*
- * @since 4.0
- * @version $Id: DeleteCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public class DeleteCommand<T> extends EditCommand<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/EditCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditCommand.java b/src/main/java/org/apache/commons/text/diff/EditCommand.java
index 7c5ff76..627db4d 100644
--- a/src/main/java/org/apache/commons/text/diff/EditCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/EditCommand.java
@@ -21,9 +21,10 @@ package org.apache.commons.text.diff;
* into another one.
* <p>
* When two objects sequences are compared through the
- * {@link SequencesComparator#getScript SequencesComparator.getScript} method,
+ * {@link StringsComparator#getScript StringsComparator.getScript} method,
* the result is provided has a {@link EditScript script} containing the commands
* that progressively transform the first sequence into the second one.
+ * </p>
* <p>
* There are only three types of commands, all of which are subclasses of this
* abstract class. Each command is associated with one object belonging to at
@@ -38,12 +39,12 @@ package org.apache.commons.text.diff;
* the object passed as an argument comes from the second sequence) ; this can
* be important if subclassing is used for some elements in the first sequence
* and the <code>equals</code> method is specialized.
+ * </p>
*
- * @see SequencesComparator
+ * @see StringsComparator
* @see EditScript
*
- * @since 4.0
- * @version $Id: EditCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public abstract class EditCommand<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/EditScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/EditScript.java b/src/main/java/org/apache/commons/text/diff/EditScript.java
index ab7b04e..641d60b 100644
--- a/src/main/java/org/apache/commons/text/diff/EditScript.java
+++ b/src/main/java/org/apache/commons/text/diff/EditScript.java
@@ -25,7 +25,7 @@ import java.util.List;
* <p>
* An edit script is the most general view of the differences between two
* sequences. It is built as the result of the comparison between two sequences
- * by the {@link SequencesComparator SequencesComparator} class. The user can
+ * by the {@link StringsComparator StringsComparator} class. The user can
* walk through it using the <em>visitor</em> design pattern.
* <p>
* It is guaranteed that the objects embedded in the {@link InsertCommand insert
@@ -35,13 +35,12 @@ import java.util.List;
* is used for some elements in the first sequence and the <code>equals</code>
* method is specialized.
*
- * @see SequencesComparator
+ * @see StringsComparator
* @see EditCommand
* @see CommandVisitor
* @see ReplacementsHandler
*
- * @since 4.0
- * @version $Id: EditScript.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public class EditScript<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/InsertCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/InsertCommand.java b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
index 9ec9c6a..9a365d0 100644
--- a/src/main/java/org/apache/commons/text/diff/InsertCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/InsertCommand.java
@@ -24,12 +24,12 @@ package org.apache.commons.text.diff;
* transforming the first sequence into the second sequence uses an instance of
* this class to represent the insertion of this object. The objects embedded in
* these type of commands always come from the second sequence.
+ * </p>
*
- * @see SequencesComparator
+ * @see StringsComparator
* @see EditScript
*
- * @since 4.0
- * @version $Id: InsertCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public class InsertCommand<T> extends EditCommand<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/KeepCommand.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/KeepCommand.java b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
index a511bfe..687f7e7 100644
--- a/src/main/java/org/apache/commons/text/diff/KeepCommand.java
+++ b/src/main/java/org/apache/commons/text/diff/KeepCommand.java
@@ -24,12 +24,12 @@ package org.apache.commons.text.diff;
* transforming the first sequence into the second sequence uses an instance of
* this class to represent the keeping of this object. The objects embedded in
* these type of commands always come from the first sequence.
+ * </p>
*
- * @see SequencesComparator
+ * @see StringsComparator
* @see EditScript
*
- * @since 4.0
- * @version $Id: KeepCommand.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public class KeepCommand<T> extends EditCommand<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
index 361a26d..52e4112 100644
--- a/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsFinder.java
@@ -34,25 +34,26 @@ import java.util.List;
* {@link DeleteCommand delete}, {@link KeepCommand keep} and
* {@link InsertCommand insert} commands by fewer replacements acting on
* complete sub-sequences.
+ * </p>
* <p>
* This class is devoted to perform this interpretation. It visits an
* {@link EditScript edit script} (because it implements the
* {@link CommandVisitor CommandVisitor} interface) and calls a user-supplied
* handler implementing the {@link ReplacementsHandler ReplacementsHandler}
* interface to process the sub-sequences.
+ * </p>
*
* @see ReplacementsHandler
* @see EditScript
- * @see SequencesComparator
+ * @see StringsComparator
*
- * @since 4.0
- * @version $Id: ReplacementsFinder.java 1477760 2013-04-30 18:34:03Z tn $
+ * @since 1.0
*/
public class ReplacementsFinder<T> implements CommandVisitor<T> {
private final List<T> pendingInsertions;
private final List<T> pendingDeletions;
- private int skipped;
+ private int skipped;
/** Handler to call when synchronized sequences are found. */
private final ReplacementsHandler<T> handler;
@@ -83,6 +84,7 @@ public class ReplacementsFinder<T> implements CommandVisitor<T> {
* <p>
* When a synchronization object is identified, the pending insertions and
* pending deletions sets are provided to the user handler as subsequences.
+ * </p>
*
* @param object synchronization object detected
*/
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
index fe99a03..d5d61a4 100644
--- a/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
+++ b/src/main/java/org/apache/commons/text/diff/ReplacementsHandler.java
@@ -22,8 +22,7 @@ import java.util.List;
* This interface is devoted to handle synchronized replacement sequences.
*
* @see ReplacementsFinder
- * @since 4.0
- * @version $Id: ReplacementsHandler.java 1543277 2013-11-19 00:53:50Z ggregory $
+ * @since 1.0
*/
public interface ReplacementsHandler<T> {
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/StringsComparator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/StringsComparator.java b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
index c59f89f..c2fbfab 100644
--- a/src/main/java/org/apache/commons/text/diff/StringsComparator.java
+++ b/src/main/java/org/apache/commons/text/diff/StringsComparator.java
@@ -1,25 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.commons.text.diff;
-
-
+/**
+ * <p>
+ * It is guaranteed that the comparisons will always be done as
+ * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
+ * sequence and <code>o2</code> belongs to the second sequence. This can
+ * be important if subclassing is used for some elements in the first
+ * sequence and the <code>equals</code> method is specialized.
+ * </p>
+ * <p>
+ * Comparison can be seen from two points of view: either as giving the smallest
+ * modification allowing to transform the first sequence into the second one, or
+ * as giving the longest sequence which is a subsequence of both initial
+ * sequences. The <code>equals</code> method is used to compare objects, so any
+ * object can be put into sequences. Modifications include deleting, inserting
+ * or keeping one object, starting from the beginning of the first sequence.
+ * </p>
+ * <p>
+ * This class implements the comparison algorithm, which is the very efficient
+ * algorithm from Eugene W. Myers
+ * <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">
+ * An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces
+ * the shortest possible {@link EditScript edit script} containing all the
+ * {@link EditCommand commands} needed to transform the first sequence into
+ * the second one.
+ *
+ * <p>
+ * This class has been adapted from the commons-collections implementation.
+ * </p>
+ *
+ * @see EditScript
+ * @see EditCommand
+ * @see CommandVisitor
+ *
+ * @since 1.0
+ */
public class StringsComparator {
-
+
+ /**
+ * First character sequence.
+ */
private final String left;
+ /**
+ * Second character sequence.
+ */
private final String right;
-
+
/** Temporary variables. */
private final int[] vDown;
private final int[] vUp;
-
+
+ /**
+ * Simple constructor.
+ * <p>
+ * Creates a new instance of StringsComparator.
+ * </p>
+ * <p>
+ * It is <em>guaranteed</em> that the comparisons will always be done as
+ * <code>o1.equals(o2)</code> where <code>o1</code> belongs to the first
+ * sequence and <code>o2</code> belongs to the second sequence. This can be
+ * important if subclassing is used for some elements in the first sequence
+ * and the <code>equals</code> method is specialized.
+ * </p>
+ *
+ * @param left first character sequence to be compared
+ * @param right second character sequence to be compared
+ */
public StringsComparator(String left, String right) {
this.left = left;
this.right = right;
-
+
final int size = left.length() + right.length() + 2;
vDown = new int[size];
vUp = new int[size];
}
-
+
/**
* Get the {@link EditScript} object.
* <p>
@@ -29,12 +100,13 @@ public class StringsComparator {
* {@link KeepCommand keep commands} come from the first sequence. This can
* be important if subclassing is used for some elements in the first
* sequence and the <code>equals</code> method is specialized.
+ * </p>
*
* @return the edit script resulting from the comparison of the two
* sequences
*/
- public EditScript getScript() {
- final EditScript script = new EditScript();
+ public EditScript<Character> getScript() {
+ final EditScript<Character> script = new EditScript<Character>();
buildScript(0, left.length(), 0, right.length(), script);
return script;
}
@@ -49,7 +121,7 @@ public class StringsComparator {
* @param script the edited script
*/
private void buildScript(final int start1, final int end1, final int start2, final int end2,
- final EditScript script) {
+ final EditScript<Character> script) {
final Snake middle = getMiddleSnake(start1, end1, start2, end2);
if (middle == null
@@ -60,15 +132,15 @@ public class StringsComparator {
int j = start2;
while (i < end1 || j < end2) {
if (i < end1 && j < end2 && left.charAt(i) == right.charAt(j)) {
- script.append(new KeepCommand(left.charAt(i)));
+ script.append(new KeepCommand<Character>(left.charAt(i)));
++i;
++j;
} else {
if (end1 - start1 > end2 - start2) {
- script.append(new DeleteCommand(left.charAt(i)));
+ script.append(new DeleteCommand<Character>(left.charAt(i)));
++i;
} else {
- script.append(new InsertCommand(right.charAt(j)));
+ script.append(new InsertCommand<Character>(right.charAt(j)));
++j;
}
}
@@ -80,16 +152,33 @@ public class StringsComparator {
start2, middle.getStart() - middle.getDiag(),
script);
for (int i = middle.getStart(); i < middle.getEnd(); ++i) {
- script.append(new KeepCommand(left.charAt(i)));
+ script.append(new KeepCommand<Character>(left.charAt(i)));
}
buildScript(middle.getEnd(), end1,
middle.getEnd() - middle.getDiag(), end2,
script);
}
}
-
+
+ /**
+ * Get the middle snake corresponding to two subsequences of the
+ * main sequences.
+ * <p>
+ * The snake is found using the MYERS Algorithm (this algorithms has
+ * also been implemented in the GNU diff program). This algorithm is
+ * explained in Eugene Myers article:
+ * <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">
+ * An O(ND) Difference Algorithm and Its Variations</a>.
+ * </p>
+ *
+ * @param start1 the begin of the first sequence to be compared
+ * @param end1 the end of the first sequence to be compared
+ * @param start2 the begin of the second sequence to be compared
+ * @param end2 the end of the second sequence to be compared
+ * @return the middle snake
+ */
private Snake getMiddleSnake(int start1, int end1, int start2, int end2) {
- // Myers Algorithm
+ // Myers Algorithm
// Initialisations
final int m = end1 - start1;
final int n = end2 - start2;
@@ -160,7 +249,7 @@ public class StringsComparator {
// this should not happen
throw new RuntimeException("Internal Error");
}
-
+
/**
* Build a snake.
*
@@ -235,17 +324,17 @@ public class StringsComparator {
return diag;
}
}
-
+
public static void main(String[] args) {
StringsComparator sc = new StringsComparator("O Bruno eh um bom rapaz. Ele eh do Brasil.", "O Bruno foi um bom rapaz. Ele eh do Brasil .");
- EditScript es = sc.getScript();
- es.visit(new CommandVisitor() {
-
+ EditScript<Character> es = sc.getScript();
+ es.visit(new CommandVisitor<Character>() {
+
boolean nlAdd = true;
boolean nlRemove = true;
@Override
- public void visitInsertCommand(Object object) {
+ public void visitInsertCommand(Character object) {
if (nlAdd) {
System.out.println();
System.out.print("> ");
@@ -255,7 +344,7 @@ public class StringsComparator {
}
@Override
- public void visitKeepCommand(Object object) {
+ public void visitKeepCommand(Character object) {
if (!nlAdd) {
nlAdd = true;
}
@@ -267,7 +356,7 @@ public class StringsComparator {
}
@Override
- public void visitDeleteCommand(Object object) {
+ public void visitDeleteCommand(Character object) {
if (nlRemove) {
System.out.println();
System.out.print("< ");
http://git-wip-us.apache.org/repos/asf/commons-text/blob/a7e88eef/src/main/java/org/apache/commons/text/diff/package-info.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/diff/package-info.java b/src/main/java/org/apache/commons/text/diff/package-info.java
index 7a565d4..92fde86 100644
--- a/src/main/java/org/apache/commons/text/diff/package-info.java
+++ b/src/main/java/org/apache/commons/text/diff/package-info.java
@@ -17,6 +17,9 @@
/**
* <p>Provides algorithms for diff between strings.</p>
*
+ * <p>The initial implementation of the Myers algorithm was adapted from the
+ * commons-collections sequence package.</p>
+ *
* @since 1.0
*/
package org.apache.commons.text.diff;