You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/26 23:55:44 UTC
[8/9] incubator-joshua git commit: fixed "source annotations" test
case by moving character rewriting
fixed "source annotations" test case by moving character rewriting
Source-side annotations can be added by appending ;-delimited "key=value" annotations after input tokens, in square brackets, e.g.,
i[POS=PRO] want[POS=VB;TENSE=pr;person=sg] to go...
so the replacement of square brackets has to happen *after* this is parsed (in Token.java)
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/b82027d0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/b82027d0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/b82027d0
Branch: refs/heads/master
Commit: b82027d09cc950a366b032c2d61ac6ade85bff0a
Parents: 7fdc4cd
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Apr 26 17:54:16 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Apr 26 17:54:16 2016 -0400
----------------------------------------------------------------------
src/joshua/decoder/segment_file/Sentence.java | 3 ---
src/joshua/decoder/segment_file/Token.java | 9 +++++----
2 files changed, 5 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b82027d0/src/joshua/decoder/segment_file/Sentence.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/segment_file/Sentence.java b/src/joshua/decoder/segment_file/Sentence.java
index 970b387..08ecf89 100644
--- a/src/joshua/decoder/segment_file/Sentence.java
+++ b/src/joshua/decoder/segment_file/Sentence.java
@@ -113,9 +113,6 @@ public class Sentence {
this.id = id;
}
- // Mask strings that cause problems for the decoder
- source = escapeSpecialSymbols(source);
-
// Only trim strings
if (joshuaConfiguration.lattice_decoding && ! source.startsWith("((("))
adjustForLength(joshuaConfiguration.maxlen);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b82027d0/src/joshua/decoder/segment_file/Token.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/segment_file/Token.java b/src/joshua/decoder/segment_file/Token.java
index ebe9a43..bddfd68 100644
--- a/src/joshua/decoder/segment_file/Token.java
+++ b/src/joshua/decoder/segment_file/Token.java
@@ -18,6 +18,8 @@
*/
package joshua.decoder.segment_file;
+import static joshua.util.FormatUtils.escapeSpecialSymbols;
+
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -90,10 +92,9 @@ public class Token {
token = rawWord;
}
- // Mask strings that cause problems for the decoder
- token = token.replaceAll("\\[", "-lsb-")
- .replaceAll("\\]", "-rsb-")
- .replaceAll("\\|", "-pipe-");
+ // Mask strings that cause problems for the decoder. This has to be done *after* parsing for
+ // annotations.
+ token = escapeSpecialSymbols(token);
if (joshuaConfiguration != null && joshuaConfiguration.lowercase) {
if (FormatUtils.ISALLUPPERCASE(token))