You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/03 15:59:50 UTC
[4/4] incubator-joshua git commit: removed cruft from Grammar
interface (regexp grammar, writeGrammarOnDisk, constructManualRule)
removed cruft from Grammar interface (regexp grammar, writeGrammarOnDisk, constructManualRule)
The regexp grammar code actually incurred a hit, too: it permitted multiple arcs to be matched when walking the trie in extending a rule in DotChart; with that gone, we now know there will be at most one match, which simplifies the code and gets rid of an array creation and iteration.
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/9762a484
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/9762a484
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/9762a484
Branch: refs/heads/master
Commit: 9762a484a40b27eeaba1c36c0a0c0be291381fc8
Parents: aa10be5
Author: Matt Post <po...@cs.jhu.edu>
Authored: Fri Jun 3 11:59:43 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Fri Jun 3 11:59:43 2016 -0400
----------------------------------------------------------------------
.../joshua/decoder/chart_parser/Chart.java | 7 +----
.../joshua/decoder/chart_parser/DotChart.java | 29 ++++----------------
.../apache/joshua/decoder/ff/tm/Grammar.java | 29 --------------------
.../decoder/ff/tm/SentenceFilteredGrammar.java | 12 --------
.../tm/hash_based/MemoryBasedBatchGrammar.java | 27 +-----------------
.../decoder/ff/tm/packed/PackedGrammar.java | 14 ----------
.../joshua/decoder/phrase/PhraseTable.java | 16 -----------
.../org/apache/joshua/server/ServerThread.java | 6 +---
.../system/MultithreadedTranslationTests.java | 2 --
.../regexp-grammar-both-rule-types/.gitignore | 2 --
.../regexp-grammar-both-rule-types/README | 16 -----------
.../regexp-grammar-both-rule-types/config | 9 ------
.../regexp-grammar-both-rule-types/glue-grammar | 3 --
.../regexp-grammar-both-rule-types/input | 5 ----
.../regexp-grammar-both-rule-types/output.gold | 12 --------
.../regexp-grammar | 12 --------
.../regexp-grammar-both-rule-types/test.sh | 29 --------------------
.../regexp-grammar-both-rule-types/weights | 4 ---
.../resources/decoder/regexp-grammar/.gitignore | 2 --
.../resources/decoder/regexp-grammar/README | 10 -------
.../resources/decoder/regexp-grammar/config | 11 --------
.../decoder/regexp-grammar/glue-grammar | 3 --
src/test/resources/decoder/regexp-grammar/input | 4 ---
.../decoder/regexp-grammar/output.gold | 4 ---
.../decoder/regexp-grammar/regexp-grammar | 6 ----
.../resources/decoder/regexp-grammar/test.sh | 29 --------------------
.../resources/decoder/regexp-grammar/weights | 5 ----
27 files changed, 8 insertions(+), 300 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
index c2f009d..d0cd96b 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
@@ -96,7 +96,6 @@ public class Chart {
private Sentence sentence = null;
// private SyntaxTree parseTree;
-// private ManualConstraintsHandler manualConstraintsHandler;
private StateConstraint stateConstraint;
@@ -148,14 +147,10 @@ public class Chart {
// each grammar will have a dot chart
this.dotcharts = new DotChart[this.grammars.length];
for (int i = 0; i < this.grammars.length; i++)
- this.dotcharts[i] = new DotChart(this.inputLattice, this.grammars[i], this,
- this.grammars[i].isRegexpGrammar());
+ this.dotcharts[i] = new DotChart(this.inputLattice, this.grammars[i], this);
// Begin to do initialization work
-// manualConstraintsHandler = new ManualConstraintsHandler(this, grammars[grammars.length - 1],
-// sentence.constraints());
-
stateConstraint = null;
if (sentence.target() != null)
// stateConstraint = new StateConstraint(sentence.target());
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java b/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java
index 367ec94..4f1d4c8 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java
@@ -98,10 +98,6 @@ class DotChart {
/* Represents the input sentence being translated. */
private final Lattice<Token> input;
- /* If enabled, rule terminals are treated as regular expressions. */
- private final boolean regexpMatching;
-
-
// ===============================================================
// Constructors
// ===============================================================
@@ -118,18 +114,13 @@ class DotChart {
* @param grammar A translation grammar.
* @param chart A CKY+ style chart in which completed span entries are stored.
*/
-
-
-
- public DotChart(Lattice<Token> input, Grammar grammar, Chart chart, boolean regExpMatching) {
+ public DotChart(Lattice<Token> input, Grammar grammar, Chart chart) {
this.dotChart = chart;
this.pGrammar = grammar;
this.input = input;
this.sentLen = input.size();
-
this.dotcells = new ChartSpan<DotCell>(sentLen, null);
- this.regexpMatching = regExpMatching;
seed();
}
@@ -211,20 +202,10 @@ class DotChart {
List<Trie> child_tnodes = null;
- if (this.regexpMatching) {
- child_tnodes = matchAll(dotNode, last_word);
- } else {
- Trie child_node = dotNode.trieNode.match(last_word);
- child_tnodes = Arrays.asList(child_node);
- }
-
- if (!(child_tnodes == null || child_tnodes.isEmpty())) {
- for (Trie child_tnode : child_tnodes) {
- if (null != child_tnode) {
- addDotItem(child_tnode, i, j - 1 + arc_len, dotNode.antSuperNodes, null,
- dotNode.srcPath.extend(arc));
- }
- }
+ Trie child_node = dotNode.trieNode.match(last_word);
+ if (null != child_node) {
+ addDotItem(child_node, i, j - 1 + arc_len, dotNode.antSuperNodes, null,
+ dotNode.srcPath.extend(arc));
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
index 9748ba0..06252a1 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/Grammar.java
@@ -92,35 +92,6 @@ public interface Grammar {
int getNumDenseFeatures();
/**
- * This is used to construct a manual rule supported from outside the grammar, but the owner
- * should be the same as the grammar. Rule ID will the same as OOVRuleId, and no lattice cost
- * @param lhs todo
- * @param sourceWords todo
- * @param targetWords todo
- * @param scores todo
- * @param arity todo
- * @return the constructed {@link org.apache.joshua.decoder.ff.tm.Rule}
- */
- @Deprecated
- Rule constructManualRule(int lhs, int[] sourceWords, int[] targetWords, float[] scores, int arity);
-
- /**
- * Dump the grammar to disk.
- *
- * @param file the file path to write to
- */
- @Deprecated
- void writeGrammarOnDisk(String file);
-
- /**
- * This returns true if the grammar contains rules that are regular expressions, possibly matching
- * many different inputs.
- *
- * @return true if the grammar's rules may contain regular expressions.
- */
- boolean isRegexpGrammar();
-
- /**
* Return the grammar's owner.
* @return grammar owner
*/
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
index 2362cfd..c952b05 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
@@ -111,18 +111,6 @@ public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
return numRules;
}
- @Override
- public Rule constructManualRule(int lhs, int[] sourceWords, int[] targetWords, float[] scores,
- int aritity) {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public boolean isRegexpGrammar() {
- return false;
- }
-
/**
* What is the algorithm?
*
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
index 2bfa8c1..9295fd0 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
@@ -71,9 +71,6 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
private GrammarReader<Rule> modelReader;
- /* Whether the grammar's rules contain regular expressions. */
- private boolean isRegexpGrammar = false;
-
// ===============================================================
// Static Fields
// ===============================================================
@@ -109,7 +106,6 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
Vocabulary.id(defaultLHSSymbol);
this.spanLimit = spanLimit;
this.grammarFile = grammarFile;
- this.setRegexpGrammar(formatKeyword.equals("regexp"));
// ==== loading grammar
this.modelReader = createReader(formatKeyword, grammarFile);
@@ -129,7 +125,7 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
protected GrammarReader<Rule> createReader(String format, String grammarFile) throws IOException {
if (grammarFile != null) {
- if ("hiero".equals(format) || "thrax".equals(format) || "regexp".equals(format)) {
+ if ("hiero".equals(format) || "thrax".equals(format)) {
return new HieroFormatReader(grammarFile);
} else if ("moses".equals(format)) {
return new MosesFormatReader(grammarFile);
@@ -153,12 +149,6 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
return this.qtyRulesRead;
}
- @Override
- public Rule constructManualRule(int lhs, int[] sourceWords, int[] targetWords,
- float[] denseScores, int arity) {
- return null;
- }
-
/**
* if the span covered by the chart bin is greater than the limit, then return false
*/
@@ -234,21 +224,6 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
this.qtyRulesRead, this.qtyRuleBins, grammarFile);
}
- /**
- * This returns true if the grammar contains rules that are regular expressions, possibly matching
- * many different inputs.
- *
- * @return true if the grammar's rules may contain regular expressions.
- */
- @Override
- public boolean isRegexpGrammar() {
- return this.isRegexpGrammar;
- }
-
- public void setRegexpGrammar(boolean value) {
- this.isRegexpGrammar = value;
- }
-
/***
* Takes an input word and creates an OOV rule in the current grammar for that word.
*
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
index 632644f..b48685d 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -58,7 +58,6 @@ import static java.util.Collections.sort;
import java.io.File;
import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.BufferUnderflowException;
@@ -81,7 +80,6 @@ import java.util.List;
import java.util.Map;
import org.apache.joshua.corpus.Vocabulary;
-import org.apache.joshua.decoder.Decoder;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.ff.FeatureFunction;
import org.apache.joshua.decoder.ff.FeatureVector;
@@ -114,9 +112,6 @@ public class PackedGrammar extends AbstractGrammar {
private final File vocabFile; // store path to vocabulary file
- // The grammar specification keyword (e.g., "thrax" or "moses")
- private String type;
-
// The version number of the earliest supported grammar packer
public static final int SUPPORTED_VERSION = 3;
@@ -195,10 +190,6 @@ public class PackedGrammar extends AbstractGrammar {
return encoding.getNumDenseFeatures();
}
- public Rule constructManualRule(int lhs, int[] src, int[] tgt, float[] scores, int arity) {
- return null;
- }
-
/**
* Computes the MD5 checksum of the vocabulary file.
* Can be used for comparing vocabularies across multiple packedGrammars.
@@ -1037,11 +1028,6 @@ public class PackedGrammar extends AbstractGrammar {
}
@Override
- public boolean isRegexpGrammar() {
- return false;
- }
-
- @Override
public void addOOVRules(int word, List<FeatureFunction> featureFunctions) {
throw new RuntimeException("PackedGrammar.addOOVRules(): I can't add OOV rules");
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
index 255eecb..27f92ac 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
@@ -170,22 +170,6 @@ public class PhraseTable implements Grammar {
}
@Override
- public Rule constructManualRule(int lhs, int[] sourceWords, int[] targetWords, float[] scores,
- int arity) {
- return backend.constructManualRule(lhs, sourceWords, targetWords, scores, arity);
- }
-
- @Override
- public void writeGrammarOnDisk(String file) {
- backend.writeGrammarOnDisk(file);
- }
-
- @Override
- public boolean isRegexpGrammar() {
- return backend.isRegexpGrammar();
- }
-
- @Override
public int getOwner() {
return backend.getOwner();
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/main/java/org/apache/joshua/server/ServerThread.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/server/ServerThread.java b/src/main/java/org/apache/joshua/server/ServerThread.java
index 5915da6..d4dcc65 100644
--- a/src/main/java/org/apache/joshua/server/ServerThread.java
+++ b/src/main/java/org/apache/joshua/server/ServerThread.java
@@ -152,11 +152,7 @@ public class ServerThread extends Thread implements HttpHandler {
Translations translations = decoder.decodeAll(request);
OutputStream out = new HttpWriter(client);
- for (;;) {
- Translation translation = translations.next();
- if (translation == null)
- break;
-
+ for (Translation translation: translations) {
if (joshuaConfiguration.input_type == INPUT_TYPE.json || joshuaConfiguration.server_type == SERVER_TYPE.HTTP) {
JSONMessage message = JSONMessage.buildMessage(translation);
out.write(message.toString().getBytes());
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java b/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
index 83fbce3..3901f40 100644
--- a/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
+++ b/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
@@ -30,11 +30,9 @@ import java.util.ArrayList;
import org.apache.joshua.decoder.Decoder;
import org.apache.joshua.decoder.JoshuaConfiguration;
-import org.apache.joshua.decoder.MetaDataException;
import org.apache.joshua.decoder.Translation;
import org.apache.joshua.decoder.Translations;
import org.apache.joshua.decoder.io.TranslationRequestStream;
-import org.apache.joshua.decoder.segment_file.Sentence;
import org.junit.After;
import org.junit.Before;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar-both-rule-types/.gitignore
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/.gitignore b/src/test/resources/decoder/regexp-grammar-both-rule-types/.gitignore
deleted file mode 100644
index d937c7f..0000000
--- a/src/test/resources/decoder/regexp-grammar-both-rule-types/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-diff
-output
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar-both-rule-types/README
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/README b/src/test/resources/decoder/regexp-grammar-both-rule-types/README
deleted file mode 100644
index 226fa64..0000000
--- a/src/test/resources/decoder/regexp-grammar-both-rule-types/README
+++ /dev/null
@@ -1,16 +0,0 @@
-This tests the case where something matched *both* a regex and a non-regex
-rule (or two regexes), but the (correct) regex rule wasn't winning. It should
-be the case, if the code is right, that if you change the order of the rules in
-your grammar, you still get the same output translations.
-
-This test tests the use of regular expressions in the grammar. This is an
-experimental feature with an inefficient implementation in the decoder, but
-there are a number of things that could be done to make it more efficient if
-the technique proves useful.
-
-To enable it, you set the Joshua parameter
-
- regexp-grammar = OWNER
-
-where OWNER is the owner of one or more grammars whose rules might be interpreted as regular
-expressions.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar-both-rule-types/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/config b/src/test/resources/decoder/regexp-grammar-both-rule-types/config
deleted file mode 100644
index 0fb4c0c..0000000
--- a/src/test/resources/decoder/regexp-grammar-both-rule-types/config
+++ /dev/null
@@ -1,9 +0,0 @@
-tm = regexp regexp 10 ./regexp-grammar
-tm = thrax glue -1 ./glue-grammar
-mark-oovs = true
-goal-symbol = GOAL
-top-n = 3
-
-weights-file = weights
-
-feature-function = OOVPenalty
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar-both-rule-types/glue-grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/glue-grammar b/src/test/resources/decoder/regexp-grammar-both-rule-types/glue-grammar
deleted file mode 100644
index 6a1162f..0000000
--- a/src/test/resources/decoder/regexp-grammar-both-rule-types/glue-grammar
+++ /dev/null
@@ -1,3 +0,0 @@
-[GOAL] ||| <s> ||| <s> ||| 0
-[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
-[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar-both-rule-types/input
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/input b/src/test/resources/decoder/regexp-grammar-both-rule-types/input
deleted file mode 100644
index 5531876..0000000
--- a/src/test/resources/decoder/regexp-grammar-both-rule-types/input
+++ /dev/null
@@ -1,5 +0,0 @@
-chica linda
-chicos lindos
-chicos lind?s
-1928371028
-192837102
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar-both-rule-types/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/output.gold b/src/test/resources/decoder/regexp-grammar-both-rule-types/output.gold
deleted file mode 100644
index c8edb86..0000000
--- a/src/test/resources/decoder/regexp-grammar-both-rule-types/output.gold
+++ /dev/null
@@ -1,12 +0,0 @@
-0 ||| girl feminine-singular-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=0.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -4.000
-0 ||| girl feminine-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -5.000
-0 ||| girl generic-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-2.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -6.000
-1 ||| boys masculine-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -5.000
-1 ||| boys generic-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-2.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -6.000
-1 ||| boys lindos_OOV ||| tm_regexp_0=-1.000 tm_regexp_1=0.000 tm_glue_0=2.000 OOVPenalty=-100.000 ||| -103.000
-2 ||| boys generic-pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-2.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -6.000
-2 ||| boys lind?s_OOV ||| tm_regexp_0=-1.000 tm_regexp_1=0.000 tm_glue_0=2.000 OOVPenalty=-100.000 ||| -103.000
-2 ||| chicos_OOV generic-pretty ||| tm_regexp_0=-1.000 tm_regexp_1=-2.000 tm_glue_0=2.000 OOVPenalty=-100.000 ||| -105.000
-3 ||| really big number ||| tm_regexp_0=-1.000 tm_regexp_1=-1.000 tm_glue_0=1.000 OOVPenalty=0.000 ||| -3.000
-3 ||| 1928371028_OOV ||| tm_regexp_0=0.000 tm_regexp_1=0.000 tm_glue_0=1.000 OOVPenalty=-100.000 ||| -101.000
-4 ||| 192837102_OOV ||| tm_regexp_0=0.000 tm_regexp_1=0.000 tm_glue_0=1.000 OOVPenalty=-100.000 ||| -101.000
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar-both-rule-types/regexp-grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/regexp-grammar b/src/test/resources/decoder/regexp-grammar-both-rule-types/regexp-grammar
deleted file mode 100644
index c93dc80..0000000
--- a/src/test/resources/decoder/regexp-grammar-both-rule-types/regexp-grammar
+++ /dev/null
@@ -1,12 +0,0 @@
-[X] ||| blah linda ||| feminine-singular-pretty blah ||| 1 0
-[X] ||| \d{10,} ||| really big number ||| 1 1
-[X] ||| lindo.* ||| masculine-pretty ||| 1 1
-[X] ||| linda.* ||| feminine-pretty ||| 1 1
-[X] ||| lind.* ||| generic-pretty ||| 1 2
-[X] ||| lindo ||| masculine-singular-pretty ||| 1 0
-[X] ||| linda ||| feminine-singular-pretty ||| 1 0
-[X] ||| chico ||| boy ||| 1 0
-[X] ||| chicos ||| boys ||| 1 0
-[X] ||| chica ||| girl ||| 1 0
-[X] ||| chicas ||| girls ||| 1 0
-[X] ||| grande ||| great ||| 1 0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar-both-rule-types/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/test.sh b/src/test/resources/decoder/regexp-grammar-both-rule-types/test.sh
deleted file mode 100755
index d4b6436..0000000
--- a/src/test/resources/decoder/regexp-grammar-both-rule-types/test.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-cat input | $JOSHUA/bin/joshua-decoder -m 1g -c config > output 2> log
-
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
- rm -f output log diff
- exit 0
-else
- exit 1
-fi
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar-both-rule-types/weights
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar-both-rule-types/weights b/src/test/resources/decoder/regexp-grammar-both-rule-types/weights
deleted file mode 100644
index a998939..0000000
--- a/src/test/resources/decoder/regexp-grammar-both-rule-types/weights
+++ /dev/null
@@ -1,4 +0,0 @@
-tm_regexp_0 1
-tm_regexp_1 1
-tm_glue_0 -1
-OOVPenalty 1
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar/.gitignore
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/.gitignore b/src/test/resources/decoder/regexp-grammar/.gitignore
deleted file mode 100644
index d937c7f..0000000
--- a/src/test/resources/decoder/regexp-grammar/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-diff
-output
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar/README
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/README b/src/test/resources/decoder/regexp-grammar/README
deleted file mode 100644
index df81a67..0000000
--- a/src/test/resources/decoder/regexp-grammar/README
+++ /dev/null
@@ -1,10 +0,0 @@
-This test tests the use of regular expressions in the grammar. This is an experimental feature with
-an inefficient implementation in the decoder, but there are a number of things that could be done to
-make it more efficient if the technique proves useful.
-
-To enable it, you set the Joshua parameter
-
- regexp-grammar = OWNER
-
-where OWNER is the owner of one or more grammars whose rules might be interpreted as regular
-expressions.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar/config
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/config b/src/test/resources/decoder/regexp-grammar/config
deleted file mode 100644
index 526dba0..0000000
--- a/src/test/resources/decoder/regexp-grammar/config
+++ /dev/null
@@ -1,11 +0,0 @@
-tm = regexp regexp 10 ./regexp-grammar
-tm = thrax glue -1 ./glue-grammar
-mark-oovs = true
-goal-symbol = GOAL
-regexp-grammar = regexp
-
-weights-file = weights
-
-feature-function = OOVPenalty
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar/glue-grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/glue-grammar b/src/test/resources/decoder/regexp-grammar/glue-grammar
deleted file mode 100644
index 6a1162f..0000000
--- a/src/test/resources/decoder/regexp-grammar/glue-grammar
+++ /dev/null
@@ -1,3 +0,0 @@
-[GOAL] ||| <s> ||| <s> ||| 0
-[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| -1
-[GOAL] ||| [GOAL,1] </s> ||| [GOAL,1] </s> ||| 0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar/input
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/input b/src/test/resources/decoder/regexp-grammar/input
deleted file mode 100644
index 8cdf0f8..0000000
--- a/src/test/resources/decoder/regexp-grammar/input
+++ /dev/null
@@ -1,4 +0,0 @@
-chica linda
-chico lindo
-1928371028
-192837102
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar/output.gold
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/output.gold b/src/test/resources/decoder/regexp-grammar/output.gold
deleted file mode 100644
index 49c5ea4..0000000
--- a/src/test/resources/decoder/regexp-grammar/output.gold
+++ /dev/null
@@ -1,4 +0,0 @@
-0 ||| girl pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -5.000
-1 ||| boy pretty ||| tm_regexp_0=-2.000 tm_regexp_1=-1.000 tm_glue_0=2.000 OOVPenalty=0.000 ||| -5.000
-2 ||| really big number ||| tm_regexp_0=-1.000 tm_regexp_1=0.000 tm_glue_0=1.000 OOVPenalty=0.000 ||| -2.000
-3 ||| 192837102_OOV ||| tm_regexp_0=0.000 tm_regexp_1=0.000 tm_glue_0=1.000 OOVPenalty=-100.000 ||| -101.000
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar/regexp-grammar
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/regexp-grammar b/src/test/resources/decoder/regexp-grammar/regexp-grammar
deleted file mode 100644
index 6f6c57c..0000000
--- a/src/test/resources/decoder/regexp-grammar/regexp-grammar
+++ /dev/null
@@ -1,6 +0,0 @@
-[X] ||| lind.* ||| pretty ||| 1 1
-[X] ||| lindo ||| [boy version of pretty] ||| 10 0
-[X] ||| linda ||| [girl version of pretty] ||| 10 0
-[X] ||| chico ||| boy ||| 1 0
-[X] ||| chica ||| girl ||| 1 0
-[X] ||| \d{10,} ||| really big number ||| 1 0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar/test.sh
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/test.sh b/src/test/resources/decoder/regexp-grammar/test.sh
deleted file mode 100755
index 3235bd4..0000000
--- a/src/test/resources/decoder/regexp-grammar/test.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-set -u
-
-cat input | $JOSHUA/bin/joshua-decoder -c config > output 2> log
-
-diff -u output output.gold > diff
-
-if [ $? -eq 0 ]; then
- rm -rf output log diff
- exit 0
-else
- exit 1
-fi
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9762a484/src/test/resources/decoder/regexp-grammar/weights
----------------------------------------------------------------------
diff --git a/src/test/resources/decoder/regexp-grammar/weights b/src/test/resources/decoder/regexp-grammar/weights
deleted file mode 100644
index 4782753..0000000
--- a/src/test/resources/decoder/regexp-grammar/weights
+++ /dev/null
@@ -1,5 +0,0 @@
-tm_regexp_0 1
-tm_regexp_1 1
-tm_glue_0 -1
-
-OOVPenalty 1