You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/05/31 16:36:59 UTC
[03/13] incubator-joshua git commit: made a place for constants and
pushed a few things into it
made a place for constants and pushed a few things into it
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/b1961e17
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/b1961e17
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/b1961e17
Branch: refs/heads/JOSHUA-252
Commit: b1961e17f59811c5e4cd070ab8691691574bb8ec
Parents: 89d3b18
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed May 25 16:33:13 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed May 25 16:33:13 2016 -0400
----------------------------------------------------------------------
src/joshua/decoder/ff/tm/GrammarReader.java | 2 --
.../decoder/ff/tm/format/HieroFormatReader.java | 10 ++----
.../decoder/ff/tm/format/MosesFormatReader.java | 12 ++++---
src/joshua/util/Constants.java | 36 ++++++++++++++++++++
4 files changed, 46 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1961e17/src/joshua/decoder/ff/tm/GrammarReader.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/GrammarReader.java b/src/joshua/decoder/ff/tm/GrammarReader.java
index 3432e53..6f8c312 100644
--- a/src/joshua/decoder/ff/tm/GrammarReader.java
+++ b/src/joshua/decoder/ff/tm/GrammarReader.java
@@ -34,8 +34,6 @@ import joshua.util.io.LineReader;
*/
public abstract class GrammarReader<R extends Rule> implements Iterable<R>, Iterator<R> {
- protected static String fieldDelimiter;
-
protected static String description;
protected String fileName;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1961e17/src/joshua/decoder/ff/tm/format/HieroFormatReader.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/format/HieroFormatReader.java b/src/joshua/decoder/ff/tm/format/HieroFormatReader.java
index 9c21fb0..9b2039e 100644
--- a/src/joshua/decoder/ff/tm/format/HieroFormatReader.java
+++ b/src/joshua/decoder/ff/tm/format/HieroFormatReader.java
@@ -23,6 +23,7 @@ import java.io.IOException;
import joshua.corpus.Vocabulary;
import joshua.decoder.ff.tm.GrammarReader;
import joshua.decoder.ff.tm.Rule;
+import joshua.util.Constants;
import joshua.util.FormatUtils;
/**
@@ -35,7 +36,6 @@ import joshua.util.FormatUtils;
public class HieroFormatReader extends GrammarReader<Rule> {
static {
- fieldDelimiter = "\\s\\|{3}\\s";
description = "Original Hiero format";
}
@@ -49,7 +49,7 @@ public class HieroFormatReader extends GrammarReader<Rule> {
@Override
public Rule parseLine(String line) {
- String[] fields = line.split(fieldDelimiter);
+ String[] fields = line.split(Constants.fieldDelimiter);
if (fields.length < 3) {
throw new RuntimeException(String.format("Rule '%s' does not have four fields", line));
}
@@ -100,11 +100,7 @@ public class HieroFormatReader extends GrammarReader<Rule> {
return new Rule(lhs, sourceIDs, targetIDs, sparse_features, arity, alignment);
}
-
- public static String getFieldDelimiter() {
- return fieldDelimiter;
- }
-
+
public static boolean isNonTerminal(final String word) {
return FormatUtils.isNonterminal(word);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1961e17/src/joshua/decoder/ff/tm/format/MosesFormatReader.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/format/MosesFormatReader.java b/src/joshua/decoder/ff/tm/format/MosesFormatReader.java
index 47a3e46..a2ada68 100644
--- a/src/joshua/decoder/ff/tm/format/MosesFormatReader.java
+++ b/src/joshua/decoder/ff/tm/format/MosesFormatReader.java
@@ -22,6 +22,8 @@ import java.io.IOException;
import joshua.corpus.Vocabulary;
import joshua.decoder.ff.tm.Rule;
+import joshua.util.Constants;
+import joshua.util.FormatUtils;
import joshua.util.io.LineReader;
/***
@@ -46,12 +48,12 @@ public class MosesFormatReader extends HieroFormatReader {
public MosesFormatReader(String grammarFile) throws IOException {
super(grammarFile);
- Vocabulary.id("[X]");
+ Vocabulary.id(Constants.defaultNT);
}
public MosesFormatReader() {
super();
- Vocabulary.id("[X]");
+ Vocabulary.id(Constants.defaultNT);
}
/**
@@ -73,10 +75,10 @@ public class MosesFormatReader extends HieroFormatReader {
*/
@Override
public Rule parseLine(String line) {
- String[] fields = line.split(fieldDelimiter);
+ String[] fields = line.split(Constants.fieldDelimiter);
- StringBuffer hieroLine = new StringBuffer();
- hieroLine.append("[X] ||| [X,1] " + fields[0] + " ||| [X,1] " + fields[1] + " |||");
+ String nt = FormatUtils.cleanNonTerminal(Constants.defaultNT);
+ StringBuffer hieroLine = new StringBuffer(Constants.defaultNT + " ||| [" + nt + ",1] " + fields[0] + " ||| [" + nt + ",1] " + fields[1] + " |||");
String mosesFeatureString = fields[2];
for (String value: mosesFeatureString.split(" ")) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1961e17/src/joshua/util/Constants.java
----------------------------------------------------------------------
diff --git a/src/joshua/util/Constants.java b/src/joshua/util/Constants.java
new file mode 100644
index 0000000..90e3016
--- /dev/null
+++ b/src/joshua/util/Constants.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.util;
+
+/***
+ * One day, all constants should be moved here (many are in Vocabulary).
+ *
+ * @author Matt Post <po...@cs.jhu.edu>
+ */
+
+public final class Constants {
+ public static String defaultNT = "[X]";
+
+ public static final String START_SYM = "<s>";
+ public static final String STOP_SYM = "</s>";
+ public static final String UNKNOWN_WORD = "<unk>";
+
+ public static final String fieldDelimiter = "\\s\\|{3}\\s";
+ public static final String spaceSeparator = "\\s+";
+}