You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/05/31 16:36:59 UTC

[03/13] incubator-joshua git commit: made a place for constants and pushed a few things into it

made a place for constants and pushed a few things into it


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/b1961e17
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/b1961e17
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/b1961e17

Branch: refs/heads/JOSHUA-252
Commit: b1961e17f59811c5e4cd070ab8691691574bb8ec
Parents: 89d3b18
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed May 25 16:33:13 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed May 25 16:33:13 2016 -0400

----------------------------------------------------------------------
 src/joshua/decoder/ff/tm/GrammarReader.java     |  2 --
 .../decoder/ff/tm/format/HieroFormatReader.java | 10 ++----
 .../decoder/ff/tm/format/MosesFormatReader.java | 12 ++++---
 src/joshua/util/Constants.java                  | 36 ++++++++++++++++++++
 4 files changed, 46 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1961e17/src/joshua/decoder/ff/tm/GrammarReader.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/GrammarReader.java b/src/joshua/decoder/ff/tm/GrammarReader.java
index 3432e53..6f8c312 100644
--- a/src/joshua/decoder/ff/tm/GrammarReader.java
+++ b/src/joshua/decoder/ff/tm/GrammarReader.java
@@ -34,8 +34,6 @@ import joshua.util.io.LineReader;
  */
 public abstract class GrammarReader<R extends Rule> implements Iterable<R>, Iterator<R> {
 
-  protected static String fieldDelimiter;
-
   protected static String description;
 
   protected String fileName;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1961e17/src/joshua/decoder/ff/tm/format/HieroFormatReader.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/format/HieroFormatReader.java b/src/joshua/decoder/ff/tm/format/HieroFormatReader.java
index 9c21fb0..9b2039e 100644
--- a/src/joshua/decoder/ff/tm/format/HieroFormatReader.java
+++ b/src/joshua/decoder/ff/tm/format/HieroFormatReader.java
@@ -23,6 +23,7 @@ import java.io.IOException;
 import joshua.corpus.Vocabulary;
 import joshua.decoder.ff.tm.GrammarReader;
 import joshua.decoder.ff.tm.Rule;
+import joshua.util.Constants;
 import joshua.util.FormatUtils;
 
 /**
@@ -35,7 +36,6 @@ import joshua.util.FormatUtils;
 public class HieroFormatReader extends GrammarReader<Rule> {
 
   static {
-    fieldDelimiter = "\\s\\|{3}\\s";
     description = "Original Hiero format";
   }
 
@@ -49,7 +49,7 @@ public class HieroFormatReader extends GrammarReader<Rule> {
 
   @Override
   public Rule parseLine(String line) {
-    String[] fields = line.split(fieldDelimiter);
+    String[] fields = line.split(Constants.fieldDelimiter);
     if (fields.length < 3) {
       throw new RuntimeException(String.format("Rule '%s' does not have four fields", line));
     }
@@ -100,11 +100,7 @@ public class HieroFormatReader extends GrammarReader<Rule> {
 
     return new Rule(lhs, sourceIDs, targetIDs, sparse_features, arity, alignment);
   }
-
-  public static String getFieldDelimiter() {
-    return fieldDelimiter;
-  }
-
+  
   public static boolean isNonTerminal(final String word) {
     return FormatUtils.isNonterminal(word);
   }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1961e17/src/joshua/decoder/ff/tm/format/MosesFormatReader.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/format/MosesFormatReader.java b/src/joshua/decoder/ff/tm/format/MosesFormatReader.java
index 47a3e46..a2ada68 100644
--- a/src/joshua/decoder/ff/tm/format/MosesFormatReader.java
+++ b/src/joshua/decoder/ff/tm/format/MosesFormatReader.java
@@ -22,6 +22,8 @@ import java.io.IOException;
 
 import joshua.corpus.Vocabulary;
 import joshua.decoder.ff.tm.Rule;
+import joshua.util.Constants;
+import joshua.util.FormatUtils;
 import joshua.util.io.LineReader;
 
 /***
@@ -46,12 +48,12 @@ public class MosesFormatReader extends HieroFormatReader {
 
   public MosesFormatReader(String grammarFile) throws IOException {
     super(grammarFile);
-    Vocabulary.id("[X]");
+    Vocabulary.id(Constants.defaultNT);
   }
   
   public MosesFormatReader() {
     super();
-    Vocabulary.id("[X]");
+    Vocabulary.id(Constants.defaultNT);
   }
   
   /**
@@ -73,10 +75,10 @@ public class MosesFormatReader extends HieroFormatReader {
    */
   @Override
   public Rule parseLine(String line) {
-    String[] fields = line.split(fieldDelimiter);
+    String[] fields = line.split(Constants.fieldDelimiter);
     
-    StringBuffer hieroLine = new StringBuffer();
-    hieroLine.append("[X] ||| [X,1] " + fields[0] + " ||| [X,1] " + fields[1] + " |||");
+    String nt = FormatUtils.cleanNonTerminal(Constants.defaultNT);
+    StringBuffer hieroLine = new StringBuffer(Constants.defaultNT + " ||| [" + nt + ",1] " + fields[0] + " ||| [" + nt + ",1] " + fields[1] + " |||");
 
     String mosesFeatureString = fields[2];
     for (String value: mosesFeatureString.split(" ")) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/b1961e17/src/joshua/util/Constants.java
----------------------------------------------------------------------
diff --git a/src/joshua/util/Constants.java b/src/joshua/util/Constants.java
new file mode 100644
index 0000000..90e3016
--- /dev/null
+++ b/src/joshua/util/Constants.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package joshua.util;
+
+/***
+ * One day, all constants should be moved here (many are in Vocabulary).
+ * 
+ * @author Matt Post <po...@cs.jhu.edu>
+ */
+
+public final class Constants {
+  public static String defaultNT = "[X]";
+
+  public static final String START_SYM = "<s>";
+  public static final String STOP_SYM = "</s>";
+  public static final String UNKNOWN_WORD = "<unk>";
+  
+  public static final String fieldDelimiter = "\\s\\|{3}\\s";
+  public static final String spaceSeparator = "\\s+";
+}