You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/01 02:51:36 UTC
[41/94] [abbrv] [partial] incubator-joshua git commit: Pulled
JOSHUA-252 changes and Resolved Merge Conflicts
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/symal/symal.cpp
----------------------------------------------------------------------
diff --git a/ext/symal/symal.cpp b/ext/symal/symal.cpp
deleted file mode 100644
index 8f1bac0..0000000
--- a/ext/symal/symal.cpp
+++ /dev/null
@@ -1,503 +0,0 @@
-// $Id$
-
-#include <cassert>
-#include <iomanip>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <list>
-#include <vector>
-#include <set>
-#include <algorithm>
-#include <cstring>
-#include "cmd.h"
-
-using namespace std;
-
-#define MAX_WORD 10000 // maximum lengthsource/target strings
-#define MAX_M 200 // maximum length of source strings
-#define MAX_N 200 // maximum length of target strings
-
-#define UNION 1
-#define INTERSECT 2
-#define GROW 3
-#define SRCTOTGT 4
-#define TGTTOSRC 5
-#define BOOL_YES 1
-#define BOOL_NO 0
-
-#define END_ENUM { (char*)0, 0 }
-
-static Enum_T AlignEnum [] = {
- { "union", UNION },
- { "u", UNION },
- { "intersect", INTERSECT},
- { "i", INTERSECT},
- { "grow", GROW },
- { "g", GROW },
- { "srctotgt", SRCTOTGT },
- { "s2t", SRCTOTGT },
- { "tgttosrc", TGTTOSRC },
- { "t2s", TGTTOSRC },
- END_ENUM
-};
-
-static Enum_T BoolEnum [] = {
- { "true", BOOL_YES },
- { "yes", BOOL_YES },
- { "y", BOOL_YES },
- { "false", BOOL_NO },
- { "no", BOOL_NO },
- { "n", BOOL_NO },
- END_ENUM
-};
-
-
-
-// global variables and constants
-
-int* fa; //counters of covered foreign positions
-int* ea; //counters of covered english positions
-int** A; //alignment matrix with information symmetric/direct/inverse alignments
-
-int verbose=0;
-
-//read an alignment pair from the input stream.
-
-int lc = 0;
-
-int getals(fstream& inp,int& m, int *a,int& n, int *b)
-{
- char w[MAX_WORD], dummy[10];
- int i,j,freq;
- if (inp >> freq) {
- ++lc;
- //target sentence
- inp >> n;
- assert(n<MAX_N);
- for (i=1; i<=n; i++) {
- inp >> setw(MAX_WORD) >> w;
- if (strlen(w)>=MAX_WORD-1) {
- cerr << lc << ": target len=" << strlen(w) << " is not less than MAX_WORD-1="
- << MAX_WORD-1 << endl;
- assert(strlen(w)<MAX_WORD-1);
- }
- }
-
- inp >> dummy; //# separator
- // inverse alignment
- for (i=1; i<=n; i++) inp >> b[i];
-
- //source sentence
- inp >> m;
- assert(m<MAX_M);
- for (j=1; j<=m; j++) {
- inp >> setw(MAX_WORD) >> w;
- if (strlen(w)>=MAX_WORD-1) {
- cerr << lc << ": source len=" << strlen(w) << " is not less than MAX_WORD-1="
- << MAX_WORD-1 << endl;
- assert(strlen(w)<MAX_WORD-1);
- }
- }
-
- inp >> dummy; //# separator
-
- // direct alignment
- for (j=1; j<=m; j++) {
- inp >> a[j];
- assert(0<=a[j] && a[j]<=n);
- }
-
- //check inverse alignemnt
- for (i=1; i<=n; i++)
- assert(0<=b[i] && b[i]<=m);
-
- return 1;
-
- } else
- return 0;
-};
-
-
-//compute union alignment
-int prunionalignment(fstream& out,int m,int *a,int n,int* b)
-{
-
- ostringstream sout;
-
- for (int j=1; j<=m; j++)
- if (a[j])
- sout << j-1 << "-" << a[j]-1 << " ";
-
- for (int i=1; i<=n; i++)
- if (b[i] && a[b[i]]!=i)
- sout << b[i]-1 << "-" << i-1 << " ";
-
- //fix the last " "
- string str = sout.str();
- if (str.length() == 0)
- str = "\n";
- else
- str.replace(str.length()-1,1,"\n");
-
- out << str;
- out.flush();
-
- return 1;
-}
-
-
-//Compute intersection alignment
-
-int printersect(fstream& out,int m,int *a,int n,int* b)
-{
-
- ostringstream sout;
-
- for (int j=1; j<=m; j++)
- if (a[j] && b[a[j]]==j)
- sout << j-1 << "-" << a[j]-1 << " ";
-
- //fix the last " "
- string str = sout.str();
- if (str.length() == 0)
- str = "\n";
- else
- str.replace(str.length()-1,1,"\n");
-
- out << str;
- out.flush();
-
- return 1;
-}
-
-//Compute target-to-source alignment
-
-int printtgttosrc(fstream& out,int m,int *a,int n,int* b)
-{
-
- ostringstream sout;
-
- for (int i=1; i<=n; i++)
- if (b[i])
- sout << b[i]-1 << "-" << i-1 << " ";
-
- //fix the last " "
- string str = sout.str();
- if (str.length() == 0)
- str = "\n";
- else
- str.replace(str.length()-1,1,"\n");
-
- out << str;
- out.flush();
-
- return 1;
-}
-
-//Compute source-to-target alignment
-
-int printsrctotgt(fstream& out,int m,int *a,int n,int* b)
-{
-
- ostringstream sout;
-
- for (int j=1; j<=m; j++)
- if (a[j])
- sout << j-1 << "-" << a[j]-1 << " ";
-
- //fix the last " "
- string str = sout.str();
- if (str.length() == 0)
- str = "\n";
- else
- str.replace(str.length()-1,1,"\n");
-
- out << str;
- out.flush();
-
- return 1;
-}
-
-//Compute Grow Diagonal Alignment
-//Nice property: you will never introduce more points
-//than the unionalignment alignemt. Hence, you will always be able
-//to represent the grow alignment as the unionalignment of a
-//directed and inverted alignment
-
-int printgrow(fstream& out,int m,int *a,int n,int* b, bool diagonal=false,bool final=false,bool bothuncovered=false)
-{
-
- ostringstream sout;
-
- vector <pair <int,int> > neighbors; //neighbors
-
- pair <int,int> entry;
-
- neighbors.push_back(make_pair(-1,-0));
- neighbors.push_back(make_pair(0,-1));
- neighbors.push_back(make_pair(1,0));
- neighbors.push_back(make_pair(0,1));
-
-
- if (diagonal) {
- neighbors.push_back(make_pair(-1,-1));
- neighbors.push_back(make_pair(-1,1));
- neighbors.push_back(make_pair(1,-1));
- neighbors.push_back(make_pair(1,1));
- }
-
-
- int i,j,o;
-
-
- //covered foreign and english positions
-
- memset(fa,0,(m+1)*sizeof(int));
- memset(ea,0,(n+1)*sizeof(int));
-
- //matrix to quickly check if one point is in the symmetric
- //alignment (value=2), direct alignment (=1) and inverse alignment
-
- for (int i=1; i<=n; i++) memset(A[i],0,(m+1)*sizeof(int));
-
- set <pair <int,int> > currentpoints; //symmetric alignment
- set <pair <int,int> > unionalignment; //union alignment
-
- pair <int,int> point; //variable to store points
- set<pair <int,int> >::const_iterator k; //iterator over sets
-
- //fill in the alignments
- for (j=1; j<=m; j++) {
- if (a[j]) {
- unionalignment.insert(make_pair(a[j],j));
- if (b[a[j]]==j) {
- fa[j]=1;
- ea[a[j]]=1;
- A[a[j]][j]=2;
- currentpoints.insert(make_pair(a[j],j));
- } else
- A[a[j]][j]=-1;
- }
- }
-
- for (i=1; i<=n; i++)
- if (b[i] && a[b[i]]!=i) { //not intersection
- unionalignment.insert(make_pair(i,b[i]));
- A[i][b[i]]=1;
- }
-
-
- int added=1;
-
- while (added) {
- added=0;
- ///scan the current alignment
- for (k=currentpoints.begin(); k!=currentpoints.end(); k++) {
- //cout << "{"<< (k->second)-1 << "-" << (k->first)-1 << "}";
- for (o=0; o<neighbors.size(); o++) {
- //cout << "go over check all neighbors\n";
- point.first=k->first+neighbors[o].first;
- point.second=k->second+neighbors[o].second;
- //cout << point.second-1 << " " << point.first-1 << "\n";
- //check if neighbor is inside 'matrix'
- if (point.first>0 && point.first <=n && point.second>0 && point.second<=m)
- //check if neighbor is in the unionalignment alignment
- if (b[point.first]==point.second || a[point.second]==point.first) {
- //cout << "In unionalignment ";cout.flush();
- //check if it connects at least one uncovered word
- if (!(ea[point.first] && fa[point.second])) {
- //insert point in currentpoints!
- currentpoints.insert(point);
- A[point.first][point.second]=2;
- ea[point.first]=1;
- fa[point.second]=1;
- added=1;
- //cout << "added grow: " << point.second-1 << "-" << point.first-1 << "\n";cout.flush();
- }
- }
- }
- }
- }
-
- if (final) {
- for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
- if (A[k->first][k->second]==1) {
- point.first=k->first;
- point.second=k->second;
- //one of the two words is not covered yet
- //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
- if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
- (!bothuncovered && !(ea[point.first] && fa[point.second]))) {
- //add it!
- currentpoints.insert(point);
- A[point.first][point.second]=2;
- //keep track of new covered positions
- ea[point.first]=1;
- fa[point.second]=1;
-
- //added=1;
- //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
- }
- }
-
- for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
- if (A[k->first][k->second]==-1) {
- point.first=k->first;
- point.second=k->second;
- //one of the two words is not covered yet
- //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
- if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
- (!bothuncovered && !(ea[point.first] && fa[point.second]))) {
- //add it!
- currentpoints.insert(point);
- A[point.first][point.second]=2;
- //keep track of new covered positions
- ea[point.first]=1;
- fa[point.second]=1;
-
- //added=1;
- //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
- }
- }
- }
-
-
- for (k=currentpoints.begin(); k!=currentpoints.end(); k++)
- sout << k->second-1 << "-" << k->first-1 << " ";
-
-
- //fix the last " "
- string str = sout.str();
- if (str.length() == 0)
- str = "\n";
- else
- str.replace(str.length()-1,1,"\n");
-
- out << str;
- out.flush();
- return 1;
-
- return 1;
-}
-
-
-
-//Main file here
-
-
-int main(int argc, char** argv)
-{
-
- int alignment=0;
- char* input="/dev/stdin";
- char* output="/dev/stdout";
- int diagonal=false;
- int final=false;
- int bothuncovered=false;
-
-
- DeclareParams("a", CMDENUMTYPE, &alignment, AlignEnum,
- "alignment", CMDENUMTYPE, &alignment, AlignEnum,
- "d", CMDENUMTYPE, &diagonal, BoolEnum,
- "diagonal", CMDENUMTYPE, &diagonal, BoolEnum,
- "f", CMDENUMTYPE, &final, BoolEnum,
- "final", CMDENUMTYPE, &final, BoolEnum,
- "b", CMDENUMTYPE, &bothuncovered, BoolEnum,
- "both", CMDENUMTYPE, &bothuncovered, BoolEnum,
- "i", CMDSTRINGTYPE, &input,
- "o", CMDSTRINGTYPE, &output,
- "v", CMDENUMTYPE, &verbose, BoolEnum,
- "verbose", CMDENUMTYPE, &verbose, BoolEnum,
-
- (char *)NULL);
-
- GetParams(&argc, &argv, (char*) NULL);
-
- if (alignment==0) {
- cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"
- << "Input file or std must be in .bal format (see script giza2bal.pl).\n";
-
- exit(1);
-
- }
-
- fstream inp(input,ios::in);
- fstream out(output,ios::out);
-
- if (!inp.is_open()) {
- cerr << "cannot open " << input << "\n";
- exit(1);
- }
-
- if (!out.is_open()) {
- cerr << "cannot open " << output << "\n";
- exit(1);
- }
-
-
- int a[MAX_M],b[MAX_N],m,n;
- fa=new int[MAX_M+1];
- ea=new int[MAX_N+1];
-
-
- int sents = 0;
- A=new int *[MAX_N+1];
- for (int i=1; i<=MAX_N; i++) A[i]=new int[MAX_M+1];
-
- switch (alignment) {
- case UNION:
- cerr << "symal: computing union alignment\n";
- while(getals(inp,m,a,n,b)) {
- prunionalignment(out,m,a,n,b);
- sents++;
- }
- cerr << "Sents: " << sents << endl;
- break;
- case INTERSECT:
- cerr << "symal: computing intersect alignment\n";
- while(getals(inp,m,a,n,b)) {
- printersect(out,m,a,n,b);
- sents++;
- }
- cerr << "Sents: " << sents << endl;
- break;
- case GROW:
- cerr << "symal: computing grow alignment: diagonal ("
- << diagonal << ") final ("<< final << ")"
- << "both-uncovered (" << bothuncovered <<")\n";
-
- while(getals(inp,m,a,n,b))
- printgrow(out,m,a,n,b,diagonal,final,bothuncovered);
-
- break;
- case TGTTOSRC:
- cerr << "symal: computing target-to-source alignment\n";
-
- while(getals(inp,m,a,n,b)) {
- printtgttosrc(out,m,a,n,b);
- sents++;
- }
- cerr << "Sents: " << sents << endl;
- break;
- case SRCTOTGT:
- cerr << "symal: computing source-to-target alignment\n";
-
- while(getals(inp,m,a,n,b)) {
- printsrctotgt(out,m,a,n,b);
- sents++;
- }
- cerr << "Sents: " << sents << endl;
- break;
- default:
- exit(1);
- }
-
- delete [] fa;
- delete [] ea;
- for (int i=1; i<=MAX_N; i++) delete [] A[i];
- delete [] A;
-
- exit(0);
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/jni/kenlm_wrap.cc
----------------------------------------------------------------------
diff --git a/jni/kenlm_wrap.cc b/jni/kenlm_wrap.cc
index 16cb54b..64c9fe9 100644
--- a/jni/kenlm_wrap.cc
+++ b/jni/kenlm_wrap.cc
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
#include "lm/enumerate_vocab.hh"
#include "lm/model.hh"
#include "lm/left.hh"
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/scripts/training/pipeline.pl
----------------------------------------------------------------------
diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl
index c33d54b..8c3e4b9 100755
--- a/scripts/training/pipeline.pl
+++ b/scripts/training/pipeline.pl
@@ -1,5 +1,20 @@
#!/usr/bin/env perl
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
# This script implements the Joshua pipeline. It can run a complete
# pipeline --- from raw training corpora to bleu scores on a test set
# --- and it allows jumping into arbitrary points of the pipeline.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/BasicPhrase.java b/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
index f65f26f..f7f6be2 100644
--- a/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
+++ b/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
@@ -12,7 +12,7 @@ import java.util.ArrayList;
/**
* The simplest concrete implementation of Phrase.
*
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
* @version $LastChangedDate$
*/
public class BasicPhrase extends AbstractPhrase {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java b/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java
index 855a7c1..af669b7 100644
--- a/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java
+++ b/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java
@@ -31,37 +31,16 @@ import java.util.List;
*/
public class ContiguousPhrase extends AbstractPhrase {
- // ===============================================================
- // Constants
- // ===============================================================
-
- // ===============================================================
- // Member variables
- // ===============================================================
-
protected int startIndex;
protected int endIndex;
protected Corpus corpusArray;
- // ===============================================================
- // Constructor(s)
- // ===============================================================
-
public ContiguousPhrase(int startIndex, int endIndex, Corpus corpusArray) {
this.startIndex = startIndex;
this.endIndex = endIndex;
this.corpusArray = corpusArray;
}
-
- // ===============================================================
- // Public
- // ===============================================================
-
- // ===========================================================
- // Accessor methods (set/get)
- // ===========================================================
-
/**
* This method copies the phrase into an array of ints. This method should be avoided if possible.
*
@@ -75,23 +54,15 @@ public class ContiguousPhrase extends AbstractPhrase {
return words;
}
-
public int getWordID(int position) {
return corpusArray.getWordID(startIndex + position);
// return corpusArray.corpus[startIndex+position];
}
-
public int size() {
return endIndex - startIndex;
}
-
- // ===========================================================
- // Methods
- // ===========================================================
-
-
/**
* Gets all possible subphrases of this phrase, up to and including the phrase itself. For
* example, the phrase "I like cheese ." would return the following:
@@ -114,7 +85,6 @@ public class ContiguousPhrase extends AbstractPhrase {
return getSubPhrases(size());
}
-
/**
* Returns a list of subphrases only of length <code>maxLength</code> or smaller.
*
@@ -134,7 +104,6 @@ public class ContiguousPhrase extends AbstractPhrase {
return phrases;
}
-
/**
* creates a new phrase object from the indexes provided.
* <P>
@@ -148,36 +117,9 @@ public class ContiguousPhrase extends AbstractPhrase {
return new ContiguousPhrase(startIndex + start, startIndex + end, corpusArray);
}
-
- // ===============================================================
- // Protected
- // ===============================================================
-
- // ===============================================================
- // Methods
- // ===============================================================
-
-
- // ===============================================================
- // Private
- // ===============================================================
-
- // ===============================================================
- // Methods
- // ===============================================================
-
-
- // ===============================================================
- // Static
- // ===============================================================
-
-
- // ===============================================================
- // Main
- // ===============================================================
-
/**
* Main contains test code
+ * @param args String array of arguments used to run this class.
*/
public static void main(String[] args) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/Corpus.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Corpus.java b/src/main/java/org/apache/joshua/corpus/Corpus.java
index a943dd2..1a7d1b0 100755
--- a/src/main/java/org/apache/joshua/corpus/Corpus.java
+++ b/src/main/java/org/apache/joshua/corpus/Corpus.java
@@ -34,6 +34,7 @@ public interface Corpus { // extends Externalizable {
// ===============================================================
/**
+ * @param position the position at which we want to obtain a word ID
* @return the integer representation of the Word at the specified position in the corpus.
*/
int getWordID(int position);
@@ -51,7 +52,7 @@ public interface Corpus { // extends Externalizable {
/**
* Gets the sentence index of each specified position.
*
- * @param position Index into the corpus
+ * @param positions Index into the corpus
* @return array of the sentence indices associated with the specified positions in the corpus.
*/
int[] getSentenceIndices(int[] positions);
@@ -60,6 +61,7 @@ public interface Corpus { // extends Externalizable {
* Gets the position in the corpus of the first word of the specified sentence. If the sentenceID
* is outside of the bounds of the sentences, then it returns the last position in the corpus + 1.
*
+ * @param sentenceID a specific sentence to obtain a position for
* @return the position in the corpus of the first word of the specified sentence. If the
* sentenceID is outside of the bounds of the sentences, then it returns the last position
* in the corpus + 1.
@@ -69,6 +71,7 @@ public interface Corpus { // extends Externalizable {
/**
* Gets the exclusive end position of a sentence in the corpus.
*
+ * @param sentenceID a specific sentence to obtain an end position for
* @return the position in the corpus one past the last word of the specified sentence. If the
* sentenceID is outside of the bounds of the sentences, then it returns one past the last
* position in the corpus.
@@ -113,7 +116,7 @@ public interface Corpus { // extends Externalizable {
* @param phrase the superphrase that the comparsion phrase is drawn from
* @param phraseStart the point in the phrase where the comparison begins (inclusive)
* @param phraseEnd the point in the phrase where the comparison ends (exclusive)
- * @return an int that follows the conventions of java.util.Comparator.compareTo()
+ * @return an int that follows the conventions of {@link java.util.Comparator#compare(Object, Object)}
*/
int comparePhrase(int corpusStart, Phrase phrase, int phraseStart, int phraseEnd);
@@ -122,9 +125,9 @@ public interface Corpus { // extends Externalizable {
* Compares the phrase that starts at position start with the phrase passed in. Compares the
* entire phrase.
*
- * @param corpusStart
- * @param phrase
- * @return
+ * @param corpusStart position start
+ * @param phrase {@link org.apache.joshua.corpus.Phrase} to compare against
+ * @return an int that follows the conventions of {@link java.util.Comparator#compare(Object, Object)}
*/
int comparePhrase(int corpusStart, Phrase phrase);
@@ -134,15 +137,15 @@ public interface Corpus { // extends Externalizable {
* @param position1 the position in the corpus where the first suffix begins
* @param position2 the position in the corpus where the second suffix begins
* @param maxComparisonLength a cutoff point to stop the comparison
- * @return an int that follows the conventions of java.util.Comparator.compareTo()
+ * @return an int that follows the conventions of {@link java.util.Comparator#compare(Object, Object)}
*/
int compareSuffixes(int position1, int position2, int maxComparisonLength);
/**
*
- * @param startPosition
- * @param endPosition
- * @return
+ * @param startPosition start position for phrase
+ * @param endPosition end position for phrase
+ * @return the {@link org.apache.joshua.corpus.ContiguousPhrase}
*/
ContiguousPhrase getPhrase(int startPosition, int endPosition);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/Phrase.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Phrase.java b/src/main/java/org/apache/joshua/corpus/Phrase.java
index f22c8a5..5a06a8b 100644
--- a/src/main/java/org/apache/joshua/corpus/Phrase.java
+++ b/src/main/java/org/apache/joshua/corpus/Phrase.java
@@ -93,6 +93,9 @@ public interface Phrase extends Comparable<Phrase> {
* complete Phrase List.
*
* @see ArrayList#subList(int, int)
+ * @param start start position to begin new phrase
+ * @param end end position to end new phrase
+ * @return a new {@link org.apache.joshua.corpus.Phrase} object from the indexes provided.
*/
Phrase subPhrase(int start, int end);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/Span.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Span.java b/src/main/java/org/apache/joshua/corpus/Span.java
index 753b007..414fe95 100644
--- a/src/main/java/org/apache/joshua/corpus/Span.java
+++ b/src/main/java/org/apache/joshua/corpus/Span.java
@@ -90,8 +90,8 @@ public class Span implements Iterable<Integer>, Comparable<Span> {
/**
* Returns true if the other span does not intersect with this one.
- * @param o
- * @return
+ * @param o new {@link org.apache.joshua.corpus.Span} to check for intersection
+ * @return true if the other span does not intersect with this one
*/
public boolean disjointFrom(Span o) {
if (start < o.start) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/SymbolTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/SymbolTable.java b/src/main/java/org/apache/joshua/corpus/SymbolTable.java
index d8b1694..274e8b9 100644
--- a/src/main/java/org/apache/joshua/corpus/SymbolTable.java
+++ b/src/main/java/org/apache/joshua/corpus/SymbolTable.java
@@ -17,9 +17,9 @@
* under the License.
*/
package org.apache.joshua.corpus;
-
+
import java.util.Collection;
-
+
/**
* Represents a symbol table capable of mapping between strings and
* symbols.
@@ -29,302 +29,299 @@ import java.util.Collection;
* @version $LastChangedDate: 2009-11-24 23:07:43 -0600 (Tue, 24 Nov 2009) $
*/
public interface SymbolTable {
-
- //TODO Remove all hard-coded references to nonterminals
-
- /**
- * The unknown word's ID will be the size of the vocabulary,
- * ensuring that it is outside of the vocabulary. Note that
- * for vocabularies which have not been fixed yet, this
- * means the actual value is volatile and therefore a word
- * ID can only be compared against UNKNOWN_WORD at the time
- * the word ID is generated (otherwise unknown words can
- * become "known" if new words are added to the vocabulary
- * before testing).
- * <p>
- * Negative IDs are reserved for non-terminals.
- *
- * Zero is reserved as the UNKNOWN_WORD.
- */
- int UNKNOWN_WORD = 1;
-
- /** String representation for out-of-vocabulary words. */
- String UNKNOWN_WORD_STRING = "<unk>";
-
- /**
- * Integer representation of the bare (non-indexed) nonterminal X,
- * which represents a wild-card gap in a phrase.
- * <p>
- * All nonterminals are guaranteed to be represented by negative integers.
- */
- int X = -1;
-
- /**
- * String representation of the bare (non-indexed) nonterminal X,
- * which represents a wild-card gap in a phrase.
- */
- String X_STRING = "[X]";
-
-
-
- /**
- * String representation of the nonterminal X with index 1,
- * which represents a wild-card gap in a phrase.
- */
- String X1_STRING = "[X,1]";
-
-
-
- /**
- * String representation of the nonterminal X with index 2,
- * which represents a wild-card gap in a phrase.
- */
- String X2_STRING = "[X,2]";
-
- /**
- * Integer representation of the nonterminal S.
- * <p>
- * All nonterminals are guaranteed to be represented by negative integers.
- */
- int S = -4;
-
- /**
- * String representation of the nonterminal S..
- */
- String S_STRING = "[S]";
-
- /**
- * Integer representation of the nonterminal X with index 1,
- * which represents a wild-card gap in a phrase.
- * <p>
- * All nonterminals are guaranteed to be represented by negative integers.
- */
- int S1 = -5;
-
- /**
- * String representation of the nonterminal X with index 2,
- * which represents a wild-card gap in a phrase.
- */
- String S1_STRING = "[S,1]";
-
- /**
- * Gets a unique integer identifier for the nonterminal.
- * <p>
- * The integer returned is guaranteed to be a negative number.
- *
- * If the nonterminal is {@link #X_STRING},
- * then the value returned must be {@link #X}.
- *
- * Otherwise, the value returned must be a negative number
- * whose value is less than {@link X}.
- *
- * @param nonterminal Nonterminal symbol
- * @return a unique integer identifier for the nonterminal
- */
- int addNonterminal(String nonterminal);
-
- /**
- * Gets a unique integer identifier for the terminal.
- *
- * @param terminal Terminal symbol
- * @return a unique integer identifier for the terminal
- */
- int addTerminal(String terminal);
-
- /**
- * Gets the unique integer identifiers for the words.
- *
- * @param words Array of symbols
- * @return the unique integer identifiers for the words
- */
- int[] addTerminals(String[] words);
-
- /**
- * Gets the unique integer identifiers for the words
- * in the sentence.
- *
- * @param sentence Space-delimited string of symbols
- * @return the unique integer identifiers for the words
- * in the sentence
- */
- int[] addTerminals(String sentence);
-
- /**
- * Gets an integer identifier for the word.
- * <p>
- * If the word is in the vocabulary, the integer returned
- * will uniquely identify that word.
- * <p>
- * If the word is not in the vocabulary, the integer returned
- * by <code>getUnknownWordID</code> may be returned.
- *
- * Alternatively, implementations may, if they choose, add
- * unknown words and assign them a symbol ID instead of
- * returning <code>getUnknownWordID</code>.
- *
- * @see #getUnknownWordID
- * @return the unique integer identifier for wordString,
- * or the result of <code>getUnknownWordID<code>
- * if wordString is not in the vocabulary
- */
- int getID(String wordString);
-
- /**
- * Gets the integer identifiers for all words in the provided
- * sentence.
- * <p>
- * The sentence will be split (on spaces) into words, then
- * the integer identifier for each word will be retrieved
- * using <code>getID</code>.
- *
- * @see #getID(String)
- * @param sentence String of words, separated by spaces.
- * @return Array of integer identifiers for each word in
- * the sentence
- */
- int[] getIDs(String sentence);
-
- /**
- * Gets the String that corresponds to the specified integer
- * identifier.
- * <p>
- * If the identifier is in the symbol vocabulary, the String
- * returned will correspond to that identifier.
- *
- * Otherwise, the String returned by <code>getUnknownWord<code>
- * will be returned.
- *
- * @return the String that corresponds to the specified
- * integer identifier, or the result of
- * <code>getUnknownWord</code> if the identifier
- * does not correspond to a word in the vocabulary
- */
- String getTerminal(int wordID);
-
- /**
- * Gets the String that corresponds to the specified integer
- * identifier.
- * <p>
- * This method can be called for terminals or nonterminals.
- *
- * @param tokenID Integer identifier
- * @return the String that corresponds to the specified
- * integer identifier
- */
- String getWord(int tokenID);
-
- /**
- * Gets the String that corresponds to the sequence of
- * specified integer identifiers.
- *
- * @param ids Sequence of integer identifiers
- * @return the String that corresponds to the sequence of
- * specified integer identifiers
- */
- String getWords(int[] ids);
-
- /**
- *
- * @param wordIDs
- * @return
- */
- String getTerminals(int[] wordIDs);
-
- /**
- * Gets a collection over all symbol identifiers for the
- * vocabulary.
- *
- * @return a collection over all symbol identifiers for the
- * vocabulary
- */
- Collection<Integer> getAllIDs();
-
- /**
- * Gets the list of all words represented by this vocabulary.
- *
- * @return the list of all words represented by this
- * vocabulary
- */
- Collection<String> getWords();
-
- /**
- * Gets the number of unique words in the vocabulary.
- *
- * @return the number of unique words in the vocabulary.
- */
- int size();
-
- /**
- * Gets the integer symbol representation of the unknown
- * word.
- *
- * @return the integer symbol representation of the unknown
- * word.
- */
- int getUnknownWordID();
-
- /**
- * Gets the string representation of the unknown word.
- *
- * @return the string representation of the unknown word.
- */
- String getUnknownWord();
-
- /**
- * Returns <code>true</code> if the symbol id represents a
- * nonterminal, <code>false</code> otherwise.
- *
- * @param id
- * @return <code>true</code> if the symbol id represents a
- * nonterminal, <code>false</code> otherwise.
- */
- boolean isNonterminal(int id);
-
- /**
- * Gets the lowest-valued allowable terminal symbol id in
- * this table.
- *
- * @return the lowest-valued allowable terminal symbol id
- * in this table.
- */
- int getLowestID();
-
-
- /**
- * Gets the highest-valued allowable terminal symbol id in
- * this table.
- * <p>
- * NOTE: This may or may not return the same value as
- * <code>size</code>.
- *
- * @return the highest-valued allowable terminal symbol id
- * in this table.
- */
- int getHighestID();
-
- /**
- *
- *
- * @param id
- * @return
- */
- int getTargetNonterminalIndex(int id);//first convert id to its String mapping, then call the function below
-
- /**
- *
- *
- * @param word
- * @return
- */
- int getTargetNonterminalIndex(String word);
-
- /**
- *
- *
- * @param wordIDs
- * @param ntIndexIncrements
- * @return
- */
- String getWords(int[] wordIDs, boolean ntIndexIncrements);
-
+
+ //TODO Remove all hard-coded references to nonterminals
+
+ /**
+ * The unknown word's ID will be the size of the vocabulary,
+ * ensuring that it is outside of the vocabulary. Note that
+ * for vocabularies which have not been fixed yet, this
+ * means the actual value is volatile and therefore a word
+ * ID can only be compared against UNKNOWN_WORD at the time
+ * the word ID is generated (otherwise unknown words can
+ * become "known" if new words are added to the vocabulary
+ * before testing).
+ * <p>
+ * Negative IDs are reserved for non-terminals.
+ *
+ * Zero is reserved as the UNKNOWN_WORD.
+ */
+ int UNKNOWN_WORD = 1;
+
+ /** String representation for out-of-vocabulary words. */
+ String UNKNOWN_WORD_STRING = "<unk>";
+
+ /**
+ * Integer representation of the bare (non-indexed) nonterminal X,
+ * which represents a wild-card gap in a phrase.
+ * <p>
+ * All nonterminals are guaranteed to be represented by negative integers.
+ */
+ int X = -1;
+
+ /**
+ * String representation of the bare (non-indexed) nonterminal X,
+ * which represents a wild-card gap in a phrase.
+ */
+ String X_STRING = "[X]";
+
+
+
+ /**
+ * String representation of the nonterminal X with index 1,
+ * which represents a wild-card gap in a phrase.
+ */
+ String X1_STRING = "[X,1]";
+
+
+
+ /**
+ * String representation of the nonterminal X with index 2,
+ * which represents a wild-card gap in a phrase.
+ */
+ String X2_STRING = "[X,2]";
+
+ /**
+ * Integer representation of the nonterminal S.
+ * <p>
+ * All nonterminals are guaranteed to be represented by negative integers.
+ */
+ int S = -4;
+
+ /**
+ * String representation of the nonterminal S..
+ */
+ String S_STRING = "[S]";
+
+ /**
+ * Integer representation of the nonterminal X with index 1,
+ * which represents a wild-card gap in a phrase.
+ * <p>
+ * All nonterminals are guaranteed to be represented by negative integers.
+ */
+ int S1 = -5;
+
+ /**
+ * String representation of the nonterminal X with index 2,
+ * which represents a wild-card gap in a phrase.
+ */
+ String S1_STRING = "[S,1]";
+
+ /**
+ * Gets a unique integer identifier for the nonterminal.
+ * <p>
+ * The integer returned is guaranteed to be a negative number.
+ *
+ * If the nonterminal is {@link #X_STRING},
+ * then the value returned must be {@link #X}.
+ *
+ * Otherwise, the value returned must be a negative number
+ * whose value is less than {@link X}.
+ *
+ * @param nonterminal Nonterminal symbol
+ * @return a unique integer identifier for the nonterminal
+ */
+ int addNonterminal(String nonterminal);
+
+ /**
+ * Gets a unique integer identifier for the terminal.
+ *
+ * @param terminal Terminal symbol
+ * @return a unique integer identifier for the terminal
+ */
+ int addTerminal(String terminal);
+
+ /**
+ * Gets the unique integer identifiers for the words.
+ *
+ * @param words Array of symbols
+ * @return the unique integer identifiers for the words
+ */
+ int[] addTerminals(String[] words);
+
+ /**
+ * Gets the unique integer identifiers for the words
+ * in the sentence.
+ *
+ * @param sentence Space-delimited string of symbols
+ * @return the unique integer identifiers for the words
+ * in the sentence
+ */
+ int[] addTerminals(String sentence);
+
+ /**
+ * Gets an integer identifier for the word.
+ * <p>
+ * If the word is in the vocabulary, the integer returned
+ * will uniquely identify that word.
+ * <p>
+ * If the word is not in the vocabulary, the integer returned
+ * by <code>getUnknownWordID</code> may be returned.
+ *
+ * Alternatively, implementations may, if they choose, add
+ * unknown words and assign them a symbol ID instead of
+ * returning <code>getUnknownWordID</code>.
+ *
+ * @see #getUnknownWordID
+ * @return the unique integer identifier for wordString,
+ * or the result of <code>getUnknownWordID</code>
+ * if wordString is not in the vocabulary
+ * @param wordString the word to retrieve the integer identifier
+ */
+ int getID(String wordString);
+
+ /**
+ * Gets the integer identifiers for all words in the provided
+ * sentence.
+ * <p>
+ * The sentence will be split (on spaces) into words, then
+ * the integer identifier for each word will be retrieved
+ * using <code>getID</code>.
+ *
+ * @see #getID(String)
+ * @param sentence String of words, separated by spaces.
+ * @return Array of integer identifiers for each word in
+ * the sentence
+ */
+ int[] getIDs(String sentence);
+
+ /**
+ * Gets the String that corresponds to the specified integer
+ * identifier.
+ * <p>
+ * If the identifier is in the symbol vocabulary, the String
+ * returned will correspond to that identifier.
+ *
+ * Otherwise, the String returned by <code>getUnknownWord</code>
+ * will be returned.
+ *
+ * @param wordID an integer identifier for a specific String
+ * @return the String that corresponds to the specified
+ * integer identifier, or the result of
+ * <code>getUnknownWord</code> if the identifier
+ * does not correspond to a word in the vocabulary
+ */
+ String getTerminal(int wordID);
+
+ /**
+ * Gets the String that corresponds to the specified integer
+ * identifier.
+ * <p>
+ * This method can be called for terminals or nonterminals.
+ *
+ * @param tokenID Integer identifier
+ * @return the String that corresponds to the specified
+ * integer identifier
+ */
+ String getWord(int tokenID);
+
+ /**
+ * Gets the String that corresponds to the sequence of
+ * specified integer identifiers.
+ *
+ * @param ids Sequence of integer identifiers
+ * @return the String that corresponds to the sequence of
+ * specified integer identifiers
+ */
+ String getWords(int[] ids);
+
+ /**
+ *
+ * @param wordIDs an int[] of identifiers for a specific Strings
+ * @return the String that corresponds to the specified
+ * integer identifiers
+ */
+ String getTerminals(int[] wordIDs);
+
+ /**
+ * Gets a collection over all symbol identifiers for the
+ * vocabulary.
+ *
+ * @return a collection over all symbol identifiers for the
+ * vocabulary
+ */
+ Collection<Integer> getAllIDs();
+
+ /**
+ * Gets the list of all words represented by this vocabulary.
+ *
+ * @return the list of all words represented by this
+ * vocabulary
+ */
+ Collection<String> getWords();
+
+ /**
+ * Gets the number of unique words in the vocabulary.
+ *
+ * @return the number of unique words in the vocabulary.
+ */
+ int size();
+
+ /**
+ * Gets the integer symbol representation of the unknown
+ * word.
+ *
+ * @return the integer symbol representation of the unknown
+ * word.
+ */
+ int getUnknownWordID();
+
+ /**
+ * Gets the string representation of the unknown word.
+ *
+ * @return the string representation of the unknown word.
+ */
+ String getUnknownWord();
+
+ /**
+ * Returns <code>true</code> if the symbol id represents a
+ * nonterminal, <code>false</code> otherwise.
+ *
+ * @param id int symbol id
+ * @return <code>true</code> if the symbol id represents a
+ * nonterminal, <code>false</code> otherwise.
+ */
+ boolean isNonterminal(int id);
+
+ /**
+ * Gets the lowest-valued allowable terminal symbol id in
+ * this table.
+ *
+ * @return the lowest-valued allowable terminal symbol id
+ * in this table.
+ */
+ int getLowestID();
+
+
+ /**
+ * Gets the highest-valued allowable terminal symbol id in
+ * this table.
+ * <p>
+ * NOTE: This may or may not return the same value as
+ * <code>size</code>.
+ *
+ * @return the highest-valued allowable terminal symbol id
+ * in this table.
+ */
+ int getHighestID();
+
+ /**
+ * @param id todo
+ * @return todo
+ */
+ int getTargetNonterminalIndex(int id);//first convert id to its String mapping, then call the function below
+
+ /**
+ * @param word todo
+ * @return todo
+ */
+ int getTargetNonterminalIndex(String word);
+
+ /**
+ * @param wordIDs todo
+ * @param ntIndexIncrements todo
+ * @return todo
+ */
+ String getWords(int[] wordIDs, boolean ntIndexIncrements);
+
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/TerminalIterator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/TerminalIterator.java b/src/main/java/org/apache/joshua/corpus/TerminalIterator.java
index 8f2a576..e82b4cc 100644
--- a/src/main/java/org/apache/joshua/corpus/TerminalIterator.java
+++ b/src/main/java/org/apache/joshua/corpus/TerminalIterator.java
@@ -39,8 +39,7 @@ public class TerminalIterator implements Iterator<Integer> {
/**
* Constructs an iterator for the terminals in the given list of words.
*
- * @param vocab
- * @param words
+ * @param words array of words
*/
public TerminalIterator(int[] words) {
this.words = words;
@@ -75,7 +74,7 @@ public class TerminalIterator implements Iterator<Integer> {
/**
* Unsupported operation, guaranteed to throw an UnsupportedOperationException.
*
- * @throws UnsupportedOperationException
+ * @throws UnsupportedOperationException operation not supported yet!
*/
public void remove() {
throw new UnsupportedOperationException();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/Vocabulary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Vocabulary.java b/src/main/java/org/apache/joshua/corpus/Vocabulary.java
index a153902..582efc6 100644
--- a/src/main/java/org/apache/joshua/corpus/Vocabulary.java
+++ b/src/main/java/org/apache/joshua/corpus/Vocabulary.java
@@ -22,10 +22,13 @@ import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
+import java.io.Externalizable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -43,7 +46,7 @@ import org.apache.joshua.util.FormatUtils;
* @author Juri Ganitkevitch
*/
-public class Vocabulary {
+public class Vocabulary implements Externalizable {
private final static ArrayList<NGramLanguageModel> LMs = new ArrayList<>();
@@ -80,9 +83,9 @@ public class Vocabulary {
* Reads a vocabulary from file. This deletes any additions to the vocabulary made prior to
* reading the file.
*
- * @param file_name
+ * @param vocab_file path to a vocabulary file
* @return Returns true if vocabulary was read without mismatches or collisions.
- * @throws IOException
+ * @throws IOException of the file cannot be found or read properly
*/
public static boolean read(final File vocab_file) throws IOException {
DataInputStream vocab_stream =
@@ -125,9 +128,12 @@ public class Vocabulary {
* Get the id of the token if it already exists, new id is created otherwise.
*
* TODO: currently locks for every call. Separate constant (frozen) ids from
- * changing (e.g. OOV) ids. Constant ids could be immutable -> no locking.
+ * changing (e.g. OOV) ids. Constant ids could be immutable -> no locking.
* Alternatively: could we use ConcurrentHashMap to not have to lock if
* actually contains it and only lock for modifications?
+ *
+ * @param token a token to obtain an id for
+ * @return the token id
*/
public static int id(String token) {
// First attempt an optimistic read
@@ -185,7 +191,7 @@ public class Vocabulary {
public static int[] addAll(String sentence) {
return addAll(sentence.split("\\s+"));
}
-
+
public static int[] addAll(String[] tokens) {
int[] ids = new int[tokens.length];
for (int i = 0; i < tokens.length; i++)
@@ -230,8 +236,8 @@ public class Vocabulary {
/**
* Returns true if the Vocabulary ID represents a nonterminal.
*
- * @param id
- * @return
+ * @param id vocabularly ID to check
+ * @return true if the Vocabulary ID represents a nonterminal
*/
public static boolean nt(int id) {
return (id < 0);
@@ -275,4 +281,26 @@ public class Vocabulary {
LMs.clear();
}
+ @Override
+ public void writeExternal(ObjectOutput out) throws IOException {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public void readExternal(ObjectInput in)
+ throws IOException, ClassNotFoundException {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if(getClass() == o.getClass()) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/package.html b/src/main/java/org/apache/joshua/corpus/package.html
deleted file mode 100644
index 7643936..0000000
--- a/src/main/java/org/apache/joshua/corpus/package.html
+++ /dev/null
@@ -1,19 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE. BEGIN WITH A #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE: #####
--->
-
-Provides data structures for representing and manipulating corpora
-and phrases extracted from corpora.
-
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java b/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
index dc98585..f374279 100644
--- a/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
+++ b/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
@@ -280,17 +280,14 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
// TODO: bothersome no-backwards-arrays method.
}
}
-
return labels;
}
-
@Override
public int[] getTerminals() {
return getTerminals(0, terminals.size());
}
-
@Override
public int[] getTerminals(int from, int to) {
int[] span = new int[to - from];
@@ -299,40 +296,32 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
return span;
}
-
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
// TODO Auto-generated method stub
-
}
-
public void writeExternal(ObjectOutput out) throws IOException {
// TODO Auto-generated method stub
-
}
-
/**
* Reads Penn Treebank format file
+ * @param file_name the string path of the Penn Treebank file
+ * @throws IOException if the file does not exist
*/
public void readExternalText(String file_name) throws IOException {
LineReader reader = new LineReader(file_name);
-
initialize();
-
for (String line : reader) {
if (line.trim().equals("")) continue;
appendFromPennFormat(line);
}
}
-
public void writeExternalText(String file_name) throws IOException {
// TODO Auto-generated method stub
-
}
-
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ArgsParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ArgsParser.java b/src/main/java/org/apache/joshua/decoder/ArgsParser.java
index fea20fc..ce42938 100644
--- a/src/main/java/org/apache/joshua/decoder/ArgsParser.java
+++ b/src/main/java/org/apache/joshua/decoder/ArgsParser.java
@@ -37,8 +37,9 @@ public class ArgsParser {
* Parse the arguments passed from the command line when the JoshuaDecoder application was
* executed from the command line.
*
- * @param args
- * @throws IOException
+ * @param args string array of input arguments
+ * @param joshuaConfiguration the {@link org.apache.joshua.decoder.JoshuaConfiguration}
+ * @throws IOException if there is an error wit the input arguments
*/
public ArgsParser(String[] args, JoshuaConfiguration joshuaConfiguration) throws IOException {
@@ -59,8 +60,8 @@ public class ArgsParser {
LineReader reader = new LineReader(String.format("%s/VERSION", System.getenv("JOSHUA")));
reader.readLine();
String version = reader.readLine().split("\\s+")[2];
- System.out.println(String.format("The Joshua machine translator, version %s", version));
- System.out.println("joshua-decoder.org");
+ System.out.println(String.format("The Apache Joshua machine translator, version %s", version));
+ System.out.println("joshua.incubator.apache.org");
System.exit(0);
} else if (args[i].equals("-license")) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/BLEU.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/BLEU.java b/src/main/java/org/apache/joshua/decoder/BLEU.java
index a531611..8b51403 100644
--- a/src/main/java/org/apache/joshua/decoder/BLEU.java
+++ b/src/main/java/org/apache/joshua/decoder/BLEU.java
@@ -20,7 +20,6 @@ package org.apache.joshua.decoder;
import java.util.ArrayList;
import java.util.HashMap;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -35,7 +34,7 @@ import org.apache.joshua.util.Regex;
/**
* this class implements: (1) sentence-level bleu, with smoothing
*
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class BLEU {
// do_ngram_clip: consider global n-gram clip
@@ -47,11 +46,12 @@ public class BLEU {
// ====================multiple references
/**
*
- * @param refSents
- * @param hypSent
+ * @param refSents todo
+ * @param hypSent todo
* @param doNgramClip Should usually be true
* @param bleuOrder Should usually be 4
* @param useShortestRef Probably use false
+ * @return todo
*/
public static float computeSentenceBleu(String[] refSents, String hypSent, boolean doNgramClip,
int bleuOrder, boolean useShortestRef) {
@@ -92,6 +92,9 @@ public class BLEU {
/**
* words in the ngrams are using integer symbol ID
+ * @param refSents todo
+ * @param bleuOrder todo
+ * @return todo
* */
public static HashMap<String, Integer> constructMaxRefCountTable(String[] refSents, int bleuOrder) {
@@ -111,6 +114,8 @@ public class BLEU {
/**
* compute max_ref_count for each ngram in the reference sentences
+ * @param listRefNgramTbl todo
+ * @return todo
* */
public static HashMap<String, Integer> computeMaxRefCountTbl(
List<HashMap<String, Integer>> listRefNgramTbl) {
@@ -195,10 +200,7 @@ public class BLEU {
numNgramMatch[Regex.spaces.split(ngram).length - 1] += Support.findMin(
refNgramTbl.get(ngram), entry.getValue()); // ngram clip
} else {
- numNgramMatch[Regex.spaces.split(ngram).length - 1] += entry.getValue();// without
- // ngram
- // count
- // clipping
+ numNgramMatch[Regex.spaces.split(ngram).length - 1] += entry.getValue();// without ngram count clipping
}
}
}
@@ -256,6 +258,11 @@ public class BLEU {
/**
* speed consideration: assume hypNgramTable has a smaller size than referenceNgramTable does
+ * @param linearCorpusGainThetas todo
+ * @param hypLength todo
+ * @param hypNgramTable todo
+ * @param referenceNgramTable todo
+ * @return todo
*/
public static float computeLinearCorpusGain(float[] linearCorpusGainThetas, int hypLength,
Map<String, Integer> hypNgramTable, Map<String, Integer> referenceNgramTable) {
@@ -331,8 +338,10 @@ public class BLEU {
return res;
}
+ public static final int maxOrder = 4;
+
/**
- * Computes BLEU statistics incurred by a rule. This is (a) all ngram (n <= 4) for terminal rules
+ * Computes BLEU statistics incurred by a rule. This is (a) all ngram (n <= 4) for terminal rules
* and (b) all ngrams overlying boundary points between terminals in the rule and ngram state from
* tail nodes.
*
@@ -346,13 +355,11 @@ public class BLEU {
*
* Of these, all but the first have a boundary point to consider.
*
- * @param rule the rule being applied
- * @param spanWidth the width of the span in the input sentence
+ * @param edge todo
+ * @param spanPct todo
* @param references the reference to compute statistics against
- * @return
+ * @return todo
*/
- public static final int maxOrder = 4;
-
public static Stats compute(HyperEdge edge, float spanPct, References references) {
Stats stats = new Stats();
// TODO: this should not be the span width, but the real ref scaled to the span percentage
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Decoder.java b/src/main/java/org/apache/joshua/decoder/Decoder.java
index 1aac0b0..87ab650 100644
--- a/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -28,7 +28,6 @@ import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -80,10 +79,10 @@ import org.apache.joshua.util.io.LineReader;
* Translations object). Translations itself is an iterator whose next() call blocks until the next
* translation is available.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Zhifei Li, <zh...@gmail.com>
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @author Lane Schwartz <do...@users.sourceforge.net>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author wren ng thornton wren@users.sourceforge.net
+ * @author Lane Schwartz dowobeha@users.sourceforge.net
*/
public class Decoder {
@@ -117,7 +116,8 @@ public class Decoder {
/**
* Constructor method that creates a new decoder using the specified configuration file.
*
- * @param configFile Name of configuration file.
+ * @param joshuaConfiguration a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
+ * @param configFile name of configuration file.
*/
public Decoder(JoshuaConfiguration joshuaConfiguration, String configFile) {
this(joshuaConfiguration);
@@ -128,6 +128,7 @@ public class Decoder {
* Factory method that creates a new decoder using the specified configuration file.
*
* @param configFile Name of configuration file.
+ * @return a configured {@link org.apache.joshua.decoder.Decoder}
*/
public static Decoder createDecoder(String configFile) {
JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
@@ -154,6 +155,8 @@ public class Decoder {
* <p>
* This method is called by unit tests or any outside packages (e.g., MERT) relying on the
* decoder.
+ * @param joshuaConfiguration a {@link org.apache.joshua.decoder.JoshuaConfiguration} object
+ * @return an uninitialized decoder for use in testing
*/
static public Decoder getUninitalizedDecoder(JoshuaConfiguration joshuaConfiguration) {
return new Decoder(joshuaConfiguration);
@@ -436,9 +439,9 @@ public class Decoder {
* (possibly boundless) set of input sentences. Each request launches its own thread to read the
* sentences of the request.
*
- * @param request
- * @return an iterable set of Translation objects
- * @throws IOException
+ * @param request the populated {@link org.apache.joshua.decoder.io.TranslationRequestStream}
+ * @param out an appropriate {@link java.io.OutputStream} to write results to
+ * @throws IOException if there is an error with the input stream or writing the output
*/
public void decodeAll(TranslationRequestStream request, OutputStream out) throws IOException {
Translations translations = new Translations(request);
@@ -496,8 +499,8 @@ public class Decoder {
/**
* We can also just decode a single sentence.
*
- * @param sentence
- * @return The translated sentence
+ * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+ * @return the sentence {@link org.apache.joshua.decoder.Translation}
*/
public Translation decode(Sentence sentence) {
// Get a thread.
@@ -777,7 +780,7 @@ public class Decoder {
: -1;
joshuaConfiguration.search_algorithm = "stack";
- grammar = new PhraseTable(path, owner, type, joshuaConfiguration, maxSourceLen);
+ grammar = new PhraseTable(path, owner, type, joshuaConfiguration);
}
this.grammars.add(grammar);
@@ -794,7 +797,7 @@ public class Decoder {
}
/* Add the grammar for custom entries */
- this.customPhraseTable = new PhraseTable(null, "custom", "phrase", joshuaConfiguration, 0);
+ this.customPhraseTable = new PhraseTable(null, "custom", "phrase", joshuaConfiguration);
this.grammars.add(this.customPhraseTable);
/* Create an epsilon-deleting grammar */
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/DecoderThread.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/DecoderThread.java b/src/main/java/org/apache/joshua/decoder/DecoderThread.java
index 4390a59..0f80335 100644
--- a/src/main/java/org/apache/joshua/decoder/DecoderThread.java
+++ b/src/main/java/org/apache/joshua/decoder/DecoderThread.java
@@ -44,8 +44,8 @@ import org.apache.joshua.corpus.Vocabulary;
*
* The DecoderFactory class is responsible for launching the threads.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class DecoderThread extends Thread {
@@ -91,6 +91,7 @@ public class DecoderThread extends Thread {
* Translate a sentence.
*
* @param sentence The sentence to be translated.
+ * @return the sentence {@link org.apache.joshua.decoder.Translation}
*/
public Translation translate(Sentence sentence) {
@@ -105,7 +106,7 @@ public class DecoderThread extends Thread {
Decoder.LOG(1, String.format("Translation %d: Translation took 0 seconds", sentence.id()));
return new Translation(sentence, null, featureFunctions, joshuaConfiguration);
}
-
+
long startTime = System.currentTimeMillis();
int numGrammars = allGrammars.size();
@@ -113,7 +114,7 @@ public class DecoderThread extends Thread {
for (int i = 0; i < allGrammars.size(); i++)
grammars[i] = allGrammars.get(i);
-
+
if (joshuaConfiguration.segment_oovs)
sentence.segmentOOVs(grammars);
@@ -127,7 +128,7 @@ public class DecoderThread extends Thread {
if (joshuaConfiguration.search_algorithm.equals("stack")) {
Stacks stacks = new Stacks(sentence, this.featureFunctions, grammars, joshuaConfiguration);
-
+
hypergraph = stacks.search();
} else {
/* Seeding: the chart only sees the grammars, not the factories */
@@ -135,10 +136,10 @@ public class DecoderThread extends Thread {
joshuaConfiguration.goal_symbol, joshuaConfiguration);
hypergraph = (joshuaConfiguration.use_dot_chart)
- ? chart.expand()
- : chart.expandSansDotChart();
+ ? chart.expand()
+ : chart.expandSansDotChart();
}
-
+
} catch (java.lang.OutOfMemoryError e) {
Decoder.LOG(1, String.format("Input %d: out of memory", sentence.id()));
hypergraph = null;
@@ -155,7 +156,7 @@ public class DecoderThread extends Thread {
}
/*****************************************************************************************/
-
+
/*
* Synchronous parsing.
*
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java b/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
index b4624cf..def7c85 100644
--- a/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
+++ b/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
@@ -43,8 +43,8 @@ import org.apache.joshua.util.io.LineReader;
* When adding new features to Joshua, any new configurable parameters should be added to this
* class.
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
*/
public class JoshuaConfiguration {
@@ -325,6 +325,8 @@ public class JoshuaConfiguration {
* To process command-line options, we write them to a file that looks like the config file, and
* then call readConfigFile() on it. It would be more general to define a class that sits on a
* stream and knows how to chop it up, but this was quicker to implement.
+ *
+ * @param options string array of command line options
*/
public void processCommandLineOptions(String[] options) {
try {
@@ -696,8 +698,13 @@ public class JoshuaConfiguration {
* equivalence classes on external use of parameter names, permitting arbitrary_under_scores and
* camelCasing in paramter names without forcing the user to memorize them all. Here are some
* examples of equivalent ways to refer to parameter names:
- *
+ * <pre>
* {pop-limit, poplimit, PopLimit, popLimit, pop_lim_it} {lmfile, lm-file, LM-FILE, lm_file}
+ * </pre>
+ *
+ * @param text the string to be normalized
+ * @return normalized key
+ *
*/
public static String normalize_key(String text) {
return text.replaceAll("[-_]", "").toLowerCase();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java b/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
index 8c0b10b..42b17d7 100644
--- a/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
+++ b/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
@@ -39,9 +39,9 @@ import org.apache.joshua.server.ServerThread;
* Implements decoder initialization, including interaction with <code>JoshuaConfiguration</code>
* and <code>DecoderThread</code>.
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @author Lane Schwartz <do...@users.sourceforge.net>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author wren ng thornton wren@users.sourceforge.net
+ * @author Lane Schwartz dowobeha@users.sourceforge.net
*/
public class JoshuaDecoder {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java b/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
index b2126cb..e2061b0 100644
--- a/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
+++ b/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
@@ -42,7 +42,7 @@ import org.apache.joshua.util.Regex;
* uses a Viterbi approximation: the probability of a string is its best derivation probability So,
* if one want to deal with spurious ambiguity, he/she should do that before calling this class
*
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class NbestMinRiskReranker {
@@ -182,7 +182,10 @@ public class NbestMinRiskReranker {
/**
* based on a list of log-probabilities in nbestLogProbs, obtain a normalized distribution, and
* put the normalized probability (real value in [0,1]) into nbestLogProbs
- * */
+ *
+ * @param nbestLogProbs a {@link java.util.List} of {@link java.lang.Double} representing nbestLogProbs
+ * @param scalingFactor double value representing scaling factor
+ */
// get a normalized distributeion and put it back to nbestLogProbs
static public void computeNormalizedProbs(List<Double> nbestLogProbs, double scalingFactor) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
index 75e6ab4..8aa518e 100644
--- a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
+++ b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
@@ -33,10 +33,9 @@ import org.apache.joshua.decoder.hypergraph.HyperGraph;
import org.apache.joshua.decoder.segment_file.Sentence;
/**
- * structuredTranslation provides a more structured access to translation
+ * <p>structuredTranslation provides a more structured access to translation
* results than the Translation class.
- * Members of instances of this class can be used upstream.
- * <br/>
+ * Members of instances of this class can be used upstream.</p>
* TODO:
* Enable K-Best extraction.
*
@@ -107,6 +106,7 @@ public class StructuredTranslation {
/**
* Returns a list of target to source alignments.
+ * @return a list of target to source alignments
*/
public List<List<Integer>> getTranslationWordAlignments() {
return translationWordAlignments;
@@ -118,6 +118,7 @@ public class StructuredTranslation {
/**
* Time taken to build output information from the hypergraph.
+ * @return the time taken to build output information from the hypergraph
*/
public Float getExtractionTime() {
return extractionTime;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/Support.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Support.java b/src/main/java/org/apache/joshua/decoder/Support.java
index 7c4a0b2..e513aef 100644
--- a/src/main/java/org/apache/joshua/decoder/Support.java
+++ b/src/main/java/org/apache/joshua/decoder/Support.java
@@ -21,7 +21,7 @@ package org.apache.joshua.decoder;
import java.util.List;
/**
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class Support {
@@ -33,14 +33,15 @@ public class Support {
return (a > b) ? a : b;
}
-
public static int[] toArray(List<Integer> in) {
return subIntArray(in, 0, in.size());
}
/**
+ * @param in a {@link java.util.List} of Integer
* @param start inclusive
* @param end exclusive
+ * @return sub int[] from start to end
*/
public static int[] subIntArray(List<Integer> in, int start, int end) {
int[] res = new int[end - start];
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/Translation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translation.java b/src/main/java/org/apache/joshua/decoder/Translation.java
index 5afae74..0ee1f08 100644
--- a/src/main/java/org/apache/joshua/decoder/Translation.java
+++ b/src/main/java/org/apache/joshua/decoder/Translation.java
@@ -41,7 +41,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
* sentence and id and contains the decoded hypergraph. Translation objects are returned by
* DecoderThread instances to the InputHandler, where they are assembled in order for output.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class Translation {
@@ -54,17 +54,17 @@ public class Translation {
private String output = null;
private StructuredTranslation structuredTranslation = null;
-
+
public Translation(Sentence source, HyperGraph hypergraph,
List<FeatureFunction> featureFunctions, JoshuaConfiguration joshuaConfiguration) {
this.source = source;
-
+
if (joshuaConfiguration.use_structured_output) {
-
+
structuredTranslation = new StructuredTranslation(
source, hypergraph, featureFunctions);
this.output = structuredTranslation.getTranslationString();
-
+
} else {
StringWriter sw = new StringWriter();
@@ -81,15 +81,15 @@ public class Translation {
// We must put this weight as zero, otherwise we get an error when we try to retrieve it
// without checking
Decoder.weights.increment("BLEU", 0);
-
+
if (joshuaConfiguration.topN == 0) {
-
+
/* construct Viterbi output */
final String best = getViterbiString(hypergraph);
-
+
Decoder.LOG(1, String.format("Translation %d: %.3f %s", source.id(), hypergraph.goalNode.getScore(),
best));
-
+
/*
* Setting topN to 0 turns off k-best extraction, in which case we need to parse through
* the output-string, with the understanding that we can only substitute variables for the
@@ -100,21 +100,21 @@ public class Translation {
.replace("%S", DeNormalize.processSingleLine(best))
.replace("%c", String.format("%.3f", hypergraph.goalNode.getScore()))
.replace("%i", String.format("%d", source.id()));
-
+
if (joshuaConfiguration.outputFormat.contains("%a")) {
translation = translation.replace("%a", getViterbiWordAlignments(hypergraph));
}
-
+
if (joshuaConfiguration.outputFormat.contains("%f")) {
final FeatureVector features = getViterbiFeatures(hypergraph, featureFunctions, source);
translation = translation.replace("%f", joshuaConfiguration.moses ? features.mosesString() : features.toString());
}
-
+
out.write(translation);
out.newLine();
-
+
} else {
-
+
final KBestExtractor kBestExtractor = new KBestExtractor(
source, featureFunctions, Decoder.weights, false, joshuaConfiguration);
kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
@@ -132,31 +132,31 @@ public class Translation {
Decoder.LOG(1, String.format("Input %d: %d-best extraction took %.3f seconds", id(),
joshuaConfiguration.topN, seconds));
- } else {
-
- // Failed translations and blank lines get empty formatted outputs
- // @formatter:off
- String outputString = joshuaConfiguration.outputFormat
- .replace("%s", source.source())
- .replace("%e", "")
- .replace("%S", "")
- .replace("%t", "()")
- .replace("%i", Integer.toString(source.id()))
- .replace("%f", "")
- .replace("%c", "0.000");
- // @formatter:on
-
- out.write(outputString);
- out.newLine();
- }
+ } else {
+
+ // Failed translations and blank lines get empty formatted outputs
+ // @formatter:off
+ String outputString = joshuaConfiguration.outputFormat
+ .replace("%s", source.source())
+ .replace("%e", "")
+ .replace("%S", "")
+ .replace("%t", "()")
+ .replace("%i", Integer.toString(source.id()))
+ .replace("%f", "")
+ .replace("%c", "0.000");
+ // @formatter:on
+
+ out.write(outputString);
+ out.newLine();
+ }
out.flush();
} catch (IOException e) {
throw new RuntimeException(e);
}
-
+
this.output = sw.toString();
-
+
}
/*
@@ -169,7 +169,7 @@ public class Translation {
break;
}
}
-
+
}
public Sentence getSourceSentence() {
@@ -184,12 +184,12 @@ public class Translation {
public String toString() {
return output;
}
-
+
/**
* Returns the StructuredTranslation object
* if JoshuaConfiguration.construct_structured_output == True.
* @throws RuntimeException if StructuredTranslation object not set.
- * @return
+ * @return {@link org.apache.joshua.decoder.StructuredTranslation} object
*/
public StructuredTranslation getStructuredTranslation() {
if (structuredTranslation == null) {
@@ -197,5 +197,5 @@ public class Translation {
}
return structuredTranslation;
}
-
+
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/Translations.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translations.java b/src/main/java/org/apache/joshua/decoder/Translations.java
index 7dd9086..0b91ff9 100644
--- a/src/main/java/org/apache/joshua/decoder/Translations.java
+++ b/src/main/java/org/apache/joshua/decoder/Translations.java
@@ -30,7 +30,7 @@ import org.apache.joshua.decoder.io.TranslationRequestStream;
* Translation in the right place. When the next translation in a sequence is available, next() is
* notified.
*
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
*/
public class Translations {
@@ -73,7 +73,7 @@ public class Translations {
* the ID of the translation is the same as the one being waited for (currentID). If so, the
* thread waiting for it is notified.
*
- * @param translation
+ * @param translation a translated input object
*/
public void record(Translation translation) {
synchronized (this) {
@@ -98,6 +98,8 @@ public class Translations {
/**
* Returns the next Translation, blocking if necessary until it's available, since the next
* Translation might not have been produced yet.
+ *
+ * @return first element from the list of {@link org.apache.joshua.decoder.Translation}'s
*/
public Translation next() {
synchronized (this) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
index 0825ccb..0bc2f9f 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
@@ -60,8 +60,8 @@ import org.apache.joshua.util.ChartSpan;
* index of sentences: start from zero index of cell: cell (i,j) represent span
* of words indexed [i,j-1] where i is in [0,n-1] and j is in [1,n]
*
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
*/
public class Chart {
@@ -728,7 +728,7 @@ public class Chart {
}
/***
- * Add a terminal production (X -> english phrase) to the hypergraph.
+ * Add a terminal production (X -> english phrase) to the hypergraph.
*
* @param i the start index
* @param j stop index
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java b/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
index eeb6366..73c3cd0 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
@@ -35,8 +35,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
/**
* This class computes the cost of applying a rule.
*
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
*/
public class ComputeNodeResult {
@@ -52,13 +52,20 @@ public class ComputeNodeResult {
// The StateComputer objects themselves serve as keys.
private List<DPState> dpStates;
-
+
/**
* Computes the new state(s) that are produced when applying the given rule to the list of tail
* nodes. Also computes a range of costs of doing so (the transition cost, the total (Viterbi)
* cost, and a score that includes a future cost estimate).
*
* Old version that doesn't use the derivation state.
+ * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+ * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to use when computing th node result
+ * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode}'s
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source lattice
+ * @param sentence the lattice input
*/
public ComputeNodeResult(List<FeatureFunction> featureFunctions, Rule rule, List<HGNode> tailNodes,
int i, int j, SourcePath sourcePath, Sentence sentence) {
@@ -66,12 +73,12 @@ public class ComputeNodeResult {
// The total Viterbi cost of this edge. This is the Viterbi cost of the tail nodes, plus
// whatever costs we incur applying this rule to create a new hyperedge.
float viterbiCost = 0.0f;
-
+
if (Decoder.VERBOSE >= 4) {
System.err.println("ComputeNodeResult():");
System.err.println("-> RULE " + rule);
}
-
+
/*
* Here we sum the accumulated cost of each of the tail nodes. The total cost of the new
* hyperedge (the inside or Viterbi cost) is the sum of these nodes plus the cost of the
@@ -95,7 +102,7 @@ public class ComputeNodeResult {
// The future cost estimate is a heuristic estimate of the outside cost of this edge.
float futureCostEstimate = 0.0f;
-
+
/*
* We now iterate over all the feature functions, computing their cost and their expected future
* cost.
@@ -105,7 +112,7 @@ public class ComputeNodeResult {
DPState newState = feature.compute(rule, tailNodes, i, j, sourcePath, sentence, acc);
transitionCost += acc.getScore();
-
+
if (Decoder.VERBOSE >= 4)
System.err.println(String.format("-> FEATURE %s = %.3f * %.3f = %.3f",
feature.getName(), acc.getScore() / Decoder.weights.getSparse(feature.getName()),
@@ -116,21 +123,22 @@ public class ComputeNodeResult {
allDPStates.add(((StatefulFF)feature).getStateIndex(), newState);
}
}
-
+
viterbiCost += transitionCost;
if (Decoder.VERBOSE >= 4)
System.err.println(String.format("-> COST = %.3f", transitionCost));
-
+
// Set the final results.
this.pruningCostEstimate = viterbiCost + futureCostEstimate;
this.viterbiCost = viterbiCost;
this.transitionCost = transitionCost;
this.dpStates = allDPStates;
}
-
+
/**
- * This is called from Cell.java when making the final transition to the goal state.
+ * This is called from {@link org.apache.joshua.decoder.chart_parser.Cell}
+ * when making the final transition to the goal state.
* This is done to allow feature functions to correct for partial estimates, since
* they now have the knowledge that the whole sentence is complete. Basically, this
* is only used by LanguageModelFF, which does not score partial n-grams, and therefore
@@ -140,6 +148,14 @@ public class ComputeNodeResult {
* too: it makes search better (more accurate at the beginning, for example), and would
* also do away with the need for the computeFinal* class of functions (and hooks in
* the feature function interface).
+ *
+ * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+ * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode}'s
+ * @param i todo
+ * @param j todo
+ * @param sourcePath information about a path taken through the source lattice
+ * @param sentence the lattice input
+ * @return the final cost for the Node
*/
public static float computeFinalCost(List<FeatureFunction> featureFunctions,
List<HGNode> tailNodes, int i, int j, SourcePath sourcePath, Sentence sentence) {
@@ -150,13 +166,13 @@ public class ComputeNodeResult {
}
return cost;
}
-
+
public static FeatureVector computeTransitionFeatures(List<FeatureFunction> featureFunctions,
HyperEdge edge, int i, int j, Sentence sentence) {
// Initialize the set of features with those that were present with the rule in the grammar.
FeatureVector featureDelta = new FeatureVector();
-
+
// === compute feature logPs
for (FeatureFunction ff : featureFunctions) {
// A null rule signifies the final transition.
@@ -166,7 +182,7 @@ public class ComputeNodeResult {
featureDelta.add(ff.computeFeatures(edge.getRule(), edge.getTailNodes(), i, j, edge.getSourcePath(), sentence));
}
}
-
+
return featureDelta;
}
@@ -176,11 +192,12 @@ public class ComputeNodeResult {
/**
* The complete cost of the Viterbi derivation at this point
+ * @return float representing cost
*/
public float getViterbiCost() {
return this.viterbiCost;
}
-
+
public float getBaseCost() {
return getViterbiCost() - getTransitionCost();
}
@@ -188,7 +205,7 @@ public class ComputeNodeResult {
/**
* The cost incurred by this edge alone
*
- * @return
+ * @return float representing cost
*/
public float getTransitionCost() {
return this.transitionCost;