You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/01 02:51:36 UTC

[41/94] [abbrv] [partial] incubator-joshua git commit: Pulled JOSHUA-252 changes and Resolved Merge Conflicts

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/symal/symal.cpp
----------------------------------------------------------------------
diff --git a/ext/symal/symal.cpp b/ext/symal/symal.cpp
deleted file mode 100644
index 8f1bac0..0000000
--- a/ext/symal/symal.cpp
+++ /dev/null
@@ -1,503 +0,0 @@
-// $Id$
-
-#include <cassert>
-#include <iomanip>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <list>
-#include <vector>
-#include <set>
-#include <algorithm>
-#include <cstring>
-#include "cmd.h"
-
-using namespace std;
-
-#define MAX_WORD 10000 // maximum lengthsource/target strings 
-#define MAX_M 200      // maximum length of source strings
-#define MAX_N 200      // maximum length of target strings 
-
-#define UNION                      1
-#define INTERSECT                  2
-#define GROW                       3
-#define SRCTOTGT                   4
-#define TGTTOSRC                   5
-#define BOOL_YES                   1
-#define BOOL_NO                    0
-
-#define END_ENUM    {   (char*)0,  0 }
-
-static Enum_T AlignEnum [] = {
-  {    "union",                        UNION },
-  {    "u",                            UNION },
-  {    "intersect",                    INTERSECT},
-  {    "i",                            INTERSECT},
-  {    "grow",                         GROW },
-  {    "g",                            GROW },
-  {    "srctotgt",                     SRCTOTGT },
-  {    "s2t",                          SRCTOTGT },
-  {    "tgttosrc",                     TGTTOSRC },
-  {    "t2s",                          TGTTOSRC },
-  END_ENUM
-};
-
-static Enum_T BoolEnum [] = {
-  {    "true",        BOOL_YES },
-  {    "yes",         BOOL_YES },
-  {    "y",           BOOL_YES },
-  {    "false",       BOOL_NO },
-  {    "no",          BOOL_NO },
-  {    "n",           BOOL_NO },
-  END_ENUM
-};
-
-
-
-// global variables and constants
-
-int* fa; //counters of covered foreign positions
-int* ea; //counters of covered english positions
-int** A; //alignment matrix with information symmetric/direct/inverse alignments
-
-int verbose=0;
-
-//read an alignment pair from the input stream.
-
-int lc = 0;
-
-int getals(fstream& inp,int& m, int *a,int& n, int *b)
-{
-  char w[MAX_WORD], dummy[10];
-  int i,j,freq;
-  if (inp >> freq) {
-    ++lc;
-    //target sentence
-    inp >> n;
-    assert(n<MAX_N);
-    for (i=1; i<=n; i++) {
-      inp >> setw(MAX_WORD) >> w;
-      if (strlen(w)>=MAX_WORD-1) {
-        cerr << lc << ": target len=" << strlen(w) << " is not less than MAX_WORD-1="
-             << MAX_WORD-1 << endl;
-        assert(strlen(w)<MAX_WORD-1);
-      }
-    }
-
-    inp >> dummy; //# separator
-    // inverse alignment
-    for (i=1; i<=n; i++) inp >> b[i];
-
-    //source sentence
-    inp >> m;
-    assert(m<MAX_M);
-    for (j=1; j<=m; j++) {
-      inp >> setw(MAX_WORD) >> w;
-      if (strlen(w)>=MAX_WORD-1) {
-        cerr << lc << ": source len=" << strlen(w) << " is not less than MAX_WORD-1="
-             << MAX_WORD-1 << endl;
-        assert(strlen(w)<MAX_WORD-1);
-      }
-    }
-
-    inp >> dummy; //# separator
-
-    // direct alignment
-    for (j=1; j<=m; j++) {
-      inp >> a[j];
-      assert(0<=a[j] && a[j]<=n);
-    }
-
-    //check inverse alignemnt
-    for (i=1; i<=n; i++)
-      assert(0<=b[i] && b[i]<=m);
-
-    return 1;
-
-  } else
-    return 0;
-};
-
-
-//compute union alignment
-int prunionalignment(fstream& out,int m,int *a,int n,int* b)
-{
-
-  ostringstream sout;
-
-  for (int j=1; j<=m; j++)
-    if (a[j])
-      sout << j-1 << "-" << a[j]-1 << " ";
-
-  for (int i=1; i<=n; i++)
-    if (b[i] && a[b[i]]!=i)
-      sout << b[i]-1 <<  "-" << i-1 << " ";
-
-  //fix the last " "
-  string str = sout.str();
-  if (str.length() == 0)
-    str = "\n";
-  else
-    str.replace(str.length()-1,1,"\n");
-
-  out << str;
-  out.flush();
-
-  return 1;
-}
-
-
-//Compute intersection alignment
-
-int printersect(fstream& out,int m,int *a,int n,int* b)
-{
-
-  ostringstream sout;
-
-  for (int j=1; j<=m; j++)
-    if (a[j] && b[a[j]]==j)
-      sout << j-1 << "-" << a[j]-1 << " ";
-
-  //fix the last " "
-  string str = sout.str();
-  if (str.length() == 0)
-    str = "\n";
-  else
-    str.replace(str.length()-1,1,"\n");
-
-  out << str;
-  out.flush();
-
-  return 1;
-}
-
-//Compute target-to-source alignment
-
-int printtgttosrc(fstream& out,int m,int *a,int n,int* b)
-{
-
-  ostringstream sout;
-
-  for (int i=1; i<=n; i++)
-    if (b[i])
-      sout << b[i]-1 << "-" << i-1 << " ";
-
-  //fix the last " "
-  string str = sout.str();
-  if (str.length() == 0)
-    str = "\n";
-  else
-    str.replace(str.length()-1,1,"\n");
-
-  out << str;
-  out.flush();
-
-  return 1;
-}
-
-//Compute source-to-target alignment
-
-int printsrctotgt(fstream& out,int m,int *a,int n,int* b)
-{
-
-  ostringstream sout;
-
-  for (int j=1; j<=m; j++)
-    if (a[j])
-      sout << j-1 << "-" << a[j]-1 << " ";
-
-  //fix the last " "
-  string str = sout.str();
-  if (str.length() == 0)
-    str = "\n";
-  else
-    str.replace(str.length()-1,1,"\n");
-
-  out << str;
-  out.flush();
-
-  return 1;
-}
-
-//Compute Grow Diagonal Alignment
-//Nice property: you will never introduce more points
-//than the unionalignment alignemt. Hence, you will always be able
-//to represent the grow alignment as the unionalignment of a
-//directed and inverted alignment
-
-int printgrow(fstream& out,int m,int *a,int n,int* b, bool diagonal=false,bool final=false,bool bothuncovered=false)
-{
-
-  ostringstream sout;
-
-  vector <pair <int,int> > neighbors; //neighbors
-
-  pair <int,int> entry;
-
-  neighbors.push_back(make_pair(-1,-0));
-  neighbors.push_back(make_pair(0,-1));
-  neighbors.push_back(make_pair(1,0));
-  neighbors.push_back(make_pair(0,1));
-
-
-  if (diagonal) {
-    neighbors.push_back(make_pair(-1,-1));
-    neighbors.push_back(make_pair(-1,1));
-    neighbors.push_back(make_pair(1,-1));
-    neighbors.push_back(make_pair(1,1));
-  }
-
-
-  int i,j,o;
-
-
-  //covered foreign and english positions
-
-  memset(fa,0,(m+1)*sizeof(int));
-  memset(ea,0,(n+1)*sizeof(int));
-
-  //matrix to quickly check if one point is in the symmetric
-  //alignment (value=2), direct alignment (=1) and inverse alignment
-
-  for (int i=1; i<=n; i++) memset(A[i],0,(m+1)*sizeof(int));
-
-  set <pair <int,int> > currentpoints; //symmetric alignment
-  set <pair <int,int> > unionalignment; //union alignment
-
-  pair <int,int> point; //variable to store points
-  set<pair <int,int> >::const_iterator k; //iterator over sets
-
-  //fill in the alignments
-  for (j=1; j<=m; j++) {
-    if (a[j]) {
-      unionalignment.insert(make_pair(a[j],j));
-      if (b[a[j]]==j) {
-        fa[j]=1;
-        ea[a[j]]=1;
-        A[a[j]][j]=2;
-        currentpoints.insert(make_pair(a[j],j));
-      } else
-        A[a[j]][j]=-1;
-    }
-  }
-
-  for (i=1; i<=n; i++)
-    if (b[i] && a[b[i]]!=i) { //not intersection
-      unionalignment.insert(make_pair(i,b[i]));
-      A[i][b[i]]=1;
-    }
-
-
-  int added=1;
-
-  while (added) {
-    added=0;
-    ///scan the current alignment
-    for (k=currentpoints.begin(); k!=currentpoints.end(); k++) {
-      //cout << "{"<< (k->second)-1 << "-" << (k->first)-1 << "}";
-      for (o=0; o<neighbors.size(); o++) {
-        //cout << "go over check all neighbors\n";
-        point.first=k->first+neighbors[o].first;
-        point.second=k->second+neighbors[o].second;
-        //cout << point.second-1 << " " << point.first-1 << "\n";
-        //check if neighbor is inside 'matrix'
-        if (point.first>0 && point.first <=n && point.second>0 && point.second<=m)
-          //check if neighbor is in the unionalignment alignment
-          if (b[point.first]==point.second || a[point.second]==point.first) {
-            //cout << "In unionalignment ";cout.flush();
-            //check if it connects at least one uncovered word
-            if (!(ea[point.first] && fa[point.second])) {
-              //insert point in currentpoints!
-              currentpoints.insert(point);
-              A[point.first][point.second]=2;
-              ea[point.first]=1;
-              fa[point.second]=1;
-              added=1;
-              //cout << "added grow: " << point.second-1 << "-" << point.first-1 << "\n";cout.flush();
-            }
-          }
-      }
-    }
-  }
-
-  if (final) {
-    for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
-      if (A[k->first][k->second]==1) {
-        point.first=k->first;
-        point.second=k->second;
-        //one of the two words is not covered yet
-        //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
-        if ((bothuncovered &&  !ea[point.first] && !fa[point.second]) ||
-            (!bothuncovered && !(ea[point.first] && fa[point.second]))) {
-          //add it!
-          currentpoints.insert(point);
-          A[point.first][point.second]=2;
-          //keep track of new covered positions
-          ea[point.first]=1;
-          fa[point.second]=1;
-
-          //added=1;
-          //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
-        }
-      }
-
-    for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
-      if (A[k->first][k->second]==-1) {
-        point.first=k->first;
-        point.second=k->second;
-        //one of the two words is not covered yet
-        //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
-        if ((bothuncovered &&  !ea[point.first] && !fa[point.second]) ||
-            (!bothuncovered && !(ea[point.first] && fa[point.second]))) {
-          //add it!
-          currentpoints.insert(point);
-          A[point.first][point.second]=2;
-          //keep track of new covered positions
-          ea[point.first]=1;
-          fa[point.second]=1;
-
-          //added=1;
-          //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
-        }
-      }
-  }
-
-
-  for (k=currentpoints.begin(); k!=currentpoints.end(); k++)
-    sout << k->second-1 << "-" << k->first-1 << " ";
-
-
-  //fix the last " "
-  string str = sout.str();
-  if (str.length() == 0)
-    str = "\n";
-  else
-    str.replace(str.length()-1,1,"\n");
-
-  out << str;
-  out.flush();
-  return 1;
-
-  return 1;
-}
-
-
-
-//Main file here
-
-
-int main(int argc, char** argv)
-{
-
-  int alignment=0;
-  char* input="/dev/stdin";
-  char* output="/dev/stdout";
-  int diagonal=false;
-  int final=false;
-  int bothuncovered=false;
-
-
-  DeclareParams("a", CMDENUMTYPE,  &alignment, AlignEnum,
-                "alignment", CMDENUMTYPE,  &alignment, AlignEnum,
-                "d", CMDENUMTYPE,  &diagonal, BoolEnum,
-                "diagonal", CMDENUMTYPE,  &diagonal, BoolEnum,
-                "f", CMDENUMTYPE,  &final, BoolEnum,
-                "final", CMDENUMTYPE,  &final, BoolEnum,
-                "b", CMDENUMTYPE,  &bothuncovered, BoolEnum,
-                "both", CMDENUMTYPE,  &bothuncovered, BoolEnum,
-                "i", CMDSTRINGTYPE, &input,
-                "o", CMDSTRINGTYPE, &output,
-                "v", CMDENUMTYPE,  &verbose, BoolEnum,
-                "verbose", CMDENUMTYPE,  &verbose, BoolEnum,
-
-                (char *)NULL);
-
-  GetParams(&argc, &argv, (char*) NULL);
-
-  if (alignment==0) {
-    cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"
-         << "Input file or std must be in .bal format (see script giza2bal.pl).\n";
-
-    exit(1);
-
-  }
-
-  fstream inp(input,ios::in);
-  fstream out(output,ios::out);
-
-  if (!inp.is_open()) {
-    cerr << "cannot open " << input << "\n";
-    exit(1);
-  }
-
-  if (!out.is_open()) {
-    cerr << "cannot open " << output << "\n";
-    exit(1);
-  }
-
-
-  int a[MAX_M],b[MAX_N],m,n;
-  fa=new int[MAX_M+1];
-  ea=new int[MAX_N+1];
-
-
-  int sents = 0;
-  A=new int *[MAX_N+1];
-  for (int i=1; i<=MAX_N; i++) A[i]=new int[MAX_M+1];
-
-  switch (alignment) {
-  case UNION:
-    cerr << "symal: computing union alignment\n";
-    while(getals(inp,m,a,n,b)) {
-      prunionalignment(out,m,a,n,b);
-      sents++;
-    }
-    cerr << "Sents: " << sents << endl;
-    break;
-  case INTERSECT:
-    cerr << "symal: computing intersect alignment\n";
-    while(getals(inp,m,a,n,b)) {
-      printersect(out,m,a,n,b);
-      sents++;
-    }
-    cerr << "Sents: " << sents << endl;
-    break;
-  case GROW:
-    cerr << "symal: computing grow alignment: diagonal ("
-         << diagonal << ") final ("<< final << ")"
-         <<  "both-uncovered (" << bothuncovered <<")\n";
-
-    while(getals(inp,m,a,n,b))
-      printgrow(out,m,a,n,b,diagonal,final,bothuncovered);
-
-    break;
-  case TGTTOSRC:
-    cerr << "symal: computing target-to-source alignment\n";
-
-    while(getals(inp,m,a,n,b)) {
-      printtgttosrc(out,m,a,n,b);
-      sents++;
-    }
-    cerr << "Sents: " << sents << endl;
-    break;
-  case SRCTOTGT:
-    cerr << "symal: computing source-to-target alignment\n";
-
-    while(getals(inp,m,a,n,b)) {
-      printsrctotgt(out,m,a,n,b);
-      sents++;
-    }
-    cerr << "Sents: " << sents << endl;
-    break;
-  default:
-    exit(1);
-  }
-
-  delete [] fa;
-  delete [] ea;
-  for (int i=1; i<=MAX_N; i++) delete [] A[i];
-  delete [] A;
-
-  exit(0);
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/jni/kenlm_wrap.cc
----------------------------------------------------------------------
diff --git a/jni/kenlm_wrap.cc b/jni/kenlm_wrap.cc
index 16cb54b..64c9fe9 100644
--- a/jni/kenlm_wrap.cc
+++ b/jni/kenlm_wrap.cc
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 #include "lm/enumerate_vocab.hh"
 #include "lm/model.hh"
 #include "lm/left.hh"

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/scripts/training/pipeline.pl
----------------------------------------------------------------------
diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl
index c33d54b..8c3e4b9 100755
--- a/scripts/training/pipeline.pl
+++ b/scripts/training/pipeline.pl
@@ -1,5 +1,20 @@
 #!/usr/bin/env perl
 
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # This script implements the Joshua pipeline.  It can run a complete
 # pipeline --- from raw training corpora to bleu scores on a test set
 # --- and it allows jumping into arbitrary points of the pipeline. 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/BasicPhrase.java b/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
index f65f26f..f7f6be2 100644
--- a/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
+++ b/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
@@ -12,7 +12,7 @@ import java.util.ArrayList;
 /**
  * The simplest concrete implementation of Phrase.
  * 
- * @author wren ng thornton <wr...@users.sourceforge.net>
+ * @author wren ng thornton wren@users.sourceforge.net
  * @version $LastChangedDate$
  */
 public class BasicPhrase extends AbstractPhrase {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java b/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java
index 855a7c1..af669b7 100644
--- a/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java
+++ b/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java
@@ -31,37 +31,16 @@ import java.util.List;
  */
 public class ContiguousPhrase extends AbstractPhrase {
 
-  // ===============================================================
-  // Constants
-  // ===============================================================
-
-  // ===============================================================
-  // Member variables
-  // ===============================================================
-
   protected int startIndex;
   protected int endIndex;
   protected Corpus corpusArray;
 
-  // ===============================================================
-  // Constructor(s)
-  // ===============================================================
-
   public ContiguousPhrase(int startIndex, int endIndex, Corpus corpusArray) {
     this.startIndex = startIndex;
     this.endIndex = endIndex;
     this.corpusArray = corpusArray;
   }
 
-
-  // ===============================================================
-  // Public
-  // ===============================================================
-
-  // ===========================================================
-  // Accessor methods (set/get)
-  // ===========================================================
-
   /**
    * This method copies the phrase into an array of ints. This method should be avoided if possible.
    * 
@@ -75,23 +54,15 @@ public class ContiguousPhrase extends AbstractPhrase {
     return words;
   }
 
-
   public int getWordID(int position) {
     return corpusArray.getWordID(startIndex + position);
     // return corpusArray.corpus[startIndex+position];
   }
 
-
   public int size() {
     return endIndex - startIndex;
   }
 
-
-  // ===========================================================
-  // Methods
-  // ===========================================================
-
-
   /**
    * Gets all possible subphrases of this phrase, up to and including the phrase itself. For
    * example, the phrase "I like cheese ." would return the following:
@@ -114,7 +85,6 @@ public class ContiguousPhrase extends AbstractPhrase {
     return getSubPhrases(size());
   }
 
-
   /**
    * Returns a list of subphrases only of length <code>maxLength</code> or smaller.
    * 
@@ -134,7 +104,6 @@ public class ContiguousPhrase extends AbstractPhrase {
     return phrases;
   }
 
-
   /**
    * creates a new phrase object from the indexes provided.
    * <P>
@@ -148,36 +117,9 @@ public class ContiguousPhrase extends AbstractPhrase {
     return new ContiguousPhrase(startIndex + start, startIndex + end, corpusArray);
   }
 
-
-  // ===============================================================
-  // Protected
-  // ===============================================================
-
-  // ===============================================================
-  // Methods
-  // ===============================================================
-
-
-  // ===============================================================
-  // Private
-  // ===============================================================
-
-  // ===============================================================
-  // Methods
-  // ===============================================================
-
-
-  // ===============================================================
-  // Static
-  // ===============================================================
-
-
-  // ===============================================================
-  // Main
-  // ===============================================================
-
   /**
    * Main contains test code
+   * @param args String array of arguments used to run this class.
    */
   public static void main(String[] args) {
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/Corpus.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Corpus.java b/src/main/java/org/apache/joshua/corpus/Corpus.java
index a943dd2..1a7d1b0 100755
--- a/src/main/java/org/apache/joshua/corpus/Corpus.java
+++ b/src/main/java/org/apache/joshua/corpus/Corpus.java
@@ -34,6 +34,7 @@ public interface Corpus { // extends Externalizable {
   // ===============================================================
 
   /**
+   * @param position the position at which we want to obtain a word ID
    * @return the integer representation of the Word at the specified position in the corpus.
    */
   int getWordID(int position);
@@ -51,7 +52,7 @@ public interface Corpus { // extends Externalizable {
   /**
    * Gets the sentence index of each specified position.
    * 
-   * @param position Index into the corpus
+   * @param positions Index into the corpus
    * @return array of the sentence indices associated with the specified positions in the corpus.
    */
   int[] getSentenceIndices(int[] positions);
@@ -60,6 +61,7 @@ public interface Corpus { // extends Externalizable {
    * Gets the position in the corpus of the first word of the specified sentence. If the sentenceID
    * is outside of the bounds of the sentences, then it returns the last position in the corpus + 1.
    * 
+   * @param sentenceID a specific sentence to obtain a position for
    * @return the position in the corpus of the first word of the specified sentence. If the
    *         sentenceID is outside of the bounds of the sentences, then it returns the last position
    *         in the corpus + 1.
@@ -69,6 +71,7 @@ public interface Corpus { // extends Externalizable {
   /**
    * Gets the exclusive end position of a sentence in the corpus.
    * 
+   * @param sentenceID a specific sentence to obtain an end position for
    * @return the position in the corpus one past the last word of the specified sentence. If the
    *         sentenceID is outside of the bounds of the sentences, then it returns one past the last
    *         position in the corpus.
@@ -113,7 +116,7 @@ public interface Corpus { // extends Externalizable {
    * @param phrase the superphrase that the comparsion phrase is drawn from
    * @param phraseStart the point in the phrase where the comparison begins (inclusive)
    * @param phraseEnd the point in the phrase where the comparison ends (exclusive)
-   * @return an int that follows the conventions of java.util.Comparator.compareTo()
+   * @return an int that follows the conventions of {@link java.util.Comparator#compare(Object, Object)}
    */
   int comparePhrase(int corpusStart, Phrase phrase, int phraseStart, int phraseEnd);
 
@@ -122,9 +125,9 @@ public interface Corpus { // extends Externalizable {
    * Compares the phrase that starts at position start with the phrase passed in. Compares the
    * entire phrase.
    * 
-   * @param corpusStart
-   * @param phrase
-   * @return
+   * @param corpusStart position start
+   * @param phrase {@link org.apache.joshua.corpus.Phrase} to compare against
+   * @return an int that follows the conventions of {@link java.util.Comparator#compare(Object, Object)}
    */
   int comparePhrase(int corpusStart, Phrase phrase);
 
@@ -134,15 +137,15 @@ public interface Corpus { // extends Externalizable {
    * @param position1 the position in the corpus where the first suffix begins
    * @param position2 the position in the corpus where the second suffix begins
    * @param maxComparisonLength a cutoff point to stop the comparison
-   * @return an int that follows the conventions of java.util.Comparator.compareTo()
+   * @return an int that follows the conventions of {@link java.util.Comparator#compare(Object, Object)}
    */
   int compareSuffixes(int position1, int position2, int maxComparisonLength);
 
   /**
    * 
-   * @param startPosition
-   * @param endPosition
-   * @return
+   * @param startPosition start position for phrase
+   * @param endPosition end position for phrase
+   * @return the {@link org.apache.joshua.corpus.ContiguousPhrase}
    */
   ContiguousPhrase getPhrase(int startPosition, int endPosition);
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/Phrase.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Phrase.java b/src/main/java/org/apache/joshua/corpus/Phrase.java
index f22c8a5..5a06a8b 100644
--- a/src/main/java/org/apache/joshua/corpus/Phrase.java
+++ b/src/main/java/org/apache/joshua/corpus/Phrase.java
@@ -93,6 +93,9 @@ public interface Phrase extends Comparable<Phrase> {
    * complete Phrase List.
    * 
    * @see ArrayList#subList(int, int)
+   * @param start start position to begin new phrase
+   * @param end end position to end new phrase
+   * @return a new {@link org.apache.joshua.corpus.Phrase} object from the indexes provided.
    */
   Phrase subPhrase(int start, int end);
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/Span.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Span.java b/src/main/java/org/apache/joshua/corpus/Span.java
index 753b007..414fe95 100644
--- a/src/main/java/org/apache/joshua/corpus/Span.java
+++ b/src/main/java/org/apache/joshua/corpus/Span.java
@@ -90,8 +90,8 @@ public class Span implements Iterable<Integer>, Comparable<Span> {
 
   /**
    * Returns true if the other span does not intersect with this one.
-   * @param o
-   * @return
+   * @param o new {@link org.apache.joshua.corpus.Span} to check for intersection
+   * @return true if the other span does not intersect with this one
    */
   public boolean disjointFrom(Span o) {
     if (start < o.start) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/SymbolTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/SymbolTable.java b/src/main/java/org/apache/joshua/corpus/SymbolTable.java
index d8b1694..274e8b9 100644
--- a/src/main/java/org/apache/joshua/corpus/SymbolTable.java
+++ b/src/main/java/org/apache/joshua/corpus/SymbolTable.java
@@ -17,9 +17,9 @@
  * under the License.
  */
 package org.apache.joshua.corpus; 
- 
+
 import java.util.Collection; 
- 
+
 /**
  * Represents a symbol table capable of mapping between strings and 
  * symbols. 
@@ -29,302 +29,299 @@ import java.util.Collection;
  * @version $LastChangedDate: 2009-11-24 23:07:43 -0600 (Tue, 24 Nov 2009) $ 
  */ 
 public interface SymbolTable { 
- 
- //TODO Remove all hard-coded references to nonterminals 
-  
- /**
-  * The unknown word's ID will be the size of the vocabulary, 
-  * ensuring that it is outside of the vocabulary. Note that 
-  * for vocabularies which have not been fixed yet, this 
-  * means the actual value is volatile and therefore a word 
-  * ID can only be compared against UNKNOWN_WORD at the time 
-  * the word ID is generated (otherwise unknown words can 
-  * become "known" if new words are added to the vocabulary 
-  * before testing). 
-  * <p> 
-  * Negative IDs are reserved for non-terminals. 
-  * 
-  * Zero is reserved as the UNKNOWN_WORD. 
-  */ 
- int UNKNOWN_WORD = 1; 
-  
- /** String representation for out-of-vocabulary words. */ 
- String UNKNOWN_WORD_STRING = "<unk>"; 
-  
- /**
-  * Integer representation of the bare (non-indexed) nonterminal X, 
-  * which represents a wild-card gap in a phrase. 
-  * <p> 
-  * All nonterminals are guaranteed to be represented by negative integers. 
-  */ 
- int X = -1; 
-  
- /**
-  * String representation of the bare (non-indexed) nonterminal X, 
-  * which represents a wild-card gap in a phrase. 
-  */ 
- String X_STRING = "[X]"; 
-  
-  
-  
- /**
-  * String representation of the nonterminal X with index 1, 
-  * which represents a wild-card gap in a phrase. 
-  */ 
- String X1_STRING = "[X,1]"; 
-  
-  
-  
- /**
-  * String representation of the nonterminal X with index 2, 
-  * which represents a wild-card gap in a phrase. 
-  */ 
- String X2_STRING = "[X,2]";  
-  
- /**
-  * Integer representation of the nonterminal S. 
-  * <p> 
-  * All nonterminals are guaranteed to be represented by negative integers. 
-  */ 
- int S = -4; 
-  
- /**
-  * String representation of the nonterminal S.. 
-  */ 
- String S_STRING = "[S]";  
-  
- /**
-  * Integer representation of the nonterminal X with index 1, 
-  * which represents a wild-card gap in a phrase. 
-  * <p> 
-  * All nonterminals are guaranteed to be represented by negative integers. 
-  */ 
- int S1 = -5; 
-  
- /**
-  * String representation of the nonterminal X with index 2, 
-  * which represents a wild-card gap in a phrase. 
-  */ 
- String S1_STRING = "[S,1]";  
-  
- /**
-  * Gets a unique integer identifier for the nonterminal. 
-  * <p> 
-  * The integer returned is guaranteed to be a negative number. 
-  *  
-  * If the nonterminal is {@link #X_STRING}, 
-  * then the value returned must be {@link #X}. 
-  *  
-  * Otherwise, the value returned must be a negative number  
-  * whose value is less than {@link X}. 
-  *  
-  * @param nonterminal Nonterminal symbol 
-  * @return a unique integer identifier for the nonterminal 
-  */ 
- int addNonterminal(String nonterminal); 
-  
- /**
-  * Gets a unique integer identifier for the terminal. 
-  *  
-  * @param terminal Terminal symbol 
-  * @return a unique integer identifier for the terminal 
-  */ 
- int addTerminal(String terminal); 
-  
- /**
-  * Gets the unique integer identifiers for the words. 
-  *  
-  * @param words Array of symbols 
-  * @return the unique integer identifiers for the words 
-  */ 
- int[] addTerminals(String[] words); 
-  
- /**
-  * Gets the unique integer identifiers for the words 
-  * in the sentence. 
-  *  
-  * @param sentence Space-delimited string of symbols 
-  * @return the unique integer identifiers for the words 
-  *         in the sentence 
-  */ 
- int[] addTerminals(String sentence); 
-  
- /**
-  * Gets an integer identifier for the word. 
-  * <p> 
-  * If the word is in the vocabulary, the integer returned 
-  * will uniquely identify that word. 
-  * <p> 
-  * If the word is not in the vocabulary, the integer returned 
-  * by <code>getUnknownWordID</code> may be returned. 
-  *  
-  * Alternatively, implementations may, if they choose, add 
-  * unknown words and assign them a symbol ID instead of 
-  * returning <code>getUnknownWordID</code>. 
-  *  
-  * @see #getUnknownWordID 
-  * @return the unique integer identifier for wordString,  
-  *         or the result of <code>getUnknownWordID<code>  
-  *         if wordString is not in the vocabulary 
-  */ 
- int getID(String wordString); 
-  
- /**
-  * Gets the integer identifiers for all words in the provided 
-  * sentence. 
-  * <p> 
-  * The sentence will be split (on spaces) into words, then 
-  * the integer identifier for each word will be retrieved 
-  * using <code>getID</code>. 
-  *  
-  * @see #getID(String) 
-  * @param sentence String of words, separated by spaces. 
-  * @return Array of integer identifiers for each word in 
-  *         the sentence 
-  */ 
- int[] getIDs(String sentence); 
-  
- /**
-  * Gets the String that corresponds to the specified integer 
-  * identifier. 
-  * <p> 
-  * If the identifier is in the symbol vocabulary, the String 
-  * returned will correspond to that identifier. 
-  *  
-  * Otherwise, the String returned by <code>getUnknownWord<code> 
-  * will be returned. 
-  * 
-  * @return the String that corresponds to the specified 
-  *         integer identifier, or the result of 
-  *         <code>getUnknownWord</code> if the identifier 
-  *         does not correspond to a word in the vocabulary 
-  */ 
- String getTerminal(int wordID); 
-  
- /**
-  * Gets the String that corresponds to the specified integer 
-  * identifier. 
-  * <p> 
-  * This method can be called for terminals or nonterminals. 
-  * 
-  * @param tokenID Integer identifier 
-  * @return the String that corresponds to the specified 
-  *         integer identifier 
-  */ 
- String getWord(int tokenID); 
-  
- /**
-  * Gets the String that corresponds to the sequence of 
-  * specified integer identifiers. 
-  * 
-  * @param ids Sequence of integer identifiers 
-  * @return the String that corresponds to the sequence of 
-  *         specified integer identifiers 
-  */ 
- String getWords(int[] ids); 
-  
- /**
-  *  
-  * @param wordIDs 
-  * @return 
-  */ 
- String getTerminals(int[] wordIDs); 
-  
- /**
-  * Gets a collection over all symbol identifiers for the 
-  * vocabulary. 
-  * 
-  * @return a collection over all symbol identifiers for the 
-  *         vocabulary 
-  */ 
- Collection<Integer> getAllIDs(); 
-  
- /**
-  * Gets the list of all words represented by this vocabulary. 
-  * 
-  * @return the list of all words represented by this 
-  *         vocabulary 
-  */ 
- Collection<String> getWords(); 
-  
- /**
-  * Gets the number of unique words in the vocabulary. 
-  * 
-  * @return the number of unique words in the vocabulary. 
-  */ 
- int size(); 
-  
- /**
-  * Gets the integer symbol representation of the unknown 
-  * word. 
-  * 
-  * @return the integer symbol representation of the unknown 
-  *         word. 
-  */ 
- int getUnknownWordID(); 
-  
- /**
-  * Gets the string representation of the unknown word. 
-  * 
-  * @return the string representation of the unknown word. 
-  */ 
- String getUnknownWord(); 
-  
- /**
-  * Returns <code>true</code> if the symbol id represents a 
-  * nonterminal, <code>false</code> otherwise. 
-  *  
-  * @param id 
-  * @return <code>true</code> if the symbol id represents a 
-  *         nonterminal, <code>false</code> otherwise. 
-  */ 
- boolean isNonterminal(int id); 
-  
- /**
-  * Gets the lowest-valued allowable terminal symbol id in 
-  * this table. 
-  * 
-  * @return the lowest-valued allowable terminal symbol id 
-  *         in this table. 
-  */ 
- int getLowestID(); 
- 
-  
- /**
-  * Gets the highest-valued allowable terminal symbol id in 
-  * this table. 
-  * <p> 
-  * NOTE: This may or may not return the same value as 
-  * <code>size</code>. 
-  * 
-  * @return the highest-valued allowable terminal symbol id 
-  *         in this table. 
-  */ 
- int getHighestID(); 
-  
- /**
-  *  
-  *  
-  * @param id 
-  * @return 
-  */ 
- int getTargetNonterminalIndex(int id);//first convert id to its String mapping, then call the function below 
-  
- /**
-  *  
-  *  
-  * @param word 
-  * @return 
-  */ 
- int getTargetNonterminalIndex(String word); 
-  
- /**
-  *  
-  *  
-  * @param wordIDs 
-  * @param ntIndexIncrements 
-  * @return 
-  */ 
- String getWords(int[] wordIDs, boolean ntIndexIncrements); 
-  
+
+  //TODO Remove all hard-coded references to nonterminals 
+
+  /**
+   * The unknown word's ID will be the size of the vocabulary, 
+   * ensuring that it is outside of the vocabulary. Note that 
+   * for vocabularies which have not been fixed yet, this 
+   * means the actual value is volatile and therefore a word 
+   * ID can only be compared against UNKNOWN_WORD at the time 
+   * the word ID is generated (otherwise unknown words can 
+   * become "known" if new words are added to the vocabulary 
+   * before testing). 
+   * <p> 
+   * Negative IDs are reserved for non-terminals. 
+   * 
+   * Zero is reserved as the UNKNOWN_WORD. 
+   */ 
+  int UNKNOWN_WORD = 1; 
+
+  /** String representation for out-of-vocabulary words. */ 
+  String UNKNOWN_WORD_STRING = "<unk>"; 
+
+  /**
+   * Integer representation of the bare (non-indexed) nonterminal X, 
+   * which represents a wild-card gap in a phrase. 
+   * <p> 
+   * All nonterminals are guaranteed to be represented by negative integers. 
+   */ 
+  int X = -1; 
+
+  /**
+   * String representation of the bare (non-indexed) nonterminal X, 
+   * which represents a wild-card gap in a phrase. 
+   */ 
+  String X_STRING = "[X]"; 
+
+
+
+  /**
+   * String representation of the nonterminal X with index 1, 
+   * which represents a wild-card gap in a phrase. 
+   */ 
+  String X1_STRING = "[X,1]"; 
+
+
+
+  /**
+   * String representation of the nonterminal X with index 2, 
+   * which represents a wild-card gap in a phrase. 
+   */ 
+  String X2_STRING = "[X,2]";  
+
+  /**
+   * Integer representation of the nonterminal S. 
+   * <p> 
+   * All nonterminals are guaranteed to be represented by negative integers. 
+   */ 
+  int S = -4; 
+
+  /**
+   * String representation of the nonterminal S.. 
+   */ 
+  String S_STRING = "[S]";  
+
+  /**
+   * Integer representation of the nonterminal X with index 1, 
+   * which represents a wild-card gap in a phrase. 
+   * <p> 
+   * All nonterminals are guaranteed to be represented by negative integers. 
+   */ 
+  int S1 = -5; 
+
+  /**
+   * String representation of the nonterminal X with index 2, 
+   * which represents a wild-card gap in a phrase. 
+   */ 
+  String S1_STRING = "[S,1]";  
+
+  /**
+   * Gets a unique integer identifier for the nonterminal. 
+   * <p> 
+   * The integer returned is guaranteed to be a negative number. 
+   *  
+   * If the nonterminal is {@link #X_STRING}, 
+   * then the value returned must be {@link #X}. 
+   *  
+   * Otherwise, the value returned must be a negative number  
+   * whose value is less than {@link X}. 
+   *  
+   * @param nonterminal Nonterminal symbol 
+   * @return a unique integer identifier for the nonterminal 
+   */ 
+  int addNonterminal(String nonterminal); 
+
+  /**
+   * Gets a unique integer identifier for the terminal. 
+   *  
+   * @param terminal Terminal symbol 
+   * @return a unique integer identifier for the terminal 
+   */ 
+  int addTerminal(String terminal); 
+
+  /**
+   * Gets the unique integer identifiers for the words. 
+   *  
+   * @param words Array of symbols 
+   * @return the unique integer identifiers for the words 
+   */ 
+  int[] addTerminals(String[] words); 
+
+  /**
+   * Gets the unique integer identifiers for the words 
+   * in the sentence. 
+   *  
+   * @param sentence Space-delimited string of symbols 
+   * @return the unique integer identifiers for the words 
+   *         in the sentence 
+   */ 
+  int[] addTerminals(String sentence); 
+
+  /**
+   * Gets an integer identifier for the word. 
+   * <p> 
+   * If the word is in the vocabulary, the integer returned 
+   * will uniquely identify that word. 
+   * <p> 
+   * If the word is not in the vocabulary, the integer returned 
+   * by <code>getUnknownWordID</code> may be returned. 
+   *  
+   * Alternatively, implementations may, if they choose, add 
+   * unknown words and assign them a symbol ID instead of 
+   * returning <code>getUnknownWordID</code>. 
+   *  
+   * @see #getUnknownWordID 
+   * @return the unique integer identifier for wordString,  
+   *         or the result of <code>getUnknownWordID</code>  
+   *         if wordString is not in the vocabulary 
+   * @param wordString the word to retrieve the integer identifier
+   */ 
+  int getID(String wordString); 
+
+  /**
+   * Gets the integer identifiers for all words in the provided 
+   * sentence. 
+   * <p> 
+   * The sentence will be split (on spaces) into words, then 
+   * the integer identifier for each word will be retrieved 
+   * using <code>getID</code>. 
+   *  
+   * @see #getID(String) 
+   * @param sentence String of words, separated by spaces. 
+   * @return Array of integer identifiers for each word in 
+   *         the sentence 
+   */ 
+  int[] getIDs(String sentence); 
+
+  /**
+   * Gets the String that corresponds to the specified integer 
+   * identifier. 
+   * <p> 
+   * If the identifier is in the symbol vocabulary, the String 
+   * returned will correspond to that identifier. 
+   *  
+   * Otherwise, the String returned by <code>getUnknownWord</code> 
+   * will be returned. 
+   * 
+   * @param wordID an integer identifier for a specific String
+   * @return the String that corresponds to the specified 
+   *         integer identifier, or the result of 
+   *         <code>getUnknownWord</code> if the identifier 
+   *         does not correspond to a word in the vocabulary 
+   */ 
+  String getTerminal(int wordID); 
+
+  /**
+   * Gets the String that corresponds to the specified integer 
+   * identifier. 
+   * <p> 
+   * This method can be called for terminals or nonterminals. 
+   * 
+   * @param tokenID Integer identifier 
+   * @return the String that corresponds to the specified 
+   *         integer identifier 
+   */ 
+  String getWord(int tokenID); 
+
+  /**
+   * Gets the String that corresponds to the sequence of 
+   * specified integer identifiers. 
+   * 
+   * @param ids Sequence of integer identifiers 
+   * @return the String that corresponds to the sequence of 
+   *         specified integer identifiers 
+   */ 
+  String getWords(int[] ids); 
+
+  /**
+   *  
+   * @param wordIDs an int[] of identifiers for a specific Strings
+   * @return the String that corresponds to the specified 
+   *         integer identifiers
+   */ 
+  String getTerminals(int[] wordIDs); 
+
+  /**
+   * Gets a collection over all symbol identifiers for the 
+   * vocabulary. 
+   * 
+   * @return a collection over all symbol identifiers for the 
+   *         vocabulary 
+   */ 
+  Collection<Integer> getAllIDs(); 
+
+  /**
+   * Gets the list of all words represented by this vocabulary. 
+   * 
+   * @return the list of all words represented by this 
+   *         vocabulary 
+   */ 
+  Collection<String> getWords(); 
+
+  /**
+   * Gets the number of unique words in the vocabulary. 
+   * 
+   * @return the number of unique words in the vocabulary. 
+   */ 
+  int size(); 
+
+  /**
+   * Gets the integer symbol representation of the unknown 
+   * word. 
+   * 
+   * @return the integer symbol representation of the unknown 
+   *         word. 
+   */ 
+  int getUnknownWordID(); 
+
+  /**
+   * Gets the string representation of the unknown word. 
+   * 
+   * @return the string representation of the unknown word. 
+   */ 
+  String getUnknownWord(); 
+
+  /**
+   * Returns <code>true</code> if the symbol id represents a 
+   * nonterminal, <code>false</code> otherwise. 
+   *  
+   * @param id int symbol id
+   * @return <code>true</code> if the symbol id represents a 
+   *         nonterminal, <code>false</code> otherwise. 
+   */ 
+  boolean isNonterminal(int id); 
+
+  /**
+   * Gets the lowest-valued allowable terminal symbol id in 
+   * this table. 
+   * 
+   * @return the lowest-valued allowable terminal symbol id 
+   *         in this table. 
+   */ 
+  int getLowestID(); 
+
+
+  /**
+   * Gets the highest-valued allowable terminal symbol id in 
+   * this table. 
+   * <p> 
+   * NOTE: This may or may not return the same value as 
+   * <code>size</code>. 
+   * 
+   * @return the highest-valued allowable terminal symbol id 
+   *         in this table. 
+   */ 
+  int getHighestID(); 
+
+  /**
+   * @param id todo
+   * @return todo
+   */ 
+  int getTargetNonterminalIndex(int id);//first convert id to its String mapping, then call the function below 
+
+  /**
+   * @param word todo
+   * @return todo
+   */ 
+  int getTargetNonterminalIndex(String word); 
+
+  /**
+   * @param wordIDs todo
+   * @param ntIndexIncrements todo
+   * @return todo
+   */ 
+  String getWords(int[] wordIDs, boolean ntIndexIncrements); 
+
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/TerminalIterator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/TerminalIterator.java b/src/main/java/org/apache/joshua/corpus/TerminalIterator.java
index 8f2a576..e82b4cc 100644
--- a/src/main/java/org/apache/joshua/corpus/TerminalIterator.java
+++ b/src/main/java/org/apache/joshua/corpus/TerminalIterator.java
@@ -39,8 +39,7 @@ public class TerminalIterator implements Iterator<Integer> {
   /**
    * Constructs an iterator for the terminals in the given list of words.
    * 
-   * @param vocab
-   * @param words
+   * @param words array of words
    */
   public TerminalIterator(int[] words) {
     this.words = words;
@@ -75,7 +74,7 @@ public class TerminalIterator implements Iterator<Integer> {
   /**
    * Unsupported operation, guaranteed to throw an UnsupportedOperationException.
    * 
-   * @throws UnsupportedOperationException
+   * @throws UnsupportedOperationException operation not supported yet!
    */
   public void remove() {
     throw new UnsupportedOperationException();

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/Vocabulary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Vocabulary.java b/src/main/java/org/apache/joshua/corpus/Vocabulary.java
index a153902..582efc6 100644
--- a/src/main/java/org/apache/joshua/corpus/Vocabulary.java
+++ b/src/main/java/org/apache/joshua/corpus/Vocabulary.java
@@ -22,10 +22,13 @@ import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
+import java.io.Externalizable;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -43,7 +46,7 @@ import org.apache.joshua.util.FormatUtils;
  * @author Juri Ganitkevitch
  */
 
-public class Vocabulary {
+public class Vocabulary implements Externalizable {
 
   private final static ArrayList<NGramLanguageModel> LMs = new ArrayList<>();
 
@@ -80,9 +83,9 @@ public class Vocabulary {
    * Reads a vocabulary from file. This deletes any additions to the vocabulary made prior to
    * reading the file.
    *
-   * @param file_name
+   * @param vocab_file path to a vocabulary file
    * @return Returns true if vocabulary was read without mismatches or collisions.
-   * @throws IOException
+   * @throws IOException of the file cannot be found or read properly
    */
   public static boolean read(final File vocab_file) throws IOException {
     DataInputStream vocab_stream =
@@ -125,9 +128,12 @@ public class Vocabulary {
    * Get the id of the token if it already exists, new id is created otherwise.
    *
    * TODO: currently locks for every call. Separate constant (frozen) ids from
-   * changing (e.g. OOV) ids. Constant ids could be immutable -> no locking.
+   * changing (e.g. OOV) ids. Constant ids could be immutable -&gt; no locking.
    * Alternatively: could we use ConcurrentHashMap to not have to lock if
    * actually contains it and only lock for modifications?
+   * 
+   * @param token a token to obtain an id for
+   * @return the token id
    */
   public static int id(String token) {
     // First attempt an optimistic read
@@ -185,7 +191,7 @@ public class Vocabulary {
   public static int[] addAll(String sentence) {
     return addAll(sentence.split("\\s+"));
   }
-  
+
   public static int[] addAll(String[] tokens) {
     int[] ids = new int[tokens.length];
     for (int i = 0; i < tokens.length; i++)
@@ -230,8 +236,8 @@ public class Vocabulary {
   /**
    * Returns true if the Vocabulary ID represents a nonterminal.
    *
-   * @param id
-   * @return
+   * @param id vocabularly ID to check
+   * @return true if the Vocabulary ID represents a nonterminal
    */
   public static boolean nt(int id) {
     return (id < 0);
@@ -275,4 +281,26 @@ public class Vocabulary {
     LMs.clear();
   }
 
+  @Override
+  public void writeExternal(ObjectOutput out) throws IOException {
+    // TODO Auto-generated method stub
+
+  }
+
+  @Override
+  public void readExternal(ObjectInput in)
+      throws IOException, ClassNotFoundException {
+    // TODO Auto-generated method stub
+
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if(getClass() == o.getClass()) {
+      return true;
+    } else {
+      return false;
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/package.html
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/package.html b/src/main/java/org/apache/joshua/corpus/package.html
deleted file mode 100644
index 7643936..0000000
--- a/src/main/java/org/apache/joshua/corpus/package.html
+++ /dev/null
@@ -1,19 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
-<html>
-<head></head>
-<body bgcolor="white">
-
-<!--
-##### THIS IS THE TEMPLATE FOR THE PACKAGE DOC COMMENTS. #####
-##### TYPE YOUR PACKAGE COMMENTS HERE.  BEGIN WITH A     #####
-##### ONE-SENTENCE SUMMARY STARTING WITH A VERB LIKE:    #####
--->
-
-Provides data structures for representing and manipulating corpora
-and phrases extracted from corpora.
-
-
-<!-- Put @see and @since tags down here. -->
-
-</body>
-</html>

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java b/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
index dc98585..f374279 100644
--- a/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
+++ b/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
@@ -280,17 +280,14 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
         // TODO: bothersome no-backwards-arrays method.
       }
     }
-
     return labels;
   }
 
-
   @Override
   public int[] getTerminals() {
     return getTerminals(0, terminals.size());
   }
 
-
   @Override
   public int[] getTerminals(int from, int to) {
     int[] span = new int[to - from];
@@ -299,40 +296,32 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
     return span;
   }
 
-
   public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
     // TODO Auto-generated method stub
-
   }
 
-
   public void writeExternal(ObjectOutput out) throws IOException {
     // TODO Auto-generated method stub
-
   }
 
-
   /**
    * Reads Penn Treebank format file
+   * @param file_name the string path of the Penn Treebank file
+   * @throws IOException if the file does not exist
    */
   public void readExternalText(String file_name) throws IOException {
     LineReader reader = new LineReader(file_name);
-
     initialize();
-
     for (String line : reader) {
       if (line.trim().equals("")) continue;
       appendFromPennFormat(line);
     }
   }
 
-
   public void writeExternalText(String file_name) throws IOException {
     // TODO Auto-generated method stub
-
   }
 
-
   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder();

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/ArgsParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ArgsParser.java b/src/main/java/org/apache/joshua/decoder/ArgsParser.java
index fea20fc..ce42938 100644
--- a/src/main/java/org/apache/joshua/decoder/ArgsParser.java
+++ b/src/main/java/org/apache/joshua/decoder/ArgsParser.java
@@ -37,8 +37,9 @@ public class ArgsParser {
    * Parse the arguments passed from the command line when the JoshuaDecoder application was
    * executed from the command line.
    * 
-   * @param args
-   * @throws IOException 
+   * @param args string array of input arguments
+   * @param joshuaConfiguration the {@link org.apache.joshua.decoder.JoshuaConfiguration}
+   * @throws IOException if there is an error wit the input arguments
    */
   public ArgsParser(String[] args, JoshuaConfiguration joshuaConfiguration) throws IOException {
 
@@ -59,8 +60,8 @@ public class ArgsParser {
           LineReader reader = new LineReader(String.format("%s/VERSION", System.getenv("JOSHUA")));
           reader.readLine();
           String version = reader.readLine().split("\\s+")[2];
-          System.out.println(String.format("The Joshua machine translator, version %s", version));
-          System.out.println("joshua-decoder.org");
+          System.out.println(String.format("The Apache Joshua machine translator, version %s", version));
+          System.out.println("joshua.incubator.apache.org");
           System.exit(0);
 
         } else if (args[i].equals("-license")) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/BLEU.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/BLEU.java b/src/main/java/org/apache/joshua/decoder/BLEU.java
index a531611..8b51403 100644
--- a/src/main/java/org/apache/joshua/decoder/BLEU.java
+++ b/src/main/java/org/apache/joshua/decoder/BLEU.java
@@ -20,7 +20,6 @@ package org.apache.joshua.decoder;
 
 import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -35,7 +34,7 @@ import org.apache.joshua.util.Regex;
 /**
  * this class implements: (1) sentence-level bleu, with smoothing
  * 
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
  */
 public class BLEU {
   // do_ngram_clip: consider global n-gram clip
@@ -47,11 +46,12 @@ public class BLEU {
   // ====================multiple references
   /**
    * 
-   * @param refSents
-   * @param hypSent
+   * @param refSents todo
+   * @param hypSent todo
    * @param doNgramClip Should usually be true
    * @param bleuOrder Should usually be 4
    * @param useShortestRef Probably use false
+   * @return todo
    */
   public static float computeSentenceBleu(String[] refSents, String hypSent, boolean doNgramClip,
       int bleuOrder, boolean useShortestRef) {
@@ -92,6 +92,9 @@ public class BLEU {
 
   /**
    * words in the ngrams are using integer symbol ID
+   * @param refSents todo
+   * @param bleuOrder todo
+   * @return todo
    * */
   public static HashMap<String, Integer> constructMaxRefCountTable(String[] refSents, int bleuOrder) {
 
@@ -111,6 +114,8 @@ public class BLEU {
 
   /**
    * compute max_ref_count for each ngram in the reference sentences
+   * @param listRefNgramTbl todo
+   * @return todo
    * */
   public static HashMap<String, Integer> computeMaxRefCountTbl(
       List<HashMap<String, Integer>> listRefNgramTbl) {
@@ -195,10 +200,7 @@ public class BLEU {
           numNgramMatch[Regex.spaces.split(ngram).length - 1] += Support.findMin(
               refNgramTbl.get(ngram), entry.getValue()); // ngram clip
         } else {
-          numNgramMatch[Regex.spaces.split(ngram).length - 1] += entry.getValue();// without
-                                                                                        // ngram
-                                                                                        // count
-                                                                                        // clipping
+          numNgramMatch[Regex.spaces.split(ngram).length - 1] += entry.getValue();// without ngram count clipping
         }
       }
     }
@@ -256,6 +258,11 @@ public class BLEU {
 
   /**
    * speed consideration: assume hypNgramTable has a smaller size than referenceNgramTable does
+   * @param linearCorpusGainThetas todo
+   * @param hypLength todo
+   * @param hypNgramTable todo
+   * @param referenceNgramTable todo
+   * @return todo
    */
   public static float computeLinearCorpusGain(float[] linearCorpusGainThetas, int hypLength,
       Map<String, Integer> hypNgramTable, Map<String, Integer> referenceNgramTable) {
@@ -331,8 +338,10 @@ public class BLEU {
     return res;
   }
 
+  public static final int maxOrder = 4;
+
   /**
-   * Computes BLEU statistics incurred by a rule. This is (a) all ngram (n <= 4) for terminal rules
+   * Computes BLEU statistics incurred by a rule. This is (a) all ngram (n &lt;= 4) for terminal rules
    * and (b) all ngrams overlying boundary points between terminals in the rule and ngram state from
    * tail nodes.
    * 
@@ -346,13 +355,11 @@ public class BLEU {
    * 
    * Of these, all but the first have a boundary point to consider.
    * 
-   * @param rule the rule being applied
-   * @param spanWidth the width of the span in the input sentence
+   * @param edge todo
+   * @param spanPct todo
    * @param references the reference to compute statistics against
-   * @return
+   * @return todo
    */
-  public static final int maxOrder = 4;
-
   public static Stats compute(HyperEdge edge, float spanPct, References references) {
     Stats stats = new Stats();
     // TODO: this should not be the span width, but the real ref scaled to the span percentage

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Decoder.java b/src/main/java/org/apache/joshua/decoder/Decoder.java
index 1aac0b0..87ab650 100644
--- a/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -28,7 +28,6 @@ import java.io.FileNotFoundException;
 import java.io.FileWriter;
 import java.lang.reflect.Constructor;
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -80,10 +79,10 @@ import org.apache.joshua.util.io.LineReader;
  * Translations object). Translations itself is an iterator whose next() call blocks until the next
  * translation is available.
  *
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Zhifei Li, <zh...@gmail.com>
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @author Lane Schwartz <do...@users.sourceforge.net>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author wren ng thornton wren@users.sourceforge.net
+ * @author Lane Schwartz dowobeha@users.sourceforge.net
  */
 public class Decoder {
 
@@ -117,7 +116,8 @@ public class Decoder {
   /**
    * Constructor method that creates a new decoder using the specified configuration file.
    *
-   * @param configFile Name of configuration file.
+   * @param joshuaConfiguration a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
+   * @param configFile name of configuration file.
    */
   public Decoder(JoshuaConfiguration joshuaConfiguration, String configFile) {
     this(joshuaConfiguration);
@@ -128,6 +128,7 @@ public class Decoder {
    * Factory method that creates a new decoder using the specified configuration file.
    *
    * @param configFile Name of configuration file.
+   * @return a configured {@link org.apache.joshua.decoder.Decoder}
    */
   public static Decoder createDecoder(String configFile) {
     JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
@@ -154,6 +155,8 @@ public class Decoder {
    * <p>
    * This method is called by unit tests or any outside packages (e.g., MERT) relying on the
    * decoder.
+   * @param joshuaConfiguration a {@link org.apache.joshua.decoder.JoshuaConfiguration} object
+   * @return an uninitialized decoder for use in testing
    */
   static public Decoder getUninitalizedDecoder(JoshuaConfiguration joshuaConfiguration) {
     return new Decoder(joshuaConfiguration);
@@ -436,9 +439,9 @@ public class Decoder {
    * (possibly boundless) set of input sentences. Each request launches its own thread to read the
    * sentences of the request.
    *
-   * @param request
-   * @return an iterable set of Translation objects
-   * @throws IOException
+   * @param request the populated {@link org.apache.joshua.decoder.io.TranslationRequestStream}
+   * @param out an appropriate {@link java.io.OutputStream} to write results to
+   * @throws IOException if there is an error with the input stream or writing the output
    */
   public void decodeAll(TranslationRequestStream request, OutputStream out) throws IOException {
     Translations translations = new Translations(request);
@@ -496,8 +499,8 @@ public class Decoder {
   /**
    * We can also just decode a single sentence.
    *
-   * @param sentence
-   * @return The translated sentence
+   * @param sentence {@link org.apache.joshua.lattice.Lattice} input
+   * @return the sentence {@link org.apache.joshua.decoder.Translation}
    */
   public Translation decode(Sentence sentence) {
     // Get a thread.
@@ -777,7 +780,7 @@ public class Decoder {
               : -1;
 
           joshuaConfiguration.search_algorithm = "stack";
-          grammar = new PhraseTable(path, owner, type, joshuaConfiguration, maxSourceLen);
+          grammar = new PhraseTable(path, owner, type, joshuaConfiguration);
         }
 
         this.grammars.add(grammar);
@@ -794,7 +797,7 @@ public class Decoder {
     }
     
     /* Add the grammar for custom entries */
-    this.customPhraseTable = new PhraseTable(null, "custom", "phrase", joshuaConfiguration, 0);
+    this.customPhraseTable = new PhraseTable(null, "custom", "phrase", joshuaConfiguration);
     this.grammars.add(this.customPhraseTable);
     
     /* Create an epsilon-deleting grammar */

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/DecoderThread.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/DecoderThread.java b/src/main/java/org/apache/joshua/decoder/DecoderThread.java
index 4390a59..0f80335 100644
--- a/src/main/java/org/apache/joshua/decoder/DecoderThread.java
+++ b/src/main/java/org/apache/joshua/decoder/DecoderThread.java
@@ -44,8 +44,8 @@ import org.apache.joshua.corpus.Vocabulary;
  * 
  * The DecoderFactory class is responsible for launching the threads.
  * 
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
  */
 
 public class DecoderThread extends Thread {
@@ -91,6 +91,7 @@ public class DecoderThread extends Thread {
    * Translate a sentence.
    * 
    * @param sentence The sentence to be translated.
+   * @return the sentence {@link org.apache.joshua.decoder.Translation}
    */
   public Translation translate(Sentence sentence) {
 
@@ -105,7 +106,7 @@ public class DecoderThread extends Thread {
       Decoder.LOG(1, String.format("Translation %d: Translation took 0 seconds", sentence.id()));
       return new Translation(sentence, null, featureFunctions, joshuaConfiguration);
     }
-    
+
     long startTime = System.currentTimeMillis();
 
     int numGrammars = allGrammars.size();
@@ -113,7 +114,7 @@ public class DecoderThread extends Thread {
 
     for (int i = 0; i < allGrammars.size(); i++)
       grammars[i] = allGrammars.get(i);
-    
+
     if (joshuaConfiguration.segment_oovs)
       sentence.segmentOOVs(grammars);
 
@@ -127,7 +128,7 @@ public class DecoderThread extends Thread {
 
       if (joshuaConfiguration.search_algorithm.equals("stack")) {
         Stacks stacks = new Stacks(sentence, this.featureFunctions, grammars, joshuaConfiguration);
-        
+
         hypergraph = stacks.search();
       } else {
         /* Seeding: the chart only sees the grammars, not the factories */
@@ -135,10 +136,10 @@ public class DecoderThread extends Thread {
             joshuaConfiguration.goal_symbol, joshuaConfiguration);
 
         hypergraph = (joshuaConfiguration.use_dot_chart) 
-          ? chart.expand() 
-          : chart.expandSansDotChart();
+            ? chart.expand() 
+                : chart.expandSansDotChart();
       }
-      
+
     } catch (java.lang.OutOfMemoryError e) {
       Decoder.LOG(1, String.format("Input %d: out of memory", sentence.id()));
       hypergraph = null;
@@ -155,7 +156,7 @@ public class DecoderThread extends Thread {
     }
 
     /*****************************************************************************************/
-    
+
     /*
      * Synchronous parsing.
      * 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java b/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
index b4624cf..def7c85 100644
--- a/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
+++ b/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
@@ -43,8 +43,8 @@ import org.apache.joshua.util.io.LineReader;
  * When adding new features to Joshua, any new configurable parameters should be added to this
  * class.
  *
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
  */
 public class JoshuaConfiguration {
 
@@ -325,6 +325,8 @@ public class JoshuaConfiguration {
    * To process command-line options, we write them to a file that looks like the config file, and
    * then call readConfigFile() on it. It would be more general to define a class that sits on a
    * stream and knows how to chop it up, but this was quicker to implement.
+   * 
+   * @param options string array of command line options
    */
   public void processCommandLineOptions(String[] options) {
     try {
@@ -696,8 +698,13 @@ public class JoshuaConfiguration {
    * equivalence classes on external use of parameter names, permitting arbitrary_under_scores and
    * camelCasing in paramter names without forcing the user to memorize them all. Here are some
    * examples of equivalent ways to refer to parameter names:
-   *
+   * <pre>
    * {pop-limit, poplimit, PopLimit, popLimit, pop_lim_it} {lmfile, lm-file, LM-FILE, lm_file}
+   * </pre>
+   * 
+   * @param text the string to be normalized
+   * @return normalized key
+   * 
    */
   public static String normalize_key(String text) {
     return text.replaceAll("[-_]", "").toLowerCase();

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java b/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
index 8c0b10b..42b17d7 100644
--- a/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
+++ b/src/main/java/org/apache/joshua/decoder/JoshuaDecoder.java
@@ -39,9 +39,9 @@ import org.apache.joshua.server.ServerThread;
  * Implements decoder initialization, including interaction with <code>JoshuaConfiguration</code>
  * and <code>DecoderThread</code>.
  * 
- * @author Zhifei Li, <zh...@gmail.com>
- * @author wren ng thornton <wr...@users.sourceforge.net>
- * @author Lane Schwartz <do...@users.sourceforge.net>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author wren ng thornton wren@users.sourceforge.net
+ * @author Lane Schwartz dowobeha@users.sourceforge.net
  */
 public class JoshuaDecoder {
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java b/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
index b2126cb..e2061b0 100644
--- a/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
+++ b/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
@@ -42,7 +42,7 @@ import org.apache.joshua.util.Regex;
  * uses a Viterbi approximation: the probability of a string is its best derivation probability So,
  * if one want to deal with spurious ambiguity, he/she should do that before calling this class
  * 
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
  */
 public class NbestMinRiskReranker {
 
@@ -182,7 +182,10 @@ public class NbestMinRiskReranker {
   /**
    * based on a list of log-probabilities in nbestLogProbs, obtain a normalized distribution, and
    * put the normalized probability (real value in [0,1]) into nbestLogProbs
-   * */
+   * 
+   * @param nbestLogProbs a {@link java.util.List} of {@link java.lang.Double} representing nbestLogProbs
+   * @param scalingFactor double value representing scaling factor
+   */
   // get a normalized distributeion and put it back to nbestLogProbs
   static public void computeNormalizedProbs(List<Double> nbestLogProbs, double scalingFactor) {
 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
index 75e6ab4..8aa518e 100644
--- a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
+++ b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
@@ -33,10 +33,9 @@ import org.apache.joshua.decoder.hypergraph.HyperGraph;
 import org.apache.joshua.decoder.segment_file.Sentence;
 
 /**
- * structuredTranslation provides a more structured access to translation
+ * <p>structuredTranslation provides a more structured access to translation
  * results than the Translation class.
- * Members of instances of this class can be used upstream.
- * <br/>
+ * Members of instances of this class can be used upstream.</p>
  * TODO:
  * Enable K-Best extraction.
  * 
@@ -107,6 +106,7 @@ public class StructuredTranslation {
 
   /**
    * Returns a list of target to source alignments.
+   * @return a list of target to source alignments
    */
   public List<List<Integer>> getTranslationWordAlignments() {
     return translationWordAlignments;
@@ -118,6 +118,7 @@ public class StructuredTranslation {
   
   /**
    * Time taken to build output information from the hypergraph.
+   * @return the time taken to build output information from the hypergraph
    */
   public Float getExtractionTime() {
     return extractionTime;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/Support.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Support.java b/src/main/java/org/apache/joshua/decoder/Support.java
index 7c4a0b2..e513aef 100644
--- a/src/main/java/org/apache/joshua/decoder/Support.java
+++ b/src/main/java/org/apache/joshua/decoder/Support.java
@@ -21,7 +21,7 @@ package org.apache.joshua.decoder;
 import java.util.List;
 
 /**
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Zhifei Li, zhifei.work@gmail.com
  */
 public class Support {
 
@@ -33,14 +33,15 @@ public class Support {
     return (a > b) ? a : b;
   }
 
-  
   public static int[] toArray(List<Integer> in) {
     return subIntArray(in, 0, in.size());
   }
 
   /**
+   * @param in a {@link java.util.List} of Integer
    * @param start inclusive
    * @param end exclusive
+   * @return sub int[] from start to end
    */
   public static int[] subIntArray(List<Integer> in, int start, int end) {
     int[] res = new int[end - start];

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/Translation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translation.java b/src/main/java/org/apache/joshua/decoder/Translation.java
index 5afae74..0ee1f08 100644
--- a/src/main/java/org/apache/joshua/decoder/Translation.java
+++ b/src/main/java/org/apache/joshua/decoder/Translation.java
@@ -41,7 +41,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
  * sentence and id and contains the decoded hypergraph. Translation objects are returned by
  * DecoderThread instances to the InputHandler, where they are assembled in order for output.
  * 
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
  */
 
 public class Translation {
@@ -54,17 +54,17 @@ public class Translation {
   private String output = null;
 
   private StructuredTranslation structuredTranslation = null;
-  
+
   public Translation(Sentence source, HyperGraph hypergraph, 
       List<FeatureFunction> featureFunctions, JoshuaConfiguration joshuaConfiguration) {
     this.source = source;
-    
+
     if (joshuaConfiguration.use_structured_output) {
-      
+
       structuredTranslation = new StructuredTranslation(
           source, hypergraph, featureFunctions);
       this.output = structuredTranslation.getTranslationString();
-      
+
     } else {
 
       StringWriter sw = new StringWriter();
@@ -81,15 +81,15 @@ public class Translation {
           // We must put this weight as zero, otherwise we get an error when we try to retrieve it
           // without checking
           Decoder.weights.increment("BLEU", 0);
-          
+
           if (joshuaConfiguration.topN == 0) {
-            
+
             /* construct Viterbi output */
             final String best = getViterbiString(hypergraph);
-            
+
             Decoder.LOG(1, String.format("Translation %d: %.3f %s", source.id(), hypergraph.goalNode.getScore(),
                 best));
-            
+
             /*
              * Setting topN to 0 turns off k-best extraction, in which case we need to parse through
              * the output-string, with the understanding that we can only substitute variables for the
@@ -100,21 +100,21 @@ public class Translation {
                 .replace("%S", DeNormalize.processSingleLine(best))
                 .replace("%c", String.format("%.3f", hypergraph.goalNode.getScore()))
                 .replace("%i", String.format("%d", source.id()));
-            
+
             if (joshuaConfiguration.outputFormat.contains("%a")) {
               translation = translation.replace("%a", getViterbiWordAlignments(hypergraph));
             }
-            
+
             if (joshuaConfiguration.outputFormat.contains("%f")) {
               final FeatureVector features = getViterbiFeatures(hypergraph, featureFunctions, source);
               translation = translation.replace("%f", joshuaConfiguration.moses ? features.mosesString() : features.toString());
             }
-            
+
             out.write(translation);
             out.newLine();
-            
+
           } else {
-            
+
             final KBestExtractor kBestExtractor = new KBestExtractor(
                 source, featureFunctions, Decoder.weights, false, joshuaConfiguration);
             kBestExtractor.lazyKBestExtractOnHG(hypergraph, joshuaConfiguration.topN, out);
@@ -132,31 +132,31 @@ public class Translation {
           Decoder.LOG(1, String.format("Input %d: %d-best extraction took %.3f seconds", id(),
               joshuaConfiguration.topN, seconds));
 
-      } else {
-        
-        // Failed translations and blank lines get empty formatted outputs
-        // @formatter:off
-        String outputString = joshuaConfiguration.outputFormat
-            .replace("%s", source.source())
-            .replace("%e", "")
-            .replace("%S", "")
-            .replace("%t", "()")
-            .replace("%i", Integer.toString(source.id()))
-            .replace("%f", "")
-            .replace("%c", "0.000");
-        // @formatter:on
-
-        out.write(outputString);
-        out.newLine();
-      }
+        } else {
+
+          // Failed translations and blank lines get empty formatted outputs
+          // @formatter:off
+          String outputString = joshuaConfiguration.outputFormat
+              .replace("%s", source.source())
+              .replace("%e", "")
+              .replace("%S", "")
+              .replace("%t", "()")
+              .replace("%i", Integer.toString(source.id()))
+              .replace("%f", "")
+              .replace("%c", "0.000");
+          // @formatter:on
+
+          out.write(outputString);
+          out.newLine();
+        }
 
         out.flush();
       } catch (IOException e) {
         throw new RuntimeException(e);
       }
-      
+
       this.output = sw.toString();
-      
+
     }
 
     /*
@@ -169,7 +169,7 @@ public class Translation {
         break;
       }
     }
-    
+
   }
 
   public Sentence getSourceSentence() {
@@ -184,12 +184,12 @@ public class Translation {
   public String toString() {
     return output;
   }
-  
+
   /**
    * Returns the StructuredTranslation object
    * if JoshuaConfiguration.construct_structured_output == True.
    * @throws RuntimeException if StructuredTranslation object not set.
-   * @return
+   * @return {@link org.apache.joshua.decoder.StructuredTranslation} object
    */
   public StructuredTranslation getStructuredTranslation() {
     if (structuredTranslation == null) {
@@ -197,5 +197,5 @@ public class Translation {
     }
     return structuredTranslation;
   }
-  
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/Translations.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translations.java b/src/main/java/org/apache/joshua/decoder/Translations.java
index 7dd9086..0b91ff9 100644
--- a/src/main/java/org/apache/joshua/decoder/Translations.java
+++ b/src/main/java/org/apache/joshua/decoder/Translations.java
@@ -30,7 +30,7 @@ import org.apache.joshua.decoder.io.TranslationRequestStream;
  * Translation in the right place. When the next translation in a sequence is available, next() is
  * notified.
  * 
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Matt Post post@cs.jhu.edu
  */
 public class Translations {
 
@@ -73,7 +73,7 @@ public class Translations {
    * the ID of the translation is the same as the one being waited for (currentID). If so, the
    * thread waiting for it is notified.
    * 
-   * @param translation
+   * @param translation a translated input object
    */
   public void record(Translation translation) {
     synchronized (this) {
@@ -98,6 +98,8 @@ public class Translations {
   /**
    * Returns the next Translation, blocking if necessary until it's available, since the next
    * Translation might not have been produced yet.
+   * 
+   * @return first element from the list of {@link org.apache.joshua.decoder.Translation}'s
    */
   public Translation next() {
     synchronized (this) {

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
index 0825ccb..0bc2f9f 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
@@ -60,8 +60,8 @@ import org.apache.joshua.util.ChartSpan;
  * index of sentences: start from zero index of cell: cell (i,j) represent span
  * of words indexed [i,j-1] where i is in [0,n-1] and j is in [1,n]
  * 
- * @author Zhifei Li, <zh...@gmail.com>
- * @author Matt Post <po...@cs.jhu.edu>
+ * @author Zhifei Li, zhifei.work@gmail.com
+ * @author Matt Post post@cs.jhu.edu
  */
 
 public class Chart {
@@ -728,7 +728,7 @@ public class Chart {
   }
 
   /***
-   * Add a terminal production (X -> english phrase) to the hypergraph.
+   * Add a terminal production (X -&gt; english phrase) to the hypergraph.
    * 
    * @param i the start index
    * @param j stop index

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java b/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
index eeb6366..73c3cd0 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
@@ -35,8 +35,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
 /**
  * This class computes the cost of applying a rule.
  * 
- * @author Matt Post <po...@cs.jhu.edu>
- * @author Zhifei Li, <zh...@gmail.com>
+ * @author Matt Post post@cs.jhu.edu
+ * @author Zhifei Li, zhifei.work@gmail.com
  */
 
 public class ComputeNodeResult {
@@ -52,13 +52,20 @@ public class ComputeNodeResult {
 
   // The StateComputer objects themselves serve as keys.
   private List<DPState> dpStates;
-  
+
   /**
    * Computes the new state(s) that are produced when applying the given rule to the list of tail
    * nodes. Also computes a range of costs of doing so (the transition cost, the total (Viterbi)
    * cost, and a score that includes a future cost estimate).
    * 
    * Old version that doesn't use the derivation state.
+   * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+   * @param rule {@link org.apache.joshua.decoder.ff.tm.Rule} to use when computing th node result
+   * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode}'s
+   * @param i todo
+   * @param j todo
+   * @param sourcePath information about a path taken through the source lattice
+   * @param sentence the lattice input
    */
   public ComputeNodeResult(List<FeatureFunction> featureFunctions, Rule rule, List<HGNode> tailNodes,
       int i, int j, SourcePath sourcePath, Sentence sentence) {
@@ -66,12 +73,12 @@ public class ComputeNodeResult {
     // The total Viterbi cost of this edge. This is the Viterbi cost of the tail nodes, plus
     // whatever costs we incur applying this rule to create a new hyperedge.
     float viterbiCost = 0.0f;
-    
+
     if (Decoder.VERBOSE >= 4) {
       System.err.println("ComputeNodeResult():");
       System.err.println("-> RULE " + rule);
     }
-      
+
     /*
      * Here we sum the accumulated cost of each of the tail nodes. The total cost of the new
      * hyperedge (the inside or Viterbi cost) is the sum of these nodes plus the cost of the
@@ -95,7 +102,7 @@ public class ComputeNodeResult {
 
     // The future cost estimate is a heuristic estimate of the outside cost of this edge.
     float futureCostEstimate = 0.0f;
-    
+
     /*
      * We now iterate over all the feature functions, computing their cost and their expected future
      * cost.
@@ -105,7 +112,7 @@ public class ComputeNodeResult {
 
       DPState newState = feature.compute(rule, tailNodes, i, j, sourcePath, sentence, acc);
       transitionCost += acc.getScore();
-      
+
       if (Decoder.VERBOSE >= 4)
         System.err.println(String.format("-> FEATURE %s = %.3f * %.3f = %.3f", 
             feature.getName(), acc.getScore() / Decoder.weights.getSparse(feature.getName()),
@@ -116,21 +123,22 @@ public class ComputeNodeResult {
         allDPStates.add(((StatefulFF)feature).getStateIndex(), newState);
       }
     }
-  
+
     viterbiCost += transitionCost;
 
     if (Decoder.VERBOSE >= 4)
       System.err.println(String.format("-> COST = %.3f", transitionCost));
-    
+
     // Set the final results.
     this.pruningCostEstimate = viterbiCost + futureCostEstimate;
     this.viterbiCost = viterbiCost;
     this.transitionCost = transitionCost;
     this.dpStates = allDPStates;
   }
-  
+
   /**
-   * This is called from Cell.java when making the final transition to the goal state.
+   * This is called from {@link org.apache.joshua.decoder.chart_parser.Cell} 
+   * when making the final transition to the goal state.
    * This is done to allow feature functions to correct for partial estimates, since
    * they now have the knowledge that the whole sentence is complete. Basically, this
    * is only used by LanguageModelFF, which does not score partial n-grams, and therefore
@@ -140,6 +148,14 @@ public class ComputeNodeResult {
    * too: it makes search better (more accurate at the beginning, for example), and would
    * also do away with the need for the computeFinal* class of functions (and hooks in
    * the feature function interface).
+   * 
+   * @param featureFunctions {@link java.util.List} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
+   * @param tailNodes {@link java.util.List} of {@link org.apache.joshua.decoder.hypergraph.HGNode}'s
+   * @param i todo
+   * @param j todo
+   * @param sourcePath information about a path taken through the source lattice
+   * @param sentence the lattice input
+   * @return the final cost for the Node
    */
   public static float computeFinalCost(List<FeatureFunction> featureFunctions,
       List<HGNode> tailNodes, int i, int j, SourcePath sourcePath, Sentence sentence) {
@@ -150,13 +166,13 @@ public class ComputeNodeResult {
     }
     return cost;
   }
-  
+
   public static FeatureVector computeTransitionFeatures(List<FeatureFunction> featureFunctions,
       HyperEdge edge, int i, int j, Sentence sentence) {
 
     // Initialize the set of features with those that were present with the rule in the grammar.
     FeatureVector featureDelta = new FeatureVector();
-    
+
     // === compute feature logPs
     for (FeatureFunction ff : featureFunctions) {
       // A null rule signifies the final transition.
@@ -166,7 +182,7 @@ public class ComputeNodeResult {
         featureDelta.add(ff.computeFeatures(edge.getRule(), edge.getTailNodes(), i, j, edge.getSourcePath(), sentence));
       }
     }
-    
+
     return featureDelta;
   }
 
@@ -176,11 +192,12 @@ public class ComputeNodeResult {
 
   /**
    *  The complete cost of the Viterbi derivation at this point
+   *  @return float representing cost
    */
   public float getViterbiCost() {
     return this.viterbiCost;
   }
-  
+
   public float getBaseCost() {
     return getViterbiCost() - getTransitionCost();
   }
@@ -188,7 +205,7 @@ public class ComputeNodeResult {
   /**
    * The cost incurred by this edge alone
    * 
-   * @return
+   * @return float representing cost
    */
   public float getTransitionCost() {
     return this.transitionCost;