You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/26 04:22:24 UTC

[07/14] incubator-joshua git commit: JOSHUA-252 Make it possible to use Maven to build Joshua

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/parse.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/parse.cpp b/ext/giza-pp/GIZA++-v2/parse.cpp
deleted file mode 100644
index ebb136e..0000000
--- a/ext/giza-pp/GIZA++-v2/parse.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-
-/* FJO 01/2001: completely reorganized parameter processing */
-
-#include <sstream>
-#include <string>
-#include <fstream>
-#include "defs.h"
-#include "utility.h"
-#include "Globals.h"
-#include "D4Tables.h"
-#include "D5Tables.h"
-#include "ATables.h"
-#include "Parameter.h"
-
-extern bool ONLYALDUMPS;
-
-void parseConfigFile (char * fname )
-  // This functions reads in the configuration file to set up some run-time
-  // parameters. The parameters are global variables that are defined in 
-  // main.cc and used all over the place in the program
-  // The format of the configuration file can be explained in the following way
-  // FORMAT:
-  // the character '\n' separates lines ..
-  // lines that start with "//" (skipping over white spaces are considered 
-  // as comments and will be ignored.
-  // Any other line is considered as an attribute setting instruction and it 
-  // is divided into haves (separated by a colon ":"). The first half is the
-  // attribute value which consists of the concatenation of all non-white space
-  // tokens before the colon. These tokens will have spaces eseparating them.
-  // The attribute vlue is the first token after the colon (any thing after 
-  // it will be ignored ;
-  // For example :
-  // if the configuration file has the following entry:
-  //
-  // NO.   ITERATIONS   MODEL 2 :	10
-  //
-  // then the attribute is "NO. ITERATIONS MODEL 2" , and the attribute value
-  // is "10"  (these do not include the quotation marks).
-
-{
-
-  string line, word, attrib, attribval ;
-  ifstream Config_File(fname);
-  if(!Config_File){
-    cerr << "ERROR:  Cannot open configuration file " << fname << "!\n" ;
-    exit(1);
-  }
-
-  cout << "The following options are from the config file and will be overwritten by any command line options.\n";
-  
-  while(getline(Config_File, line)){
-
-    istringstream buffer(line);
-    word = attrib = attribval = "" ;
-    buffer >> word  ;
-    if (word != "//"){ // if line does not start with "//" (i.e. not a comment)
-      attrib = word ;
-      while((buffer >> word) && (word != ":")){
-	attrib += " " + word ;
-      }      
-      if(!(buffer >> attribval))
-	{
-	  istringstream buffer2(line);
-	  buffer2>>attrib;
-	  buffer2>>attribval;
-	}
-
-      // This# is where (1) the configuration file is defined and
-      //               (2) parsing of its attributes occurs.
-      
-      if(attrib == "t FILE"){
-	t_Filename = attribval;
-	cout << "\tt file:  " << t_Filename << '\n';
-      }
-      else if(attrib ==  "a FILE"){
-	a_Filename = attribval;
-	cout << "\ta file:  " << a_Filename << '\n';
-      }
-      else if(attrib == "d FILE"){
-	d_Filename = attribval;
-	cout << "\td file:  " << d_Filename << '\n';
-      }
-      else if(attrib == "n FILE"){
-	n_Filename = attribval;
-	cout << "\tn file:  " << n_Filename << '\n';
-      }
-      else if(attrib == "p0 FILE"){
-	p0_Filename = attribval;
-	cout << "\tp0 file:  " << p0_Filename << '\n';
-      }
-      else if ( line == ""){}
-      else if(  !makeSetCommand(attrib,attribval,getGlobalParSet(),2) )
-	cerr << "ERROR: Unrecognized attribute :" << attrib << '\n';
-    }
-  }
-}
-
-
-void parseArguments(int argc, char *argv[])
-{
-  int arg = 1;
-
-  if(!strcmp(argv[1], "--h") || !strcmp(argv[1], "--help")){
-    printHelp();
-    exit(0);
-  }
-  if( argv[1][0]=='-' )
-    arg=0;
-  else
-    parseConfigFile(argv[1]);
-  while(++arg<argc){
-    if( strlen(argv[arg])>2 && argv[arg][0]=='-' && argv[arg][1]=='-' )
-      {
-	if( !makeSetCommand(argv[arg]+1,"1",getGlobalParSet(),2))
-	  cerr << "WARNING: ignoring unrecognized option:  "<< argv[arg] << '\n' ;  
-      }
-    else if( arg+1<argc && !makeSetCommand(argv[arg],argv[arg+1],getGlobalParSet(),2))
-      cerr << "WARNING: ignoring unrecognized option:  "<< argv[arg] << '\n' ;  
-    else
-      {
-	arg++;
-      }
-  }
-  if( OPath.length() )
-    OPath+="/";
-  Prefix = (OPath + Prefix);
-  LogFilename = (OPath + LogFilename);
-  printGIZAPars(cout);
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/plain2snt.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/plain2snt.cpp b/ext/giza-pp/GIZA++-v2/plain2snt.cpp
deleted file mode 100644
index 66ae677..0000000
--- a/ext/giza-pp/GIZA++-v2/plain2snt.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-#include <iostream>
-#include <string>
-#include <sstream>
-#include <fstream>
-#include <map>
-#include <vector>
-#include <cstdlib>
-
-using namespace std;
-
-int main(int argc,char**argv)
-{
-  vector<double>weights;
-  vector<string>filenames;
-  for(int i=1;i<argc;++i)
-    if(string(argv[i])=="-weight")
-      weights.push_back(atof(argv[++i]));
-    else
-      filenames.push_back(argv[i]);
-  
-  if((filenames.size()%2)==1||filenames.size()==0 )
-    {
-      cerr << argv[0] << " txt1 txt2 [txt3 txt4 -weight w]\n";
-      cerr << " Converts plain text into GIZA++ snt-format.\n";
-      exit(1);
-    }
-  string line1,line2,word;
-  map<string,int> v1,v2;
-  map<string,int> id1,id2;
-  vector<string> iid1(2),iid2(2);
-  
-  string w1(filenames[0]);
-  string w2(filenames[1]);
-  
-  if( w1.length()>4&&w2.length()>4&&((w1.substr(w1.length()-4,w1.length())==".tok" && w2.substr(w2.length()-4,w2.length())==".tok" )||
-				     (w1.substr(w1.length()-4,w1.length())==".txt" && w2.substr(w2.length()-4,w2.length())==".txt" ) ))
-    {
-      w1=w1.substr(0,w1.length()-4);
-      w2=w2.substr(0,w2.length()-4);
-      cerr << "w1:"<< w1 << " w2:" << w2 << endl;
-    } 
-      
-
-  string vocab1(w1),vocab2(w2),snt1,snt2;
-  unsigned int slashpos=vocab1.rfind('/')+1;
-  if( slashpos>=vocab1.length() ) slashpos=0;
-  string vocab1x(vocab1.substr(slashpos,vocab1.length()));
-  cout << vocab1 << " -> " << vocab1x << endl;
-  slashpos=vocab2.rfind('/')+1;
-  if( slashpos>=vocab2.length() ) slashpos=0;
-  string vocab2x(vocab2.substr(slashpos,vocab2.length()));
-  cout << vocab2 << " -> " << vocab2x << endl;  
-  snt1=vocab1+"_"+vocab2x+string(".snt");
-  snt2=vocab2+"_"+vocab1x+string(".snt");
-  vocab1+=string(".vcb");
-  vocab2+=string(".vcb");
-
-  ofstream ovocab1(vocab1.c_str()),ovocab2(vocab2.c_str()),osnt1(snt1.c_str()),osnt2(snt2.c_str());
-  for(unsigned int i=0;i<filenames.size();i+=2)
-    {
-      ifstream i1(filenames[i].c_str()),i2(filenames[i+1].c_str());
-      if(!i1)cerr << "WARNING: " << filenames[i] << " cannot be read.\n";
-      if(!i2)cerr << "WARNING: " << filenames[i+1] << " cannot be read.\n";
-      while(getline(i1,line1) && getline(i2,line2) )
-	{
-	  vector<string> t1,t2;
-	  istringstream ii1(line1);
-	  while(ii1>>word)
-	    {
-	      t1.push_back(word);
-	      v1[word]++;
-	      if( id1.find(word)==id1.end() )
-		{
-		  iid1.push_back(word);
-		  id1[word]=iid1.size()-1;
-		}
-	    }
-	  istringstream ii2(line2);
-	  while(ii2>>word)
-	    {
-	      t2.push_back(word);
-	      v2[word]++;
-	      if( id2.find(word)==id2.end() )
-		{
-		  iid2.push_back(word);
-		  id2[word]=iid2.size()-1;
-		}
-	    }
-	  double w=1.0;
-	  if( i/2<weights.size() )
-	    w=weights[i/2];
-	  if( t1.size()&&t2.size() )
-	    {
-	      osnt1 << w << "\n";
-	      for(unsigned int j=0;j<t1.size();++j)osnt1 << id1[t1[j]] << ' ';
-	      osnt1 << '\n';
-	      for(unsigned int j=0;j<t2.size();++j)osnt1 << id2[t2[j]] << ' ';
-	      osnt1 << '\n';
-
-	      osnt2 << w << "\n";
-	      for(unsigned int j=0;j<t2.size();++j)osnt2 << id2[t2[j]] << ' ';
-	      osnt2 << '\n';
-	      for(unsigned int j=0;j<t1.size();++j)osnt2 << id1[t1[j]] << ' ';
-	      osnt2 << '\n';
-	    }
-	  else
-	    cerr << "WARNING: filtered out empty sentence (source: " << filenames[i] << " " << t1.size() <<
-	      " target: " << filenames[i+1] << " " << t2.size() << ").\n";
-	}
-    }
-  
-  for(unsigned int i=2;i<iid1.size();++i)
-    ovocab1 << i << ' ' << iid1[i] << ' ' << v1[iid1[i]] << '\n';
-  for(unsigned int i=2;i<iid2.size();++i)
-    ovocab2 << i << ' ' << iid2[i] << ' ' << v2[iid2[i]] << '\n';
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/reports.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/reports.cpp b/ext/giza-pp/GIZA++-v2/reports.cpp
deleted file mode 100644
index 621e21a..0000000
--- a/ext/giza-pp/GIZA++-v2/reports.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#include <sstream>
-#include <time.h>
-#include <set>
-#include "defs.h"
-#include "vocab.h" 
-#include "Perplexity.h"
-#include "getSentence.h"
-#include "TTables.h"
-#include "Globals.h"
-#include "Parameter.h"
-
-void printHelp(void)
-{
-  cerr << "Usage:\n\n" << Usage << '\n';
-  cerr << "Options (these override parameters set in the config file):\n\n";
-  cerr << "\t--v \t\t print verbose message, Warning this is not very descriptive and not systematic.\n";
-  cerr << "\t--NODUMPS \t Do not write any files to disk (This will over write dump frequency options).\n";
-  cerr << "\t--h[elp]\t\tprint this help\n";
-  cerr << "\t--p\t\tUse pegging when generating alignments for Model3 training.  (Default NO PEGGING)\n";
-  cerr << "\t--st\t\tto use a fixed ditribution for the fertility parameters when tranfering from model 2 to model 3 (Default complicated estimation)\n";
-  printGIZAPars(cout);
-}
-
-
-void generatePerplexityReport(const Perplexity& trainperp, 
-			      const Perplexity& testperp, 
-			      const Perplexity& trainVperp, 
-			      const Perplexity& testVperp, 
-			      ostream& of, int trainsize, int testsize, 
-			      bool)
-{
-  unsigned int i, m;
-  unsigned int m1 = max(trainperp.size(), testperp.size());
-  unsigned int m2 = max(trainVperp.size(), testVperp.size());
-  m = max(m1,m2);
-  of << "#trnsz\ttstsz\titer\tmodel\ttrn-pp\t\ttest-pp\t\ttrn-vit-pp\t\ttst-vit-pp\n";
-  for (i = 0 ; i <m ; i++){
-    of << trainsize << '\t' << testsize << '\t' << i<< '\t' << trainperp.modelid[i] << '\t';
-    if (i < trainperp.perp.size())
-      of << trainperp.perp[i] << "\t\t" ;
-    else 
-      of << "N/A\t\t";
-    if (i<testperp.perp.size())
-      of << testperp.perp[i] << "\t\t" ;
-    else 
-      of << "N/A\t\t";
-    if (i < trainVperp.perp.size())
-      of << trainVperp.perp[i] << "\t\t" ;
-    else 
-      of << "N/A\t";
-    if (i< testVperp.perp.size())
-      of << testVperp.perp[i] << '\n' ;
-    else 
-      of << "N/A\n";
-  }
-}
-
-void  printSentencePair(Vector<WordIndex>& es, 
-			Vector<WordIndex>& fs, 
-			ostream& of)
-  
-  // just writes a sentece pair to the give output stream, one sentence pair line
-  // it writes token ids not actual tokens.
-{
-  WordIndex i, j, l, m;
-  l = es.size() - 1;
-  m = fs.size() - 1;
-  of << "Source sentence length : " << l << " , target : " << m << "\n"; 
-  for (i = 1 ; i <= l ; i++)
-    of << es[i] << ' ';
-  of << "\n";
-  for (j = 1 ; j <= m ; j++)
-    of << fs[j] << ' ';
-  of << "\n";
-
-}
-
-extern short CompactAlignmentFormat;
-void printAlignToFile(const Vector<WordIndex>& es, 
-		      const Vector<WordIndex>& fs, 
-		      const Vector<WordEntry>& evlist, 
-		      const Vector<WordEntry>& fvlist, 
-		      ostream& of2, 
-		      const Vector<WordIndex>& viterbi_alignment, 
-		      int pair_no, double alignment_score)
-     
-     // prints the given alignment to alignments file (given it stream pointer)
-     // in a format recognizable by the draw-alignment tool ... which is of the
-     // example (each line triple is one sentence pair): 
-     //   # sentence caption 
-     //   target_word_1 target_word_2  ..... target_word_m
-     //   source_word_1 ({ x y z }) source_word_2 ({ })  .. source_word_n ({w})
-     // where x, y, z, and w are positions of target words that each source word
-     // is connected to.
-
-{
-  WordIndex l, m;
-  Vector<Vector<WordIndex> > translations(es.size()); // each english words has a vector 
-  // of zero or more translations .
-  l = es.size() - 1;
-  m = fs.size() - 1;
-  if( CompactAlignmentFormat )
-    {
-      for (WordIndex j = 1 ; j <= m ; j++)
-	if( viterbi_alignment[j] )
-	  of2 << viterbi_alignment[j]-1 << ' ' << j-1 << ' ';
-      of2 << '\n';
-    }
-  else
-    {
-      of2 << "# Sentence pair (" << pair_no <<") source length " << l << " target length "<< m << 
-	" alignment score : "<< alignment_score << '\n';
-      for (WordIndex j = 1 ; j <= m ; j++){
-	of2 << fvlist[fs[j]].word << " " ;
-	translations[viterbi_alignment[j]].push_back(j);
-      }
-      of2 << '\n';
-      
-      for (WordIndex i = 0  ; i <= l ; i++){
-	of2 << evlist[es[i]].word << " ({ " ;
-	for (WordIndex j = 0 ; j < translations[i].size() ; j++)
-	  of2 << translations[i][j] << " " ;
-	of2 << "}) ";
-      }
-      of2 << '\n';
-    }
-}
-
-
-void printOverlapReport(const tmodel<COUNT, PROB>& tTable, 
-			sentenceHandler& testHandler,  vcbList& trainEList, 
-			vcbList& trainFList, vcbList& testEList, vcbList& testFList)
-{
-  set<pair<WordIndex, WordIndex> > testCoocur ;
-  sentPair s ;
-  /*  string unseenCoocurFile = Prefix + ".tst.unseen.cooc" ;
-      ofstream of_unseenCoocur(unseenCoocurFile.c_str());
-      
-      string seenCoocurFile = Prefix + ".tst.seen.cooc" ;
-      ofstream of_seenCoocur(seenCoocurFile.c_str());
-  */  
-  testHandler.rewind();
-  int seen_coocur = 0, unseen_coocur = 0, srcUnk = 0, trgUnk = 0 ;
-  while(testHandler.getNextSentence(s)){    
-    for (WordIndex i = 1 ; i < s.eSent.size() ; i++)
-      for (WordIndex j = 1 ; j < s.fSent.size() ; j++)	
-	testCoocur.insert(pair<WordIndex, WordIndex> (s.eSent[i], s.fSent[j])) ;
-  }
-  set<pair<WordIndex, WordIndex> >::const_iterator i ;
-  for (i = testCoocur.begin() ; i != testCoocur.end() ; ++i){
-    if (tTable.getProb((*i).first, (*i).second) > PROB_SMOOTH){
-      seen_coocur ++ ;
-      //      of_seenCoocur << (*i).first << ' ' << (*i).second << '\n';
-    }
-    else {
-      unseen_coocur++;
-      //      of_unseenCoocur << (*i).first << ' ' << (*i).second << '\n';
-    }
-  }
-  
-  string trgUnkFile = Prefix + ".tst.trg.unk" ;
-  ofstream of_trgUnk(trgUnkFile.c_str());
-
-  for (WordIndex i = 0 ; i <  testFList.getVocabList().size() && i < testFList.uniqTokens();i++)
-    if (testFList.getVocabList()[i].freq > 0 && trainFList.getVocabList()[i].freq <= 0){
-      of_trgUnk << i << ' ' << testFList.getVocabList()[i].word << ' ' << testFList.getVocabList()[i].freq
-		<< '\n';
-      trgUnk++ ;
-    }
-  string srcUnkFile = Prefix + ".tst.src.unk" ;
-  ofstream of_srcUnk(srcUnkFile.c_str());
-
-  for (WordIndex j = 0 ; j <  testEList.getVocabList().size() && j < testEList.uniqTokens();j++)
-    if (testEList.getVocabList()[j].freq > 0 && trainEList.getVocabList()[j].freq <= 0){
-      srcUnk++ ;
-      of_srcUnk << j << ' ' << testEList.getVocabList()[j].word << ' ' << testEList.getVocabList()[j].freq
-		<< '\n';
-    }
-  string summaryFile = Prefix + ".tst.stats" ;  
-  ofstream of_summary(summaryFile.c_str());
-  of_summary << "\t\t STATISTICS ABOUT TEST CORPUS\n\n";
-  of_summary << "source unique tokens: " <<  testEList.uniqTokens() << '\n';
-  of_summary << "target unique tokens: " <<  testFList.uniqTokens() << '\n';
-  of_summary << "unique unseen source tokens: " << srcUnk << '\n';
-  of_summary << "unique unseen target tokens: " << trgUnk << '\n';
-  of_summary << "cooccurrences not found in the final t table: " << unseen_coocur << '\n';
-  of_summary << "cooccurrences found in the final t table: " << seen_coocur << '\n';
-  
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/small_snt2cooc.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/small_snt2cooc.cpp b/ext/giza-pp/GIZA++-v2/small_snt2cooc.cpp
deleted file mode 100644
index 1ce7648..0000000
--- a/ext/giza-pp/GIZA++-v2/small_snt2cooc.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <iostream>
-#include <string>
-#include <sstream>
-#include <fstream>
-#include <vector>
-#include <algorithm>
-
-
-using namespace std;
-
-class myset {
-  private:
-    vector<int> data;
-    void check_cap() {
-      size_t dc = data.capacity();
-      if (dc - data.size() < 3) {
-        if (dc < 4) { dc = 2; }
-        if (dc < 18) { dc*=2; } else { dc+=15; }
-	data.reserve(dc);
-      }
-    }
-  public:
-    typedef vector<int>::iterator iterator;
-    void insert(int x) {
-      if (data.size() == 0) { data.push_back(x); return; }
-      vector<int>::iterator p = lower_bound(data.begin(), data.end(), x);
-      int i = p - data.begin();
-      if (i >= data.size()) { check_cap(); data.push_back(x); return; }
-      if (*p == x) return;
-      check_cap();
-      data.insert(data.begin() + i,x);
-    }
-    iterator begin() { return data.begin(); }
-    iterator end() { return data.end(); }
-};
-
-//#include <set>
-// typedef std::set<int> intset;
-//#include <ext/hash_set>
-// typedef __gnu_cxx::hash_set<int> intset;
-typedef myset intset;
-
-
-int main(int argc,char **argv)
-{
-  if( argc!=2 )
-    {
-      cerr << "Usage: " << argv[0] << " snt12 \n";
-      cerr << "Converts GIZA++ snt-format into plain text.\n";
-      exit(1);
-    }
-  ifstream t(argv[1]);
-  string line1,line2,line3;
-  vector<intset> vsi(400000);
-  int nLine=0;
-  int totalElems=0;
-  while(getline(t,line1)&&getline(t,line2)&&getline(t,line3))
-    {
-      istringstream eingabe1(line1.c_str()),eingabe2(line2.c_str()),eingabe3(line3.c_str());
-      double count;
-      string word;
-      eingabe1>>count;
-      vector<int>l1,l2;
-      while(eingabe2>>word)
-	l1.push_back(atoi(word.c_str()));
-      while(eingabe3>>word)
-	l2.push_back(atoi(word.c_str()));
-      if( ((++nLine)%1000)==0 )
-	cerr << "line " << nLine << '\n'; 
-      for(unsigned int j=0;j<l2.size();++j)
-	vsi[0].insert(l2[j]);
-      for(unsigned int i=0;i<l1.size();++i)
-	{
-	  if( l1[i]>=int(vsi.size()) )
-	    {
-	      cerr << "I have to resize: " << l1[i] << endl;
-	      vsi.resize(l1[i]+1000);
-	    }
-	  intset&theset=vsi[l1[i]];
-	  for(unsigned int j=0;j<l2.size();++j)
-	    theset.insert(l2[j]);
-	}
-     }
-  int vi = 0;
-  for(vector<intset>::iterator i=vsi.begin();i != vsi.end(); ++i) {
-    for(intset::iterator j=i->begin();j!=i->end();++j)
-      cout << vi << " " << *j << endl;
-    ++vi;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/snt2cooc.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/snt2cooc.cpp b/ext/giza-pp/GIZA++-v2/snt2cooc.cpp
deleted file mode 100644
index c6af6d4..0000000
--- a/ext/giza-pp/GIZA++-v2/snt2cooc.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-#include <iostream>
-#include <string>
-#include <sstream>
-#include <fstream>
-#include <map>
-#include <vector>
-#include <set>
-#include <cstdlib>
-
-using namespace std;
-
-void readVoc(istream&in,map<string,string>&voc)
-{
-  string line,s1,s2; 
-  voc["1"]="UNK";
-  if( !in )cerr <<"Vocabulary does not exist.\n";
-  while(getline(in,line))
-    {
-      istringstream eingabe(line); 
-      if( !(eingabe>>s1>>s2))
-	cerr << "ERROR in vocabulary '" << line << "'\n";
-      voc[s1]=s2;
-    }
-}
-
-int maxElems=0;
-int main(int argc,char **argv)
-{
-  if( argc!=4&&argc!=5 )
-    {
-      cerr << "Usage: " << argv[0] << " vcb1 vcb2 snt12 \n";
-      cerr << "Converts GIZA++ snt-format into plain text.\n";
-      exit(1);
-    }
-  bool counts=0;
-  if( argc==5 )
-    {
-      if(string(argv[4])!="-counts")
-	cerr << "ERROR: wrong option " << argv[5] << endl;
-      counts=1;
-	maxElems=10000000;
-    }
-  ifstream v1(argv[1]),v2(argv[2]),t(argv[3]);
-  map<string,string>voc1,voc2;
-  readVoc(v1,voc1);
-  readVoc(v2,voc2);
-  string line1,line2,line3;
-  vector<map<int,int> > vsi(voc1.size()+1000);
-  int nLine=0;
-  int totalElems=0;
-  while(getline(t,line1)&&getline(t,line2)&&getline(t,line3))
-    {
-      istringstream eingabe1(line1),eingabe2(line2),eingabe3(line3);
-      double count;
-      string word;
-      eingabe1>>count;
-      vector<int>l1,l2;
-      while(eingabe2>>word)
-	l1.push_back(atoi(word.c_str()));
-      while(eingabe3>>word)
-	l2.push_back(atoi(word.c_str()));
-      if( ((++nLine)%1000)==0 )
-	cerr << "line " << nLine << '\n'; 
-      totalElems-=vsi[0].size();
-      for(unsigned int j=0;j<l2.size();++j)
-	vsi[0][l2[j]]++;
-      totalElems+=vsi[0].size();
-      for(unsigned int i=0;i<l1.size();++i)
-	{
-	  if( l1[i]>=int(vsi.size()) )
-	    {
-	      cerr << "I have to resize: " << l1[i] << endl;
-	      vsi.resize(l1[i]+1);
-	    }
-	  map<int,int>&theset=vsi[l1[i]];
-	  totalElems-=theset.size();
-	  for(unsigned int j=0;j<l2.size();++j)
-	    theset[l2[j]]++;
-	  totalElems+=theset.size();
-	}
-      if( totalElems>maxElems&&maxElems )
-	{
-	  cerr << "INFO: print out " << totalElems << " entries.\n";
-	  for(unsigned int i=0;i<vsi.size();++i)
-	    for(map<int,int>::const_iterator j=vsi[i].begin();j!=vsi[i].end();++j)
-	      {
-		if(counts==1 )
-		  cout << j->second << " " << i << " " << j->first << '\n';
-		else
-		  cout << i << " " << j->first << '\n';
-	      }
-	  totalElems=0;
-	  vsi.clear();
-	  vsi.resize(voc1.size()+1000);
-	}
-    }
-  cerr << "END.\n";
-  for(unsigned int i=0;i<vsi.size();++i)
-    for(map<int,int>::const_iterator j=vsi[i].begin();j!=vsi[i].end();++j)
-      {
-	if(counts==1 )
-	  cout << j->second << " " << i << " " << j->first << '\n';
-	else
-	  cout << i << " " << j->first << '\n';
-      }
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/snt2plain.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/snt2plain.cpp b/ext/giza-pp/GIZA++-v2/snt2plain.cpp
deleted file mode 100644
index 3eb99ad..0000000
--- a/ext/giza-pp/GIZA++-v2/snt2plain.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <iostream>
-#include <string>
-#include <sstream>
-#include <fstream>
-#include <map>
-#include <vector>
-#include <cstdlib>
-
-using namespace std;
-
-void readVoc(istream&in,map<string,string>&voc)
-{
-  string line,s1,s2; 
-  voc["1"]="UNK";
-  if( !in )cerr <<"Vocabulary does not exist.\n";
-  while(getline(in,line))
-    {
-      istringstream eingabe(line); 
-      if( !(eingabe>>s1>>s2))
-	cerr << "ERROR in vocabulary '" << line << "'\n";
-      voc[s1]=s2;
-    }
-}
-
-int main(int argc,char **argv)
-{
-  if( argc!=5&&argc!=6 )
-    {
-      cerr << "Usage: " << argv[0] << " vcb1 vcb2 snt12 output_prefix [ -counts ]\n";
-      cerr << "Converts GIZA++ snt-format into plain text.\n";
-      exit(1);
-    }
-  bool counts=0;
-  if( argc==6 )
-    {
-      if(string(argv[5])!="-counts")
-	cerr << "ERROR: wrong option " << argv[5] << endl;
-      counts=1;
-    }
-  ifstream v1(argv[1]),v2(argv[2]),t(argv[3]);
-  string prefix(argv[4]);
-  string outfil1=prefix+"1.txt";
-  string outfil2=prefix+"2.txt";
-  ofstream out1(outfil1.c_str());
-  ofstream out2(outfil2.c_str());
-  map<string,string>voc1,voc2;
-  readVoc(v1,voc1);
-  readVoc(v2,voc2);
-  int source=0,target=0;
-  string line1,line2,line3;
-  int printed=0;
-  while(getline(t,line1)&&getline(t,line2)&&getline(t,line3))
-    {
-      istringstream eingabe1(line1),eingabe2(line2),eingabe3(line3);
-      double count;
-      string word;
-      eingabe1>>count;
-      vector<string>l1,l2;
-      while(eingabe2>>word)
-	l1.push_back(word);
-      while(eingabe3>>word)
-	l2.push_back(word);
-      if( counts ) 
-	cout << count << '\n';
-      for(unsigned int p=0;p<l1.size();p++)
-	{
-	  if(voc1.count(l1[p])==0)
-	    {
-	      if( printed++==0)
-		cerr << "ERROR: source vocabulary entry " << l1[p] << " unknown.\n";
-	      out1 << l1[p]<<' ';
-	    }
-	  else
-	    out1 << voc1[l1[p]] << ' ';
-	  source++;
-	}
-      for(unsigned int p=0;p<l2.size();p++)
-	{
-	  if(voc2.count(l2[p])==0)
-	    {
-	      if( printed++ ==0)
-		cerr << "ERROR: target vocabulary entry " << l2[p] << " unknown.\n";
-	      out2 <<l2[p]<<' ';
-	    }
-	  out2 << voc2[l2[p]] << ' ';
-	  target++;
-	}
-      out1<<'\n';
-      out2<<'\n';
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/trainGIZA++.sh
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/trainGIZA++.sh b/ext/giza-pp/GIZA++-v2/trainGIZA++.sh
deleted file mode 100755
index 09f6851..0000000
--- a/ext/giza-pp/GIZA++-v2/trainGIZA++.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#! /bin/csh
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-if( $# != 3 ) then
-
- echo Usage: trainGIZA++.sh vcb1 vcb2 snt
- echo " "
- echo Performs a training of word classes and a standard GIZA training.
-
-else
-
-    snt2plain.out $1 $2 $3 PLAIN
-
-    mkcls -m2 -pPLAIN1.txt -c50 -V$1.classes opt >& mkcls1.log
-    rm PLAIN1.txt
-    mkcls -m2 -pPLAIN2.txt -c50 -V$2.classes opt >& mkcls2.log
-    rm PLAIN2.txt
-    GIZA++ -S $1 -T $2 -C $3 -p0 0.98 -o GIZA++ >& GIZA++.log
-
-endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/transpair_model1.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/transpair_model1.h b/ext/giza-pp/GIZA++-v2/transpair_model1.h
deleted file mode 100644
index dd1425d..0000000
--- a/ext/giza-pp/GIZA++-v2/transpair_model1.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
-
-Copyright (C) 2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#ifndef transpair_model1_h_fjo_defined
-#define transpair_model1_h_fjo_defined
-//#include "logprob.h"
-#include "defs.h"
-#include "Array2.h"
-#include "defs.h"
-#include "Vector.h"
-#include "NTables.h"
-#include "ATables.h"
-#include "TTables.h"
-#include "alignment.h"
-#include <cmath>
-#include <algorithm>
-#include "Array2.h"
-#include "mystl.h"
-
-class transpair_model1 
-{
- public:
-  bool verboseTP;
-  Array2<PROB, Vector<PROB> > t;
-  WordIndex l, m;
-  Vector<WordIndex> E,F;
-  void setMode(bool)
-    {}
-  transpair_model1(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, const tmodel<COUNT, PROB>&tTable)
-    : verboseTP(0),t(es.size(), fs.size()),l(es.size()-1), m(fs.size()-1),E(es),F(fs)
-    {
-      WordIndex l=es.size()-1,m=fs.size()-1;
-      for(WordIndex i=0;i<=l;i++)
-	for(WordIndex j=1;j<=m;j++)
-	  {
-	    t(i, j)=tTable.getProb(es[i], fs[j]);
-	    if( !(t(i,j)>=PROB_SMOOTH) )
-	      cerr << "ERROR IN PROBABILITY: " << t(i,j) << " " << PROB_SMOOTH << endl;
-	  }
-    }
-  /*  transpair_model1(const Vector<WordIndex>&es, const Vector<WordIndex>&fs)
-    : verboseTP(0),t(es.size(), fs.size()),l(es.size()-1), m(fs.size()-1),E(es),F(fs)
-    {
-      WordIndex l=es.size()-1,m=fs.size()-1;
-      for(WordIndex i=0;i<=l;i++)
-	for(WordIndex j=1;j<=m;j++)
-	  {
-	    const string&estr=globeTrainVcbList->getVocabList()[es[i]].word;
-	    const string&fstr=globfTrainVcbList->getVocabList()[fs[j]].word;
-	    if( lev(estr,fstr)==0 )
-	      t(i,j)=1.0;
-	    else
-	      t(i,j)=1/100.0;
-	    massert( t(i,j)>=PROB_SMOOTH );
-	  }
-}*/
-  WordIndex get_l()const
-    {return l;}
-  WordIndex get_m()const
-    {return m;}
-  const PROB&get_t(WordIndex i, WordIndex j)const
-    {massert( t(i,j)>=PROB_SMOOTH);
-    return t(i, j);}
-  WordIndex get_es(int i)const {return E[i];}
-  WordIndex get_fs(int j)const {return F[j];}
-  bool greedyHillClimbing()const
-    {return 0;}
-  void computeScores(const alignment&,vector<double>&)const
-    {}
-  LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double=-1.0)const
-    {
-      int old_i=a(j);
-      return (t(new_i, j) /t(old_i, j));
-    }
-  LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double=-1.0)const 
-    {
-      WordIndex i1=a(j1), i2=a(j2);
-      return (t(i2, j1)/t(i1, j1))*(t(i1, j2)/t(i2, j2));
-    }
-  LogProb prob_of_target_and_alignment_given_source(const alignment&al)const
-    {
-      LogProb prob=1.0;
-      int lp1=al.get_l()+1;
-      for(unsigned int j=1;j<=al.get_m();++j)
-	prob*=t(al(j),j)/lp1;
-      return prob;
-    }
-};
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/transpair_model2.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/transpair_model2.h b/ext/giza-pp/GIZA++-v2/transpair_model2.h
deleted file mode 100644
index 751ce52..0000000
--- a/ext/giza-pp/GIZA++-v2/transpair_model2.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-
-Copyright (C) 2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#ifndef transpair_model2_defined_h
-#define transpair_model2_defined_h
-
-#include "defs.h"
-#include "Vector.h"
-#include "NTables.h"
-#include "ATables.h"
-#include "TTables.h"
-#include "alignment.h"
-#include <cmath>
-#include "transpair_model1.h"
-
-
-class transpair_model2 : public transpair_model1
-{
- protected:
-  Array2<PROB, Vector<PROB> > a;
- public:
-  transpair_model2(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, const tmodel<COUNT, PROB>&tTable, 
-		   const amodel<PROB>&aTable)
-    : transpair_model1(es,fs,tTable),a(es.size(),fs.size())
-    {
-      for(WordIndex i=0;i<=l;i++)
-	for(WordIndex j=1;j<=m;j++)
-	  a(i, j)=aTable.getValue(i, j, l, m);
-    }
-  const PROB&get_a(WordIndex i, WordIndex j)const
-    {return a(i, j);}
-};
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/transpair_model3.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/transpair_model3.cpp b/ext/giza-pp/GIZA++-v2/transpair_model3.cpp
deleted file mode 100644
index 0ab4c54..0000000
--- a/ext/giza-pp/GIZA++-v2/transpair_model3.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-/*--
-transpair_model3: representation of a translation pair for model3 training
-allowing for fast access (esp. to t table).
-
-Franz Josef Och (30/07/99)
---*/
-#include "transpair_model3.h"
-#include <algorithm>
-
-transpair_model3::transpair_model3(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable, amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable, double _p1, double _p0, void*)
-  : transpair_model2(es,fs,tTable,aTable),d(es.size(), fs.size()),n(es.size(), MAX_FERTILITY+1), p0(_p0), p1(_p1)
-{ 
-  WordIndex l=es.size()-1,m=fs.size()-1;
-  for(WordIndex i=0;i<=l;i++)
-    {
-      for(WordIndex j=1;j<=m;j++)
-	d(i, j)=dTable.getValue(j, i, l, m);
-      if( i>0 )
-	{
-	  for(WordIndex f=0;f<MAX_FERTILITY;f++)
-	    n(i, f)=nTable.getValue(es[i], f);
-	  n(i,MAX_FERTILITY)=PROB_SMOOTH;
-	}
-    }
-}
-
-LogProb transpair_model3::scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j, double,bool forModel3)const
-{
-  LogProb change;
-  const WordIndex old_i=a(j);
-  WordIndex f0=a.fert(0);
-  if (old_i == new_i)
-    change=1.0;
-  else if (old_i == 0)
-    change=((double)p0*p0/p1) *
-      (( (DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):f0)*(m-f0+1.0)) / ((m-2*f0+1)*(m-2*f0+2.0))) *
-      ((PROB)(forModel3?(a.fert(new_i)+1.0):1.0)) *
-      (get_fertility(new_i, a.fert(new_i)+1) / get_fertility(new_i, a.fert(new_i)))*
-      (t(new_i, j)/t(old_i, j))*
-      (forModel3?d(new_i, j):1.0);
-  else if (new_i == 0)
-    change=(double(p1) / (p0*p0)) *
-      (double((m-2*f0)*(m-2*f0-1))/( (DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):(1+f0))*(m-f0))) *
-      (forModel3?(1.0/a.fert(old_i)):1.0) *
-      (get_fertility(old_i, a.fert(old_i)-1) /get_fertility(old_i, a.fert(old_i)))*
-      (t(new_i, j) /t(old_i, j)) *
-      (forModel3?(1.0 / d(old_i, j)):1.0);
-  else
-    change=(forModel3?((a.fert(new_i)+1.0)/a.fert(old_i)):1.0) *
-      (get_fertility(old_i,a.fert(old_i)-1) / get_fertility(old_i,a.fert(old_i))) *
-      (get_fertility(new_i,a.fert(new_i)+1) /get_fertility(new_i,a.fert(new_i))) *
-      (t(new_i,j)/t(old_i,j)) *
-      (forModel3?(d(new_i,j)/d(old_i,j)):1.0);
-  return change;
-}
-
-LogProb transpair_model3::scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2, double,bool forModel3)const 
-{
-  PROB score=1;
-  assert(j1<j2);
-  WordIndex i1=a(j1), i2=a(j2);
-  if (i1!=i2) 
-    {
-      score=(t(i2, j1)/t(i1, j1))*(t(i1, j2)/t(i2, j2));
-      if( forModel3 )
-	{
-	  if (i1)
-	    score *= d(i1, j2)/d(i1, j1);
-	  if (i2)
-	    score *= d(i2, j1)/d(i2, j2);    
-	}
-    }
-  return score;
-}
-
-ostream&operator<<(ostream&out, const transpair_model3&m)
-{
-  for(WordIndex i=0;i<=m.get_l();i++)
-    {
-      out << "EF-I:"<<i<<' ';
-      for(WordIndex j=1;j<=m.get_m();j++)
-	out << "("<<m.t(i,j)<<","<<m.d(i,j)<<")";
-      for(WordIndex j=1;j<MAX_FERTILITY;j++)
-	if( i>0 )
-	  out << "(fert:"<<m.get_fertility(i,j)<<")";
-      out << '\n';
-    }
-  out << "T:" << m.t << "D:" << m.d << "A:" << m.a  << "N:" << m.n << m.p0 << m.p1 << '\n';
-  return out;
-}
-
-LogProb transpair_model3::_scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double)const
-{
-  alignment b(a);
-  b.set(j, new_i);
-  LogProb a_prob=prob_of_target_and_alignment_given_source(a);
-  LogProb b_prob=prob_of_target_and_alignment_given_source(b);
-  if( a_prob )
-    return b_prob/a_prob;
-  else if( b_prob )
-    return 1e20;
-  else
-    return 1.0;
-}
-
-LogProb transpair_model3::_scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const
-{
-  alignment b(a);
-  b.set(j1, a(j2));
-  b.set(j2, a(j1));
-  LogProb a_prob=thisValue;
-  if( a_prob<0.0 )
-    a_prob=prob_of_target_and_alignment_given_source(a);
-  massert(a_prob==prob_of_target_and_alignment_given_source(a));
-  LogProb b_prob=prob_of_target_and_alignment_given_source(b);
-  if( a_prob )
-    return b_prob/a_prob;
-  else if( b_prob )
-    return 1e20;
-  else
-    return 1.0;
-}
-
-LogProb transpair_model3::prob_of_target_and_alignment_given_source(const alignment&al,bool verb)const
-{
-  LogProb total = 1.0 ;
-  static const LogProb zero = 1E-299 ; 
-  total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
-  if( verb) cerr << "IBM-3: (1-p1)^(m-2 f0)*p1^f0: " << total << '\n';
-  for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
-    total *= double(m - al.fert(0) - i + 1) / (double(DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):i)) ;
-  if( verb) cerr << "IBM-3: +NULL:binomial+distortion " << total << '\n';
-  for (WordIndex i = 1 ; i <= l ; i++)
-    {
-      total *= get_fertility(i, al.fert(i)) * (LogProb) factorial(al.fert(i));
-      if( verb) cerr << "IBM-3: fertility of " << i << " with factorial " << get_fertility(i, al.fert(i)) * (LogProb) factorial(al.fert(i)) << " -> " << total << '\n';
-    }
-  for (WordIndex j = 1 ; j <= m ; j++)
-    {
-      total*= get_t(al(j), j) ;
-      massert( get_t(al(j), j)>=PROB_SMOOTH );
-      if( verb) cerr << "IBM-3: t of " << j << " " << al(j) << ": " << get_t(al(j), j)  << " -> " << total << '\n';
-      if (al(j))
-	{
-	  total *= get_d(al(j), j);
-	  if( verb) cerr << "IBM-3: d of " << j << ": " << get_d(al(j), j)  << " -> " << total << '\n';
-	}
-    }
-  return total?total:zero;
-}
-
-
-void transpair_model3::computeScores(const alignment&al,vector<double>&d)const
-{
-  LogProb total1 = 1.0,total2=1.0,total3=1.0,total4=1.0 ;
-  total1 *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
-  for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
-    total1 *= double(m - al.fert(0) - i + 1) / (double(DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):i)) ;
-  for (WordIndex i = 1 ; i <= l ; i++)
-    {
-      total2 *= get_fertility(i, al.fert(i)) * (LogProb) factorial(al.fert(i));
-    }
-  for (WordIndex j = 1 ; j <= m ; j++)
-    {
-      total3*= get_t(al(j), j) ;
-      massert( get_t(al(j), j)>=PROB_SMOOTH );
-      if (al(j))
-	{
-	  total4 *= get_d(al(j), j);
-	}
-    }
-  d.push_back(total1);//5
-  d.push_back(total2);//6
-  d.push_back(total3);//7
-  d.push_back(total4);//8
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/transpair_model3.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/transpair_model3.h b/ext/giza-pp/GIZA++-v2/transpair_model3.h
deleted file mode 100644
index 9c07fd9..0000000
--- a/ext/giza-pp/GIZA++-v2/transpair_model3.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-/*--
-transpair_model3: representation of a translation pair for model3 training
-allowing for fast access (esp. to t table).
-
-Franz Josef Och (30/07/99)
---*/
-#ifndef transpair_model3_h_fjo_defined
-#define transpair_model3_h_fjo_defined
-#include "Array2.h"
-#include "defs.h"
-#include "Vector.h"
-#include "NTables.h"
-#include "ATables.h"
-#include "TTables.h"
-#include "alignment.h"
-#include <cmath>
-#include "transpair_model2.h"
-
-extern double factorial(int n);
-inline bool doubleEqual(const double a, const double b)
-{
-  if( a==b )
-    return 1.0;
-  bool bl=fabs(1.0-a/b)<1e-10;
-  if( bl )
-    return 1;
-  else
-    {
-      cerr << "DIFFERENT: " << a << " " << b << " " << a/b << " " << 1.0-a/b << endl;
-      return 0;
-    }
-}
-
-
-class transpair_model3 : public transpair_model2
-{
- protected:
-  Array2<PROB, Vector<PROB> > d, n;
-  PROB p0, p1;
- public:
-  typedef transpair_model3 simpler_transpair_model;
-  transpair_model3(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable, 
-		   amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable, 
-		   double _p1, double _p0, void*x=0);
-  const PROB&get_d(WordIndex i, WordIndex j)const
-    {return d(i, j);}
-  const PROB&get_a(WordIndex i, WordIndex j)const
-    {return a(i, j);}
-  const PROB&get_fertility(WordIndex i, WordIndex f)const
-    {massert(i>0);return (f>=MAX_FERTILITY)?n(i, MAX_FERTILITY):n(i, f);}
-  int modelnr()const{return 3;}
-  LogProb scoreOfAlignmentForChange(const alignment&)const
-    {return -1.0; }  
-  LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j, double thisValue=-1.0,bool withDistortions=1)const;
-  LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2, double thisValue=-1.0,bool withDistortions=1)const ;
-  LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
-  LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const;
-  friend ostream&operator<<(ostream&out, const transpair_model3&m);
-  LogProb prob_of_target_and_alignment_given_source(const alignment&al,bool verb=0)const;
-  bool isSubOptimal()const{return 1;}
-  void computeScores(const alignment&al,vector<double>&d)const;
-};
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/transpair_model4.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/transpair_model4.cpp b/ext/giza-pp/GIZA++-v2/transpair_model4.cpp
deleted file mode 100644
index ebc2666..0000000
--- a/ext/giza-pp/GIZA++-v2/transpair_model4.cpp
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
-
-Copyright (C) 2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#include "transpair_model4.h"
-#include "Parameter.h"
-
-GLOBAL_PARAMETER(float,d4modelsmooth_factor,"model4SmoothFactor","smooting parameter for alignment probabilities in Model 4",PARLEV_SMOOTH,0.2);
-
-LogProb transpair_model4::_scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double)const
-{
-  LogProb a_prob=prob_of_target_and_alignment_given_source(a);
-  alignment b(a);
-  b.set(j, new_i);
-  LogProb b_prob=prob_of_target_and_alignment_given_source(b);
-  if( a_prob )
-    return b_prob/a_prob;
-  else if( b_prob )
-    return 1e20;
-  else
-    return 1.0;
-}
-LogProb transpair_model4::_scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double)const 
-{
-  LogProb a_prob=prob_of_target_and_alignment_given_source(a);
-  alignment b(a);
-  b.set(j1, a(j2));
-  b.set(j2, a(j1));
-  LogProb b_prob=prob_of_target_and_alignment_given_source(b);
-  if( a_prob )
-    return b_prob/a_prob;
-  else if( b_prob )
-    return 1e20;
-  else
-    return 1.0;
-}
-//increasing efficiency: no copy of alignment (calc. everything incrementally)
-LogProb transpair_model4::scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue)const
-{
-  if( a(j)==new_i )
-    return 1.0;
-  LogProb change=transpair_model3::scoreOfMove(a,new_i,j,-1.0,0);
-  LogProb a_prob=thisValue;
-  if(a_prob<0.0 )
-    a_prob=prob_of_target_and_alignment_given_source(a,2);
-  massert(a_prob==prob_of_target_and_alignment_given_source(a,2));
-  WordIndex old_i=a(j);
-  //alignment b(a);
-  const_cast<alignment&>(a).set(j,new_i);
-  LogProb b_prob=prob_of_target_and_alignment_given_source(a,2);
-  const_cast<alignment&>(a).set(j,old_i);
-  change*=b_prob/a_prob;
-  return change;
-}
-//increasing efficiency: no copy of alignment (calc. everything incrementally)
-LogProb transpair_model4::scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const 
-{
-  WordIndex aj1=a(j1),aj2=a(j2);
-  if( aj1==aj2 )
-    return 1.0;
-  LogProb change=transpair_model3::scoreOfSwap(a,j1,j2,-1.0,0);
-  LogProb a_prob=thisValue;
-  if( a_prob<0.0 )
-    a_prob=prob_of_target_and_alignment_given_source(a,2);
-  massert(a_prob==prob_of_target_and_alignment_given_source(a,2));
-
-  //alignment b(a);
-  const_cast<alignment&>(a).set(j1,aj2);
-  const_cast<alignment&>(a).set(j2,aj1);
-  LogProb b_prob=prob_of_target_and_alignment_given_source(a,2);
-  const_cast<alignment&>(a).set(j1,aj1);
-  const_cast<alignment&>(a).set(j2,aj2);
-
-  if( verboseTP )
-    cerr << "scoreOfSwap: " << change << ' ' << a_prob << ' ' << b_prob << ' ' << endl;
-  change*=b_prob/a_prob;
-  if( verboseTP )
-    cerr << "resulting: " << change << " should be " << _scoreOfSwap(a,j1,j2) << endl;
-  return change;
-}
-
-LogProb transpair_model4::prob_of_target_and_alignment_given_source_1(const alignment&al,bool verb)const
-{
-  LogProb total = 1.0 ;
-  total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
-  if( verb) cerr << "IBM-4: (1-p1)^(m-2 f0)*p1^f0: " << total << endl;
-  for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
-    total *= double(m - al.fert(0) - i + 1) / (double(DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):i)) ;
-  if( verb) cerr << "IBM-4: +NULL:binomial+distortion " << total << endl;
-  for (WordIndex i = 1 ; i <= l ; i++)
-    {
-      total *= get_fertility(i, al.fert(i));// * (LogProb) factorial(al.fert(i));
-      if( verb) cerr << "IBM-4: fertility of " << i << " " << get_fertility(i, al.fert(i)) << " -> " << total << endl;
-    }
-  for (WordIndex j = 1 ; j <= m ; j++)
-    {
-      total*= get_t(al(j), j) ;
-      if( verb) cerr << "IBM-4: t of j:" << j << " i:" << al(j) << ": " << get_t(al(j), j)  << " -> " << total << endl;
-    }
-  return total;
-}
-
-LogProb transpair_model4::prob_of_target_and_alignment_given_source(const alignment&al, short distortionType,bool verb)const
-{
-  LogProb total = 1.0 ;
-  static const LogProb almostZero = 1E-299 ; 
-  if( distortionType&1 )
-    {
-      total *= prob_of_target_and_alignment_given_source_1(al,verb);
-    }
-  if( distortionType&2 )
-    {
-      for(WordIndex j=1;j<=m;j++)
-	if( al(j) )
-	  if( al.get_head(al(j))==j)
-	    {
-	      int ep=al.prev_cept(al(j));
-	      float x2=probFirst[ep](j,al.get_center(ep));
-	      massert(x2<=1.0);
-	      total*=x2;
-	      if( verb) cerr << "IBM-4: d=1 of " << j << ": " << x2  << " -> " << total << endl;
-	    }
-	  else
-	    {
-	      float x2=probSecond(j,al.prev_in_cept(j));
-	      massert(x2<=1.0);
-	      total*=x2;
-	      if( verb) cerr << "IBM-4: d>1 of " << j << ": " << x2  << " -> " << total << endl;
-	    }
-    }
-  return total?total:almostZero;
-}
-
-void transpair_model4::computeScores(const alignment&al,vector<double>&d)const
-{
-  LogProb total1 = 1.0,total2=1.0,total3=1.0,total4=1.0 ;
-  total1 *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
-  for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
-    total1 *= double(m - al.fert(0) - i + 1) / (double(DeficientDistortionForEmptyWord?(max(2,int(m))/DeficientDistortionForEmptyWord):i)) ;
-  for (WordIndex i = 1 ; i <= l ; i++)
-    total2 *= get_fertility(i, al.fert(i));// * (LogProb) factorial(al.fert(i));
-  for (WordIndex j = 1 ; j <= m ; j++)
-    total3*= get_t(al(j), j) ;
-  for(WordIndex j=1;j<=m;j++)
-    if( al(j) )
-      if( al.get_head(al(j))==j)
-	{
-	  int ep=al.prev_cept(al(j));
-	  float x2=probFirst[ep](j,al.get_center(ep));
-	  total4*=x2;
-	}
-      else
-	{
-	  float x2=probSecond(j,al.prev_in_cept(j));
-	  total4*=x2;
-	}
-  d.push_back(total1);//9
-  d.push_back(total2);//10
-  d.push_back(total3);//11
-  d.push_back(total4);//12
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/transpair_model4.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/transpair_model4.h b/ext/giza-pp/GIZA++-v2/transpair_model4.h
deleted file mode 100644
index 730fbe7..0000000
--- a/ext/giza-pp/GIZA++-v2/transpair_model4.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
-
-Copyright (C) 2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#ifndef transpair_model4_h_fjo_defined
-#define transpair_model4_h_fjo_defined
-#include "Array2.h"
-#include "defs.h"
-#include "Vector.h"
-#include "NTables.h"
-#include "ATables.h"
-#include "TTables.h"
-#include "alignment.h"
-#include "D4Tables.h"
-#include "transpair_model3.h"
-
-extern double factorial(int n);
-
-class transpair_model4 : public transpair_model3
-{
- private:
-  d4model&d4m;
-  Array2<double> probSecond;
-  Vector<Array2<double> > probFirst;
- public:
-  typedef transpair_model3 simpler_transpair_model;
-  transpair_model4(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable, amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable, double _p1, double _p0,d4model*_d4m) 
-    : transpair_model3(es, fs, tTable, aTable, dTable, nTable, _p1, _p0), 
-    d4m(*_d4m),probSecond(m+1,m+1,0.0),probFirst(l+1)
-    {
-      for(unsigned int j1=1;j1<=m;++j1)
-	for(unsigned int j2=1;j2<j1;++j2)
-	  {
-	    probSecond(j1,j2)=d4m.getProb_bigger(j1,j2,0,d4m.fwordclasses.getClass(get_fs(j1)),l,m);
-	  }
-      for(unsigned int i=0;i<=l;++i)
-	{
-	  Array2<double> &pf=probFirst[i]=Array2<double>(m+1,m+1,0.0);
-	  for(unsigned int j1=1;j1<=m;++j1)
-	    {
-	      map<m4_key,d4model::Vpff,compare1 >::const_iterator ci=d4m.getProb_first_iterator(d4m.ewordclasses.getClass(get_es(i)),d4m.fwordclasses.getClass(get_fs(j1)),l,m);
-	      for(unsigned int j2=0;j2<=m;++j2)
-		{
-		  pf(j1,j2)=d4m.getProb_first_withiterator(j1,j2,m,ci);
-		  massert(pf(j1,j2)==d4m.getProb_first(j1,j2,d4m.ewordclasses.getClass(get_es(i)),d4m.fwordclasses.getClass(get_fs(j1)),l,m));
-		}
-	    }
-	}
-    }
-  LogProb prob_of_target_and_alignment_given_source_1(const alignment&al,bool verb)const;
-  LogProb scoreOfAlignmentForChange(const alignment&a)const
-    {return prob_of_target_and_alignment_given_source(a,2); }  
-  LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
-  LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const ;
-  LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
-  LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const ;
-  int modelnr()const{return 4;}
-  LogProb prob_of_target_and_alignment_given_source(const alignment&al, short distortionType=3,bool verb=0)const;
-  void computeScores(const alignment&al,vector<double>&d)const;
-};
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/transpair_model5.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/transpair_model5.cpp b/ext/giza-pp/GIZA++-v2/transpair_model5.cpp
deleted file mode 100644
index 7baa5ca..0000000
--- a/ext/giza-pp/GIZA++-v2/transpair_model5.cpp
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
-
-Copyright (C) 2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#include "transpair_model5.h"
-#include "Parameter.h"
-
-int m5scorefound=0,m5scorenotfound=0;
-
-GLOBAL_PARAMETER(float,d5modelsmooth_factor,"model5SmoothFactor","smooting parameter for distortion probabilities in Model 5 (linear interpolation with constant)",PARLEV_SMOOTH,0.1);
-float d5modelsmooth_countoffset=0.0;
-
-LogProb transpair_model5::_scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double)const
-{
-  if( doModel4Scoring )
-    return transpair_model4::_scoreOfMove(a,new_i,j);
-  alignment b(a);
-  b.set(j, new_i);
-  LogProb a_prob=prob_of_target_and_alignment_given_source(a);
-  LogProb b_prob=prob_of_target_and_alignment_given_source(b);
-  if( a_prob )
-    return b_prob/a_prob;
-  else if( b_prob )
-    return 1e20;
-  else
-    return 1.0;
-}
-LogProb transpair_model5::_scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const 
-{
-  if( doModel4Scoring )
-    return transpair_model4::_scoreOfSwap(a,j1,j2,thisValue);
-  alignment b(a);
-  b.set(j1, a(j2));
-  b.set(j2, a(j1));
-  LogProb a_prob=prob_of_target_and_alignment_given_source(a);
-  LogProb b_prob=prob_of_target_and_alignment_given_source(b);
-  assert(a_prob);
-  assert(b_prob);
-  if( a_prob )
-    return b_prob/a_prob;
-  else if( b_prob )
-    return 1e20;
-  else
-    return 1.0;
-}
-
-//increasing efficiency: no copy of alignment (calc. everything incrementally)
-LogProb transpair_model5::scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue)const
-{
-  if( doModel4Scoring )
-    return transpair_model4::scoreOfMove(a,new_i,j,thisValue);
-  alignment b(a);
-  b.set(j,new_i);
-  
-  LogProb change;
-  const WordIndex old_i=a(j);
-  WordIndex f0=a.fert(0);
-  if (old_i == new_i)
-    change=1.0;
-  else if (old_i == 0)
-    change=((double)p0*p0/p1) *
-      ((f0*(m-f0+1.0)) / ((m-2*f0+1)*(m-2*f0+2.0))) *
-      ((PROB)(1.0)) *
-      (get_fertility(new_i, a.fert(new_i)+1) / get_fertility(new_i, a.fert(new_i)))*
-      (t(new_i, j)/t(old_i, j))*
-      1.0;
-  else if (new_i == 0)
-    change=(double(p1) / (p0*p0)) *
-      (double((m-2*f0)*(m-2*f0-1))/((1+f0)*(m-f0))) *
-      (1.0) *
-      (get_fertility(old_i, a.fert(old_i)-1) /get_fertility(old_i, a.fert(old_i)))*
-      (t(new_i, j) /t(old_i, j)) *
-      (1.0);
-  else
-    change=(1.0) *
-      (get_fertility(old_i,a.fert(old_i)-1) / get_fertility(old_i,a.fert(old_i))) *
-      (get_fertility(new_i,a.fert(new_i)+1) /get_fertility(new_i,a.fert(new_i))) *
-      (t(new_i,j)/t(old_i,j)) *
-      (1.0);
-  LogProb a_prob=thisValue;
-  if( a_prob<0.0 )
-    a_prob=prob_of_target_and_alignment_given_source(a,2);
-  massert(a_prob==prob_of_target_and_alignment_given_source(a,2));
-  
-  LogProb b_prob=prob_of_target_and_alignment_given_source(b,2);
-  change*=b_prob/a_prob;
-  return change;
-}
-LogProb transpair_model5::scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue)const 
-{
-  if( doModel4Scoring )
-    return transpair_model4::scoreOfSwap(a,j1,j2,thisValue);
-  alignment b(a);
-  b.set(j1,a(j2));
-  b.set(j2,a(j1));
-  LogProb change=transpair_model3::scoreOfSwap(a,j1,j2,-1.0,0);
-  LogProb a_prob=thisValue;
-  if( a_prob<0.0 )
-    a_prob=prob_of_target_and_alignment_given_source(a,2);
-  massert(a_prob==prob_of_target_and_alignment_given_source(a,2));
-  LogProb b_prob=prob_of_target_and_alignment_given_source(b,2);
-  change*=b_prob/a_prob;
-  return change;
-}
-
-LogProb transpair_model5::prob_of_target_and_alignment_given_source(const alignment&al, short distortionType,bool verb)const
-{
-  if( doModel4Scoring )
-    return transpair_model4::prob_of_target_and_alignment_given_source(al,distortionType);
-  LogProb total = 1.0 ;
-  static const LogProb almostZero = 1E-299 ; 
-  double x2;
-  if( distortionType&1 )
-    {
-      total *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
-      if( verb) cerr << "IBM-5: (1-p1)^(m-2 f0)*p1^f0: " << total << endl;
-      for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
-	total *= double(m - al.fert(0) - i + 1) / i ; // IBM-5 is not deficient!
-      if( verb) cerr << "IBM-5: +NULL:binomial+distortion " << total << endl;
-      for (WordIndex i = 1 ; i <= l ; i++)
-	{
-	  total *= get_fertility(i, al.fert(i));
-	  if( verb) cerr << "IBM-5: fertility of " << i << " " << get_fertility(i, al.fert(i)) << " -> " << total << endl;
-	}
-      for (WordIndex j = 1 ; j <= m ; j++)
-	{
-	  total*= get_t(al(j), j) ;
-	  if( verb) cerr << "IBM-5: t of j:" << j << " i:" << al(j) << ": " << get_t(al(j), j)  << " -> " << total << endl;
-	}
-    }
-  if( distortionType&2 )
-    {
-      PositionIndex prev_cept=0;
-      PositionIndex vac_all=m;
-      Vector<char> vac(m+1,0);
-      for(WordIndex i=1;i<=l;i++)
-	{
-	  PositionIndex cur_j=al.als_i[i]; 
-	  PositionIndex prev_j=0;
-	  PositionIndex k=0;
-	  if(cur_j) { // process first word of cept
-	    k++;
-	    // previous position
-	    total*= (x2=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses.getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k));
-	    
-	    vac_all--;
-	    assert(vac[cur_j]==0);
-	    vac[cur_j]=1;
-	    
-	    if( verb) cerr << "IBM-5: d=1 of " << cur_j << ": " << x2  << " -> " << total << endl;
-	    prev_j=cur_j;
-	    cur_j=al.als_j[cur_j].next;
-	  }
-	  while(cur_j) { // process following words of cept
-	    k++;
-	    // previous position
-	    int vprev=vacancies(vac,prev_j);
-	    total*= (x2=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses.getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k));
-	    
-	    
-	    vac_all--;
-	    vac[cur_j]=1;
-	    
-	    
-	    if( verb) cerr << "IBM-5: d>1 of " << cur_j << ": " << x2  << " -> " << total << endl;
-	    prev_j=cur_j;
-	    cur_j=al.als_j[cur_j].next;
-	  }
-	  assert(k==al.fert(i));
-	  if( k )
-	    prev_cept=i;
-	}
-      assert(vac_all==al.fert(0));
-    }
-  total = total?total:almostZero;
-  return total;
-}
-
-
-void transpair_model5::computeScores(const alignment&al,vector<double>&d)const
-{
-  LogProb total1 = 1.0,total2=1.0,total3=1.0,total4=1.0 ;
-  total1 *= pow(double(1-p1), m-2.0 * al.fert(0)) * pow(double(p1), double(al.fert(0)));
-  for (WordIndex i = 1 ; i <= al.fert(0) ; i++)
-    total1 *= double(m - al.fert(0) - i + 1) / i ; // IBM-5 is not deficient!
-  for (WordIndex i = 1 ; i <= l ; i++)
-    total2 *= get_fertility(i, al.fert(i));
-  for (WordIndex j = 1 ; j <= m ; j++)
-    total3*= get_t(al(j), j) ;
-  PositionIndex prev_cept=0;
-  PositionIndex vac_all=m;
-  Vector<char> vac(m+1,0);
-  for(WordIndex i=1;i<=l;i++)
-    {
-      PositionIndex cur_j=al.als_i[i]; 
-      PositionIndex prev_j=0;
-      PositionIndex k=0;
-      if(cur_j) { // process first word of cept
-	k++;
-	total4*=d5m.getProb_first(vacancies(vac,cur_j),vacancies(vac,al.get_center(prev_cept)),d5m.fwordclasses.getClass(get_fs(cur_j)),l,m,vac_all-al.fert(i)+k);
-	vac_all--;
-	assert(vac[cur_j]==0);
-	vac[cur_j]=1;
-	prev_j=cur_j;
-	cur_j=al.als_j[cur_j].next;
-      }
-      while(cur_j) { // process following words of cept
-	k++;
-	int vprev=vacancies(vac,prev_j);
-	total4*=d5m.getProb_bigger(vacancies(vac,cur_j),vprev,d5m.fwordclasses.getClass(get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-al.fert(i)+k);
-	vac_all--;
-	vac[cur_j]=1;
-	prev_j=cur_j;
-	cur_j=al.als_j[cur_j].next;
-      }
-      assert(k==al.fert(i));
-      if( k )
-	prev_cept=i;
-    }
-  assert(vac_all==al.fert(0));
-  d.push_back(total1);//13
-  d.push_back(total2);//14
-  d.push_back(total3);//15
-  d.push_back(total4);//16
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/transpair_model5.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/transpair_model5.h b/ext/giza-pp/GIZA++-v2/transpair_model5.h
deleted file mode 100644
index 5ecf49d..0000000
--- a/ext/giza-pp/GIZA++-v2/transpair_model5.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
-
-Copyright (C) 2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#ifndef transpair_model5_h_fjo_defined
-#define transpair_model5_h_fjo_defined
-#include "Array2.h"
-#include "defs.h"
-#include "Vector.h"
-#include "NTables.h"
-#include "ATables.h"
-#include "TTables.h"
-#include "alignment.h"
-#include "D5Tables.h"
-#include "transpair_model4.h"
-
-extern double factorial(int n);
-
-inline int vacancies(const Vector<char>&vac,int u)
-{
-  int n=0;
-  const char *i=&(vac[0])+1;
-  const char *end=&(vac[0])+u+1;
-  while(i<end)
-    n+= ((*i++)==0);
-  return n;
-}
-
-class transpair_model5 : public transpair_model4
-{
- private:
-  const d5model&d5m;
-  bool doModel4Scoring;
- public:
-  typedef transpair_model3 simpler_transpair_model;
-  mutable map<Vector<PositionIndex>,LogProb> scores[4];
-  transpair_model5(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, tmodel<COUNT, PROB>&tTable, 
-		   amodel<PROB>&aTable, amodel<PROB>&dTable, nmodel<PROB>&nTable, double _p1, double _p0, 
-		   const d5model*_d5m) 
-    : transpair_model4(es, fs, tTable, aTable, dTable, nTable, _p1, _p0,&_d5m->d4m),d5m(*_d5m),doModel4Scoring(0) {}
-  LogProb scoreOfAlignmentForChange(const alignment&a)const
-    {
-      if( doModel4Scoring )
-	return transpair_model4::prob_of_target_and_alignment_given_source(a,2); 
-      else
-	return prob_of_target_and_alignment_given_source(a,2); 
-    }
-  LogProb scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
-  LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const ;
-  LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double thisValue=-1.0)const;
-  LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double thisValue=-1.0)const ;
-  int modelnr()const{return 5;}
-  LogProb prob_of_target_and_alignment_given_source(const alignment&al, short distortionType=3,bool verb=0)const;
-  void computeScores(const alignment&al,vector<double>&d)const;
-};
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/transpair_modelhmm.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/transpair_modelhmm.h b/ext/giza-pp/GIZA++-v2/transpair_modelhmm.h
deleted file mode 100644
index d836ad4..0000000
--- a/ext/giza-pp/GIZA++-v2/transpair_modelhmm.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
-
-Copyright (C) 2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#ifndef transpair_modelhmm_h_fjo_defined
-#define transpair_modelhmm_h_fjo_defined
-#include "Array2.h"
-#include "defs.h"
-#include "Vector.h"
-#include "NTables.h"
-#include "ATables.h"
-#include "TTables.h"
-#include "alignment.h"
-#include <cmath>
-#include "transpair_model2.h"
-#include "ForwardBackward.h"
-#include "hmm.h"
-
-class transpair_modelhmm : public transpair_model2
-{
- public:
- typedef transpair_modelhmm simpler_transpair_model;           
-  HMMNetwork*net;
-  transpair_modelhmm(const Vector<WordIndex>&es, const Vector<WordIndex>&fs, const tmodel<COUNT, PROB>&tTable, 
-		   const amodel<PROB>&aTable,const amodel<PROB>&,const nmodel<PROB>&, 
-		   double, double,const hmm*h)
-    : transpair_model2(es,fs,tTable,aTable),net(h->makeHMMNetwork(es,fs,0))
-    {}
-   ~transpair_modelhmm() { delete net; }
-  int modelnr()const{return 6;}
-  LogProb scoreOfMove(const alignment&a, WordIndex _new_i, WordIndex j,double=-1.0)const
-    {
-      int new_i=_new_i;
-      LogProb change=1.0;
-      int old_i=a(j);
-      if (old_i == new_i)
-	change=1.0;
-      else
-	{
-	  int theJ=j-1;
-	  old_i--;
-	  new_i--;
-	  int jj=j-1;
-	  while(jj>0&&a(jj)==0)
-	    jj--;
-	  int theIPrev= (jj>0)?(a(jj)-1):0;
-	  if( j>1&&a(j-1)==0 )
-	    theIPrev+=l;
-	  if( old_i==-1 ){old_i = theIPrev;if(old_i<int(l))old_i+=l;}
-	  if( new_i==-1 ){new_i = theIPrev;if(new_i<int(l))new_i+=l;}
-	  int theIPrevOld=theIPrev,theIPrevNew=theIPrev;
-	  if( theJ==0 )
-	    {
-	      change*=net->getAlphainit(new_i)/net->getAlphainit(old_i);
-	    }
-	  do
-	    {
-	      if( new_i!=old_i )
-		{
-		  change*=net->nodeProb(new_i,theJ)/net->nodeProb(old_i,theJ);
-		}
-	      if( theJ>0)
-		change*=net->outProb(theJ,theIPrevNew,new_i)/net->outProb(theJ,theIPrevOld,old_i);
-	      theIPrevOld=old_i;
-	      theIPrevNew=new_i;
-	      theJ++;
-	      if( theJ<int(m) && a(theJ+1)==0 )
-		{
-		  if( new_i<int(l)) new_i+=l;
-		  if( old_i<int(l)) old_i+=l;
-		}
-	    } while( theJ<int(m) && a(theJ+1)==0 );
-	  if(theJ==int(m))
-	    {
-	      change*=net->getBetainit(new_i)/net->getBetainit(old_i);
-	    }
-	  else
-	    {
-	      new_i=a(theJ+1)-1;
-	      if( new_i==-1)
-		new_i=theIPrevNew;
-	      change*=net->outProb(theJ,theIPrevNew,new_i)/net->outProb(theJ,theIPrevOld,new_i);
-	    }
-	}
-      return change;
-    }
-  LogProb scoreOfAlignmentForChange(const alignment&)const
-    {return -1.0; }  
-  LogProb scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double=-1.0)const 
-    {
-      return _scoreOfSwap(a,j1,j2);
-    }
-  LogProb _scoreOfMove(const alignment&a, WordIndex new_i, WordIndex j,double=-1.0)const
-    {
-      alignment b(a);
-      b.set(j, new_i);
-      LogProb a_prob=prob_of_target_and_alignment_given_source(a);
-      LogProb b_prob=prob_of_target_and_alignment_given_source(b);
-      if( a_prob )
-	return b_prob/a_prob;
-      else if( b_prob )
-	return 1e20;
-      else
-	return 1.0;
-    }
-  LogProb _scoreOfSwap(const alignment&a, WordIndex j1, WordIndex j2,double=-1.0)const
-    {
-      WordIndex aj1=a(j1),aj2=a(j2);
-      if( aj1==aj2 )
-	return 1.0;
-      LogProb a_prob=prob_of_target_and_alignment_given_source(a);
-
-      /*alignment b(a);
-      b.set(j1, a(j2));
-      b.set(j2, a(j1));
-      LogProb b_prob=prob_of_target_and_alignment_given_source(b);*/
-
-      const_cast<alignment&>(a).set(j1,aj2);
-      const_cast<alignment&>(a).set(j2,aj1);
-      LogProb b_prob=prob_of_target_and_alignment_given_source(a);
-      const_cast<alignment&>(a).set(j1,aj1);
-      const_cast<alignment&>(a).set(j2,aj2);
-
-      if( a_prob )
-	return b_prob/a_prob;
-      else if( b_prob )
-	return 1e20;
-      else
-	return 1.0;
-    }
-  inline friend ostream&operator<<(ostream&out, const transpair_modelhmm&)
-    {
-      return out << "NO-OUTPUT for transpair_modelhmm\n";
-    }
-  LogProb prob_of_target_and_alignment_given_source(const alignment&al,bool verbose=0)const
-    {
-      double prob=1.0;
-      int theIPrev=0;
-      for(unsigned int j=1;j<=m;j++)
-	{
-	  int theJ=j-1;
-	  int theI=al(j)-1;
-	  if( theI==-1 )
-	    theI=(theIPrev%l)+l;
-	  prob*=net->nodeProb(theI,theJ);
-	  if( verbose )
-	    cout << "NP " << net->nodeProb(theI,theJ) << ' ';
-	  if( j==1 )
-	    {
-	      prob*=net->getAlphainit(theI);
-	      if( verbose )
-		cout << "AP0 " << net->getAlphainit(theI) << ' ';
-	    }
-	  else
-	    {
-	      prob*=net->outProb(theJ,theIPrev,theI);
-	      if( verbose )
-		cout << "AP1 " << net->outProb(theJ,theIPrev,theI) << ' ';
-	    }
-	  theIPrev=theI;
-	  if( j==m )
-	    {
-	      prob*=net->getBetainit(theI);
-	      if( verbose )
-		cout << "AP2 " << net->getBetainit(theI) << ' ';
-	    }
-	  if( verbose )
-	    cout << "j:"<<theJ<<" i:"<<theI << ";  ";
-	}
-      if( verbose )
-	cout << '\n';
-      return prob*net->finalMultiply;
-    }
-  void computeScores(const alignment&al,vector<double>&d)const
-    {
-      double prob1=1.0,prob2=1.0;
-      int theIPrev=0;
-      for(unsigned int j=1;j<=m;j++)
-	{
-	  int theJ=j-1;
-	  int theI=al(j)-1;
-	  if( theI==-1 )
-	    theI=(theIPrev%l)+l;
-	  prob1*=net->nodeProb(theI,theJ);
-	  if( j==1 )
-	    {
-	      prob2*=net->getAlphainit(theI);
-	    }
-	  else
-	    {
-	      prob2*=net->outProb(theJ,theIPrev,theI);
-	    }
-	  theIPrev=theI;
-	  if( j==m )
-	    {
-	      prob2*=net->getBetainit(theI);
-	    }
-	}
-      d.push_back(prob1);
-      d.push_back(prob2);
-    }
-
-  bool isSubOptimal()const{return 0;}
-};
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/utility.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/utility.cpp b/ext/giza-pp/GIZA++-v2/utility.cpp
deleted file mode 100644
index 4e9607a..0000000
--- a/ext/giza-pp/GIZA++-v2/utility.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#include "mymath.h"
-
-double factorial(int n)
-{
-  double  f=1;
-  for(int i=2; i <= n; i++)
-    f *= i;
-  return f;
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/utility.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/utility.h b/ext/giza-pp/GIZA++-v2/utility.h
deleted file mode 100644
index 078a2a0..0000000
--- a/ext/giza-pp/GIZA++-v2/utility.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#ifndef utility_h
-#define utility_h
-#include <iostream>
-#include "Perplexity.h"
-#include "Vector.h"
-#include "TTables.h"
-#include "getSentence.h"
-#include "vocab.h"
-
-extern void printHelp(void);
-extern void parseConfigFile (char * fname );
-extern void parseArguments(int argc, char *argv[]);
-extern void generatePerplexityReport(const Perplexity& trainperp, 
-				     const Perplexity& testperp, 
-				     const Perplexity& trainVperp, 
-				     const Perplexity& testVperp, 
-				     ostream& of, int trainsize, 
-				     int testsize, unsigned int last, bool);
-
-extern void  printSentencePair(Vector<WordIndex>& es, Vector<WordIndex>& fs, ostream& of);
-     
-extern void printOverlapReport(const tmodel<COUNT, PROB>& tTable,
-			       sentenceHandler& testHandler, vcbList& trainEList, 
-			       vcbList& trainFList, vcbList& testEList, vcbList& testFList);
-
-extern void printAlignToFile(const Vector<WordIndex>& es,  const Vector<WordIndex>& fs, 
-			     const Vector<WordEntry>& evlist, const Vector<WordEntry>& fvlist, 
-			     ostream& of2, const Vector<WordIndex>& viterbi_alignment, int pair_no, 
-			     double viterbi_score);
-
-extern double factorial(int) ;
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/vocab.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/vocab.cpp b/ext/giza-pp/GIZA++-v2/vocab.cpp
deleted file mode 100644
index a91c572..0000000
--- a/ext/giza-pp/GIZA++-v2/vocab.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#include "vocab.h" 
-
-void vcbList::readVocabList()
-     // reads a vocabulary file from fname. It expects the following format:
-     // 
-     // token_id token_string frequency
-{
-
-  int freq=0;
-  WordIndex word_id ;
-  WordEntry entry("NULL",0) ;
-
-  string line, word ;
-  cerr << "Reading vocabulary file from:" << fname << "\n";    
-  //  total = 0 ;
-  ifstream vFile(fname);
-  if(!vFile){
-    cerr <<  "\nCannot open vocabulary file " << fname << "file";
-    exit(1);
-  }
-  
-  list.push_back(entry);
-  s2i[entry.word]=list.size()-1;
-
-  while(getline(vFile, line)){
-    istringstream buffer(line);
-    if(!(buffer >> word_id >> word >> freq))
-      cerr << "ERROR: reading vocabulary; " << word_id << ' ' << word << ' ' << freq << endl;
-    if (word_id == 0){
-      cerr << "ERROR: TOKEN ID 0 is reserved for special token NULL, in line: \n"<< line<<"\n" ;
-      exit(-1);
-    }
-    else if (word_id >= MAX_VOCAB_SIZE){
-      cerr << "ERROR: TOKEN ID is greater than maximum vocabulary size "
-	   << MAX_VOCAB_SIZE << " in line :\n"<< line <<"\n" ;
-      exit(-1);
-    }	
-    else if (freq < 0){
-      cerr << "ERROR: frequency must be a positive integer, in line :\n"
-	   << line <<"\n";
-      exit(-1);
-    }
-    else if(word_id >= list.size()){
-      list.resize(word_id+1);
-      list[word_id].word = word ;
-      s2i[word]=word_id;
-      list[word_id].freq = 0 ;
-      noUniqueTokens = word_id + 1 ;
-      //      noUniqueTokens++ ;
-      //      total += freq ;
-    }      
-    else if(list[word_id].word != "\0"){
-      cerr << "ERROR: TOKEN ID must be unique for each token, in line :\n"
-	   << line <<"\n";
-      cerr << "TOKEN ID " << word_id << " has already been assigned to: " <<
-	list[word_id].word << "\n";
-      exit(-1);
-    }
-    else { // line  has valid information
-      list[word_id].word = word ;
-      s2i[word]=word_id;
-      list[word_id].freq = 0 ;
-      //      noUniqueTokens++ ;
-      noUniqueTokens  = word_id + 1 ;
-      //      total += freq ;
-    }
-  } // end of while
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/GIZA++-v2/vocab.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/vocab.h b/ext/giza-pp/GIZA++-v2/vocab.h
deleted file mode 100644
index 988edc6..0000000
--- a/ext/giza-pp/GIZA++-v2/vocab.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#ifndef _vocab_h
-#define _vocab_h 1
-
-#include "defs.h" 
-#include "Vector.h" 
-
-#include <fstream>
-#include <sstream>
-#include <map>
-
-class WordEntry {
- public:
-  string word ;
-  double freq ;
-  WordEntry():word("\0"), freq(0){};
-  WordEntry(string w, int f):word(w), freq(f){};
-};
-
-class vcbList{
- private:
-  Vector<WordEntry> list ;
-  map<string,int> s2i;
-  double total;
-  WordIndex noUniqueTokens ;
-  WordIndex noUniqueTokensInCorpus ;
-  const char* fname ;
- public:
-  vcbList(const char* f=0):list(), total(0), noUniqueTokens(0), noUniqueTokensInCorpus(0), fname(f){};
-  void setName(const char*f)
-    { fname=f; }
-  vcbList(const vcbList& a):list(a.list), total(a.total), noUniqueTokens(a.noUniqueTokens), noUniqueTokensInCorpus(0), fname(a.fname){};
-  inline WordIndex size()const {return (list.size());};
-  inline WordIndex uniqTokens()const {return noUniqueTokens;};
-  inline WordIndex uniqTokensInCorpus()const {return noUniqueTokensInCorpus;};
-  inline double totalVocab() const {return total;};
-  inline Vector<WordEntry>& getVocabList() { return(list);}; 
-  inline const Vector<WordEntry>& getVocabList()const { return(list);}; 
-  void readVocabList();
-  void incFreq(WordIndex id , double f){
-    if(id < list.size()){
-      if (list[id].freq == 0)
-	noUniqueTokensInCorpus++;
-      list[id].freq += f ;
-      total += f ;
-    }
-  };
-  void clearAllFreq(){
-    for (WordIndex id = 0 ; id < list.size() ; id++)
-      list[id].freq = 0 ;
-    total = 0 ;
-    noUniqueTokensInCorpus = 0 ;
-  };
-  int operator()(const string&x)const
-    {
-      map<string,int>::const_iterator i=s2i.find(x);
-      if( i!=s2i.end() )
-	return i->second;
-      else
-	{
-	  cerr << "ERROR: no word index for '"<<x<<"'\n";
-	  return 0;
-	}
-    }
-  const string operator()(WordIndex id) const { // Yaser - 2000-12-13
-    if (id < list.size())
-      return list[id].word ;
-    else return 0 ;
-  }
-  const string operator[](WordIndex id) const { // Yaser - 2000-12-13
-    if (id < list.size())
-      return list[id].word ;
-    else return 0 ;
-  }
-  void printVocabList(ostream& of){
-    for (WordIndex i = 1 ; i < list.size() ; i++){
-      if (list[i].word != "" && list[i].freq > 0)
-	of << i << ' ' << list[i].word << ' ' << list[i].freq << '\n';
-    }
-  }
-  
-};
-
-#endif 

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/Makefile
----------------------------------------------------------------------
diff --git a/ext/giza-pp/Makefile b/ext/giza-pp/Makefile
deleted file mode 100644
index cb78185..0000000
--- a/ext/giza-pp/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-
-.PHONY: gizapp mkcls-v2 install clean
-
-all: gizapp mkcls-v2
-
-gizapp:
-	@echo $(JOSHUA)
-	$(MAKE) -C GIZA++-v2
-
-mkcls-v2:
-	@echo $(JOSHUA)
-	$(MAKE) -C mkcls-v2
-
-install: gizapp mkcls-v2
-	@cp GIZA++-v2/GIZA++ GIZA++-v2/snt2cooc.out mkcls-v2/mkcls $(JOSHUA)/bin/
-
-clean:
-	$(MAKE) -C GIZA++-v2 clean
-	$(MAKE) -C mkcls-v2 clean
-	@rm -f $(JOSHUA)/bin/GIZA++ $(JOSHUA)/bin/mkcls $(JOSHUA)/bin/snt2cooc.out

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/README
----------------------------------------------------------------------
diff --git a/ext/giza-pp/README b/ext/giza-pp/README
deleted file mode 100644
index c4b4e34..0000000
--- a/ext/giza-pp/README
+++ /dev/null
@@ -1,8 +0,0 @@
-This package contains the GIZA++ toolkit and the mkcls tool, originally
-written by F.J. Och and several other authors.
-
-For more information, refer to the README files and the following pages:
-  http://www.fjoch.com/mkcls.html
-  http://www.fjoch.com/GIZA++.html
-
-