You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/01 02:51:44 UTC
[49/94] [abbrv] [partial] incubator-joshua git commit: Pulled JOSHUA-252 changes and Resolved Merge Conflicts

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/TTables.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/TTables.cpp b/ext/giza-pp/GIZA++-v2/TTables.cpp
deleted file mode 100644
index 25c126f..0000000
--- a/ext/giza-pp/GIZA++-v2/TTables.cpp
+++ /dev/null
@@ -1,323 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
- 
-*/
-#include "TTables.h"
-#include "Parameter.h"
-
-GLOBAL_PARAMETER(float,PROB_CUTOFF,"PROB CUTOFF","Probability cutoff threshold for lexicon probabilities",PARLEV_OPTHEUR,1e-7);
-GLOBAL_PARAMETER2(float, COUNTINCREASE_CUTOFF,"COUNTINCREASE CUTOFF","countCutoff","Counts increment cutoff threshold",PARLEV_OPTHEUR,1e-6);
-
-#ifdef BINARY_SEARCH_FOR_TTABLE
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::printCountTable(const char *, 
-					 const Vector<WordEntry>&, 
-					 const Vector<WordEntry>&,
-					 const bool) const
-{
-}
-
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::printProbTable(const char *filename, 
-					 const Vector<WordEntry>& evlist, 
-					 const Vector<WordEntry>& fvlist,
-					 const bool actual) const
-{
-  ofstream of(filename);
-  /*  for(unsigned int i=0;i<es.size()-1;++i)
-    for(unsigned int j=es[i];j<es[i+1];++j)
-      {
-	const CPPair&x=fs[j].second;
-	WordIndex e=i,f=fs[j].first;
-	if( actual )
-	  of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
-	else
-	  of << e << ' ' << f << ' ' << x.prob << '\n';
-	  }*/
-  for(unsigned int i=0;i<lexmat.size();++i)
-    {
-      if( lexmat[i] )
-	for(unsigned int j=0;j<lexmat[i]->size();++j)
-	  {
-	    const CPPair&x=(*lexmat[i])[j].second;
-	    WordIndex e=i,f=(*lexmat[i])[j].first;
-	    if( x.prob>PROB_SMOOTH )
-	      if( actual )
-		of << evlist[e].word << ' ' << fvlist[f].word << ' ' << x.prob << '\n';
-	      else
-		of << e << ' ' << f << ' ' << x.prob << '\n';
-	  }
-    }
-}
-
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::printProbTableInverse(const char *, 
-				   const Vector<WordEntry>&, 
-				   const Vector<WordEntry>&, 
-				   const double, 
-				   const double, 
-				   const bool ) const
-{
-}
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::normalizeTable(const vcbList&, const vcbList&, int)
-{
-  for(unsigned int i=0;i<lexmat.size();++i)
-    {
-      double c=0.0;
-      if( lexmat[i] )
-	{
-	  unsigned int lSize=lexmat[i]->size();
-	  for(unsigned int j=0;j<lSize;++j)
-	    c+=(*lexmat[i])[j].second.count;
-	  for(unsigned int j=0;j<lSize;++j)
-	    {
-	      if( c==0 )
-		(*lexmat[i])[j].second.prob=1.0/(lSize);
-	      else
-		(*lexmat[i])[j].second.prob=(*lexmat[i])[j].second.count/c;
-	      (*lexmat[i])[j].second.count=0;
-	    }
-	}
-    }
-}
-
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::readProbTable(const char *){
-}
-
-template class tmodel<COUNT,PROB> ; 
-#else
-/* ------------------ Method Definiotns for Class tmodel --------------------*/
-
-#
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::printCountTable(const char *filename, 
-					 const Vector<WordEntry>& evlist, 
-					 const Vector<WordEntry>& fvlist,
-					 const bool actual) const
-     // this function dumps the t table. Each line is of the following format:
-     //
-     // c(target_word/source_word) source_word target_word
-{
-  ofstream of(filename);
-  typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i;
-  for(i = ef.begin(); i != ef.end();++i){
-    if ( ((*i).second).count >  COUNTINCREASE_CUTOFF)
-      if (actual)
-	of <<  ((*i).second).count << ' ' << evlist[ ((*i).first).first ].word << ' ' << fvlist[((*i).first).second].word << ' ' << (*i).second.prob << '\n';
-      else 
-	of << ((*i).second).count << ' ' <<  ((*i).first).first  << ' ' << ((*i).first).second << ' ' << (*i).second.prob << '\n';
-  }
-}
-
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::printProbTable(const char *filename, 
-					 const Vector<WordEntry>& evlist, 
-					 const Vector<WordEntry>& fvlist,
-					 const bool actual) const
-     // this function dumps the t table. Each line is of the following format:
-     //
-     // source_word target_word p(target_word/source_word)
-{
-  ofstream of(filename);
-  typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i;
-  for(i = ef.begin(); i != ef.end();++i)
-    if( actual )
-      of << evlist[((*i).first).first].word << ' ' << 
-	fvlist[((*i).first).second].word << ' ' << (*i).second.prob << '\n';
-    else
-      of << ((*i).first).first << ' ' << ((*i).first).second << ' ' << 
-	(*i).second.prob << '\n';
-}
-
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::printProbTableInverse(const char *filename, 
-				   const Vector<WordEntry>& evlist, 
-				   const Vector<WordEntry>& fvlist, 
-				   const double, 
-				   const double, 
-				   const bool actual) const
-  // this function dumps the inverse t table. Each line is of the format:
-  //
-  // target_word_id source_word_id p(source_word/target_word)
-  //
-  // if flag "actual " is true then print actual word entries instead of 
-  // token ids
-{
-  cerr << "Dumping the t table inverse to file: " << filename << '\n';
-  ofstream of(filename);
-  typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i;
-  PROB p_inv = 0 ;
-  //  static const PROB ratio(double(fTotal)/eTotal);
-  WordIndex e, f ;
-  int no_errors(0);
-  vector<PROB> total(fvlist.size(),PROB(0)) ; // Sum over all e of P(f/e) * p(e) - needed for normalization
- 
-  for(i = ef.begin(); i != ef.end(); i++){
-    e = ((*i).first).first ;
-    f = ((*i).first).second ;
-    total[f] += (PROB) evlist[e].freq * ((*i).second.prob); //add P(f/ei) * F(ei) 
-  }
-  
-  for(i = ef.begin(); i != ef.end(); i++){
-    e = ((*i).first).first ;
-    f = ((*i).first).second ;
-    p_inv = ((*i).second.prob) * (PROB) evlist[e].freq / total[f] ;
-    if (p_inv > 1.0001 || p_inv < 0){
-      no_errors++;
-      if (no_errors <= 10){
-	cerr << "printProbTableInverse(): Error - P("<<evlist[e].word<<"("<<
-	  e<<") / "<<fvlist[f].word << "("<<f<<")) = " << p_inv <<'\n';
-	cerr << "f(e) = "<<evlist[e].freq << " Sum(p(f/e).f(e)) = " << total[f] <<
-	  " P(f/e) = " <<((*i).second.prob)  <<'\n';
-	if (no_errors == 10)
-	  cerr<<"printProbTableInverse(): Too many P inverse errors ..\n";
-      }
-    }
-    if (actual)
-      of << fvlist[f].word << ' ' << evlist[e].word << ' ' << p_inv << '\n';
-    else 
-      of << f << ' ' << e << ' ' << p_inv <<  '\n';
-  }
-}
-/*
-
-
-
-{
-  cerr << "Dumping the t table inverse to file: " << filename << '\n';
-  ofstream of(filename);
-  hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i;
-  PROB p_inv = 0 ;
-  static const PROB ratio(double(fTotal)/eTotal);
-  WordIndex e, f ;
-  for(i = ef.begin(); i != ef.end(); i++){
-    e = ((*i).first).first ;
-    f = ((*i).first).second ;
-    p_inv = ((*i).second.prob) * ratio * (PROB) evlist[e].freq / 
-      (PROB) fvlist[f].freq ;
-    if (actual)
-      of << fvlist[f].word << ' ' << evlist[e].word << ' ' << p_inv << '\n';
-    else 
-      of << f << ' ' << e << ' ' << p_inv <<  '\n';
-  }
-}
-*/
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::normalizeTable(const vcbList&engl, const vcbList&french, int iter)
-  // normalize conditional probability P(fj/ei):
-  // i.e. make sure that Sum over all j of P(fj/e) = 1  
-  // this method reads the counts portion of the table and normalize into
-  // the probability portion. Then the counts are cleared (i.e. zeroed)
-  // if the resulting probability of an entry is below a threshold, then 
-  // remove it .
-{
-  if( iter==2 )
-    {
-      total2.resize(engl.uniqTokens());for(unsigned int i=0;i<total2.size();i++)total2[i]=0.0;
-    }
-  nFrench.resize(engl.uniqTokens());for(unsigned int i=0;i<nFrench.size();i++)nFrench[i]=0;
-  nEng.resize(french.uniqTokens());for(unsigned int i=0;i<nEng.size();i++)nEng[i]=0;
-  Vector<double> total(engl.uniqTokens(),0.0);
-  //Vector<int> nFrench(engl.uniqTokens(), 0);
-  //Vector<int> nEng(french.uniqTokens(), 0);
-
-  typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i;
-  for(i = ef.begin(); i != ef.end(); i++){ // for all possible source words e
-    if( iter==2 )
-      total2[((*i).first).first] += (*i).second.count;
-    total[((*i).first).first] += (*i).second.count;
-    nFrench[((*i).first).first]++;
-    nEng[((*i).first).second]++;
-  }
-  for(unsigned int k=0;k<engl.uniqTokens();++k)
-    if( nFrench[k] )
-      {
-	double probMass=(french.uniqTokensInCorpus()-nFrench[k])*PROB_SMOOTH;
-	if( probMass<0.0 )
-	  cout << k << " french.uniqTokensInCorpus(): " << french.uniqTokensInCorpus() << "  nFrench[k]:"<< nFrench[k] << '\n';
-	total[k]+= total[k]*probMass/(1-probMass);
-      }
-  typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::iterator j, k;
-  PROB p ;
-  int nParams=0;
-  for(j = ef.begin(); j != ef.end(); ){
-    k = j;
-    k++ ;
-    if( (total[((*j).first).first])>0.0 )
-      p = ((((*j).second).count) /(total[((*j).first).first])) ;
-    else
-      p= 0.0;
-    if (p > PROB_CUTOFF)
-      {
-	if( iter>0 )
-	  {
-	    ((*j).second).prob = 0 ;
-	    ((*j).second).count = p ;
-	  }
-	else
-	  {
-	    ((*j).second).prob = p ;
-	    ((*j).second).count = 0 ;
-	  }
-	nParams++;
-      }
-    else {
-      erase(((*j).first).first, ((*j).first).second);
-    }
-    j = k ;
-  }
-  if( iter>0 )
-    return normalizeTable(engl, french, iter-1);
-  else
-    {
-    }
-}
-
-template <class COUNT, class PROB>
-void tmodel<COUNT, PROB>::readProbTable(const char *filename){
-  /* This function reads the t table from a file.
-     Each line is of the format:  source_word_id target_word_id p(target_word|source_word)
-     This is the inverse operation of the printTable function.
-     NAS, 7/11/99
-  */
-  ifstream inf(filename);
-  cerr << "Reading t prob. table from " << filename << "\n";
-  if(!inf){
-    cerr << "\nERROR: Cannot open " << filename << "\n";
-    return;
-  }
-  WordIndex src_id, trg_id;
-  PROB prob;
-  int nEntry=0;
-  while(    inf >> src_id  >> trg_id  >> prob){
-    insert(src_id, trg_id, 0.0, prob);
-    nEntry++;
-  }
-  cerr << "Read " << nEntry << " entries in prob. table.\n";
-}
-
-template class tmodel<COUNT,PROB> ; 
-
-/* ---------------- End of Method Definitions of class tmodel ---------------*/
-
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/TTables.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/TTables.h b/ext/giza-pp/GIZA++-v2/TTables.h
deleted file mode 100644
index 85673ef..0000000
--- a/ext/giza-pp/GIZA++-v2/TTables.h
+++ /dev/null
@@ -1,417 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-/* --------------------------------------------------------------------------*
- *                                                                           *
- * Module : TTables                                                          *
- *                                                                           *
- * Prototypes File: TTables.h                                               *
- *                                                                           *
- * Objective: Defines clases and methods for handling I/O for Probability &  *
- *            Count tables and also alignment tables                         *
- *****************************************************************************/
-
-#ifndef _ttables_h
-#define _ttables_h 1
-
-
-#include "defs.h"
-#include "vocab.h"  
-
-#include <cassert>
- 
-#include <iostream>
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-#include "Vector.h"
-#include <utility>
-
-#include <fstream>
-
-#include "Globals.h"
-
-
-/* The tables defined in the following classes are defined as hash tables. For
-   example. the t-table is a hash function of a word pair; an alignment is 
-   a hash function of a vector of integer numbers (sentence positions) and so
-   on   */
-
-
-/*----------- Defnition of Hash Function for class tmodel ------- -----------*/
-
-typedef pair<WordIndex, WordIndex> wordPairIds;
-
-
-class hashpair : public unary_function< pair<WordIndex, WordIndex>, size_t >
-{
-public:
-  size_t operator() (const pair<WordIndex, WordIndex>& key) const
-    {
-      return (size_t) MAX_W*key.first + key.second; /* hash function and it 
-						       is guarnteed to have 
-						       unique id for each 
-						       unique pair */
-    }
-};
-
-
-
-/* ------------------ Class Prototype Definitions ---------------------------*
-  Class Name: tmodel
-  Objective: This defines the underlying data structur for t Tables and t 
-  Count Tables. They are defined as a hash table. Each entry in the hash table
-  is the probability (P(fj/ei) ) or count collected for ( C(fj/ei)). The 
-  probability and the count are represented as log integer probability as 
-  defined by the class LogProb .  
-
-  This class is used to represents t Tables (probabiliity) and n (fertility 
-  Tables and also their corresponding count tables .
- 
- *---------------------------------------------------------------------------*/
-
-//typedef float COUNT ;
-//typedef LogProb PROB ;
-template <class COUNT, class PROB>
-class LpPair {
- public:
-  COUNT count ;
-  PROB  prob ;
- public: // constructor 
-  LpPair():count(0), prob(0){} ;
-  LpPair(COUNT c, PROB p):count(c), prob(p){};
-} ;
-
-#ifdef BINARY_SEARCH_FOR_TTABLE
-
-
-template<class T>
-T*mbinary_search(T*x,T*y,unsigned int val)
-{
-  if( y-x==0 )
-    return 0;
-  if( x->first==val)
-    return x;
-  if( y-x<2 )
-    return 0;
-  T*mid=x+(y-x)/2;
-  if( val < mid->first )
-    return mbinary_search(x,mid,val);
-  else
-    return mbinary_search(mid,y,val);
-  
-}
-
-template<class T>
-const T*mbinary_search(const T*x,const T*y,unsigned int val)
-{
-  if( y-x==0 )
-    return 0;
-  if( x->first==val)
-    return x;
-  if( y-x<2 )
-    return 0;
-  const T*mid=x+(y-x)/2;
-  if( val < mid->first )
-    return mbinary_search(x,mid,val);
-  else
-    return mbinary_search(mid,y,val);
-  
-}
-
-template <class COUNT, class PROB>
-class tmodel{
-  typedef LpPair<COUNT, PROB> CPPair;
- public:
-  int noEnglishWords;  // total number of unique source words
-  int noFrenchWords;   // total number of unique target words
-  //vector<pair<unsigned int,CPPair> > fs;
-  //vector<unsigned int> es;
-  vector< vector<pair<unsigned int,CPPair> >* > lexmat;
-
-  void erase(WordIndex e, WordIndex f)
-    {
-      CPPair *p=find(e,f);
-      if(p)
-	*p=CPPair(0,0);
-    };
-  CPPair*find(int e,int f)
-    {
-      //pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
-      //pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
-      pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
-      pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
-      pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
-      if( x==0 )
-	{
-	  //cerr << "A:DID NOT FIND ENTRY: " << e << " " << f << '\n';
-	  //abort();
-	  return 0;
-	}
-      return &(x->second);
-    }
-  const CPPair*find(int e,int f)const
-    {
-      const pair<unsigned int,CPPair> *be=&(*lexmat[e])[0];
-      const pair<unsigned int,CPPair> *en=&(*lexmat[e])[0]+(*lexmat[e]).size();
-      //const pair<unsigned int,CPPair> *be=&(fs[0])+es[e];
-      //const pair<unsigned int,CPPair> *en=&(fs[0])+es[e+1];
-      const pair<unsigned int,CPPair> *x= mbinary_search(be,en,f);
-      if( x==0 )
-	{
-	  //cerr << "B:DID NOT FIND ENTRY: " << e << " " << f << '\n';
-	  //abort();
-	  return 0;
-	}
-
-      return &(x->second);
-    }
-public:
-  void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
-    *find(e,f)=CPPair(cval,pval);
-  }
-  CPPair*getPtr(int e,int f){return find(e,f);}
-  tmodel(const string&fn)
-    {
-      int count=0,count2=0;
-      ifstream infile2(fn.c_str());
-      int e,f,olde=-1,oldf=-1;
-      pair<unsigned int,CPPair> cp;
-      vector< pair<unsigned int,CPPair> > cps;
-      while(infile2>>e>>f)
-	{
-	  cp.first=f;
-	  assert(e>=olde);
-	  assert(e>olde ||f>oldf);
-	  if( e!=olde&&olde>=0 )
-	    {
-	      int oldsize=lexmat.size();
-	      lexmat.resize(olde+1);
-	      for(unsigned int i=oldsize;i<lexmat.size();++i)
-		lexmat[i]=0;
-	      lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
-	      cps.clear();
-	      if( !((*lexmat[olde]).size()==(*lexmat[olde]).capacity()) )
-		cerr << "eRROR: waste of memory: " << (*lexmat[olde]).size() << " " << (*lexmat[olde]).capacity() << endl;
-	      count2+=lexmat[olde]->capacity();
-	    }
-	  cps.push_back(cp);
-	  olde=e;
-	  oldf=f;
-	  count++;
-	}
-      lexmat.resize(olde+1);
-      lexmat[olde]=new vector< pair<unsigned int,CPPair> > (cps);
-      count2+=lexmat[olde]->capacity();      
-      cout << "There are " << count << " " << count2 << " entries in table" << '\n';
-    }
-
-
-  /*  tmodel(const string&fn)
-    {
-      size_t count=0;
-      {
-	ifstream infile1(fn.c_str());
-	if( !infile1 )
-	  {
-	    cerr << "ERROR: can't read coocurrence file " << fn << '\n';
-	    abort();
-	  }
-	int e,f;
-	while(infile1>>e>>f)
-	  count++;
-      }
-      cout << "There are " << count << " entries in table" << '\n';
-      ifstream infile2(fn.c_str());
-      fs.resize(count);
-      int e,f,olde=-1,oldf=-1;
-      pair<unsigned int,CPPair> cp;
-      count=0;
-      while(infile2>>e>>f)
-	{
-	  assert(e>=olde);
-	  assert(e>olde ||f>oldf);
-	  if( e!=olde )
-	    {
-	      es.resize(e+1);
-	      for(unsigned int i=olde+1;int(i)<=e;++i)
-		es[i]=count;
-	    }
-	  cp.first=f;
-	  assert(count<fs.size());
-	  fs[count]=cp;
-	  //fs.push_back(cp);
-	  olde=e;
-	  oldf=f;
-	  count++;
-	}
-      assert(count==fs.size());
-      es.push_back(fs.size());
-      cout << fs.size() << " " << count << " coocurrences read" << '\n';
-      }*/
-  void incCount(WordIndex e, WordIndex f, COUNT inc) 
-    {
-      if( inc )
-	{
-	  CPPair *p=find(e,f);
-	  if( p )
-	    p->count += inc ;
-	}
-    }
-
-  PROB getProb(WordIndex e, WordIndex f) const
-    {
-      const CPPair *p=find(e,f);
-      if( p )
-	return max(p->prob, PROB_SMOOTH);
-      else
-	return PROB_SMOOTH;
-    }
-
-  COUNT getCount(WordIndex e, WordIndex f) const
-    {
-      const CPPair *p=find(e,f);
-      if( p )
-	return p->count;
-      else
-	return 0.0;
-    }
-
-  void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
-  void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
-  void printProbTableInverse(const char *filename, 
-			     const Vector<WordEntry>& evlist, 
-			     const Vector<WordEntry>& fvlist, 
-			     const double eTotal, 
-			     const double fTotal, 
-			     const bool actual = false ) const;
-  void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
-  void readProbTable(const char *filename); 
-};
-
-
-#else
-
-
-template <class COUNT, class PROB>
-class tmodel{
-  typedef LpPair<COUNT, PROB> CPPair;
- public:
-  int noEnglishWords;  // total number of unique source words
-  int noFrenchWords;   // total number of unique target words
-  hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> > ef;
-  void erase(WordIndex e, WordIndex f)
-  // In: a source and a target token ids.
-  // removes the entry with that pair from table
-    {
-      ef.erase(wordPairIds(e, f));
-    };
-
-public:
-  Vector<PROB> total2;
-  Vector<int> nFrench;
-  Vector<int> nEng;
-
- 
-  // methods;
-
-  // insert: add entry P(fj/ei) to the hash function, Default value is 0.0 
-  void insert(WordIndex e, WordIndex f, COUNT cval=0.0, PROB pval = 0.0){
-    ef[wordPairIds(e, f)].count = cval ;
-    ef[wordPairIds(e, f)].prob = pval ;
-  }
-
-  // returns a reference to the word pair, if does not exists, it creates it.
-  CPPair&getRe(WordIndex e, WordIndex f)
-    {return ef[wordPairIds(e, f)];}
-
-  // returns a pointer to an existing word pair. if pair does not exists, 
-  // the method returns the zero pointer (NULL)
-
-  CPPair*getPtr(WordIndex e, WordIndex f) 
-    {      
-      // look up this pair and return its position
-      typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::iterator i = ef.find(wordPairIds(e, f)); 
-      if(i != ef.end())  // if it exists, return a pointer to it.
-	return(&((*i).second));
-      else return(0) ; // else return NULL pointer
-    }
-
-  void incCount(WordIndex e, WordIndex f, COUNT inc) 
-    // increments the count of the given word pair. if the pair does not exist, 
-    // it creates it with the given value.
-    {
-      if( inc )
-	ef[wordPairIds(e, f)].count += inc ;
-    }
-
-  PROB getProb(WordIndex e, WordIndex f) const
-    // read probability value for P(fj/ei) from the hash table 
-    // if pair does not exist, return floor value PROB_SMOOTH
-    {
-      typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i= ef.find(wordPairIds(e, f));
-      if(i == ef.end())  
-	return PROB_SMOOTH; 
-      else
-	return max(((*i).second).prob, PROB_SMOOTH);
-    }
-
-  COUNT getCount(WordIndex e, WordIndex f) const
-    /* read count value for entry pair (fj/ei) from the hash table */
-    {
-      typename hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >::const_iterator i= ef.find(wordPairIds(e, f));
-      if(i == ef.end())
-	return 0; 
-      else
-	return ((*i).second).count;
-    }
-
-  inline const hash_map<wordPairIds, CPPair, hashpair, equal_to<wordPairIds> >& getHash(void) const {return ef;};
-  /* get a refernece to the hash table */
-  //inline void resize(WordIndex n) {ef.resize(n);};
-  // to resize he hash table 
-
-  void printProbTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
-  void printCountTable(const char* filename, const Vector<WordEntry>&, const Vector<WordEntry>&,bool actual) const;
-  // print the t table to the given file but this time print actual source and
-  //     target words instead of thier token ids 
-  
-  void printProbTableInverse(const char *filename, 
-			     const Vector<WordEntry>& evlist, 
-			     const Vector<WordEntry>& fvlist, 
-			     const double eTotal, 
-			     const double fTotal, 
-			     const bool actual = false ) const;
-  // dump  inverse of t table (i.e P(ei/fj)) to the given file name, 
-  //  if the given flag is true then actual words are printed not token ids 
-  
-  void normalizeTable(const vcbList&engl, const vcbList&french, int iter=2);
-  // to norlmalize the table i.e. make sure P(fj/ei) for all j is equal to 1 
-
-  void readProbTable(const char *filename); 
-  //  void readAsFertilityTable(const char *filename);
-};
-/*--------------- End of Class Definition for tmodel -----------------------*/ 
-
-#endif
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/Vector.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/Vector.h b/ext/giza-pp/GIZA++-v2/Vector.h
deleted file mode 100644
index 96d26ad..0000000
--- a/ext/giza-pp/GIZA++-v2/Vector.h
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-/*--
-Vector: checked vector implementation
-
-Franz Josef Och (30/07/99)
---*/
-#ifndef ARRAY_H_DEFINED
-#define ARRAY_H_DEFINED
-#include "mystl.h"
-#include <algorithm>
-#include <string>
-#include <utility>
-#include <functional>
-#include <cassert>
-
-
-#ifdef NDEBUG
-
-#include <vector>
-#define Vector vector
-template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
-{
-  o << "Vector(" << a.size() << "){ ";
-  for(unsigned int iii=0;iii<a.size();iii++)
-    o << " " << iii<< ": " << a[iii]<<" ;";
-  return o << "}\n";
-}
-
-#else
-
-#define ARRAY_DEBUG
-#define memo_del(a, b)
-#define memo_new(a)
-
-template<class T> class Vector
-{
- private:
-  T *p;                  	
-  int realSize;
-  int maxWritten;
-
-  void copy(T *a, const T *b, int n);
-  void copy(T *a, T *b, int n);
-  void _expand();
-		
- public:
-  Vector() 
-    : p(0), realSize(0), maxWritten(-1)
-    {
-#ifdef VERY_ARRAY_DEBUG
-      cout << "MAKE ARRAY: " << this<<" "<<(void*)p << '\n';
-#endif
-    }
-  Vector(const Vector<T> &x)
-    : p(new T[x.maxWritten+1]), realSize(x.maxWritten+1), maxWritten(x.maxWritten)
-    {
-      memo_new(p);
-      copy(p, x.p, realSize);
-#ifdef VERY_ARRAY_DEBUG
-      cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< '\n';
-#endif
-    }
-  explicit Vector(int n)
-    : p(new T[n]), realSize(n), maxWritten(n-1)
-    {
-      memo_new(p);
-#ifdef VERY_ARRAY_DEBUG
-      cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif			
-    }
-  Vector(int n, const T&_init)
-    : p(new T[n]), realSize(n), maxWritten(n-1)
-    {
-      memo_new(p);
-      for(int iii=0;iii<n;iii++)p[iii]=_init;
-#ifdef VERY_ARRAY_DEBUG
-      cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif			
-    }
-  
-  ~Vector() 
-    { 
-#ifdef VERY_ARRAY_DEBUG
-      cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif			
-      delete [] p;
-      memo_del(p, 1);
-#ifndef NDEBUG
-      p=0;realSize=-1;maxWritten=-1;
-#endif
-    }
-  
-  Vector<T>& operator=(const Vector<T>&x)
-    {
-      if( this!= &x )
-	{
-#ifdef VERY_ARRAY_DEBUG
-	  cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif	
-	  delete [] p;
-	  memo_del(p, 1);
-	  realSize = x.maxWritten+1;
-	  maxWritten = x.maxWritten;
-	  p = new T[realSize]; 
-	  memo_new(p);
-	  copy(p, x.p, realSize);
-#ifdef VERY_ARRAY_DEBUG
-	  cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif			
-	}
-      return *this;
-    }
-  
-  Vector<T>& operator=(Vector<T>&x)
-    {
-      if( this!= &x )
-	{
-#ifdef VERY_ARRAY_DEBUG
-	  cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif			
-	  delete [] p;
-	  memo_del(p, 1);
-	  realSize = x.maxWritten+1;
-	  maxWritten = x.maxWritten;
-	  p = new T[realSize]; 
-	  memo_new(p);
-	  copy(p, x.p, realSize);
-#ifdef VERY_ARRAY_DEBUG
-	  cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif			
-	}
-      return *this;
-    }
-  
-  void allowAccess(int n) 
-    { 
-      while( realSize<=n )
-	_expand(); 
-      maxWritten=max(maxWritten, n);
-      assert( maxWritten<realSize );
-    }
-  void resize(int n)
-    {
-      while( realSize<n ) 
-	_expand(); 
-      maxWritten=n-1;
-    }
-  void clear()
-    {
-      resize(0);
-    }
-  void reserve(int n)
-    {
-      int maxOld=maxWritten;
-      resize(n);
-      maxWritten=maxOld;
-    }
-  void sort(int until=-1)
-    {
-      if( until== -1 ) until=size();
-      std::sort(p, p+until);
-    }
-  void invsort(int until=-1)
-    {
-      if( until== -1 ) until=size();
-      std::sort(p, p+until, greater<T>());      
-    }
-  void init(int n, const T&_init)
-    {
-#ifdef VERY_ARRAY_DEBUG
-      cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif			
-      delete []p;
-      memo_del(p, 1);
-      p=new T[n];
-      memo_new(p);
-      realSize=n;
-      maxWritten=n-1;
-      for(int iii=0;iii<n;iii++)p[iii]=_init;
-#ifdef VERY_ARRAY_DEBUG
-      cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif			
-    }
-  inline unsigned int size() const
-    {assert( maxWritten<realSize );
-    return maxWritten+1;}
-  inline int low() const
-    { return 0; }
-  inline int high() const
-    { return maxWritten; }
-  int findMax() const;
-  int findMin() const;
-  void errorAccess(int n) const;
-  inline T*getPointerToData(){return p;}
-  inline T*begin(){return p;}
-  inline T*end(){return p+maxWritten+1;}
-  inline T& operator[](int n)
-    { 
-#ifndef NDEBUG
-      if( n<0 || n>maxWritten )
-	errorAccess(n);
-#endif
-      return p[n];
-    }
-  inline const T& operator[](int n) const 
-    { 
-#ifndef NDEBUG
-      if(n<0 || n>maxWritten )
-	errorAccess(n);
-#endif
-      return p[n]; 
-    }
-  inline const T& get(int n) const 
-    { 
-#ifndef NDEBUG
-      if(n<0 || n>maxWritten )
-	errorAccess(n);
-#endif      
-      return p[n]; 
-    }
-  const T&top(int n=0) const
-    {return (*this)[maxWritten-n];}
-  T&top(int n=0)
-    {return (*this)[maxWritten-n];}
-  const T&back(int n=0) const
-    {return (*this)[maxWritten-n];}
-  T&back(int n=0)
-    {return (*this)[maxWritten-n];}
-  T&push_back(const T&x)
-    {     
-      allowAccess(maxWritten+1);
-      (*this)[maxWritten]=x;
-      return top();
-    }
-    /*
-  bool writeTo(ostream&out) const
-    {
-      out << "Vector ";
-      out << size() << " ";
-      out << a << '\n';
-      for(int iv=0;iv<=maxWritten;iv++)
-	{
-	  writeOb(out, (*this)[iv]);
-	  out << '\n';
-	}
-      return 1;
-    }
-    */
-    
-  bool readFrom(istream&in)
-    {
-      string s;
-      if( !in )
-	{
-	  cerr << "ERROR(Vector): file cannot be opened.\n";
-	  return 0;
-	}
-      in >> s;
-      if( !(s=="Vector") )
-	{
-	  cerr << "ERROR(Vector): Vector!='"<<s<<"'\n";
-	  return 0;
-	}
-      int biggest;
-      in >> biggest;
-      in >> a;
-      resize(biggest);
-      for(int iv=0;iv<size();iv++)
-	{
-	  readOb(in, (*this)[iv]);
-	}
-      return 1;
-    }
-};
-
-template<class T> bool operator==(const Vector<T> &x, const Vector<T> &y)
-{
-  if( &x == &y )
-    return 1;
-  else
-    {
-      if( y.size()!=x.size() )
-	return 0;
-      else
-	{
-	  for(unsigned int iii=0;iii<x.size();iii++)
-	    if( !(x[iii]==y[iii]) )
-	      return 0;
-	  return 1;
-	}
-    }
-}
-template<class T> bool operator!=(const Vector<T> &x, const Vector<T> &y)
-{
-  return !(x==y);
-}
-
-template<class T> bool operator<(const Vector<T> &x, const Vector<T> &y)
-{
-  if( &x == &y )
-    return 0;
-  else
-    {
-      if( y.size()<x.size() )
-	return !(y<x);
-      for(int iii=0;iii<x.size();iii++)
-	{
-	  assert( iii!=y.size() );
-	  if( x[iii]<y[iii] )
-	    return 1;
-	  else if( y[iii]<x[iii] )
-	    return 0;
-	}
-      return x.size()!=y.size();//??
-    }
-}
-
-
-template<class T> void Vector<T>:: errorAccess(int n) const
-{
-  cerr 	<< "ERROR: Access to array element " << n 
-	<< " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
-  cout <<  "ERROR: Access to array element " << n 
-       << " (" << maxWritten << ", " << realSize << ", " << (void*)p << ")\n";
-  assert(0);
-#ifndef DEBUG
-  abort();
-#endif
-}
-
-template<class T> ostream& operator<<(ostream&o, const Vector<T>&a)
-{
-  o << "Vector(" << a.size() << "){ ";
-  for(unsigned int iii=0;iii<a.size();iii++)
-    o << " " << iii<< ": " << a[iii]<<" ;";
-  return o << "}\n";
-}
-
-template<class T> istream& operator>>(istream&in, Vector<T>&)
-{return in;}
-
-template<class T> int Hash(const Vector<T>&a)
-{
-  int n=0;
-  for(int iii=0;iii<a.size();iii++)
-    n+=Hash(a[iii])*(iii+1);
-  return n+a.size()*47;
-}
-template<class T> void Vector<T>::copy(T *aa, const T *bb, int n)
-{
-  for(int iii=0;iii<n;iii++)
-    aa[iii]=bb[iii];
-}
-template<class T> void Vector<T>::copy(T *aa, T *bb, int n)
-{
-  for(int iii=0;iii<n;iii++)
-    aa[iii]=bb[iii];
-}
-
-template<class T> void Vector<T>::_expand()
-{
-#ifdef VERY_ARRAY_DEBUG
-  cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif			
-  T *oldp=p;
-  int oldsize=realSize;
-  realSize=realSize*2+1;
-  p=new T[realSize];
-  memo_new(p);
-  copy(p, oldp, oldsize);
-  delete [] oldp;
-  memo_del(oldp, 1);
-#ifdef VERY_ARRAY_DEBUG
-  cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << '\n';
-#endif			
-}
-
-template<class T> int Vector<T>::findMax() const
-{
-  if( size()==0 )
-    return -1;
-  else
-    {
-      int maxPos=0;
-      for(int iii=1;iii<size();iii++)
-	if( (*this)[maxPos]<(*this)[iii] )
-	  maxPos=iii;
-      return maxPos;
-    }
-}
-template<class T> int Vector<T>::findMin() const
-{
-  if( size()==0 )
-    return -1;
-  else
-    {
-      int minPos=0;
-      for(int iii=1;iii<size();iii++)
-	if( (*this)[iii]<(*this)[minPos] )
-	  minPos=iii;
-      return minPos;
-    }
-}
-
-#endif
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/WordClasses.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/WordClasses.h b/ext/giza-pp/GIZA++-v2/WordClasses.h
deleted file mode 100644
index 3693906..0000000
--- a/ext/giza-pp/GIZA++-v2/WordClasses.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
-
-Copyright (C) 2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#ifndef WordClasses_h_DEFINED
-#define WordClasses_h_DEFINED
-#include <map>
-#include <string>
-#include <set>
-
-class WordClasses
-{
- private:
-  map<string,string> Sw2c;
-  map<string,int> Sc2int;
-  Vector<string> Sint2c;
-  Vector<int> w2c;
-  unsigned int classes;
- public:
-  WordClasses() 
-    : classes(1) 
-    {
-      Sint2c.push_back("0");
-      Sc2int["0"]=0;
-    }
-  template<class MAPPER> bool read(istream&in,const MAPPER&m)
-    {
-      string sline;
-      int maxword=0;
-      while(getline(in,sline))
-	{
-	  string word,wclass;
-	  //istringstream iline(sline.c_str());
-	  istringstream iline(sline);
-	  iline>>word>>wclass;
-	  maxword=max(m(word),maxword);
-	  assert(Sw2c.count(word)==0);
-	  Sw2c[word]=wclass;
-	  if( !Sc2int.count(wclass) )
-	    {
-	      Sc2int[wclass]=classes++;
-	      Sint2c.push_back(wclass);
-	      assert(classes==Sint2c.size());
-	    }
-	}
-      w2c=Vector<int>(maxword+1,0);
-      for(map<string,string>::const_iterator i=Sw2c.begin();i!=Sw2c.end();++i)
-	w2c[m(i->first)]=Sc2int[i->second];
-      cout << "Read classes: #words: " << maxword << " " << " #classes: "<< classes <<endl;
-      return 1;
-    }
-  int getClass(int w)const
-    {
-      if(w>=0&&int(w)<int(w2c.size()) )
-	return w2c[w];
-      else
-	return 0;
-    }
-  int operator()(const string&x)const
-    {
-      if( Sc2int.count(x) )
-	return Sc2int.find(x)->second;
-      else
-	{
-	  cerr << "WARNING:  class " << x << " not found.\n";
-	  return 0;
-	}
-    }
-  string classString(unsigned int cnr)const
-    {
-      if( cnr<Sint2c.size())
-	return Sint2c[cnr];
-      else
-	return string("0");
-    }
-};
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/alignment.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/alignment.cpp b/ext/giza-pp/GIZA++-v2/alignment.cpp
deleted file mode 100644
index 55a2e5c..0000000
--- a/ext/giza-pp/GIZA++-v2/alignment.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-/*--
-alignment: 'checked' alignment representation with automatic calculation
-           of fertilities
-Franz Josef Och (30/07/99)
---*/
-#include "alignment.h"
-
-ostream&operator<<(ostream&out, const alignment&a)
-{
-  int m=a.a.size()-1,l=a.f.size()-1;
-  out << "AL(l:"<<l<<",m:"<<m<<")(a: ";
-  for(int j=1;j<=m;j++)out << a(j) << ' ';
-  out << ")(fert: ";
-  for(int i=0;i<=l;i++)out << a.fert(i) << ' ';
-  return out << ")  c:"<<"\n";
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/alignment.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/alignment.h b/ext/giza-pp/GIZA++-v2/alignment.h
deleted file mode 100644
index 03cf028..0000000
--- a/ext/giza-pp/GIZA++-v2/alignment.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-/*--
-alignment: 'checked' alignment representation with autom. calc. of fertilities
-Franz Josef Och (30/07/99)
---*/
-#ifndef alignment_h_fjo_defined
-#define alignment_h_fjo_defined
-#include "Vector.h"
-#include <cassert>
-#include "defs.h"
-#include "myassert.h"
-
-class al_struct
-{
- public:
-  al_struct()
-    : prev(0),next(0){}
-  PositionIndex prev,next;
-};
-
-
-class alignment
-{
- private:
-  Vector<PositionIndex> a;
-  Vector<PositionIndex> positionSum,f;
- public:
-  Vector<PositionIndex> als_i;
-  Vector<al_struct>  als_j;
-  PositionIndex l,m;
-  alignment()
-    {}
-  alignment(PositionIndex _l, PositionIndex _m)
-    : a(_m+1, (PositionIndex)0),
-    positionSum(_l+1, (PositionIndex)0), f(_l+1, (PositionIndex)0), als_i(_l+1,0),als_j(_m+1),l(_l), m(_m)
-    {
-      f[0]=m;
-      for(PositionIndex j=1;j<=m;j++)
-	{
-	  if( j>1 )
-	    als_j[j].prev= j-1;
-	  if( j<m )
-	    als_j[j].next= j+1;
-	}
-      als_i[0]=1;
-    }
-  PositionIndex get_l()const
-    {return l;}
-  PositionIndex get_m()const
-    {return m;}
-  void doMove(int i,int j)
-    {
-      set(j,i);
-    }
-  void doSwap(int j1,int j2)
-    {
-      int aj1=a[j1],aj2=a[j2];
-      set(j1,aj2);
-      set(j2,aj1);
-    }
-  void set(PositionIndex j, PositionIndex aj)
-    {
-      PositionIndex old_aj=a[j];
-      massert(j<a.size());massert(aj<f.size());
-      massert(old_aj<f.size());massert(f[old_aj]>0);
-      massert(j>0);
-      positionSum[old_aj]-=j;
-      // ausfuegen
-      PositionIndex prev=als_j[j].prev;
-      PositionIndex next=als_j[j].next;
-      if( next )
-	als_j[next].prev=prev;
-      if( prev )
-	als_j[prev].next=next;
-      else
-	als_i[old_aj]=next;
-      
-      // neue Position suchen
-      PositionIndex lfd=als_i[aj],llfd=0;
-      while( lfd && lfd<j )
-	lfd = als_j[llfd=lfd].next;
-
-      // einfuegen
-      als_j[j].prev=llfd;
-      als_j[j].next=lfd;
-      if( llfd )
-	als_j[llfd].next=j;
-      else
-	als_i[aj]=j;
-      if( lfd )
-	als_j[lfd].prev=j;
-
-      f[old_aj]--;
-      positionSum[aj]+=j;
-      f[aj]++;
-      a[j]=aj;
-    }
-  const Vector<PositionIndex>& getAlignment() const 
-    {return a ;}
-  PositionIndex get_al(PositionIndex j)const
-    {
-      massert(j<a.size());
-      return a[j];
-    }
-  PositionIndex operator()(PositionIndex j)const
-    {
-      massert(j<a.size());
-      return a[j];
-    }
-  PositionIndex fert(PositionIndex i)const
-    {
-      massert(i<f.size());
-      return f[i];
-    }
-  PositionIndex get_head(PositionIndex i)const
-    {
-      massert( als_i[i]==_get_head(i) );
-      return als_i[i];
-    }
-  PositionIndex get_center(PositionIndex i)const
-    {
-      if( i==0 )return 0;
-      massert(((positionSum[i]+f[i]-1)/f[i]==_get_center(i)));
-      return (positionSum[i]+f[i]-1)/f[i];
-    }
-  PositionIndex _get_head(PositionIndex i)const
-    {
-      if( fert(i)==0 )return 0;
-      for(PositionIndex j=1;j<=m;j++)
-	if( a[j]==i )
-	  return j;
-      return 0;
-    }
-  PositionIndex _get_center(PositionIndex i)const
-    {
-      if( i==0 )return 0;
-      massert(fert(i));
-      PositionIndex sum=0;
-      for(PositionIndex j=1;j<=m;j++)
-	if( a[j]==i )
-	  sum+=j;
-      return (sum+fert(i)-1)/fert(i);
-    }
-  PositionIndex prev_cept(PositionIndex i)const
-    {
-      if( i==0 )return 0;
-      PositionIndex k=i-1;
-      while(k&&fert(k)==0)
-	k--;
-      return k;
-    }
-  PositionIndex next_cept(PositionIndex i)const
-    {
-      PositionIndex k=i+1;
-      while(k<l+1&&fert(k)==0)
-	k++;
-      return k;
-    }
-  PositionIndex prev_in_cept(PositionIndex j)const
-    {
-      //PositionIndex k=j-1;
-      //while(k&&a[k]!=a[j])
-      //k--;
-      //assert( als_j[j].prev==k );
-      //assert(k);
-      //return k;
-      massert(als_j[j].prev==0||a[als_j[j].prev]==a[j]);
-      return als_j[j].prev;
-    }
-  friend ostream &operator<<(ostream&out, const alignment&a);
-  friend bool operator==(const alignment&a, const alignment&b)
-    {
-      massert(a.a.size()==b.a.size());
-      for(PositionIndex j=1;j<=a.get_m();j++)
-	if(a(j)!=b(j))
-	  return 0;
-      return 1;
-    }
-  friend bool operator<(const alignment&x, const alignment&y)
-    {
-      massert(x.get_m()==y.get_m());
-      for(PositionIndex j=1;j<=x.get_m();j++)
-	if( x(j)<y(j) ) 
-	  return 1;
-	else if( y(j)<x(j) )
-	  return 0;
-      return 0;
-    }
-  friend int differences(const alignment&x, const alignment&y){
-    int count=0;
-    massert(x.get_m()==y.get_m());
-    for(PositionIndex j=1;j<=x.get_m();j++)
-      count += (x(j)!=y(j));
-    return count;
-  }
-  bool valid()const
-    {
-      if( 2*f[0]>m )
-	return 0;
-      for(unsigned int i=1;i<=l;i++)
-	if( f[i]>=MAX_FERTILITY )
-	  return 0;
-      return 1;
-    }
-  friend class transpair_model5;
-};
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/collCounts.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/collCounts.cpp b/ext/giza-pp/GIZA++-v2/collCounts.cpp
deleted file mode 100644
index 6e6ef69..0000000
--- a/ext/giza-pp/GIZA++-v2/collCounts.cpp
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
-
-Copyright (C) 1999,2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#include "alignment.h"
-#include "transpair_model3.h"
-#include <map>
-#include "collCounts.h"
-#include "MoveSwapMatrix.h"
-#include "D5Tables.h"
-#include "transpair_model5.h"
-#include "transpair_modelhmm.h"
-#include "Parameter.h"
-
-extern float COUNTINCREASE_CUTOFF_AL;
-// unifies collectCountsOverAlignments and findAlignmentNeighborhood FJO-20/07/99
-template<class TRANSPAIR>
-int collectCountsOverNeighborhood(const MoveSwapMatrix<TRANSPAIR>&msc,LogProb ascore,Array2<LogProb,Vector<LogProb> >&dtcount,Array2<LogProb,Vector<LogProb> >&ncount,LogProb&p1count,LogProb&p0count,LogProb&total_count)
-{
-  int nAl=0;
-  const PositionIndex l=msc.get_l(),m=msc.get_m();
-  Array2<LogProb,Vector<LogProb> > cmove(l+1,m+1),cswap(l+1,m+1);
-  Vector<LogProb> negmove(m+1),negswap(m+1),plus1fert(l+1),minus1fert(l+1);
-  LogProb total_move,total_swap;
-  if( msc.isCenterDeleted()==0 )
-    {
-      total_move+=ascore;
-      nAl++;
-    }
-  for(PositionIndex j=1;j<=m;j++)
-      for(PositionIndex i=0;i<=l;i++)
-	if( msc(j)!=i && !msc.isDelMove(i,j) )
-	  {
-	    LogProb newscore=ascore*msc.cmove(i,j);
-	    total_move+=newscore;
-	    nAl++;
-	    cmove(i,j)+=newscore;
-	    negmove[j]+=newscore;
-	    plus1fert[i]+=newscore;
-	    minus1fert[msc(j)]+=newscore;
-	  }
-  for(PositionIndex j1=1;j1<=m;j1++)
-    for(PositionIndex j2=j1+1;j2<=m;j2++)
-      if( msc(j1)!=msc(j2) && !msc.isDelSwap(j1,j2) )
-	{
-	  LogProb newscore=ascore*msc.cswap(j1,j2);
-	  total_swap+=newscore;
-	  nAl++;
-	  cswap(msc(j1),j2)+=newscore;
-	  cswap(msc(j2),j1)+=newscore;
-	  negswap[j1]+=newscore; 
-	  negswap[j2]+=newscore;
-	}
-  total_count+=total_move+total_swap;
-  for(PositionIndex j=1;j<=m;j++)
-    for(PositionIndex i=0;i<=l;i++)
-      dtcount(i,j) += ((i==msc(j)) ? (total_count-(negmove[j]+negswap[j])) : (cswap(i,j)+cmove(i,j)));
-  for(PositionIndex i=1;i<=l;i++)
-    {
-      LogProb temp=minus1fert[i]+plus1fert[i];
-      if( msc.fert(i)<MAX_FERTILITY )
-	ncount(i,msc.fert(i))+=total_count-temp;
-      if(msc.fert(i)>0&&msc.fert(i)-1<MAX_FERTILITY)
-	ncount(i,msc.fert(i)-1)+=minus1fert[i];
-      else
-	if( minus1fert[i]!=0.0 )
-	  cerr << "ERROR: M1Fa: " << minus1fert[i] << ' ' << i << ' ' << msc.fert(i)<< endl;
-      if(msc.fert(i)+1<MAX_FERTILITY) 
-	ncount(i,msc.fert(i)+1)+=plus1fert[i];
-    }
-  LogProb temp=minus1fert[0]+plus1fert[0];
-  p1count += (total_count-temp)*(LogProb)msc.fert(0);
-  p0count += (total_count-temp)*(LogProb)(m-2*msc.fert(0));
-  if( msc.fert(0)>0 )
-    {
-      p1count += (minus1fert[0])*(LogProb)(msc.fert(0)-1);
-      p0count += (minus1fert[0])*(LogProb)(m-2*(msc.fert(0)-1));
-    }
-  else 
-    if( minus1fert[0]!=0.0 )
-      cerr << "ERROR: M1Fb: " << minus1fert[0] << endl;
-  if(int(m)-2*(int(msc.fert(0))+1)>=0)
-    {
-      p1count += (plus1fert[0])*(LogProb)(msc.fert(0)+1);
-      p0count += (plus1fert[0])*(LogProb)(m-2*(msc.fert(0)+1));
-    }
-  msc.check();
-  return nAl;
-};
-
-template<class TRANSPAIR>
-double collectCountsOverNeighborhoodForSophisticatedModels(const MoveSwapMatrix<TRANSPAIR>&,LogProb,void*)
-{
-  return 0.0;
-}
-
-template<class TRANSPAIR>
-void _collectCountsOverNeighborhoodForSophisticatedModels(const MoveSwapMatrix<TRANSPAIR>&Mmsc,const alignment&msc,const TRANSPAIR&ef,LogProb normalized_ascore,d4model*d4Table)
-{
-  Mmsc.check();
-  const PositionIndex m=msc.get_m(),l=msc.get_l();
-  for(PositionIndex j=1;j<=m;++j)
-    if( msc(j)!=0 )
-      if( msc.get_head(msc(j))==j)
-	{
-	  int ep=msc.prev_cept(msc(j));
-	  //massert( &d4Table->getCountRef_first(j,msc.get_center(ep),d4Table->ewordclasses.getClass(ef.get_es(ep)),d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m) ==  ef.getCountFirst(ep,j,msc.get_center(ep)));
-	  d4Table->getCountRef_first(j,msc.get_center(ep),d4Table->ewordclasses.getClass(ef.get_es(ep)),d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m)+=normalized_ascore;
-	}
-      else
-	{
-	  //massert( &d4Table->getCountRef_bigger(j,msc.prev_in_cept(j),0,d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m) == ef.getCountSecond(j,msc.prev_in_cept(j) ));
-	  d4Table->getCountRef_bigger(j,msc.prev_in_cept(j),0,d4Table->fwordclasses.getClass(ef.get_fs(j)),l,m)+=normalized_ascore;
-	}
-}
-
-template<class TRANSPAIR>
-void _collectCountsOverNeighborhoodForSophisticatedModels(const MoveSwapMatrix<TRANSPAIR>&Mmsc,const alignment&msc,const TRANSPAIR&ef,LogProb normalized_ascore,d5model*d5Table)
-{
-  Mmsc.check();
-  _collectCountsOverNeighborhoodForSophisticatedModels(Mmsc,msc,ef,normalized_ascore,&d5Table->d4m);
-  Mmsc.check();
-  const PositionIndex m=msc.get_m(),l=msc.get_l();
-  PositionIndex prev_cept=0;
-  PositionIndex vac_all=m;
-  Vector<char> vac(m+1,0);
-  for(PositionIndex i=1;i<=l;i++)
-    {
-      PositionIndex cur_j=msc.als_i[i]; 
-      PositionIndex prev_j=0;
-      PositionIndex k=0;
-      if(cur_j) { // process first word of cept
-	k++;
-	d5Table->getCountRef_first(vacancies(vac,cur_j),vacancies(vac,msc.get_center(prev_cept)),
-				   d5Table->fwordclasses.getClass(ef.get_fs(cur_j)),l,m,vac_all-msc.fert(i)+k)+=normalized_ascore;
-	vac_all--;
-	assert(vac[cur_j]==0);
-	vac[cur_j]=1;
-	Mmsc.check();
-	prev_j=cur_j;
-	cur_j=msc.als_j[cur_j].next;
-      }
-      while(cur_j) { // process following words of cept
-	k++;
-	int vprev=vacancies(vac,prev_j);
-	d5Table->getCountRef_bigger(vacancies(vac,cur_j),vprev,d5Table->fwordclasses.getClass(ef.get_fs(cur_j)),l,m,vac_all-vprev/*war weg*/-msc.fert(i)+k)+=normalized_ascore;
-	vac_all--;
-	vac[cur_j]=1;
-	Mmsc.check();
-	prev_j=cur_j;
-	cur_j=msc.als_j[cur_j].next;
-      }
-      assert(k==msc.fert(i));
-      if( k )
-	prev_cept=i;
-    }
-  assert(vac_all==msc.fert(0));
-}
-
-extern int NumberOfAlignmentsInSophisticatedCountCollection;
-
-template<class TRANSPAIR,class MODEL>
-double collectCountsOverNeighborhoodForSophisticatedModels(const MoveSwapMatrix<TRANSPAIR>&msc,LogProb normalized_ascore,MODEL*d5Table)
-{
-  const PositionIndex m=msc.get_m(),l=msc.get_l();
-  alignment x(msc);
-  double sum=0;
-  msc.check();
-  if( !msc.isCenterDeleted() )
-    {
-      _collectCountsOverNeighborhoodForSophisticatedModels<TRANSPAIR>(msc,x,msc.get_ef(),normalized_ascore,d5Table);
-      NumberOfAlignmentsInSophisticatedCountCollection++;
-      sum+=normalized_ascore;
-    }
-  msc.check();
-  for(WordIndex j=1;j<=m;j++)for(WordIndex i=0;i<=l;i++)
-    {
-      WordIndex old=x(j);
-      if( i!=old&& !msc.isDelMove(i,j) )
-	{
-	  msc.check();
-	  double c=msc.cmove(i,j)*normalized_ascore;
-	  if(c > COUNTINCREASE_CUTOFF_AL )
-	    {
-	      x.set(j,i);
-	      _collectCountsOverNeighborhoodForSophisticatedModels<TRANSPAIR>(msc,x,msc.get_ef(),c,d5Table);
-	      NumberOfAlignmentsInSophisticatedCountCollection++;
-	      x.set(j,old);
-	      sum+=c;
-	    }
-	  msc.check();
-	}
-    }      
-  for(PositionIndex j1=1;j1<=m;j1++)
-    for(PositionIndex j2=j1+1;j2<=m;j2++)
-      if( msc(j1)!=msc(j2) && !msc.isDelSwap(j1,j2) )
-	{
-	  double c=msc.cswap(j1,j2)*normalized_ascore;
-	  msc.check();
-	  if(c > COUNTINCREASE_CUTOFF_AL )
-	    {
-	      int old1=msc(j1),old2=msc(j2);
-	      x.set(j1,old2);
-	      x.set(j2,old1);
-	      _collectCountsOverNeighborhoodForSophisticatedModels<TRANSPAIR>(msc,x,msc.get_ef(),c,d5Table);
-	      NumberOfAlignmentsInSophisticatedCountCollection++;
-	      x.set(j1,old1);
-	      x.set(j2,old2);
-	      sum+=c;
-	    }
-	  msc.check();
-	}
-  msc.check();
-  return sum;
-}
-
-template<class TRANSPAIR,class MODEL>
-int collectCountsOverNeighborhood(const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,Vector<WordIndex>&es,Vector<WordIndex>&fs,tmodel<COUNT,PROB>&tTable,amodel<COUNT>&aCountTable,amodel<COUNT>&dCountTable,nmodel<COUNT>&nCountTable,double&p1count,double&p0count,LogProb&_total,float count,bool addCounts,MODEL*d4Table)
-{
-  int nAl=0;
-  const PositionIndex l=es.size()-1,m=fs.size()-1;
-  Array2<LogProb,Vector<LogProb> > dtcount(l+1,m+1),ncount(l+1,MAX_FERTILITY+1);
-  LogProb p0=0,p1=0,all_total=0;
-  for(unsigned int i=0;i<smsc.size();++i)
-    {
-      LogProb this_total=0;
-      nAl+=collectCountsOverNeighborhood(*smsc[i].first,smsc[i].second,dtcount,ncount,p1,p0,this_total);
-      all_total+=this_total;
-    }
-  _total=all_total;
-  all_total/=(double)count;
-  double sum2=0;
-  if( addCounts && d4Table )
-    {
-      for(unsigned int i=0;i<smsc.size();++i)
-	{
-	  //for(WordIndex j=1;j<=m;j++)for(WordIndex ii=0;ii<=l;ii++)
-	  //  (*smsc[i].first).cmove(ii,j);
-	  sum2+=collectCountsOverNeighborhoodForSophisticatedModels(*smsc[i].first,smsc[i].second/all_total,d4Table);    
-	}
-      if(!(fabs(count-sum2)<0.05))
-	cerr << "WARNING: DIFFERENT SUMS: (" << count << ") (" << sum2 << ")\n";
-    }
- if( addCounts )
-    {
-      for(PositionIndex i=0;i<=l;i++) 
-	{
-	  for(PositionIndex j=1;j<=m;j++)
-	    {
-	      LogProb ijadd=dtcount(i,j)/all_total;
-	      if( ijadd>COUNTINCREASE_CUTOFF_AL )
-		{
-		  tTable.incCount(es[i],fs[j],ijadd);
-		  dCountTable.getRef(j,i,l,m)+=ijadd;
-		  aCountTable.getRef(i,j,l,m)+=ijadd;
-		}
-	    }
-	  if( i>0 )
-	    for(PositionIndex n=0;n<MAX_FERTILITY;n++)
-	      nCountTable.getRef(es[i],n)+=ncount(i,n)/all_total;
-	}
-      p0count+=p0/all_total;
-      p1count+=p1/all_total;
-    }
- return nAl;
-}
-
-
-
-
-
-
-
-
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/collCounts.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/collCounts.h b/ext/giza-pp/GIZA++-v2/collCounts.h
deleted file mode 100644
index 9a0529b..0000000
--- a/ext/giza-pp/GIZA++-v2/collCounts.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
-
-Copyright (C) 1999,2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#ifndef collCounts_h_defined
-#define collCounts_h_defined
-#include "alignment.h"
-#include "transpair_model3.h"
-#include <map>
-#include "MoveSwapMatrix.h"
-#include "D4Tables.h"
-#include "transpair_model4.h"
-
-class OneMoveSwap
-{
- public:
-  short type;
-  short a,b;
-  OneMoveSwap(short _type,short _a,short _b)
-    : type(_type),a(_a),b(_b)
-    {}
-  OneMoveSwap()
-    : type(0){}
-};
-
-inline bool operator<(const OneMoveSwap&a,const OneMoveSwap&b)
-{
-  if(a.type<b.type)return 1;
-  else if(b.type<a.type)return 0;
-  else if(a.a<b.a)return 1;
-  else if(b.a<a.a)return 0;
-  else return a.b<b.b;
-}
-
-inline bool operator==(const OneMoveSwap&a,const OneMoveSwap&b)
-{
-  return a.type==b.type&&a.a==b.a&&a.b==b.b;
-}
-
-inline ostream&operator<<(ostream&out,const OneMoveSwap&o)
-{
-  return out << '(' << o.type << "," << o.a << "," << o.b << ")";
-}
-
-inline ostream &operator<<(ostream &out,const set<OneMoveSwap>&s)
-{
-  for(set<OneMoveSwap>::const_iterator i=s.begin();i!=s.end();++i)
-    cout << *i << ' ';
-  return out;
-}
-
-bool makeOneMoveSwap(const alignment&a,const alignment&b,set<OneMoveSwap>&oms);
-
-template<class TRANSPAIR,class MODEL>
-int collectCountsOverNeighborhood(const Vector<pair<MoveSwapMatrix<TRANSPAIR>*,LogProb> >&smsc,
-				   Vector<WordIndex>&es,
-				   Vector<WordIndex>&fs,tmodel<COUNT,PROB>&tTable,
-				   amodel<COUNT>&aCountTable,amodel<COUNT>&dCountTable,
-				   nmodel<COUNT>&nCountTable,double&p1count,double&p0count,
-				   LogProb&_total,float count,bool addCounts,MODEL*d4Table=0);
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/defs.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/defs.h b/ext/giza-pp/GIZA++-v2/defs.h
deleted file mode 100644
index e94addd..0000000
--- a/ext/giza-pp/GIZA++-v2/defs.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful, 
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-USA.
-
-*/
-#ifndef _defs_h
-#define _defs_h 1
-#include <string>
-#include <math.h>
-#include <limits.h>
-
-const int TRANSFER_SIMPLE=1;
-const int TRANSFER=0;
-
-const unsigned int MAX_SENTENCE_LENGTH_ALLOWED=101;
-const int TRAIN_BUFFER_SIZE= 50000;
-//#ifdef WORDINDEX_WITH_4_BYTE
-typedef unsigned int WordIndex;
-const unsigned int MAX_VOCAB_SIZE=UINT_MAX;
-typedef unsigned int PositionIndex;
-//#else
-//typedef unsigned short WordIndex;
-//const unsigned int MAX_VOCAB_SIZE=USHRT_MAX;
-//typedef unsigned short PositionIndex;
-//#endif
-extern WordIndex MAX_FERTILITY;
-
-const int MAX_W=457979;
-extern double LAMBDA; // Lambda that is used to scale cross_entropy factor
-
-typedef float PROB ;
-typedef float COUNT ;
- 
-class LogProb {
- private:
-  double x ;
- public:
-  LogProb():x(0){}
-  LogProb(double y):x(y){}
-  LogProb(float y):x(y){}
-  LogProb(int y):x(y){}
-  LogProb(WordIndex y):x(y){}
-  operator double() const {return x;}
-  LogProb operator *= (double y) { x *= y ; return *this;}
-  LogProb operator *= (LogProb y) { x *= y.x ; return *this;}
-  LogProb operator /= (double y) { x /= y ; return *this;}
-  LogProb operator /= (LogProb y) { x /= y.x ; return *this;}
-  LogProb operator += (double y) { x += y ; return *this;}
-  LogProb operator += (LogProb y) { x += y.x ; return *this;}
-};
-
-const int PARLEV_ITER=1;
-const int PARLEV_OPTHEUR=2;
-const int PARLEV_OUTPUT=3;
-const int PARLEV_SMOOTH=4;
-const int PARLEV_EM=5;
-const int PARLEV_MODELS=6;
-const int PARLEV_SPECIAL=7;
-const int PARLEV_INPUT=8;
-
-#endif
-