You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/01 02:51:46 UTC
[51/94] [abbrv] [partial] incubator-joshua git commit: Pulled
JOSHUA-252 changes and Resolved Merge Conflicts
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/AlignTables.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/AlignTables.h b/ext/giza-pp/GIZA++-v2/AlignTables.h
deleted file mode 100644
index 0daa2c3..0000000
--- a/ext/giza-pp/GIZA++-v2/AlignTables.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _aligntables_h
-#define _aligntables_h 1
-
-#include "defs.h"
-
-
-#include <cassert>
-
-#include <iostream>
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-//#include <vector>
-#include "Vector.h"
-#include <utility>
-#include <math.h>
-#include <fstream>
-#include "transpair_model1.h"
-
-
-/* ----------------- Class Defintions for hashmyalignment --------------------
- Objective: This class is used to define a hash mapping function to map
- an alignment (defined as a vector of integers) into a hash key
- ----------------------------------------------------------------------------*/
-
-class hashmyalignment : public unary_function< Vector<WordIndex>, size_t >
-{
-public:
- size_t operator() (const Vector<WordIndex>& key) const
- // to define the mapping function. it takes an alignment (a vector of
- // integers) and it returns an integer value (hash key).
- {
- WordIndex j ;
- size_t s ;
- size_t key_sum = 0 ;
- // logmsg << "For alignment:" ;
- for (j = 1 ; j < key.size() ; j++){
- // logmsg << " " << key[j] ;
- key_sum += (size_t) (int) pow(double(key[j]), double((j % 6)+1));
- }
- // logmsg << " , Key value was : " << key_sum;
- s = key_sum % 1000000 ;
- // logmsg << " h(k) = " << s << endl ;
- return(s);
- }
-};
-
-class equal_to_myalignment{
- // returns true if two alignments are the same (two vectors have same enties)
-public:
- bool operator()(const Vector<WordIndex> t1,
- const Vector<WordIndex> t2) const
- {WordIndex j ;
- if (t1.size() != t2.size())
- return(false);
- for (j = 1 ; j < t1.size() ; j++)
- if (t1[j] != t2[j])
- return(false);
- return(true);
- }
-
-};
-
-/* ---------------- End of Class Defnition for hashmyalignment --------------*/
-
-
-/* ------------------ Class Defintions for alignmodel -----------------------
- Class Name: alignmodel
- Objective: Alignments neighborhhoods (collection of alignments) are stored in
- a hash table (for easy lookup). Each alignment vector is mapped into a hash
- key using the operator defined above.
- *--------------------------------------------------------------------------*/
-
-class alignmodel{
-private:
- hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment > a;
-private:
- // void erase(Vector<WordIndex>&);
-public:
-
- // methods;
-
- inline hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator begin(void){return a.begin();} // begining of hash
- inline hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator end(void){return a.end();} // end of hash
- inline const hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >& getHash() const {return a;}; // reference to hash table
- bool insert(Vector<WordIndex>&, LogProb val=0.0); // add a alignmnet
- // void setValue(Vector<WordIndex>&, LogProb val); // not needed
- LogProb getValue(Vector<WordIndex>&)const; // retrieve prob. of alignment
- inline void clear(void){ a.clear();}; // clear hash table
- // void printTable(const char* filename);
- //inline void resize(WordIndex n) {a.resize(n);}; // resize table
-
-};
-
-/* -------------- End of alignmode Class Definitions ------------------------*/
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/Array.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/Array.h b/ext/giza-pp/GIZA++-v2/Array.h
deleted file mode 100644
index eae58d4..0000000
--- a/ext/giza-pp/GIZA++-v2/Array.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifndef GIZA_ARRAY_H_DEFINED
-#define GIZA_ARRAY_H_DEFINED
-#include "Vector.h"
-#define Array Vector
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/Array2.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/Array2.h b/ext/giza-pp/GIZA++-v2/Array2.h
deleted file mode 100644
index 546d63a..0000000
--- a/ext/giza-pp/GIZA++-v2/Array2.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/*--
-Array2: Implementation of a twodimensional checked array allowing for
-a specified underlieing one-dimensional data-structure.
-
-Franz Josef Och (30/07/99)
---*/
-#ifndef CLASS_Array2_DEFINED
-#define CLASS_Array2_DEFINED
-
-#include "mystl.h"
-#include <string>
-#include <vector>
-
-template<class T, class Y=vector<T> > class Array2
-{
-private:
- Y p;
- // short h1, h2;
- unsigned int h1, h2;
-public:
- Array2(unsigned int _h1, unsigned int _h2)
- : p(_h1*_h2), h1(_h1), h2(_h2) {}
- Array2(unsigned int _h1, unsigned int _h2, const T&_init)
- : p(_h1*_h2, _init), h1(_h1), h2(_h2) {}
- Array2()
- : h1(0), h2(0) {}
- inline T &operator()(unsigned int i, unsigned int j)
- { assert(i<h1);assert(j<h2);return p[i*h2+j]; }
- inline const T&operator()(unsigned int i, unsigned int j) const
- { assert(i<h1);assert(j<h2);return p[i*h2+j]; }
- inline T get(unsigned int i, unsigned int j)
- { assert(i<h1);assert(j<h2);return p[i*h2+j]; }
- inline void set(unsigned int i, unsigned int j, T x)
- { assert(i<h1);assert(j<h2);p[i*h2+j]=x; }
- inline const T get(unsigned int i, unsigned int j) const
- { assert(i<h1);assert(j<h2);return p[i*h2+j]; }
- inline unsigned int getLen1() const
- { return h1; }
- inline unsigned int getLen2() const
- { return h2; }
-
-inline T*begin(){
- if( h1==0||h2==0)return 0;
- return &(p[0]);
-}
-inline T*end(){
- if( h1==0||h2==0)return 0;
- return &(p[0])+p.size();
-}
-
- inline const T*begin()const{ return p.begin(); }
- inline const T*end()const{return p.end();}
-
- friend ostream&operator<<(ostream&out, const Array2<T, Y>&ar)
- {
- for(unsigned int i=0;i<ar.getLen1();i++)
- {
- //out << i << ": ";
- for(unsigned int j=0;j<ar.getLen2();j++)
- out << ar(i, j) << ' ';
- out << '\n';
- }
- return out << endl;
- }
- inline void resize(unsigned int a,unsigned int b)
- {
- if( !(a==h1&&b==h2))
- {
- h1=a;
- h2=b;
- p.resize(h1*h2);
- }
- }
- inline void resize(unsigned int a,unsigned int b,const T&t)
- {
- if( !(a==h1&&b==h2))
- {
- h1=a;
- h2=b;
- p.resize(h1*h2);
- fill(p.begin(),p.end(),t);
- }
- }
-};
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/Array4.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/Array4.h b/ext/giza-pp/GIZA++-v2/Array4.h
deleted file mode 100644
index 4e57a2e..0000000
--- a/ext/giza-pp/GIZA++-v2/Array4.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef AlignmentArray4_h_DEFINED
-#define AlignmentArray4_h_DEFINED
-
-#include "Array2.h"
-template<class T> class Array4
-{
- private:
- Array2< Array2<T>* > A;
- int M;
- T init;
- public:
- Array4(int m,const T&_init)
- : A(m,m,0),M(m),init(_init) {}
- ~Array4()
- {
- for(int l=0;l<M;++l)
- for(int m=0;m<M;++m)
- delete A(l,m);
- }
- const T&operator()(int i, int j, int l, int m)const
- {
- if( A(l,m)==0 )
- return init;
- else
- return (*A(l,m))(i,j);
- }
- const T&get(int i, int j, int l, int m)const
- {
- if( A(l,m)==0 )
- return init;
- else
- return (*A(l,m))(i,j);
- }
- T&operator()(int i, int j, int l, int m)
- {
- if( A(l,m)==0 )
- {
- A(l,m)=new Array2<T>(max(l+1,m+1),max(l+1,m+1),init);
- }
- return (*A(l,m))(i,j);
- }
- void clear()
- {
- for(int l=0;l<M;++l)
- for(int m=0;m<M;++m)
- if( A(l,m) )
- {
- Array2<T>&a=*A(l,m);
- for(int i=0;i<=l;++i)
- for(int j=0;j<=m;++j)
- a(i,j)=0.0;
- }
- }
-};
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/D4Tables.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/D4Tables.h b/ext/giza-pp/GIZA++-v2/D4Tables.h
deleted file mode 100644
index e047bcc..0000000
--- a/ext/giza-pp/GIZA++-v2/D4Tables.h
+++ /dev/null
@@ -1,460 +0,0 @@
-/*
-
-Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _d4tables_h_define
-#define _d4tables_h_define
-#include <math.h>
-#include "WordClasses.h"
-#include "Globals.h"
-#include "myassert.h"
-
-extern float d4modelsmooth_factor;
-
-class m4_key
-{
- public:
- int deps;
- int l;
- int m;
- int F;
- int E;
- int prevj;
- int vacancies1,vacancies2;
- m4_key(int _deps,int _l,int _m,int _F,int _E,int _prevj,int _v1,int _v2)
- : deps(_deps),l(_l),m(_m),F(_F),E(_E),prevj(_prevj),vacancies1(_v1),vacancies2(_v2) {}
- friend ostream&print1(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf)
- {
- if(x.deps&DEP_MODEL_l)out << "l: " << x.l<<' ';
- if(x.deps&DEP_MODEL_m)out << "m: " << x.m<<' ';
- if(x.deps&DEP_MODEL_F)out << "F: " << wcf.classString(x.F)<< ' ';
- if(x.deps&DEP_MODEL_E)out << "E: " << wce.classString(x.E)<< ' ';
- // if(x.deps&DEP_MODEL_pj)out << "j-1: " << x.prevj<<' ';
- if(x.vacancies1!=-1)out << "v1: " << x.vacancies1 << ' ';
- if(x.vacancies2!=-1)out << "v2: " << x.vacancies2 << ' ';
- return out << '\n';
- }
- friend ostream&print1_m5(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf)
- {
- out << ((x.deps&DEP_MODEL_E)?wce.classString(x.E):string("0"))<< ' ';
- out << ((x.deps&DEP_MODEL_F)?wcf.classString(x.F):string("0"))<< ' ';
- out << x.vacancies1 << ' ';
- out << x.vacancies2 << ' ';
- return out;
- }
- friend ostream&printb1(ostream&out,const m4_key&x,const WordClasses&wce,const WordClasses&wcf)
- {
- if(x.deps&DEP_MODELb_l)out << "l: " << x.l<<' ';
- if(x.deps&DEP_MODELb_m)out << "m: " << x.m<<' ';
- if(x.deps&DEP_MODELb_F)out << "F: " << wcf.classString(x.F) << ' ';
- if(x.deps&DEP_MODELb_E)out << "E: " << wce.classString(x.E) << ' ';
- if(x.vacancies1!=-1)out << "v1: " << x.vacancies1 << ' ';
- if(x.vacancies2!=-1)out << "v2: " << x.vacancies2 << ' ';
- return out << '\n';
- }
- friend ostream&printb1_m5(ostream&out,const m4_key&x,const WordClasses&wcf)
- {
- out << "-1 " << ((x.deps&DEP_MODEL_F)?wcf.classString(x.F):string("0"))<< ' ';
- out << x.vacancies1 << ' ';
- out << x.vacancies2 << ' ';
- return out;
- }
-};
-
-class compare1
-{
- private:
- int deps;
- public:
- compare1(int _deps) : deps(_deps) {}
- bool operator()(const m4_key&a,const m4_key&b)const
- {
- if(deps&DEP_MODEL_l){if( a.l<b.l )return 1;if( b.l<a.l )return 0;}
- if(deps&DEP_MODEL_m){if( a.m<b.m )return 1;if( b.m<a.m )return 0;}
- if(deps&DEP_MODEL_F){if( a.F<b.F )return 1;if( b.F<a.F )return 0;}
- if(deps&DEP_MODEL_E){if( a.E<b.E )return 1;if( b.E<a.E )return 0;}
- //if(deps&DEP_MODEL_pj){if( a.prevj<b.prevj )return 1;if( b.prevj<a.prevj )return 0;}
- if(a.vacancies1<b.vacancies1)return 1;if(b.vacancies1<a.vacancies1)return 0;
- if(a.vacancies2<b.vacancies2)return 1;if(b.vacancies2<a.vacancies2)return 0;
- return 0;
- }
-};
-
-class compareb1
-{
- private:
- int deps;
- public:
- compareb1(int _deps) : deps(_deps) {}
- bool operator()(const m4_key&a,const m4_key&b)const
- {
- if(deps&DEP_MODELb_l){if( a.l<b.l )return 1;if( b.l<a.l )return 0;}
- if(deps&DEP_MODELb_m){if( a.m<b.m )return 1;if( b.m<a.m )return 0;}
- if(deps&DEP_MODELb_F){if( a.F<b.F )return 1;if( b.F<a.F )return 0;}
- if(deps&DEP_MODELb_E){if( a.E<b.E )return 1;if( b.E<a.E )return 0;}
- //if(deps&DEP_MODELb_pj){if( a.prevJ<b.prevJ )return 1;if( b.prevJ<a.prevJ )return 0;}
- if(a.vacancies1<b.vacancies1)return 1;if(b.vacancies1<a.vacancies1)return 0;
- if(a.vacancies2<b.vacancies2)return 1;if(b.vacancies2<a.vacancies2)return 0;
- return 0;
- }
-};
-
-inline void tokenize(const string&in,Vector<string>&out)
-{
- string s;
- istringstream l(in);
- while(l>>s)
- out.push_back(s);
-}
-
-class d4model
-{
- public:
- typedef Vector<pair<COUNT,PROB> > Vpff;
- map<m4_key,Vpff,compare1 > D1;
- map<m4_key,Vpff,compareb1> Db1;
- PositionIndex msl;
- WordClasses ewordclasses;
- WordClasses fwordclasses;
- template<class MAPPER>
- void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile)
- {
- ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
- if( !estrm )
- {
- cerr << "ERROR: can not read " << efile << endl;
- }
- else
- ewordclasses.read(estrm,m1);
- if( !fstrm )
- cerr << "ERROR: can not read " << ffile << endl;
- else
- fwordclasses.read(fstrm,m2);
- }
- d4model(PositionIndex _msl)
- : D1(compare1(M4_Dependencies)),Db1(compareb1(M4_Dependencies)),msl(_msl)
- {}
- COUNT&getCountRef_first(WordIndex j,WordIndex j_cp,int E,int F,int l,int m)
- {
- assert(j>=1);
- m4_key key(M4_Dependencies,l,m,F,E,j_cp,-1,-1);
- map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
- if(p==D1.end())p=D1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=D1.end());
- return (p->second)[j-j_cp+msl].first;
- }
- COUNT&getCountRef_bigger(WordIndex j,WordIndex j_prev,int E,int F,int l,int m)
- {
- assert(j>=1);
- assert(j_prev>=1);
- m4_key key(M4_Dependencies,l,m,F,E,j_prev,-1,-1);
- map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
- if(p==Db1.end())p=Db1.insert(make_pair(key,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0)))).first;
- assert(p!=Db1.end());
- return (p->second)[j-j_prev+msl].first;
- }
- map<m4_key,Vpff,compare1 >::const_iterator getProb_first_iterator(int E,int F,int l,int m)const
- {return D1.find(m4_key(M4_Dependencies,l,m,F,E,0,-1,-1));}
- PROB getProb_first_withiterator(WordIndex j,WordIndex j_cp,int m,const map<m4_key,Vpff,compare1 >::const_iterator& p)const
- {
- assert(j>=1);//assert(j_cp>=0);
- assert(j<=msl);assert(j_cp<=msl);
- if(p==D1.end())
- {
- return PROB_SMOOTH;
- }
- else
- {
- massert((p->second)[j-j_cp+msl].second<=1.0);
- return max(PROB_SMOOTH,d4modelsmooth_factor/(2*m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
- }
- }
- PROB getProb_first(WordIndex j,WordIndex j_cp,int E,int F,int l,int m)const
- {
- assert(j>=1);//assert(j_cp>=0);
- assert(j<=msl);assert(j_cp<=msl);
- m4_key key(M4_Dependencies,l,m,F,E,j_cp,-1,-1);
- map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
- if(p==D1.end())
- {
- return PROB_SMOOTH;
- }
- else
- {
- massert((p->second)[j-j_cp+msl].second<=1.0);
- return max(PROB_SMOOTH,d4modelsmooth_factor/(2*m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_cp+msl].second);
- }
- }
- map<m4_key,Vpff,compareb1 >::const_iterator getProb_bigger_iterator(int E,int F,int l,int m)const
- {
- return Db1.find(m4_key(M4_Dependencies,l,m,F,E,0,-1,-1));
- }
- PROB getProb_bigger_withiterator(WordIndex j,WordIndex j_prev,int m,const map<m4_key,Vpff,compareb1 >::const_iterator&p)const
- {
- massert(j>=1);massert(j_prev>=1);
- massert(j>j_prev);
- massert(j<=msl);massert(j_prev<=msl);
- if(p==Db1.end())
- {
- return PROB_SMOOTH;
- }
- else
- {
- massert((p->second)[j-j_prev+msl].second<=1.0 );
- return max(PROB_SMOOTH,d4modelsmooth_factor/(m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
- }
- }
-
- PROB getProb_bigger(WordIndex j,WordIndex j_prev,int E,int F,int l,int m)const
- {
- massert(j>=1);massert(j_prev>=1);
- massert(j>j_prev);
- massert(j<=msl);massert(j_prev<=msl);
- m4_key key(M4_Dependencies,l,m,F,E,j_prev,-1,-1);
- map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
- if(p==Db1.end())
- {
- return PROB_SMOOTH;
- }
- else
- {
- massert((p->second)[j-j_prev+msl].second<=1.0 );
- return max(PROB_SMOOTH,d4modelsmooth_factor/(m-1)+(1-d4modelsmooth_factor)*(p->second)[j-j_prev+msl].second);
- }
- }
- void normalizeTable()
- {
- int nParams=0;
- for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
- {
- Vpff&d1=i->second;
- double sum=0.0;
- for(PositionIndex i=0;i<d1.size();i++)
- sum+=d1[i].first;
- for(PositionIndex i=0;i<d1.size();i++)
- {
- d1[i].second=sum?(d1[i].first/sum):(1.0/d1.size());
- nParams++;
- }
- }
- for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
- {
- Vpff&db1=i->second;
- double sum=0.0;
- for(PositionIndex i=0;i<db1.size();i++)
- sum+=db1[i].first;
- for(PositionIndex i=0;i<db1.size();i++)
- {
- db1[i].second=sum?(db1[i].first/sum):(1.0/db1.size());
- nParams++;
- }
- }
- cout << "D4 table contains " << nParams << " parameters.\n";
- }
- void clear()
- {
- for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
- {
- Vpff&d1=i->second;
- for(PositionIndex i=0;i<d1.size();i++)
- d1[i].first=0.0;
- }
- for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
- {
- Vpff&db1=i->second;
- for(PositionIndex i=0;i<db1.size();i++)
- db1[i].first=0.0;
- }
- }
-
- void printProbTable(const char*fname1,const char*fname2)
- {
- ofstream out(fname1);
- double ssum=0.0;
- out << "# Translation tables for Model 4 .\n";
- out << "# Table for head of cept.\n";
- for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
- {
- const Vpff&d1=i->second;
- double sum=0.0;
- for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
- if ( sum )
- {
- print1(out,i->first,ewordclasses,fwordclasses);
- out << "SUM: " << sum << ' '<< '\n';
- for(unsigned ii=0;ii<d1.size();ii++)
- if( d1[ii].first )
- out << (int)(ii)-(int)(msl) << ' ' << d1[ii].first << '\n';
- out << endl;
- }
- ssum+=sum;
- }
- out << "# Table for non-head of cept.\n";
- for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
- {
- const Vpff&db1=i->second;
- double sum=0.0;
- for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
- if( sum )
- {
- printb1(out,i->first,ewordclasses,fwordclasses);
- out << "SUM: " << sum << ' '<<'\n';
- for(unsigned ii=0;ii<db1.size();ii++)
- if( db1[ii].first )
- {
- out << (int)(ii)-(int)(msl) << ' ' << db1[ii].first << '\n';
- }
- out << endl;
- }
- ssum+=sum;
- }
- out << endl << "FULL-SUM: " << ssum << endl;
- if( M4_Dependencies==76 )
- {
- ofstream out2(fname2);
- for(map<m4_key,Vpff,compare1 >::const_iterator i=D1.begin();i!=D1.end();++i)
- {
- const Vpff&d1=i->second;
- for(unsigned ii=0;ii<d1.size();ii++)
- if( d1[ii].first )
- out2 << ewordclasses.classString(i->first.E) << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << d1[ii].second << '\n';
- }
- for(map<m4_key,Vpff,compareb1 >::const_iterator i=Db1.begin();i!=Db1.end();++i)
- {
- const Vpff&db1=i->second;
- for(unsigned ii=0;ii<db1.size();ii++)
- if( db1[ii].first )
- out2 << -1 << ' ' << fwordclasses.classString(i->first.F) << ' ' << (int)(ii)-(int)(msl) << ' ' << db1[ii].second << '\n';
- }
- }
- }
- bool readProbTable(const char *fname)
- {
- cerr << "Reading D4Tables from " << fname << endl;
- ifstream file(fname);
- string line;
- do
- {
- getline(file,line);
- } while(line.length()&&line[0]=='#');
-
- do
- {
- while(line.length()==0)
- getline(file,line);
- if( line[0]=='#')
- break;
- Vector<string> linestr;
- tokenize(line,linestr);
- m4_key k(M4_Dependencies,0,0,0,0,0,-1,-1);
- for(unsigned int i=0;i<linestr.size();i+=2)
- {
- if( linestr[i]=="l:" ){k.l=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_l);}
- if( linestr[i]=="m:" ){k.m=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_m);}
- if( linestr[i]=="F:" ){k.F=fwordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODEL_F);}
- if( linestr[i]=="E:" ){k.E=ewordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODEL_E);}
- //if( linestr[i]=="j-1:" ){k.prevj=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODEL_pj);}
- }
- string str;
- double sum;
- file >> str >> sum;
- iassert(str=="SUM:");
- if( str!="SUM:")
- cerr << "ERROR: string is " << str << " and not sum " << endl;
-
- do
- {
- int value;
- double count;
- getline(file,line);
- istringstream twonumbers(line);
- if(twonumbers >> value >> count)
- {
- if( D1.count(k)==0 )
- D1.insert(make_pair(k,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0))));
- D1[k][value+msl]=make_pair(count,count/sum);
- }
- }while(line.length());
- }while(file);
- do
- {
- getline(file,line);
- } while(line.length()&&line[0]=='#');
- do
- {
- while(line.length()==0)
- getline(file,line);
- if( line[0]=='#')
- break;
- Vector<string> linestr;
- tokenize(line,linestr);
- m4_key k(M4_Dependencies,0,0,0,0,0,-1,-1);
- bool sumRead=0;
- for(unsigned int i=0;i<linestr.size();i+=2)
- {
- if( linestr[i]=="l:" ){k.l=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODELb_l);}
- else if( linestr[i]=="m:" ){k.m=atoi(linestr[i+1].c_str());iassert(M4_Dependencies&DEP_MODELb_m);}
- else if( linestr[i]=="F:" ){k.F=fwordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODELb_F);}
- else if( linestr[i]=="E:" ){k.E=ewordclasses(linestr[i+1]);iassert(M4_Dependencies&DEP_MODELb_E);}
- else if( linestr[i]=="SUM:" )
- {
- cerr << "Warning: obviously no dependency.\n";
- sumRead=1;
- }
- else if( linestr[i]=="FULL-SUM:" )
- {
- break;
- }
- else
- {
- cerr << "ERROR: error in reading d4 tables: " << linestr[i] << ' ' << linestr[i+1] << endl;
- }
- }
- string str;
- double sum;
- if( sumRead==0 )
- file >> str >> sum;
- else
- {
- str=linestr[0];
- sum=atof(linestr[1].c_str());
- }
- if( str!="SUM:" )
- cerr << "ERROR: should read SUM but read " << str << endl;
- do
- {
- int value;
- double count;
- getline(file,line);
- istringstream twonumbers(line);
- if(twonumbers >> value >> count)
- {
- if( Db1.count(k)==0 )
- Db1.insert(make_pair(k,Vpff(msl*2+1,pair<COUNT,PROB>(0.0,0.0))));
- Db1[k][value+msl]=make_pair(count,count/sum);
- }
- }while(file&&line.length());
- }while(file);
- return 1;
- }
-};
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/D5Tables.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/D5Tables.h b/ext/giza-pp/GIZA++-v2/D5Tables.h
deleted file mode 100644
index c69992b..0000000
--- a/ext/giza-pp/GIZA++-v2/D5Tables.h
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _d5tables_h_define
-#define _d5tables_h_define
-#include <math.h>
-#include "D4Tables.h"
-
-extern float d5modelsmooth_countoffset;
-extern float d5modelsmooth_factor;
-
-#define UNSEENPROB (1.0/vacancies_total)
-
-class d5model
-{
- private:
- typedef Vector < pair < COUNT,PROB > >Vpff;
- map< m4_key,Vpff,compare1 > D1;
- map< m4_key,Vpff,compareb1 > Db1;
- public:
- d4model&d4m;
- WordClasses ewordclasses,fwordclasses;
- template<class MAPPER>
- void makeWordClasses(const MAPPER&m1,const MAPPER&m2,string efile,string ffile)
- {
- ifstream estrm(efile.c_str()),fstrm(ffile.c_str());
- if( !estrm )
- cerr << "ERROR: can not read classes from " << efile << endl;
- else
- ewordclasses.read(estrm,m1);
- if( !fstrm )
- cerr << "ERROR: can not read classes from " << ffile << endl;
- else
- fwordclasses.read(fstrm,m2);
- }
- d5model (d4model&_d4m)
- :D1 (compare1(M5_Dependencies)), Db1 (compareb1(M5_Dependencies)),d4m(_d4m)
- {}
- COUNT &getCountRef_first (PositionIndex vacancies_j,
- PositionIndex vacancies_jp, int F,
- PositionIndex l, PositionIndex m,
- PositionIndex vacancies_total)
- {
- massert(vacancies_j>0);
- massert(vacancies_total>0);
- //massert(vacancies_jp<=vacancies_total);
- massert(vacancies_j <=vacancies_total);
- massert(vacancies_total<=m);
- m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
- map<m4_key,Vpff,compare1 >::iterator p=D1.find(key);
- if(p==D1.end())
- p=D1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
- massert(p!=D1.end());
- return (p->second)[vacancies_j].first;
- }
- COUNT &getCountRef_bigger (PositionIndex vacancies_j,
- PositionIndex vacancies_jp, int F,
- PositionIndex l, PositionIndex m,
- PositionIndex vacancies_total)
- {
- massert(vacancies_j>0);
- massert(vacancies_total>0);
- massert (vacancies_jp <= vacancies_j);
- massert (vacancies_j-vacancies_jp <= vacancies_total);
- m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
- map<m4_key,Vpff,compareb1 >::iterator p=Db1.find(key);
- if(p==Db1.end())
- p=Db1.insert(make_pair(key,Vpff(vacancies_total+1,make_pair(0,UNSEENPROB)))).first; // !!! constrain length
- massert(p!=Db1.end());
- return (p->second)[vacancies_j - vacancies_jp].first;
- }
- PROB getProb_first (PositionIndex vacancies_j, PositionIndex vacancies_jp,
- int F, PositionIndex l, PositionIndex m,
- PositionIndex vacancies_total) const
- {
- massert(vacancies_j>0);
- massert(vacancies_total>0);
- //massert(vacancies_jp<=vacancies_total);
- massert(vacancies_j <=vacancies_total);
- massert(vacancies_total<=m);
- m4_key key(M5_Dependencies,l,m,F,0,0,vacancies_jp,vacancies_total);
- map<m4_key,Vpff,compare1 >::const_iterator p=D1.find(key);
- if( p==D1.end() )
- return UNSEENPROB;
- else
- return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j].second);
- }
- PROB getProb_bigger (PositionIndex vacancies_j, PositionIndex vacancies_jp,
- int F, PositionIndex l, PositionIndex m,
- PositionIndex vacancies_total) const
- {
- massert(vacancies_j>0);
- massert(vacancies_total>0);
- massert (vacancies_jp <= vacancies_j);
- massert (vacancies_j-vacancies_jp <= vacancies_total);
- m4_key key(M5_Dependencies,l,m,F,0,0,-1,vacancies_total);
- map<m4_key,Vpff,compareb1 >::const_iterator p=Db1.find(key);
- if(p==Db1.end())
- return UNSEENPROB;
- else
- return max(PROB_SMOOTH,d5modelsmooth_factor/(vacancies_total)+(1-d5modelsmooth_factor)*(p->second)[vacancies_j - vacancies_jp].second);
- }
- void normalizeTable ()
- {
- int nParams=0;
- for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
- {
- Vpff&d1=i->second;
- COUNT sum=0.0;
- for(PositionIndex i=0;i<d1.size();i++)
- sum+=d1[i].first+d5modelsmooth_countoffset;
- for(PositionIndex i=0;i<d1.size();i++)
- {
- d1[i].second=sum?((d1[i].first+d5modelsmooth_countoffset)/sum):(1.0/d1.size());
- nParams++;
- }
- }
- for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
- {
- Vpff&db1=i->second;
- double sum=0.0;
- for(PositionIndex i=0;i<db1.size();i++)
- sum+=db1[i].first+d5modelsmooth_countoffset;
- for(PositionIndex i=0;i<db1.size();i++)
- {
- db1[i].second=sum?((db1[i].first+d5modelsmooth_countoffset)/sum):(1.0/db1.size());
- nParams++;
- }
- }
- cout << "D5 table contains " << nParams << " parameters.\n";
- }
-
- friend ostream&operator<<(ostream&out,d5model&d5m)
- {
- out << "# Translation tables for Model 5 .\n";
- out << "# Table for head of cept.\n";
- for(map<m4_key,Vpff,compare1 >::const_iterator i=d5m.D1.begin();i!=d5m.D1.end();++i)
- {
- const Vpff&d1=i->second;
- COUNT sum=0.0;
- for(PositionIndex ii=0;ii<d1.size();ii++)sum+=d1[ii].first;
- if ( sum )
- {
- for(unsigned ii=0;ii<d1.size();ii++)
- {
- print1_m5(out,i->first,d5m.ewordclasses,d5m.fwordclasses);
- out << (int)(ii) << ' ' << d1[ii].second << ' ' << d1[ii].first << '\n';
- }
- out << endl;
- }
- }
- out << "# Table for non-head of cept.\n";
- for(map<m4_key,Vpff,compareb1 >::const_iterator i=d5m.Db1.begin();i!=d5m.Db1.end();++i)
- {
- const Vpff&db1=i->second;
- double sum=0.0;
- for(PositionIndex ii=0;ii<db1.size();++ii)sum+=db1[ii].first;
- if( sum )
- {
- for(unsigned ii=0;ii<db1.size();ii++)
- {
- printb1_m5(out,i->first,d5m.fwordclasses);
- out << (int)(ii) << ' ' << db1[ii].second << ' ' << db1[ii].first << '\n';
- }
- out << endl;
- }
- }
- return out;
- }
- void readProbTable(const char*x)
- {
- ifstream f(x);
- string l;
- while(getline(f,l))
- {
- if(l.length()&&l[0]=='#')
- continue;
- istringstream is(l.c_str());
- string E,F;
- int v1,v2,ii;
- double prob,count;
- if(is>>E>>F>>v1>>v2>>ii>>prob>>count)
- {
- //cerr << "Read: " << E << " " << F << " " << v1 << " " << v2 << " " << prob<< endl;
- if( count>0 )
- if( E=="-1")
- getCountRef_bigger(ii,0,fwordclasses(F),1000,1000,v2)+=count;
- else
- getCountRef_first(ii,v1,fwordclasses(F),1000,1000,v2)+=count;
- }
- }
- normalizeTable();
- ofstream of("M5FILE");
- of << (*this);
- }
- void clear()
- {
- for(map<m4_key,Vpff,compare1 >::iterator i=D1.begin();i!=D1.end();++i)
- {
- Vpff&d1=i->second;
- for(PositionIndex i=0;i<d1.size();i++)
- d1[i].first=0.0;
- }
- for(map<m4_key,Vpff,compareb1 >::iterator i=Db1.begin();i!=Db1.end();++i)
- {
- Vpff&db1=i->second;
- for(PositionIndex i=0;i<db1.size();i++)
- db1[i].first=0.0;
- }
- }
-};
-
-#endif
-
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/Dictionary.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/Dictionary.cpp b/ext/giza-pp/GIZA++-v2/Dictionary.cpp
deleted file mode 100644
index 6773fec..0000000
--- a/ext/giza-pp/GIZA++-v2/Dictionary.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/* Noah A. Smith
- Dictionary object for dictionary filter in Model 1 training
-
- Dictionary file must be in order (sorted) by Foreign vocab id, but English
- vocab ids may be in any order.
-
- 9 August 1999
-*/
-
-#include "Dictionary.h"
-
-#include <cstring>
-
-Dictionary::Dictionary(const char *filename){
- if(!strcmp(filename, "")){
- dead = true;
- return;
- }
- dead = false;
- cout << "Reading dictionary from: " << filename << '\n';
- ifstream dFile(filename);
- if(!dFile){
- cerr << "ERROR: Can't open dictionary: " << filename << '\n';
- exit(1);
- }
-
- currindexmin = 0;
- currindexmax = 0;
- currval = 0;
- int p, q;
- while((dFile >> p >> q)){
- pairs[0].push_back(p);
- pairs[1].push_back(q);
- }
- cout << "Dictionary read; " << pairs[0].size() << " pairs loaded." << '\n';
- dFile.close();
-}
-
-
-bool Dictionary::indict(int p, int q){
- if(dead) return false;
- if(p == 0 && q == 0) return false;
- if(currval == p){
- for(int i = currindexmin; i <= currindexmax; i++)
- if(pairs[1][i] == q) return true;
- return false;
- }
- else{
- int begin = 0, end = pairs[0].size() - 1, middle = 0;
- unsigned int t;
- bool ret = false;
- while(begin <= end){
- middle = begin + ((end - begin) >> 1);
- if(p < pairs[0][middle]) end = middle - 1;
- else if(p > pairs[0][middle]) begin = middle + 1;
- else{
- break;
- }
- }
- t = middle;
- while(pairs[0][t] == p )
- if(pairs[1][t--] == q) ret = true;
- currindexmin = t + 1;
- t = middle + 1;
- while(pairs[0][t] == p && t < pairs[0].size())
- if(pairs[1][t++] == q) ret = true;
- currindexmax = t - 1;
- currval = p;
- return ret;
- }
-}
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/Dictionary.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/Dictionary.h b/ext/giza-pp/GIZA++-v2/Dictionary.h
deleted file mode 100644
index 3a5c71e..0000000
--- a/ext/giza-pp/GIZA++-v2/Dictionary.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-/* Noah A. Smith
- Dictionary object for dictionary filter in Model 1 training
-
- 9 August 1999
-*/
-
-#include <iostream>
-#include <fstream>
-
-#include "Vector.h"
-
-#ifndef DICTIONARY_H
-#define DICTIONARY_H
-
-class Dictionary{
- private:
- Vector<int> pairs[2];
- int currval;
- int currindexmin;
- int currindexmax;
- bool dead;
- public:
- Dictionary(const char *);
- bool indict(int, int);
-};
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/FlexArray.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/FlexArray.h b/ext/giza-pp/GIZA++-v2/FlexArray.h
deleted file mode 100644
index c7365f7..0000000
--- a/ext/giza-pp/GIZA++-v2/FlexArray.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
-
-Copyright (C) 1988,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef CLASS_FlexArray_defined
-#define CLASS_FlexArray_defined
-#include "Array.h"
-#include <iostream>
-#include <fstream>
-template<class T>
-class FlexArray
-{
-private:
- Array<T> p;
- int start,End;
-public:
- FlexArray(int _start=0,int _end=-1)
- : p(_end-_start+1),start(_start),End(_end) {}
- FlexArray(int _start,int _end,const T&init)
- : p(_end-_start+1,init),start(_start),End(_end) {}
- T&operator[](int i)
- {return p[i-start];}
- const T&operator[](int i)const
- {return p[i-start];}
- int low()const{return start;}
- int high()const{return End;}
- T*begin(){return conv<double>(p.begin());}
- T*end(){return conv<double>(p.end());}
-};
-
-template<class T>
-inline ostream&operator<<(ostream&out,const FlexArray<T>&x)
-{
- for(int i=x.low();i<=x.high();++i)
- out << i << ':' << x[i] << ';' << ' ';
- return out;
-}
-
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/ForwardBackward.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/ForwardBackward.cpp b/ext/giza-pp/GIZA++-v2/ForwardBackward.cpp
deleted file mode 100644
index 969316a..0000000
--- a/ext/giza-pp/GIZA++-v2/ForwardBackward.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
-
-Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef NO_TRAINING
-#include "ForwardBackward.h"
-#include "Globals.h"
-#include "myassert.h"
-#include "HMMTables.h"
-#include "mymath.h"
-
-
-double ForwardBackwardTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E){
- const int I=net.size1(),J=net.size2(),N=I*J;
- Array<double> alpha(N,0),beta(N,0),sum(J);
- for(int i=0;i<I;i++)
- beta[N-I+i]=net.getBetainit(i);
- double * cur_beta=conv<double>(beta.begin())+N-I-1;
- for(int j=J-2;j>=0;--j)
- for(int ti=I-1;ti>=0;--ti,--cur_beta) {
- const double *next_beta=conv<double>(beta.begin())+(j+1)*I;
- const double *alprob=&net.outProb(j,ti,0),*next_node=&net.nodeProb(0,j+1);
- for(int ni=0;ni<I;++ni,(next_node+=J)){
- massert(cur_beta<next_beta&& &net.outProb(j,ti,ni)==alprob);
- massert(next_node == &net.nodeProb(ni,j+1));
- /* if( VERB&&(*next_beta)*(*alprob)*(*next_node) )
- cout << "B= " << (int)(cur_beta-beta.begin()) << " += " << (*next_beta) << "("
- << next_beta-beta.begin() << ") alprob:" << (*alprob) << " lexprob:" << (*next_node) << endl;*/
- (*cur_beta)+=(*next_beta++)*(*alprob++)*(*next_node);
- }
- }
- for(int i=0;i<I;i++)
- alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
- double* cur_alpha=conv<double>(alpha.begin())+I;
- cur_beta=conv<double>(beta.begin())+I;
- for(int j=1;j<J;j++){
- Array2<double>&e=E[ (E.size()==1)?0:(j-1) ];
- if( (E.size()!=1) || j==1 )
- {
- e.resize(I,I);
- fill(e.begin(),e.end(),0.0);
- }
-
- for(int ti=0;ti<I;++ti,++cur_alpha,++cur_beta) {
- const double * prev_alpha=conv<double>(alpha.begin())+I*(j-1);
- double *cur_e= &e(ti,0);
- double this_node=net.nodeProb(ti,j);
- const double* alprob= &net.outProb(j-1,0,ti);
- for(int pi=0;pi<I;++pi,++prev_alpha,(alprob+=I)){
- massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
- massert(&e(ti,pi)==cur_e);
- const double alpha_increment= *prev_alpha*(*alprob)*this_node;
- (*cur_alpha)+=alpha_increment;
- (*cur_e++)+=alpha_increment*(*cur_beta);
- }
- }
- }
- g.resize(N);
- transform(alpha.begin(),alpha.end(),beta.begin(),g.begin(),multiplies<double>());
- double bsum=0,esum=0,esum2;
- for(int i=0;i<I;i++)
- bsum+=beta[i]*net.nodeProb(i,0)*net.getAlphainit(i);
- for(unsigned int j=0;j<(unsigned int)E.size();j++)
- {
- Array2<double>&e=E[j];
- const double *epe=e.end();
- for(const double*ep=e.begin();ep!=epe;++ep)
- esum+=*ep;
- }
- if( J>1 )
- esum2=esum/(J-1);
- else
- esum2=0.0;
- if(!(esum2==0.0||mfabs(esum2-bsum)/bsum<1e-3*I))
- cout << "ERROR2: " << esum2 <<" " <<bsum << " " << esum << net << endl;
- double * sumptr=conv<double>(sum.begin());
- double* ge=conv<double>(g.end());
- for(double* gp=conv<double>(g.begin());gp!=ge;gp+=I)
- {
- *sumptr++=normalize_if_possible(gp,gp+I);
- if(bsum && !(mfabs((*(sumptr-1)-bsum)/bsum)<1e-3*I))
- cout << "ERROR: " << *(sumptr-1) << " " << bsum << " " << mfabs((*(sumptr-1)-bsum)/bsum) << ' ' << I << ' ' << J << endl;
- }
- for(unsigned int j=0;j<(unsigned int)E.size();j++)
- {
- Array2<double>&e=E[j];
- double* epe=e.end();
- if( esum )
- for(double*ep=e.begin();ep!=epe;++ep)
- *ep/=esum;
- else
- for(double*ep=e.begin();ep!=epe;++ep)
- *ep/=1.0/(max(I*I,I*I*(J-1)));
- }
- if( sum.size() )
- return sum[0];
- else
- return 1.0;
-}
-void HMMViterbi(const HMMNetwork&net,Array<int>&vit) {
- const int I=net.size1(),J=net.size2();
- vit.resize(J);
- Array<double>g;
- Array<Array2<double> >e(1);
- ForwardBackwardTraining(net,g,e);
- for(int j=0;j<J;j++) {
- double * begin=conv<double>(g.begin())+I*j;
- vit[j]=max_element(begin,begin+I)-begin;
- }
-}
-void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit) {
- const int I=net.size1(),J=net.size2();
- vit.resize(J);
- for(int j=0;j<J;j++) {
- double* begin=conv<double>(g.begin())+I*j;
- vit[j]=max_element(begin,begin+I)-begin;
- }
-}
-
-double HMMRealViterbi(const HMMNetwork&net,Array<int>&vitar,int pegi,int pegj,bool verbose){
- const int I=net.size1(),J=net.size2(),N=I*J;
- Array<double> alpha(N,-1);
- Array<double*> bp(N,(double*)0);
- vitar.resize(J);
- if( J==0 )
- return 1.0;
- for(int i=0;i<I;i++)
- {
- alpha[i]=net.getAlphainit(i)*net.nodeProb(i,0);
- if( i>I/2 )
- alpha[i]=0; // only first empty word can be chosen
- bp[i]=0;
- }
- double *cur_alpha=conv<double>(alpha.begin())+I;
- double **cur_bp=conv<double*>(bp.begin())+I;
- for(int j=1;j<J;j++)
- {
- if( pegj+1==j)
- for(int ti=0;ti<I;ti++)
- if( (pegi!=-1&&ti!=pegi)||(pegi==-1&&ti<I/2) )
- (cur_alpha-I)[ti]=0.0;
- for(int ti=0;ti<I;++ti,++cur_alpha,++cur_bp) {
- double* prev_alpha=conv<double>(alpha.begin())+I*(j-1);
- double this_node=net.nodeProb(ti,j);
- const double *alprob= &net.outProb(j-1,0,ti);
- for(int pi=0;pi<I;++pi,++prev_alpha,(alprob+=I)){
- massert(prev_alpha<cur_alpha&& &net.outProb(j-1,pi,ti)==alprob);
- const double alpha_increment= *prev_alpha*(*alprob)*this_node;
- if( alpha_increment> *cur_alpha )
- {
- (*cur_alpha)=alpha_increment;
- (*cur_bp)=prev_alpha;
- }
- }
- }
- }
- for(int i=0;i<I;i++)
- alpha[N-I+i]*=net.getBetainit(i);
- if( pegj==J-1)
- for(int ti=0;ti<I;ti++)
- if( (pegi!=-1&&ti!=pegi)||(pegi==-1&&ti<I/2) )
- (alpha)[N-I+ti]=0.0;
-
- int j=J-1;
- cur_alpha=conv<double>(alpha.begin())+j*I;
- vitar[J-1]=max_element(cur_alpha,cur_alpha+I)-cur_alpha;
- double ret= *max_element(cur_alpha,cur_alpha+I);
- while(bp[vitar[j]+j*I])
- {
- cur_alpha-=I;
- vitar[j-1]=bp[vitar[j]+j*I]-cur_alpha;
- massert(vitar[j-1]<I&&vitar[j-1]>=0);
- j--;
- }
- massert(j==0);
- if( verbose )
- {
- cout << "VERB:PEG: " << pegi << ' ' << pegj << endl;
- for(int j=0;j<J;j++)
- cout << "NP " << net.nodeProb(vitar[j],j) << ' ' << "AP " << ((j==0)?net.getAlphainit(vitar[j]):net.outProb(j-1,vitar[j-1],vitar[j])) << " j:" << j << " i:" << vitar[j] << "; ";
- cout << endl;
- }
- return ret;
-}
-
-double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&E){
- Array<int> vitar;
- double ret=HMMRealViterbi(net,vitar);
- const int I=net.size1(),J=net.size2();
- if( E.size()==1 )
- {
- Array2<double>&e=E[0];
- e.resize(I,I);
- g.resize(I*J);
- fill(g.begin(),g.end(),0.0);
- fill(e.begin(),e.end(),0.0);
- for(int i=0;i<J;++i)
- {
- g[i*I+vitar[i]]=1.0;
- if( i>0 )
- e(vitar[i],vitar[i-1])++;
- }
- }
- else
- {
- g.resize(I*J);
- fill(g.begin(),g.end(),0.0);
- for(int i=0;i<J;++i)
- {
- g[i*I+vitar[i]]=1.0;
- if( i>0 )
- {
- Array2<double>&e=E[i-1];
- e.resize(I,I);
- fill(e.begin(),e.end(),0.0);
- e(vitar[i],vitar[i-1])++;
- }
- }
- }
- return ret;
-}
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/ForwardBackward.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/ForwardBackward.h b/ext/giza-pp/GIZA++-v2/ForwardBackward.h
deleted file mode 100644
index 42449d3..0000000
--- a/ext/giza-pp/GIZA++-v2/ForwardBackward.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
-
-Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef NO_EM_MARKOF_ZEUGS_DEFINED
-#define NO_EM_MARKOF_ZEUGS_DEFINED
-#ifndef NO_TRAINING
-#include "myassert.h"
-#include "Array.h"
-#include "Array2.h"
-
-class HMMNetwork
-{
- public:
- int as,bs;
- Array2<double> n;
- Array<Array2<double> > e;
- Array<double> alphainit;
- Array<double> betainit;
- int ab;
- double finalMultiply;
- HMMNetwork(int I,int J)
- : as(I),bs(J),n(as,bs),/*e(as,as,0.0),*/e(0),alphainit(as,1.0/as),betainit(as,1.0),ab(as*bs),finalMultiply(1.0)
- {}
- double getAlphainit(int i)const{return alphainit[i];}
- double getBetainit(int i)const{return betainit[i];}
- inline int size1()const{return as;}
- inline int size2()const{return bs;}
- inline const double&nodeProb(int i,int j)const
- {return n(i,j);}
- inline const double&outProb(int j,int i1,int i2)const
- {/*massert(e[min(int(e.size())-1,j)](i1,i2) );*/ return e[min(int(e.size())-1,j)](i1,i2);}
- friend ostream&operator<<(ostream&out,const HMMNetwork&x)
- {
- return out <<"N: \n"<< x.n << endl << "E: \n" << x.e << "A:\n" << x.alphainit << "B:\n" << x.betainit << endl;
- }
-};
-double ForwardBackwardTraining(const HMMNetwork&mc,Array<double>&gamma,Array<Array2<double> >&epsilon);
-void HMMViterbi(const HMMNetwork&mc,Array<int>&vit);
-double HMMRealViterbi(const HMMNetwork&net,Array<int>&vit,int pegi=-1,int pegj=-1,bool verbose=0);
-double MaximumTraining(const HMMNetwork&net,Array<double>&g,Array<Array2<double> >&e);
-void HMMViterbi(const HMMNetwork&net,Array<double>&g,Array<int>&vit);
-#endif
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/GNU.GPL
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/GNU.GPL b/ext/giza-pp/GIZA++-v2/GNU.GPL
deleted file mode 100644
index 5b2225e..0000000
--- a/ext/giza-pp/GIZA++-v2/GNU.GPL
+++ /dev/null
@@ -1,282 +0,0 @@
-
-
-Preamble
-
-The licenses for most software are designed to take away your freedom
-to share and change it. By contrast, the GNU General Public License is
-intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users. This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it. (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.) You can apply it to
-your programs, too.
-
-When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
-To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the
-rights. These restrictions translate to certain responsibilities for
-you if you distribute copies of the software, or if you modify it.
-
-For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must show them these terms so they know their
-rights.
-
-We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
-Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on,
-we want its recipients to know that what they have is not the
-original, so that any problems introduced by others will not reflect
-on the original authors' reputations.
-
-Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary. To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at
-all.
-
-The precise terms and conditions for copying, distribution and
-modification follow.
-
-
-TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-0. This License applies to any program or other work which contains a
-notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License. The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language. (Hereinafter, translation is included without limitation in
-the term "modification".) Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope. The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the Program
-(independent of having been made by running the Program). Whether that
-is true depends on what the Program does.
-
-1. You may copy and distribute verbatim copies of the Program's source
-code as you receive it, in any medium, provided that you conspicuously
-and appropriately publish on each copy an appropriate copyright notice
-and disclaimer of warranty; keep intact all the notices that refer to
-this License and to the absence of any warranty; and give any other
-recipients of the Program a copy of this License along with the
-Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a
-fee.
-
-2. You may modify your copy or copies of the Program or any portion of
-it, thus forming a work based on the Program, and copy and distribute
-such modifications or work under the terms of Section 1 above,
-provided that you also meet all of these conditions:
-
- a) You must cause the modified files to carry prominent notices
- stating that you changed the files and the date of any change.
-
- b) You must cause any work that you distribute or publish, that
- in whole or in part contains or is derived from the Program or
- any part thereof, to be licensed as a whole at no charge to all
- third parties under the terms of this License.
-
- c) If the modified program normally reads commands interactively
- when run, you must cause it, when started running for such
- interactive use in the most ordinary way, to print or display an
- announcement including an appropriate copyright notice and a
- notice that there is no warranty (or else, saying that you
- provide a warranty) and that users may redistribute the program
- under these conditions, and telling the user how to view a copy
- of this License. (Exception: if the Program itself is interactive
- but does not normally print such an announcement, your work based
- on the Program is not required to print an announcement.)
-
-These requirements apply to the modified work as a whole. If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works. But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote
-it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
- a) Accompany it with the complete corresponding machine-readable
- source code, which must be distributed under the terms of
- Sections 1 and 2 above on a medium customarily used for software
- interchange; or,
-
- b) Accompany it with a written offer, valid for at least three
- years, to give any third party, for a charge no more than your
- cost of physically performing source distribution, a complete
- machine-readable copy of the corresponding source code, to be
- distributed under the terms of Sections 1 and 2 above on a medium
- customarily used for software interchange; or,
-
- c) Accompany it with the information you received as to the offer
- to distribute corresponding source code. (This alternative is
- allowed only for noncommercial distribution and only if you
- received the program in object code or executable form with such
- an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it. For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable. However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License. Any attempt otherwise
-to copy, modify, sublicense or distribute the Program is void, and
-will automatically terminate your rights under this License. However,
-parties who have received copies, or rights, from you under this
-License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
-5. You are not required to accept this License, since you have not
-signed it. However, nothing else grants you permission to modify or
-distribute the Program or its derivative works. These actions are
-prohibited by law if you do not accept this License. Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
-6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions. You may not impose any further
-restrictions on the recipients' exercise of the rights granted
-herein. You are not responsible for enforcing compliance by third
-parties to this License.
-
-
-7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all. For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices. Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded. In such case, this License incorporates
-the limitation as if written in the body of this License.
-
-9. The Free Software Foundation may publish revised and/or new
-versions of the General Public License from time to time. Such new
-versions will be similar in spirit to the present version, but may
-differ in detail to address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Program
-specifies a version number of this License which applies to it and
-"any later version", you have the option of following the terms and
-conditions either of that version or of any later version published by
-the Free Software Foundation. If the Program does not specify a
-version number of this License, you may choose any version ever
-published by the Free Software Foundation.
-
-10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the
-author to ask for permission. For software which is copyrighted by the
-Free Software Foundation, write to the Free Software Foundation; we
-sometimes make exceptions for this. Our decision will be guided by the
-two goals of preserving the free status of all derivatives of our free
-software and of promoting the sharing and reuse of software generally.
-
-NO WARRANTY
-
-11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
-WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
-LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS
-AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF
-ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
-PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
-THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-
-12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
-WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
-AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU
-FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
-CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
-PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
-RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
-FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF
-SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGES.
-
-END OF TERMS AND CONDITIONS
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/Globals.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/Globals.h b/ext/giza-pp/GIZA++-v2/Globals.h
deleted file mode 100644
index fc2953c..0000000
--- a/ext/giza-pp/GIZA++-v2/Globals.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef Globals_asdf_defined
-#define Globals_asdf_defined
-#include <string>
-#include <fstream>
-#include <map>
-#include "defs.h"
-#include "Vector.h"
-
-extern float PROB_SMOOTH;
-extern bool Verbose, Log, Peg, Transfer, Transfer2to3, useDict ;
-extern string Prefix, LogFilename, OPath,
- SourceVocabFilename, TargetVocabFilename, CorpusFilename, TestCorpusFilename,
- t_Filename, a_Filename, p0_Filename, d_Filename, n_Filename, dictionary_Filename;
-extern ofstream logmsg ;
-extern double M5P0,P0 ;
-extern bool NODUMPS, FEWDUMPS ;
-extern string Usage ;
-extern unsigned int MAX_SENTENCE_LENGTH ;
-extern int PegUntil;
-
-extern short DeficientDistortionForEmptyWord;
-
-extern int M4_Dependencies;
-extern int M5_Dependencies;
-
-extern short OutputInAachenFormat;
-
-#define DEP_MODEL_l 1
-#define DEP_MODEL_m 2
-#define DEP_MODEL_F 4
-#define DEP_MODEL_E 8
-
-#define DEP_MODELb_l 16
-#define DEP_MODELb_m 32
-#define DEP_MODELb_F 64
-#define DEP_MODELb_E 128
-
-#define DEP_SUM 256
-
-class vcbList;
-
-extern vcbList *globeTrainVcbList, *globfTrainVcbList;
-
-extern short PredictionInAlignments;
-extern short SmoothHMM;
-#define VERB Verbose
-
-double ErrorsInAlignment(const map< pair<int,int>,char >&reference,const Vector<WordIndex>&test,int l,int&missing,int&toomuch,int&eventsMissing,int&eventsToomuch,int);
-extern Vector<map< pair<int,int>,char > > ReferenceAlignment;
-void printGIZAPars(ostream&out);
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/HMMTables.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/HMMTables.cpp b/ext/giza-pp/GIZA++-v2/HMMTables.cpp
deleted file mode 100644
index f037289..0000000
--- a/ext/giza-pp/GIZA++-v2/HMMTables.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
-
-Copyright (C) 1998,1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "HMMTables.h"
-#include <fstream>
-#include "Globals.h"
-#include "Parameter.h"
-
-template<class CLS,class MAPPERCLASSTOSTRING>
-void HMMTables<CLS,MAPPERCLASSTOSTRING>::writeJumps(ostream&out) const
-{
- double ssum=0.0;
- for(typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator i=alProb.begin();i!=alProb.end();++i)
- {
- double sum=0.0;
- out << "\n\nDistribution for: ";
- printAlDeps(out,i->first,*mapper1,*mapper2);
- out << ' ';
- for(int a=i->second.low();a<=i->second.high();++a)
- if( i->second[a] )
- {
- out << a << ':' << i->second[a] << ';' << ' ';
- sum+=i->second[a];
- }
- out << '\n' << '\n';
- out << "SUM: " << sum << '\n';
- ssum+=sum;
- }
- out << "FULL-SUM: " << ssum << '\n';
-}
-template<class CLS,class MAPPERCLASSTOSTRING>
-void HMMTables<CLS,MAPPERCLASSTOSTRING>::readJumps(istream&)
-{
-}
-template<class CLS,class MAPPERCLASSTOSTRING>
-double HMMTables<CLS,MAPPERCLASSTOSTRING>::getAlProb(int istrich,int k,int sentLength,int J,CLS w1,CLS w2,int j,int iter) const
-{
- massert(k<sentLength&&k>=0);
- massert(istrich<sentLength&&istrich>=-1);
- int pos=istrich-k;
- switch(PredictionInAlignments)
- {
- case 0: pos=istrich-k; break;
- case 1: pos=k; break;
- case 2:
- pos=(k*J-j*sentLength);
- if( pos>0 ) pos+=J/2; else pos-=J/2;
- pos/=J;
- break;
- default:abort();
- }
- typename map<AlDeps<CLS>,FlexArray<double> >::const_iterator p=alProb.find(AlDeps<CLS>(sentLength,istrich,j,w1,w2));
- if( p!=alProb.end() )
- {
- return (p->second)[pos];
- }
- else
- {
- if( iter>0&&iter<5000 )
- cout << "WARNING: Not found: " << ' ' << J << ' ' << sentLength << '\n';;
- return 1.0/(2*sentLength-1);
- }
-}
-
-template<class CLS,class MAPPERCLASSTOSTRING>
-void HMMTables<CLS,MAPPERCLASSTOSTRING>::addAlCount(int istrich,int k,int sentLength,int J,CLS w1,CLS w2,int j,double value,double valuePredicted)
-{
- int pos=istrich-k;
- switch(PredictionInAlignments)
- {
- case 0: pos=istrich-k; break;
- case 1: pos=k; break;
- case 2:
- pos=(k*J-j*sentLength);
- if( pos>0 ) pos+=J/2; else pos-=J/2;
- pos/=J;
- break;
- default:abort();
- }
- AlDeps<CLS> deps(AlDeps<CLS>(sentLength,istrich,j,w1,w2));
-
- {
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=alProb.find(deps);
- if( p==alProb.end() )
- {
- if( (CompareAlDeps&1)==0 )
- p=alProb.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
- else
- p=alProb.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
- }
- p->second[pos]+=value;
- }
-
- if( valuePredicted )
- {
- typename map<AlDeps<CLS>,FlexArray<double> >::iterator p=alProbPredicted.find(deps);
- if( p==alProbPredicted.end() )
- {
- if( (CompareAlDeps&1)==0 )
- p=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-MAX_SENTENCE_LENGTH,MAX_SENTENCE_LENGTH,0.0))).first;
- else
- p=alProbPredicted.insert(make_pair(deps,FlexArray<double> (-sentLength,sentLength,0.0))).first;
- }
- p->second[pos]+=valuePredicted;
- }
-}
-
-template<class CLS,class MAPPERCLASSTOSTRING>
-Array<double>&HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetAlphaInit(int I)
-{
- if( !init_alpha.count(I) )
- init_alpha[I]=Array<double>(I,0);
- return init_alpha[I];
-}
-template<class CLS,class MAPPERCLASSTOSTRING>
-Array<double>&HMMTables<CLS,MAPPERCLASSTOSTRING>::doGetBetaInit(int I)
-{
- if( !init_beta.count(I) )
- init_beta[I]=Array<double>(I,0);
- return init_beta[I];
-}
-
-template<class CLS,class MAPPERCLASSTOSTRING>
-bool HMMTables<CLS,MAPPERCLASSTOSTRING>::getAlphaInit(int I,Array<double>&x)const
-{
- hash_map<int,Array<double> >::const_iterator i=init_alpha.find(I);
- if( i==init_alpha.end() )
- return 0;
- else
- {
- x=i->second;
- for(unsigned int j=x.size()/2+1;j<x.size();++j) // only first empty word can be chosen
- x[j]=0;
- return 1;
- }
-}
-template<class CLS,class MAPPERCLASSTOSTRING>
-bool HMMTables<CLS,MAPPERCLASSTOSTRING>::getBetaInit(int I,Array<double>&x)const
-{
- hash_map<int,Array<double> >::const_iterator i=init_beta.find(I);
- if( i==init_beta.end() )
- return 0;
- else
- {
- x=i->second;
- return 1;
- }
-}
-
-template<class CLS,class MAPPERCLASSTOSTRING>
-HMMTables<CLS,MAPPERCLASSTOSTRING>:: HMMTables(double _probForEmpty,const MAPPERCLASSTOSTRING&m1,const MAPPERCLASSTOSTRING&m2):
- probabilityForEmpty(mfabs(_probForEmpty)),
- updateProbabilityForEmpty(_probForEmpty<0.0),
- mapper1(&m1),
- mapper2(&m2)
-{}
-template<class CLS,class MAPPERCLASSTOSTRING>
-HMMTables<CLS,MAPPERCLASSTOSTRING>::~HMMTables() {}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/HMMTables.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/HMMTables.h b/ext/giza-pp/GIZA++-v2/HMMTables.h
deleted file mode 100644
index 051bd0a..0000000
--- a/ext/giza-pp/GIZA++-v2/HMMTables.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
-
-Copyright (C) 1999,2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef HMM_TABLES_H_ASDF_DEFINED
-#define HMM_TABLES_H_ASDF_DEFINED
-#include "FlexArray.h"
-
-#include "Array.h"
-#include <map>
-#include "mymath.h"
-
-template<class T>
-T normalize_if_possible(T*a,T*b)
-{
- T sum=0;
- for(T*i=a;i!=b;++i)
- sum+=*i;
- if( sum )
- for(T*i=a;i!=b;++i)
- *i/=sum;
- else
- fill(a,b,1.0/(b-a));
- return sum;
-}
-
-extern short CompareAlDeps;
-template<class CLS>
-class AlDeps
-{
- public:
- int englishSentenceLength;
- CLS classPrevious;
- int previous;
- int j;
- CLS Cj;
- AlDeps(int l,int p=0,int _j=0,CLS s1=0,CLS _Cj=0)
- : englishSentenceLength(l),classPrevious(s1),previous(p),j(_j),Cj(_Cj)
- {}
- friend bool operator<(const AlDeps&x,const AlDeps&y)
- {
- if( (CompareAlDeps&1) && x.englishSentenceLength<y.englishSentenceLength ) return 1;
- if( (CompareAlDeps&1) && y.englishSentenceLength<x.englishSentenceLength ) return 0;
- if( (CompareAlDeps&2) && x.classPrevious<y.classPrevious ) return 1;
- if( (CompareAlDeps&2) && y.classPrevious<x.classPrevious ) return 0;
- if( (CompareAlDeps&4) && x.previous<y.previous ) return 1;
- if( (CompareAlDeps&4) && y.previous<x.previous ) return 0;
- if( (CompareAlDeps&8) && x.j<y.j ) return 1;
- if( (CompareAlDeps&8) && y.j<x.j ) return 0;
- if( (CompareAlDeps&16) && x.Cj<y.Cj ) return 1;
- if( (CompareAlDeps&16) && y.Cj<x.Cj ) return 0;
- return 0;
- }
- friend bool operator==(const AlDeps&x,const AlDeps&y)
- { return !( x<y || y<x ); }
-};
-
-template<class CLS>
-class Hash_AlDeps
-{
- public:
- unsigned
- int
- operator()
- (const AlDeps<CLS>&x)
- const
- {
- unsigned int hash=0;
- if( (CompareAlDeps&1) ) { hash=hash+x.englishSentenceLength;hash*=31;}
- if( (CompareAlDeps&2) ) { hash=hash+x.classPrevious;hash*=31;}
- if( (CompareAlDeps&4) ) { hash=hash+x.previous;hash*=31;}
- if( (CompareAlDeps&8) ) { hash=hash+x.j;hash*=31;}
- if( (CompareAlDeps&16) ) { hash=hash+x.Cj;hash*=31;}
- return hash;
-
- }
-};
-
-template<class CLS,class MAPPERCLASSTOSTRING>
-class HMMTables
-{
- protected:
- double probabilityForEmpty;
- bool updateProbabilityForEmpty;
- hash_map<int,Array<double> > init_alpha;
- hash_map<int,Array<double> > init_beta;
- map<AlDeps<CLS>,FlexArray<double> > alProb;
- map<AlDeps<CLS>,FlexArray<double> > alProbPredicted;
- int globalCounter;
- double divSum;
- double p0_count,np0_count;
- const MAPPERCLASSTOSTRING*mapper1;
- const MAPPERCLASSTOSTRING*mapper2;
- public:
- const HMMTables<CLS,MAPPERCLASSTOSTRING>*getThis()const {return this;}
- HMMTables(double _probForEmpty,const MAPPERCLASSTOSTRING&m1,const MAPPERCLASSTOSTRING&m2);
- virtual ~HMMTables();
- virtual double getAlProb(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,int iter=0) const;
- virtual void writeJumps(ostream&) const;
- void addAlCount(int i,int k,int sentLength,int J,CLS w1,CLS w2,int j,double value,double valuePredicted);
- virtual void readJumps(istream&);
- virtual bool getAlphaInit(int I,Array<double>&x)const;
- virtual bool getBetaInit(int I,Array<double>&x)const;
- Array<double>&doGetAlphaInit(int I);
- Array<double>&doGetBetaInit(int I);
- virtual double getProbabilityForEmpty()const
- {return probabilityForEmpty;}
- void performGISIteration(const HMMTables<CLS,MAPPERCLASSTOSTRING>*old)
- {
- cout << "OLDSIZE: " << (old?(old->alProb.size()):0) << " NEWSIZE:"<< alProb.size()<< endl;
- for(typename map<AlDeps<CLS>,FlexArray<double> >::iterator i=alProb.begin();i!=alProb.end();++i)
- {
- if( alProbPredicted.count(i->first))
- {
- normalize_if_possible(i->second.begin(),i->second.end());
- normalize_if_possible(alProbPredicted[i->first].begin(),alProbPredicted[i->first].end());
- for(int j=i->second.low();j<=i->second.high();++j)
- {
- if( i->second[j] )
- if(alProbPredicted[i->first][j]>0.0 )
- {
- double op=1.0;
- if( old && old->alProb.count(i->first) )
- op=(old->alProb.find(i->first)->second)[j];
- //cerr << "GIS: " << j << ' ' << " OLD:"
- // << op << "*true:"
- // << i->second[j] << "/pred:" << alProbPredicted[i->first][j] << " -> ";
- i->second[j]= op*(i->second[j]/alProbPredicted[i->first][j]);
- //cerr << i->second[j] << endl;
- }
- else
- {
- cerr << "ERROR2 in performGISiteration: " << i->second[j] << endl;
- }
- }
- }
- else
- cerr << "ERROR in performGISIteration: " << alProbPredicted.count(i->first) << endl;
- }
- }
-};
-
-template<class CLS,class MAPPERCLASSTOSTRING>
-inline void printAlDeps(ostream&out,const AlDeps<CLS>&x,const MAPPERCLASSTOSTRING&mapper1,const MAPPERCLASSTOSTRING&mapper2)
-{
- if( (CompareAlDeps&1) ) out << "sentenceLength: " << x.englishSentenceLength<< ' ';
- if( (CompareAlDeps&2) ) out << "previousClass: " << mapper1.classString(x.classPrevious) << ' ';
- if( (CompareAlDeps&4) ) out << "previousPosition: " << x.previous << ' ';
- if( (CompareAlDeps&8) ) out << "FrenchPosition: " << x.j << ' ';
- if( (CompareAlDeps&16) ) out << "FrenchClass: " << mapper2.classString(x.Cj) << ' ';
- //out << '\n';
-}
-
-#endif