You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/26 04:22:23 UTC
[06/14] incubator-joshua git commit: JOSHUA-252 Make it possible to
use Maven to build Joshua
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/Array.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/Array.h b/ext/giza-pp/mkcls-v2/Array.h
deleted file mode 100644
index ab1f101..0000000
--- a/ext/giza-pp/mkcls-v2/Array.h
+++ /dev/null
@@ -1,370 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef ARRAY_H_DEFINED
-#define ARRAY_H_DEFINED
-using namespace std;
-#include "myassert.h"
-#include <algorithm>
-#include <string>
-#include <utility>
-#include <functional>
-#include "my.h"
-
-#define ARRAY_DEBUG
-
-
-template<class T> class Array
-{
- private:
- T *p;
- int realSize;
- int maxWritten;
- char a;
-
- void copy(T *a,const T *b,int n);
- void copy(T *a,T *b,int n);
- void _expand();
-
- public:
- Array()
- : p(0),realSize(0),maxWritten(-1) ,a(1)
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY: " << this<<" "<<(void*)p << endl;
-#endif
- }
- Array(const Array<T> &x)
- : p(new T[x.maxWritten+1]),realSize(x.maxWritten+1),maxWritten(x.maxWritten),a(x.a)
- {
- copy(p,x.p,realSize);
-#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY copy: " << this << " " << realSize <<" "<<(void*)p<< endl;
-#endif
- }
- explicit Array(int n)
- : p(new T[n]),realSize(n),maxWritten(n-1),a(0)
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY with parameter n: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
- Array(int n,const T&_init,int _a=0)
- : p(new T[n]),realSize(n),maxWritten(n-1),a(_a)
- {
- for(int iii=0;iii<n;iii++)p[iii]=_init;
-#ifdef VERY_ARRAY_DEBUG
- cout << "MAKE ARRAY with parameter n and init: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
-
- ~Array()
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- delete [] p;
- }
-
- Array<T>& operator=(const Array<T>&x)
- {
- if( this!= &x )
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
-
- delete [] p;
- realSize = x.maxWritten+1;
- maxWritten = x.maxWritten;
- a = x.a;
- p = new T[realSize];
- copy(p,x.p,realSize);
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
- return *this;
- }
-
- Array<T>& operator=(Array<T>&x)
- {
- if( this!= &x )
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- delete [] p;
- realSize = x.maxWritten+1;
- maxWritten = x.maxWritten;
- a = x.a;
- p = new T[realSize];
- copy(p,x.p,realSize);
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of operator=: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
- return *this;
- }
-
- void allowAccess(int n)
- {
- while( realSize<=n )
- _expand();
- maxWritten=max(maxWritten,n);
- massert( maxWritten<realSize );
- }
- void resize(int n)
- {
- while( realSize<n )
- _expand();
- maxWritten=n-1;
- }
- void sort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p,p+until);
- }
- void invsort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p,p+until,greater<T>());
- }
- void init(int n,const T&_init,bool _a=0)
- {
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- delete []p;
- p=new T[n];
- realSize=n;
- a=_a;
- maxWritten=n-1;
- for(int iii=0;iii<n;iii++)p[iii]=_init;
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of init: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- }
- inline int size() const
- {massert( maxWritten<realSize );
- return maxWritten+1;}
- inline int low() const
- { return 0; }
- inline int high() const
- { return maxWritten; }
- inline bool autoexpand() const
- {return a;}
- inline void autoexpand(bool autoExp)
- {a=autoExp;}
- int findMax() const;
- int findMin() const;
- void errorAccess(int n) const;
- inline T*getPointerToData(){return p;}
-
- inline T& operator[](int n)
- {
- if( a && n==maxWritten+1 )
- allowAccess(n);
- if( n<0 || n>maxWritten )
- errorAccess(n);
- return p[n];
- }
- inline const T& operator[](int n) const
- {
- if(n<0 || n>maxWritten )
- errorAccess(n);
- return p[n];
- }
- const T&top(int n=0) const
- {return (*this)[maxWritten-n];}
- T&top(int n=0)
- {return (*this)[maxWritten-n];}
- T&push(const T&x)
- {
- (*this)[maxWritten+1]=x;
- return top();
- }
- bool writeTo(ostream&out) const
- {
- out << "Array ";
- out << size() << " ";
- out << a << endl;
- for(int iv=0;iv<=maxWritten;iv++)
- {
- writeOb(out,(*this)[iv]);
- out << endl;
- }
- return 1;
- }
- bool readFrom(istream&in)
- {
- string s;
- if( !in )
- {
- cerr << "ERROR(Array): file cannot be opened.\n";
- return 0;
- }
- in >> s;
- if( !(s=="Array") )
- {
- cerr << "ERROR(Array): Array!='"<<s<<"'\n";
- return 0;
- }
- int biggest;
- in >> biggest;
- in >> a;
- resize(biggest);
- for(int iv=0;iv<size();iv++)
- {
- readOb(in,(*this)[iv]);
- }
- return 1;
- }
-};
-
-template<class T> bool operator==(const Array<T> &x, const Array<T> &y)
-{
- if( &x == &y )
- return 1;
- else
- {
- if( y.size()!=x.size() )
- return 0;
- else
- {
- for(int iii=0;iii<x.size();iii++)
- if( !(x[iii]==y[iii]) )
- return 0;
- return 1;
- }
- }
-}
-
-template<class T> bool operator<(const Array<T> &x, const Array<T> &y)
-{
- if( &x == &y )
- return 0;
- else
- {
- if( y.size()<x.size() )
- return !(y<x);
- for(int iii=0;iii<x.size();iii++)
- {
- massert( iii!=y.size() );
- if( x[iii]<y[iii] )
- return 1;
- else if( y[iii]<x[iii] )
- return 0;
- }
- return x.size()!=y.size();
- }
-}
-
-
-template<class T> void Array<T>:: errorAccess(int n) const
-{
- cerr << "ERROR: Access to array element " << n
- << " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
- cout << "ERROR: Access to array element " << n
- << " (" << maxWritten << "," << realSize << "," << (void*)p << " " << a << ")\n";
- massert(0);
-#ifndef DEBUG
- abort();
-#endif
-}
-
-template<class T> ostream& operator<<(ostream&o,const Array<T>&a)
-{
- o << "Array(" << a.size() << "," << a.autoexpand() << "){ ";
- for(int iii=0;iii<a.size();iii++)
- o << " " << iii<< ":" << a[iii]<<";";
- return o << "}\n";
-}
-
-template<class T> istream& operator>>(istream&in, Array<T>&)
-{return in;}
-
-template<class T> int Hash(const Array<T>&a)
-{
- int n=0;
- for(int iii=0;iii<a.size();iii++)
- n+=Hash(a[iii])*(iii+1);
- return n+a.size()*47;
-}
-template<class T> void Array<T>::copy(T *aa,const T *bb,int n)
-{
- for(int iii=0;iii<n;iii++)
- aa[iii]=bb[iii];
-}
-template<class T> void Array<T>::copy(T *aa,T *bb,int n)
-{
- for(int iii=0;iii<n;iii++)
- aa[iii]=bb[iii];
-}
-
-template<class T> void Array<T>::_expand()
-{
-#ifdef VERY_ARRAY_DEBUG
- cout << "FREE ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
- T *oldp=p;
- int oldsize=realSize;
- realSize=realSize*2+1;
- p=new T[realSize];
- copy(p,oldp,oldsize);
- delete [] oldp;
-#ifdef VERY_ARRAY_DEBUG
- cout << "NEW ARRAY because of _expand: " << this << " " << realSize<<" "<<(void*)p << endl;
-#endif
-}
-
-template<class T> int Array<T>::findMax() const
-{
- if( size()==0 )
- return -1;
- else
- {
- int maxPos=0;
- for(int iii=1;iii<size();iii++)
- if( (*this)[maxPos]<(*this)[iii] )
- maxPos=iii;
- return maxPos;
- }
-}
-template<class T> int Array<T>::findMin() const
-{
- if( size()==0 )
- return -1;
- else
- {
- int minPos=0;
- for(int iii=1;iii<size();iii++)
- if( (*this)[iii]<(*this)[minPos] )
- minPos=iii;
- return minPos;
- }
-}
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/FixedArray.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/FixedArray.h b/ext/giza-pp/mkcls-v2/FixedArray.h
deleted file mode 100644
index da2ce3e..0000000
--- a/ext/giza-pp/mkcls-v2/FixedArray.h
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#ifndef FIXARRAY_H_DEFINED
-#define FIXARRAY_H_DEFINED
-#include <iostream>
-#include <string>
-#include <functional>
-
-template<class T>
-bool writeOb(ostream&out,const T&f)
-{
- out << f << " ";
- return 1;
-}
-
-template<class T>
-bool readOb(istream&in,T&f)
-{
- in >> f;
- char c;
- in.get(c);
- massert(c==' ');
- return 1;
-}
-
-template<class T>
-bool writeOb(ostream&out,const string &s,const T&f)
-{
- out << s << " " << f << " ";
- return 1;
-}
-template<class T>
-bool readOb(istream&in,const string&s,T&f)
-{
- string ss;
- in >> ss;
- if( s!=ss )
- {
- cerr << "ERROR: readOb should be '" << s << "' and is '" << ss << "'" << endl;
- return 0;
- }
- in >> f;
- char c;
- in.get(c);
- massert(c==' ');
- return 1;
-}
-
-template<class T> class FixedArray
-{
- private:
- void copy(T *aa,const T *bb,int nnn)
- {for(int iii=0;iii<nnn;iii++)aa[iii]=bb[iii];}
-
- public:
- T *p;
- int realSize;
- FixedArray()
- : p(0),realSize(0){}
- FixedArray(const FixedArray<T> &x)
- : p(new T[x.realSize]),realSize(x.realSize) {copy(p,x.p,realSize);}
- explicit FixedArray(int n)
- : p(new T[n]),realSize(n){}
- FixedArray(int n,const T&_init)
- : p(new T[n]),realSize(n){for(int z=0;z<n;z++)p[z]=_init;}
- FixedArray(const FixedArray&f,const T&t)
- : p(new T[f.size()+1]),realSize(f.size()+1){for(int z=0;z<f.size();z++)p[z]=f[z];p[f.size()]=t;}
- ~FixedArray()
- { delete [] p;p=0;realSize=-1;}
-
- FixedArray<T>& operator=(const FixedArray<T>&x)
- {
- if( this!= &x )
- {
- delete [] p;
- realSize = x.realSize;
- p = new T[x.realSize];
- copy(p,x.p,realSize);
- }
- return *this;
- }
- void resize(int n)
- {
- if( n<=realSize )
- shrink(n);
- else
- {
- T*np=new T[n];
- copy(np,p,realSize);
- delete []p;
- p=np;
- realSize=n;
- }
- }
- void shrink(int n)
- {
- assert(n<=realSize);
- realSize=n;
- }
- void init(int n,const T&_init)
- {
- delete []p;
- p=new T[n];
- realSize=n;
- for(int l=0;l<n;l++)p[l]=_init;
- }
- inline const T&top(int n=0) const
- {return (*this)[realSize-1-n];}
- inline int size() const
- {return realSize;}
-
- inline T*begin(){ return p; }
- inline T*end(){ return p+realSize; }
-
- inline const T*begin()const{ return p; }
- inline const T*end()const{return p+realSize;}
-
- inline int low() const
- {return 0;}
- inline int high() const
- {return realSize-1;}
- void errorAccess(int n) const;
-
- inline T& operator[](int n)
- {
- return p[n];
- }
- inline const T& operator[](int n) const
- {
- return p[n];
- }
- bool writeTo(ostream&out) const
- {
- out << "FixedArray ";
- out << size() << " ";
- for(int a=0;a<size();a++)
- {
- writeOb(out,(*this)[a]);
- out << " ";
- }
- out << endl;
- return 1;
- }
- bool readFrom(istream&in)
- {
- string s;
- if( !in )
- {
- cerr << "ERROR(FixedArray): file cannot be opened.\n";
- return 0;
- }
- in >> s;
- if( !(s=="FixedArray") )
- {
- cerr << "ERROR(FixedArray): FixedArray!='"<<s<<"'\n";
- return 0;
- }
- int biggest;
- in >> biggest;
- resize(biggest);
- for(int a=0;a<size();a++)
- readOb(in,(*this)[a]);
- return 1;
- }
- void sort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p,p+until);
- }
- void invsort(int until=-1)
- {
- if( until== -1 ) until=size();
- std::sort(p,p+until,greater<T>());
- }
- int binary_locate(const T&t)
- {
- T*ppos=std::lower_bound(p,p+size(),t);
- int pos=ppos-p;
- if( pos>=-1&&pos<size() )
- return pos;
- else
- return -1;
- }
- int binary_search(const T&t)
- {
- T*ppos=std::lower_bound(p,p+size(),t);
- int pos=ppos-p;
- if( pos>=0&&pos<size()&& *ppos==t )
- return pos;
- else
- return -1;
- }
- typedef T* iterator;
- typedef const T* const_iterator;
-};
-
-template<class T> bool operator<(const FixedArray<T> &x, const FixedArray<T> &y)
-{
- return lexicographical_compare(x.begin(),x.end(),y.begin(),y.end());
-
-}
-
-
-template<class T> bool operator==(const FixedArray<T> &x, const FixedArray<T> &y)
-{
- if( &x == &y )return 1;
- const int s = x.size();
- if( s !=y.size() )return 0;
- for(int iii=0;iii<s;iii++)
- if( !(x.p[iii]==y.p[iii]) )
- return 0;
- return 1;
-}
-
-template<class T> int Hash(const FixedArray<T>&a)
-{
- int n=0;
- const int s=a.size();
- for(int iii=0;iii<s;iii++)
- n=13*n+Hash(a.p[iii]);
- return n;
-}
-
-template<class T> void FixedArray<T>:: errorAccess(int n) const
-{
- massert(0);
- cerr << "ERROR: Access to array element " << n
- << " (" << realSize << "," << (void*)p << ")\n";
-}
-
-template<class T> ostream& operator<<(ostream&o,const FixedArray<T>&a)
-{
- o << "FixedArray(" << a.size() << "){ ";
- for(int iii=0;iii<a.size();iii++)
- o << " " << iii<< ":" << a[iii]<<";";
- return o << "}\n";
-}
-
-template<class T> istream& operator>>(istream&in, FixedArray<T>&)
-{ return in;}
-
-template<class T> FixedArray<T> operator+(const FixedArray<T>&a,const FixedArray<T>&b)
-{
- massert(a.size()==b.size());
- FixedArray<T> x(a.size());
- for(int iii=0;iii<a.size();iii++)
- x[iii]=a[iii]+b[iii];
- return x;
-}
-template<class T> FixedArray<T> operator|(const FixedArray<T>&aaa,const FixedArray<T>&bbb)
-{
- iassert(aaa.size()==bbb.size());
-
- FixedArray<T> xxx(aaa.size());
- for(int iii=0;iii<aaa.size();iii++)
- xxx.p[iii]=aaa.p[iii]||bbb.p[iii];
- return xxx;
-}
-
-#endif
-
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/FlexArray.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/FlexArray.h b/ext/giza-pp/mkcls-v2/FlexArray.h
deleted file mode 100644
index ede3e9e..0000000
--- a/ext/giza-pp/mkcls-v2/FlexArray.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef CLASS_FlexArray_defined
-#define CLASS_FlexArray_defined
-#include "FixedArray.h"
-
-template<class T>
-class FlexArray
-{
-private:
- FixedArray<T> p;
- int start,end;
-public:
- FlexArray(int _start=0,int _end=-1)
- : p(_end-_start+1),start(_start),end(_end) {}
- T&operator[](int i)
- {return p[i-start];}
- const T&operator[](int i)const
- {returnp[i-start];}
- int low()const{return start;}
- int high()const{return end;}
-};
-
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/GDAOptimization.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/GDAOptimization.cpp b/ext/giza-pp/mkcls-v2/GDAOptimization.cpp
deleted file mode 100644
index a9e2fa7..0000000
--- a/ext/giza-pp/mkcls-v2/GDAOptimization.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "GDAOptimization.h"
-#include "ProblemTest.h"
-#include <cmath>
-
-#define GDAOptimization GDAOptimization
-#define IterOptimization IterOptimization
-
-
-
-double GDAOptimization::defaultTemperatur=1e100;
-
-
-double GDAOptimization::defaultAlpha=0.001;
-
-
-
-GDAOptimization::GDAOptimization(Problem &p,int m)
-: IterOptimization(p,m) ,temperatur(defaultTemperatur),alpha(defaultAlpha)
-{
-}
-
-
-GDAOptimization::GDAOptimization(Problem &p,double t,double a,int m)
-: IterOptimization(p,m) ,temperatur(t) ,alpha(a)
-{
-}
-
-
-GDAOptimization::GDAOptimization(GDAOptimization &o)
-: IterOptimization(o)
-{
- temperatur = o.temperatur;
- alpha = o.alpha;
- gdaEndFlag = o.gdaEndFlag;
-}
-
-
-void GDAOptimization::zInitialize()
-{
- IterOptimization::zInitialize();
- if(temperatur==1e100)
- {
- double v=problem.value();
-
-
-
-
-
- temperatur=v;
- }
- assert(alpha>=0);
-}
-
-short GDAOptimization::accept(double delta)
-{
- if( curValue + delta < temperatur )
- return 1;
- else
- return 0;
-}
-
-void GDAOptimization::abkuehlen()
-{
- double newTemperatur = temperatur - alpha*(temperatur - curValue);
- if( fabs(temperatur - newTemperatur)<1e-30 )
- gdaEndFlag=1;
- else
- gdaEndFlag=0;
- temperatur = newTemperatur;
-}
-
-short GDAOptimization::end()
-{
- return ( endFlag>0 ) && ( gdaEndFlag );
-}
-
-void GDAOptimization::makeGraphOutput()
-{
- IterOptimization::makeGraphOutput();
- *GraphOutput << temperatur-curValue;
-}
-
-
-
-
-double GDAOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,int typ,
- int optimierungsschritte,int print)
-{
- if(typ!=1)
- {
- cerr << "Error: wrong parameter-type in GDAOptimization::optimizeValue ("
- << typ << ")\n";
- exit(1);
- }
- else
- {
- double bestPar=-1,best=1e100;
- double now;
- if( print )
- cout << "#GDA-optimizeValues: " << numParameter<<endl;
-
-
- defaultTemperatur=1e100;
-
- for(int i=0;i<=numParameter;i++)
- {
- StatVar end,laufzeit,init;
- defaultAlpha = pow(pow(200,1.0/numParameter),i)*0.002;
- solveProblem(0,p,proParameter,optimierungsschritte,GDA_OPT,now,end,
- laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultAlpha;
- }
- if( print )
- {
- cout << defaultAlpha <<" ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller()<< " "<< end.getSigmaBigger()<< endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit"
- " Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultAlpha=0.03;
- return bestPar;
- }
- return 1e100;
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/GDAOptimization.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/GDAOptimization.h b/ext/giza-pp/mkcls-v2/GDAOptimization.h
deleted file mode 100644
index 33bcec3..0000000
--- a/ext/giza-pp/mkcls-v2/GDAOptimization.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-#ifndef GDAOPTIMIZATION
-#define GDAOPTIMIZATION
-#include "IterOptimization.h"
-
-class GDAOptimization : public IterOptimization
-{
-
- private:
- double temperatur;
- double alpha;
- short gdaEndFlag;
-
-
- protected:
- virtual void zInitialize();
-
-
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- virtual void makeGraphOutput();
-
-
- public:
- GDAOptimization(Problem &p,double temperatur,double alpha,
- int maxIter=-1);
-
-
- GDAOptimization(Problem &p,int maxIter=-1);
-
-
- GDAOptimization(GDAOptimization &o);
-
-
- static double optimizeValue(Problem &p,int proParameter,
- int numParameter,int typ,int schritte= -1,int verbose=1);
-
-
-
- static double defaultTemperatur;
- static double defaultAlpha;
-
-};
-#endif
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/GNU.GPL
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/GNU.GPL b/ext/giza-pp/mkcls-v2/GNU.GPL
deleted file mode 100644
index 5b2225e..0000000
--- a/ext/giza-pp/mkcls-v2/GNU.GPL
+++ /dev/null
@@ -1,282 +0,0 @@
-
-
-Preamble
-
-The licenses for most software are designed to take away your freedom
-to share and change it. By contrast, the GNU General Public License is
-intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users. This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it. (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.) You can apply it to
-your programs, too.
-
-When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
-To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the
-rights. These restrictions translate to certain responsibilities for
-you if you distribute copies of the software, or if you modify it.
-
-For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must show them these terms so they know their
-rights.
-
-We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
-Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on,
-we want its recipients to know that what they have is not the
-original, so that any problems introduced by others will not reflect
-on the original authors' reputations.
-
-Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary. To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at
-all.
-
-The precise terms and conditions for copying, distribution and
-modification follow.
-
-
-TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-0. This License applies to any program or other work which contains a
-notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License. The "Program", below,
-refers to any such program or work, and a "work based on the Program"
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language. (Hereinafter, translation is included without limitation in
-the term "modification".) Each licensee is addressed as "you".
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope. The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the Program
-(independent of having been made by running the Program). Whether that
-is true depends on what the Program does.
-
-1. You may copy and distribute verbatim copies of the Program's source
-code as you receive it, in any medium, provided that you conspicuously
-and appropriately publish on each copy an appropriate copyright notice
-and disclaimer of warranty; keep intact all the notices that refer to
-this License and to the absence of any warranty; and give any other
-recipients of the Program a copy of this License along with the
-Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a
-fee.
-
-2. You may modify your copy or copies of the Program or any portion of
-it, thus forming a work based on the Program, and copy and distribute
-such modifications or work under the terms of Section 1 above,
-provided that you also meet all of these conditions:
-
- a) You must cause the modified files to carry prominent notices
- stating that you changed the files and the date of any change.
-
- b) You must cause any work that you distribute or publish, that
- in whole or in part contains or is derived from the Program or
- any part thereof, to be licensed as a whole at no charge to all
- third parties under the terms of this License.
-
- c) If the modified program normally reads commands interactively
- when run, you must cause it, when started running for such
- interactive use in the most ordinary way, to print or display an
- announcement including an appropriate copyright notice and a
- notice that there is no warranty (or else, saying that you
- provide a warranty) and that users may redistribute the program
- under these conditions, and telling the user how to view a copy
- of this License. (Exception: if the Program itself is interactive
- but does not normally print such an announcement, your work based
- on the Program is not required to print an announcement.)
-
-These requirements apply to the modified work as a whole. If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works. But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote
-it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-3. You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
- a) Accompany it with the complete corresponding machine-readable
- source code, which must be distributed under the terms of
- Sections 1 and 2 above on a medium customarily used for software
- interchange; or,
-
- b) Accompany it with a written offer, valid for at least three
- years, to give any third party, for a charge no more than your
- cost of physically performing source distribution, a complete
- machine-readable copy of the corresponding source code, to be
- distributed under the terms of Sections 1 and 2 above on a medium
- customarily used for software interchange; or,
-
- c) Accompany it with the information you received as to the offer
- to distribute corresponding source code. (This alternative is
- allowed only for noncommercial distribution and only if you
- received the program in object code or executable form with such
- an offer, in accord with Subsection b above.)
-
-The source code for a work means the preferred form of the work for
-making modifications to it. For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable. However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-4. You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License. Any attempt otherwise
-to copy, modify, sublicense or distribute the Program is void, and
-will automatically terminate your rights under this License. However,
-parties who have received copies, or rights, from you under this
-License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
-5. You are not required to accept this License, since you have not
-signed it. However, nothing else grants you permission to modify or
-distribute the Program or its derivative works. These actions are
-prohibited by law if you do not accept this License. Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
-6. Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions. You may not impose any further
-restrictions on the recipients' exercise of the rights granted
-herein. You are not responsible for enforcing compliance by third
-parties to this License.
-
-
-7. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all. For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices. Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-8. If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded. In such case, this License incorporates
-the limitation as if written in the body of this License.
-
-9. The Free Software Foundation may publish revised and/or new
-versions of the General Public License from time to time. Such new
-versions will be similar in spirit to the present version, but may
-differ in detail to address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Program
-specifies a version number of this License which applies to it and
-"any later version", you have the option of following the terms and
-conditions either of that version or of any later version published by
-the Free Software Foundation. If the Program does not specify a
-version number of this License, you may choose any version ever
-published by the Free Software Foundation.
-
-10. If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the
-author to ask for permission. For software which is copyrighted by the
-Free Software Foundation, write to the Free Software Foundation; we
-sometimes make exceptions for this. Our decision will be guided by the
-two goals of preserving the free status of all derivatives of our free
-software and of promoting the sharing and reuse of software generally.
-
-NO WARRANTY
-
-11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
-WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
-LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS
-AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF
-ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
-PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
-THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-
-12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
-WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
-AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU
-FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
-CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
-PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
-RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
-FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF
-SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGES.
-
-END OF TERMS AND CONDITIONS
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/HCOptimization.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/HCOptimization.cpp b/ext/giza-pp/mkcls-v2/HCOptimization.cpp
deleted file mode 100644
index 0c6a729..0000000
--- a/ext/giza-pp/mkcls-v2/HCOptimization.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "HCOptimization.h"
-
-HCOptimization::HCOptimization(Problem &p,int m)
-: IterOptimization(p,m)
-{
- if( maxStep<=0 )
- maxStep=(int)(problem.expectedNumberOfIterations());
-}
-HCOptimization::HCOptimization(HCOptimization &o)
-: IterOptimization(o)
-{
-}
-
-
-short HCOptimization::accept(double delta)
-{
- if( delta < 0 )
- return 1;
- else
- return 0;
-}
-short HCOptimization::end()
-{
- return endFlag>0;
-}
-void HCOptimization::abkuehlen()
-{
-}
-
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/HCOptimization.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/HCOptimization.h b/ext/giza-pp/mkcls-v2/HCOptimization.h
deleted file mode 100644
index ec147b2..0000000
--- a/ext/giza-pp/mkcls-v2/HCOptimization.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-#ifndef HCOPTIMIZATION
-#define HCOPTIMIZATION
-#include "IterOptimization.h"
-
-class HCOptimization : public IterOptimization
-{
-
- protected:
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- public:
- HCOptimization(Problem &p,int maxIter=-1);
-
-
- HCOptimization(HCOptimization &o);
-
-
-};
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/IterOptimization.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/IterOptimization.cpp b/ext/giza-pp/mkcls-v2/IterOptimization.cpp
deleted file mode 100644
index 258cb1f..0000000
--- a/ext/giza-pp/mkcls-v2/IterOptimization.cpp
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#include "IterOptimization.h"
-#include "ProblemTest.h"
-
-ostream *GraphOutput;
-
-
-
-IterOptimization::IterOptimization(Problem& p,int m)
- : maxNonBetterIterations(0),problem(p),maxStep(m),initialisiert(0)
-{
-}
-
-
-
-IterOptimization::IterOptimization(IterOptimization& o) : Optimization(),problem(o.problem)
-{
- maxNonBetterIterations=o.maxNonBetterIterations;
- curValue = o.curValue;
- bestStep = o.bestStep;
- bestValue = o.bestValue;
- maxStep = o.maxStep;
- initialisiert = o.initialisiert;
- endFlag = o.endFlag;
- endFlag2 = o.endFlag2;
-}
-
-
-
-double IterOptimization::minimize(int steps)
-{
- if( !initialisiert )
- zInitialize();
-
- if( steps==0 )
- return curValue;
-
- int t=0;
- int every=(steps<0)?10000:(steps/1000+1);
-
- do
- {
- curStep++;
- t++;
- if(verboseMode&&(curStep%1000==0))
- {
- if(steps>0)
- cout << "Processed: " << 100.0*(curStep/(double)max(maxStep,1)) << " percent. (IterOptimization run) "
- << curValue << " max:" << maxStep << " " << steps << " \r";
- else
- cout << "In step:" << curStep << " currentValue: " << curValue
- << " bestValue: " << bestValue-curValue << " " << curStep-bestStep << ". \r";
- cout.flush();
- }
-
-
- ProblemChange *change= &(problem.change());
-
-
- double delta=problem.valueChange(*change);
-
-
- abkuehlen();
-
-
- if( accept(delta) )
- {
-
- problem.doChange(*change);
-
-
- curValue+=delta;
-
-
- if( curValue<bestValue-1e-10 )
- {
- bestValue=curValue;
- bestStep=curStep;
- endFlag2=endFlag=0;
- }
-
- if( verboseMode>1 )
- cout<<"in step: "<<curStep<<" accepted with : "<<delta<<endl;
- }
-
- if(curStep - bestStep>maxNonBetterIterations && maxNonBetterIterations>0)
- endFlag=1;
- if(curStep - bestStep>2*maxNonBetterIterations && maxNonBetterIterations>0)
- endFlag2=1;
-
-
-
- if( GraphOutput&&((curStep%every)==0) )
- {
- makeGraphOutput();
- *GraphOutput<<" "<<delta<<endl;
- }
-
- delete change;
- } while( t!=steps && (!end()) && (!problem.endCriterion()) );
-
- if( GraphOutput)
- {
- makeGraphOutput();
- *GraphOutput<<endl;
- }
- return curValue;
-}
-
-
-void IterOptimization::zInitialize()
-{
- initialisiert=1;
- bestValue=curValue=problem.value();
- maxNonBetterIterations=problem.maxNonBetterIterations();
- bestStep=curStep=0;
- endFlag2=endFlag=0;
-}
-
-
-void IterOptimization::makeGraphOutput()
-{
-
- *GraphOutput << curStep << " " <<curValue << " ";
-}
-
-
-double IterOptimizationOptimizeParameter(Problem &p,
- double ¶meter,double min,double max,
- int nRun,int nPar,int verfahren,
- double &bv)
-{
- if( nPar<=0 )
- return (max+min)/2;
-
- StatVar end1,time1,init1;
- StatVar end2,time2,init2;
- double mean1,mean2;
- double par1,par2;
-
- parameter = par1 = min + (max-min)/3;
- solveProblem(0,p,nRun,-1,verfahren,mean1,end1,time1,init1);
- cout << parameter << " " << mean1 << " " << end1.quantil(0.0) << " " << end1.quantil(1.0) << endl;
-
- parameter = par2 = min + 2*(max-min)/3;
- solveProblem(0,p,nRun,-1,verfahren,mean2,end2,time2,init2);
- cout << parameter << " " << mean2 << " " << end2.quantil(0.0) << " " << end2.quantil(1.0) << endl;
-
- double bestPar,bestVal;
- if(mean1<mean2)
- {
- bestVal = mean1;
- bestPar=IterOptimizationOptimizeParameter(p,parameter,min,min+2*(max-min)/3,nRun,nPar-2,verfahren,bestVal);
- }
- else
- {
- bestVal = mean2;
- bestPar=IterOptimizationOptimizeParameter(p,parameter,min+(max-min)/3,max,nRun,nPar-2,verfahren,bestVal);
- }
- if( mean1<bestVal&&mean1<=mean2 )
- {
- bv = mean1;
- return par1;
- }
- else if(mean2<bestVal && mean2<=mean1)
- {
- bv = mean2;
- return par2;
- }
- else
- {
- bv = bestVal;
- return bestPar;
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/IterOptimization.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/IterOptimization.h b/ext/giza-pp/mkcls-v2/IterOptimization.h
deleted file mode 100644
index ba39b55..0000000
--- a/ext/giza-pp/mkcls-v2/IterOptimization.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef ITEROPTIMIZATION
-#define ITEROPTIMIZATION
-
-#include "Optimization.h"
-
-
-
-
-
-#define ANZ_VERSCHLECHTERUNGEN 500
-
-extern ostream *GraphOutput;
-
-
-class IterOptimization : public Optimization
- {
-
-
- private:
- int maxNonBetterIterations;
-
-
- protected:
- Problem &problem;
- int curStep;
- double curValue;
- int bestStep;
- double bestValue;
- int maxStep;
- int initialisiert;
- short endFlag;
- short endFlag2;
-
-
-
-
- virtual void makeGraphOutput();
-
-
- virtual short end()=0;
-
-
- virtual void abkuehlen()=0;
-
-
- virtual short accept(double delta)=0;
-
-
- virtual void zInitialize();
-
-
- public:
- IterOptimization(Problem &p,int maxIter=-1);
-
-
- IterOptimization(IterOptimization &o);
-
-
- virtual double minimize(int steps=-1);
-
-
- inline int getCurStep();
-
-
- inline double getCurrentValue();
-
-
- inline const Problem& getProblem();
-
-
-};
-
-double IterOptimizationOptimizeParameter(Problem &p,
- double ¶meter,double min,double max,
- int nRun,int nPar,int verfahren,double &bv);
-
-inline int IterOptimization::getCurStep()
-{
- return curStep;
-};
-inline double IterOptimization::getCurrentValue()
-{
- return curValue;
-};
-inline const Problem& IterOptimization::getProblem()
-{
- return problem;
-};
-
-#endif
-
-
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/KategProblem.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/KategProblem.cpp b/ext/giza-pp/mkcls-v2/KategProblem.cpp
deleted file mode 100644
index 88b50b3..0000000
--- a/ext/giza-pp/mkcls-v2/KategProblem.cpp
+++ /dev/null
@@ -1,1001 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "KategProblem.h"
-#include "KategProblemTest.h"
-
-#include "ProblemTest.h"
-
-extern double SigmaVerfaelschung;
-
-double h_table[MAX_H_TABLE],l_table[MAX_H_TABLE],hmy_table[MAX_H_TABLE],hmy_sigma;
-
-double LWRW_Faktor=0.5;
-
-static int intcompare(const void *p,const void *j)
-{
- return *(int *)p - *(int *)j;
-}
-
-KategProblem::KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
- int _nachbarschaft,int mindestAnzahl)
-: Problem(mak,aw,_initialisierung,_auswertung,_nachbarschaft),
- sigmaVerfaelschung(SigmaVerfaelschung),katWasEmpty(0),nwg(mak+2),ngw(mak+2),_katOfWord(aw,-1),words(0),kats(0),
- wordFreq(aw,mindestAnzahl),katFreq(mak+2,(_auswertung==CRITERION_MY)?SigmaVerfaelschung:0.0),
- initLike(aw,-1)
-
-{
- if( auswertung == CRITERION_MY )
- cout << "Sigma-Verfaelschung: " << sigmaVerfaelschung << endl;
- _maxComp=aw;
- _maxCompVal=mak;
- massert(katFreq.nKats>0);
- massert(mak<=aw);
-
-
- for(int i=1;i<MAX_H_TABLE;i++)
- {
- h_table[i]=i*log((double)(i));
- l_table[i]=log((double)(i));
- hmy_table[i]=i*log(verfaelsche(i,sigmaVerfaelschung));
- }
- hmy_sigma=sigmaVerfaelschung;
- l_table[0]=h_table[0]=0;
-
- if( katwahl()==K_BEST )
- _maxCompVal=1;
-
-}
-
-KategProblem::~KategProblem()
-
-{
- delete words;
- delete kats;
-}
-
-void KategProblem::_initialize(int initTyp)
-{
- _initialize(initTyp,-1);
-}
-
-void KategProblem::_initialize(int initTyp,int specialFixedWord)
-
-{
- massert(wordFreq.filled);
- initialisierung = initTyp;
- int i;
-
- for(i=0;i<katFreq.nKats;i++)
- for(int j=0;j<katFreq.nKats;j++)
- katFreq.setN(i,j,0);
-
-
-
-
- for(i=0;i<wordFreq.nWords;i++)
- {
- setKatOfWord(i,-1);
- if( strcmp(getString(i),"$")==0||strcmp(getString(i),"1$")==0||strcmp(getString(i),"2$")==0||strcmp(getString(i),"3$")==0||strcmp(getString(i),"4$")==0 )
- wordFreq.setDollar(i);
- }
- wordFreq.init(specialFixedWord);
-
-
-
-
- _maxComp=wordFreq.nTranspWords;
-
- switch(initTyp)
- {
- case INIT_OTHER:
-
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_OTHER)\n";
- for(i=0;i<wordFreq.nWords;i++)
- fastPutWord(i,initLike[i]);
- break;
- case INIT_RAN:
-
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_RAN)\n";
- for(i=0;i<wordFreq.nWords;i++)
- {
- if( wordFreq.minIndex[i]>0 && wordFreq.maxIndex[i]>0 )
- fastPutWord(i,wordFreq.minIndex[i]+randomInt(wordFreq.maxIndex[i]-wordFreq.minIndex[i]+1));
- else
- fastPutWord(i,2+randomInt(katFreq.nKats-2));
- }
-
-
- break;
- case INIT_AIO:
-
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_AIO)\n";
- for(i=0;i<wordFreq.nWords;i++)
- fastPutWord(i,2);
- break;
- case INIT_FREQ:
-
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_FREQ)\n";
- for(i=0;i<wordFreq.nWords;i++)
- {
- int to=i+2;
- if( to>=katFreq.nKats )
- to=katFreq.nKats-1;
- fastPutWord((*(wordFreq.absteigend))[i],to);
- }
- curComp=katFreq.nKats-2;
- break;
- case INIT_LWRW:
-
- {
- Array<int> markList(wordFreq.nWords,1);
- int to=2;
- int i=0;
- if(verboseMode>2)cout << "KategProblem::_initialize(INIT_LWRW)\n";
- for(to=2;to<katFreq.nKats*LWRW_Faktor;to++)
- {
- int w=(*(wordFreq.absteigend))[to-2];
- fastPutWord(w,to);
- markList[w]=0;
- }
- while(to<katFreq.nKats-1 && i<wordFreq.nWords)
- {
- int toFilled=0;
- int word=(*(wordFreq.absteigend))[i];
- if(i%2)
- {
- ManyFreq &after=wordFreq.after[word];
- for(int j=0;j<after.size();j++)
- {
- int w=after[j].w;
- if( markList[w] )
- fastPutWord(w,to),toFilled++;
- markList[w]=0;
- }
- }
- else
- {
- ManyFreq &before=wordFreq.before[word];
- for(int j=0;j<before.size();j++)
- {
- int w=before[j].w;
- if( markList[w] )
- fastPutWord(w,to),toFilled++;
- markList[w]=0;
- }
- }
- i++;
- if( toFilled>0 )
- to++;
- }
- for(i=0;i<wordFreq.nWords;i++)
- if(markList[i])
- fastPutWord(i,katFreq.nKats-1);
- }
- break;
- default:
- cerr << "Wrong _initialize in KategProblem: " << initTyp << endl;
- exit(1);
- }
-
-
-
- for(int word=0;word<wordFreq.nWords;word++)
- {
- Array<OneFreq>& aft=wordFreq.after[word];
-
- int nAft=aft.size();
-
- for(i=0;i<nAft;i++)
- katFreq.addN(katOfWord(word),katOfWord(aft[i].w),aft[i].n);
- }
-
- if(verboseMode>2)
- {
- cout << "\nInitialization of KategProblem:";
- dumpOn(cout);
- }
-}
-
-double KategProblem::valueChange(ProblemChange&c)
-
-{
- numberOfPartEvaluations++;
- KategProblemChange &k=*(KategProblemChange *)&c;
- fillNWG(k.word);
-
- return _valueChange(k);
-}
-
-
-Problem *KategProblem::makeEqualProblem()
-
-{
- KategProblem*p = new KategProblem(wordFreq.nWords,katFreq.nKats-2,initialisierung,
- auswertung,nachbarschaft);
- KategProblemWBC &w=p->wordFreq;
- for(int x=0;x<wordFreq.nWords;x++)
- {
- w.setAfterWords(x,wordFreq.after[x].size());
- w.setBeforeWords(x,wordFreq.before[x].size());
- }
- int i;
- for(i=0;i<wordFreq.nWords;i++)
- {
- for(int j=0;j<wordFreq.after[i].size();j++)
- w.setFreq(i,wordFreq.after[i][j].w,wordFreq.after[i][j].n);
- }
- w.testFull();
- w.mindestAnzahl = wordFreq.mindestAnzahl;
- if(words)
- p->words = new leda_array<string>(*words);
- for(i=0;i<wordFreq.nWords;i++)
- {
- p->setKatOfWord(i,katOfWord(i));
- p->initLike[i]=initLike[i];
- }
- p->setValuesFrom(this);
- return p;
-}
-
-double KategProblem::nicevalue(double val)
-
-{
- double v;
- if( val!=1e100)
- v=val;
- else
- v=value();
- double h=wordFreq.get_h_of_words();
- double n=wordFreq.numberOfWords();
- double k=0;
- if(auswertung == CRITERION_MY)
- k=katFreq.myCriterionTerm();
- return exp((v+h-k)/n);
-}
-
-void KategProblem::makeKats()
-
-{
- if(kats)delete kats;
- kats = new leda_array<intSet>(katFreq.nKats);
- for(int i=0;i<wordFreq.nWords;i++)
- (*kats)[katOfWord(i)].insert(i);
-}
-
-void KategProblem::dumpInfos(ostream &strm)
-
-{
- strm << ";KategProblem:";
- strm << "cats: " << katFreq.nKats-2 << " words: " << wordFreq.nWords
- << endl;
-}
-
-void KategProblem::dumpOn(ostream &strm)
-
-{
- writeClasses(_katOfWord,*this,strm);
- if(PrintBestTo2)
- {
- dumpInfos(*PrintBestTo2);
- makeKats();
- if( kats==0 )
- {
- if( words==0 )
- {
- for(int i=0;i<wordFreq.nWords;i++)
- {
- *PrintBestTo2 << i << ":" << katOfWord(i) << " ";
- }
- }
- else
- {
- for(int i=0;i<wordFreq.nWords;i++)
- *PrintBestTo2 << (*words)[i] << ":" << katOfWord(i) << " ";
- }
- }
- else
- {
- int anzkat=0;
- for(int i=0;i<katFreq.nKats;i++)
- {
- int printed=0;
- *PrintBestTo2 << i << ":";
- leda_set<int>&theSet = (*kats)[i];
- if( words==0 )
- {
- int nr=0;
- forall_set(leda_set<int>,nr,theSet)
- {
- *PrintBestTo2 << nr << ", ";
- printed=1;
- }
- }
- else
- {
- int nr=0;
- forall_set(leda_set<int>,nr,theSet)
- {
- *PrintBestTo2 << (*words)[nr]<< ",";
- printed=1;
- }
- }
- if(printed==1)anzkat++;
- *PrintBestTo2 << endl;
- }
- *PrintBestTo2 << ";I have " << anzkat << " categories used.\n";
- }
- *PrintBestTo2 << endl;
- Problem::dumpOn(*PrintBestTo2);
- }
-}
-
-
-
-
-
-
-const char *KategProblem::getString(int i)
-
-{
- if(words==0)
- return "<>";
- else
- return ((*words)[i]).c_str();
-}
-
-string KategProblem::getTheString(int i)
-{
- return (*words)[i];
-}
-
-int KategProblem::maxNonBetterIterations()
-
-{
- if(katwahl()==K_BEST)
- return wordFreq.nTranspWords;
- else
- return katFreq.nKats*wordFreq.nTranspWords;
-}
-
-int KategProblem::expectedNumberOfIterations()
-
-{
-
- if(katwahl()==K_BEST)
- return 10*wordFreq.nTranspWords;
- else
- return 13*katFreq.nKats*wordFreq.nTranspWords;
-}
-
-void KategProblem::makeTitle(char x[512])
-
-{
- const char *ww;
- const char *kw;
- const char *in;
- switch(wortwahl())
- {
- case W_RAN:
- ww="zufaellig";
- break;
- case W_DET_DECR:
- ww="absteigend";
- break;
- case W_DET_INCR:
- ww="aufsteigend";
- break;
- default:
- cerr << "Error: unknown word selection\n";
- exit(1);
- }
- switch(katwahl())
- {
- case K_DET:
- kw="rotierend";
- break;
- case K_RAN:
- kw="zufaellig";
- break;
- case K_BEST:
- kw="best ";
- break;
- default:
- cout << "Error: unknown cagegory selection\n";
- exit(1);
- }
- switch(initialisierung)
- {
- case INIT_RAN:
- in="zufaellig ";
- break;
- case INIT_AIO:
- in="all-in-one";
- break;
- case INIT_LWRW:
- in="lwrw ";
- break;
- case INIT_FREQ:
- in="freq ";
- break;
- case INIT_OTHER:
- in="other ";
- break;
- default:
- cout << "Error: unknown initialization\n";
- exit(1);
- }
- sprintf(x,"(c:%d,w:%d(%d),ww:%s,kw:%s,in:%s)",katFreq.nKats,wordFreq.nWords,
- wordFreq.nTranspWords,ww,kw,in);
-}
-
-
-
-
-int KategProblem::_change(ProblemChange **p)
-
-{
- *p=0;
- int word=curDimension();
- switch( wortwahl() )
- {
- case W_RAN:
- word=(*(wordFreq.absteigend))[randomInt(wordFreq.nTranspWords)];
- break;
- case W_DET_DECR:
- word=(*(wordFreq.absteigend))[word];
- break;
- case W_DET_INCR:
- word=(*(wordFreq.absteigend))[wordFreq.nTranspWords-word-1];
- break;
- default:
- cerr << "Error: Unknown word selection\n";
- exit(1);
- }
-
- int kat=curDimensionVal()+2;
- switch( katwahl() )
- {
- case K_RAN:
- kat=randomInt(katFreq.nKats-2)+2;
-
- case K_DET:
-
-
- if( kat==katOfWord(word)||(katWasEmpty&&katFreq.n1(kat)==0) )
- return 0;
- else if( wordFreq.minIndex[word]>0 && wordFreq.maxIndex[word]>0 && (kat<wordFreq.minIndex[word]||kat>wordFreq.maxIndex[word]))
- {
-
- return 0;
- }
- else
- {
- KategProblemChange *c = new KategProblemChange;
- c->toKat=kat;
- c->word=word;
- c->fromKat=katOfWord(c->word);
- massert( c->toKat < katFreq.nKats );
- massert( c->fromKat < katFreq.nKats );
- massert( c->word < wordFreq.nWords );
- massert( c->toKat!=0 && c->toKat!=1 );
- massert( c->fromKat!=0 && c->fromKat!=1 );
- if(katFreq.n1(kat)==0)
- katWasEmpty=1;
- *p=c;
- return 1;
- }
- break;
- case K_BEST:
- {
- fillNWG(word);
- double smallest=1e100;
- KategProblemChange &smallestChange = *new KategProblemChange;
- short withEmpty=0;
-
-
- int startKat=2;
- int endKat=katFreq.nKats;
- if( wordFreq.minIndex[word]>0&&wordFreq.maxIndex[word]>0 )
- {
- startKat = max(2,wordFreq.minIndex[word]);
- endKat = min(katFreq.nKats,wordFreq.maxIndex[word]+1);
- }
- for(kat=startKat;kat<endKat;kat++)
- {
- if( kat!=katOfWord(word) && (withEmpty==0 || katFreq.n1(kat)
- || katFreq.n2(kat)) )
- {
- KategProblemChange c;
- c.toKat=kat;
- c.word=word;
- c.fromKat=katOfWord(word);
- double n=_valueChange(c);
- if(n<smallest)
- {
- smallest=n;
- smallestChange=c;
- }
- }
- if( katFreq.n1(kat)==0 && katFreq.n2(kat)==0 )
- withEmpty=1;
- }
- massert(smallest!=1e100);
- *p= &smallestChange;
- return 1;
- }
- break;
- default:
- cerr << "Error: Unknown category selection\n";
- exit(1);
- return 0;
- }
-}
-
-void KategProblem::_doChange(ProblemChange &c)
-
-{
- KategProblemChange &k=*(KategProblemChange *)&c;
- putWord(k.word,k.toKat);
-
-}
-
-void KategProblem::_undoChange(ProblemChange &c)
-
-{
- KategProblemChange &k=*(KategProblemChange *)&c;
- putWord(k.word,k.fromKat);
-
-}
-
-void KategProblem::incrementDirection()
-
-{
- Problem::incrementDirection();
- katWasEmpty=0;
- massert( _maxComp==wordFreq.nTranspWords );
-}
-
-double KategProblem::_value()
-
-{
-
- return katFreq.fullBewertung(auswertung);
-}
-
-
-double mkat_h_full(int n,double tf)
-{
-
-
- if( tf>0 )
- return n*log(tf);
- else
- return 0.0;
-}
-
-double mkat_h_part(int n,double cf)
-{
-
-
- if( cf>0.0 )
- return n*log(cf);
- else
- return 0.0;
-}
-
-double KategProblem::kat_h_full(int n)
-{
- return mkat_h_full(n,verfaelsche(n,sigmaVerfaelschung));
-}
-double KategProblem::kat_h_full(double n)
-{
- abort();
- return mkat_h_full((int)n,verfaelsche(n,sigmaVerfaelschung));
-}
-
-double KategProblem::kat_h_part(int n)
-{
- return mkat_h_part(n,verfaelsche(n,sigmaVerfaelschung));
-}
-double KategProblem::kat_h_part(double n)
-{
- abort();
- return mkat_h_part((int)n,verfaelsche(n,sigmaVerfaelschung));
-}
-
-
-
-
-double KategProblem::nmo_my(int i,int j)
-
-{
- FreqType n=nstrich(i,j),k=katFreq.n(i,j);
- return kat_h_full(n+k)-kat_h_full(k);
-}
-double KategProblem::nmo(int i,int j)
-
-{
- FreqType n=nstrich(i,j),k=katFreq.n(i,j);
- return kat_h(n+k)-kat_h(k);
-}
-double KategProblem::nmo_lo(int i,int j,int &e0,int &e1)
-
-{
- FreqType kij=katFreq.n(i,j);
- FreqType nij=nstrich(i,j)+kij;
- if( kij!=nij)
- {
- if( nij==0 )
- e0++;
- else if(nij==1)
- e1++;
- if( kij==0 )
- e0--;
- else if(kij==1)
- e1--;
- }
- return nij*kat_mlog(nij-1-rhoLo)-kij*kat_mlog(kij-1-rhoLo);
-}
-
-
-double KategProblem::_valueChange(KategProblemChange &k)
-
-{
- double v=0;
- int i=0;
-
- ursprung=k.fromKat;
- ziel=k.toKat;
-
- if( auswertung==CRITERION_LO )
- {
- int e0a=katFreq.eta0,e1a=katFreq.eta1;
- v-=nmo_lo(ursprung,ursprung,e0a,e1a)+nmo_lo(ziel,ziel,e0a,e1a)
- +nmo_lo(ursprung,ziel,e0a,e1a)+nmo_lo(ziel,ursprung,e0a,e1a);
- i=0;
- while(i<nwg.anzNot0)
- {
- int cl=nwg.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo_lo(ursprung,cl,e0a,e1a)+nmo_lo(ziel,cl,e0a,e1a);
- i++;
- }
- i=0;
- while(i<ngw.anzNot0)
- {
- int cl=ngw.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo_lo(cl,ursprung,e0a,e1a)+nmo_lo(cl,ziel,e0a,e1a);
- i++;
- }
-
- v+=kat_hlo(katFreq.n1(ursprung)-wordFreq.n1(k.word))
- -kat_hlo(katFreq.n1(ursprung))
- +kat_hlo(katFreq.n2(ursprung)-wordFreq.n2(k.word))
- -kat_hlo(katFreq.n2(ursprung))
- +kat_hlo(katFreq.n1(ziel)+wordFreq.n1(k.word))
- -kat_hlo(katFreq.n1(ziel))
- +kat_hlo(katFreq.n2(ziel)+wordFreq.n2(k.word))
- -kat_hlo(katFreq.n2(ziel));
-
- int old0=katFreq.c1_0*katFreq.nKats+katFreq.c2_0*katFreq.nKats
- -katFreq.c1_0*katFreq.c2_0;
- int nc1_0=katFreq.c1_0,nc2_0=katFreq.c2_0;
- if( wordFreq.n1(k.word)>0 && katFreq.n1(ursprung)==wordFreq.n1(k.word) )
- nc1_0++;
- if( wordFreq.n2(k.word)>0 && katFreq.n2(ursprung)==wordFreq.n2(k.word) )
- nc2_0++;
- if( wordFreq.n1(k.word)>0 && katFreq.n1(ziel)==0 ) nc1_0--;
- if( wordFreq.n2(k.word)>0 && katFreq.n2(ziel)==0 ) nc2_0--;
- int new0=nc1_0*katFreq.nKats+nc2_0*katFreq.nKats-nc1_0*nc2_0;
- v-=kat_etaFkt(e0a,e1a,new0,katFreq.nKats)
- -kat_etaFkt(katFreq.eta0,katFreq.eta1,old0,katFreq.nKats);
- vassert(NULLFLOAT(Problem::valueChange(k)-v));
- }
- else if(auswertung==CRITERION_ML)
- {
- v-=nmo(ursprung,ursprung)+nmo(ziel,ziel)
- +nmo(ursprung,ziel)+nmo(ziel,ursprung);
- i=0;
- while(i<nwg.anzNot0)
- {
- int cl=nwg.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo(ursprung,cl)+nmo(ziel,cl);
- i++;
- }
- i=0;
- while(i<ngw.anzNot0)
- {
- int cl=ngw.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo(cl,ursprung)+nmo(cl,ziel);
- i++;
- }
- v+=kat_h(katFreq.n1(ursprung)-wordFreq.n1(k.word))
- -kat_h(katFreq.n1(ursprung))
- +kat_h(katFreq.n2(ursprung)-wordFreq.n2(k.word))
- -kat_h(katFreq.n2(ursprung))
- +kat_h(katFreq.n1(ziel)+wordFreq.n1(k.word))
- -kat_h(katFreq.n1(ziel))
- +kat_h(katFreq.n2(ziel)+wordFreq.n2(k.word))
- -kat_h(katFreq.n2(ziel));
- }
- else if( auswertung==CRITERION_MY )
- {
- v-=nmo_my(ursprung,ursprung)+nmo_my(ziel,ziel)
- +nmo_my(ursprung,ziel)+nmo_my(ziel,ursprung);
- i=0;
- while(i<nwg.anzNot0)
- {
- int cl=nwg.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo_my(ursprung,cl)+nmo_my(ziel,cl);
- i++;
- }
- i=0;
- while(i<ngw.anzNot0)
- {
- int cl=ngw.not0[i];
- if( cl!= ursprung && cl!=ziel )
- v -= nmo_my(cl,ursprung)+nmo_my(cl,ziel);
- i++;
- }
- v+=kat_h_part(katFreq.n1(ursprung)-wordFreq.n1(k.word))
- -kat_h_part(katFreq.n1(ursprung))
- +kat_h_part(katFreq.n2(ursprung)-wordFreq.n2(k.word))
- -kat_h_part(katFreq.n2(ursprung))
- +kat_h_part(katFreq.n1(ziel)+wordFreq.n1(k.word))
- -kat_h_part(katFreq.n1(ziel))
- +kat_h_part(katFreq.n2(ziel)+wordFreq.n2(k.word))
- -kat_h_part(katFreq.n2(ziel));
- double bishZusatz = katFreq.myCriterionTerm();
- _doChange(k);
- double neuZusatz = katFreq.myCriterionTerm();
- _undoChange(k);
- if(verboseMode>2)
- cout << "ZUSATZ: " << bishZusatz << " " << neuZusatz << " " <<neuZusatz-bishZusatz<<" " << v << endl;
- v+=neuZusatz-bishZusatz;
- }
- else
- {
- cerr << "Fatal error: Unknown criterion: '"<<auswertung<<"'\n";
- }
- vassert( NULLFLOAT(Problem::valueChange(k)-v) );
- return v;
-}
-
-
-void KategProblem::fillNWG(int w)
-
-{
- if(nwgWord==w)
- return;
- else
- {
- Array<OneFreq> &after=wordFreq.after[w];
- int size=after.size(),i;
- nww=0;
- nwg.init();
- for(i=0;i<size;i++)
- {
- nwg.addFreq(katOfWord(after[i].w),after[i].n);
- if(after[i].w==w)
- nww=after[i].n;
- }
-
- Array<OneFreq> &before=wordFreq.before[w];
- size=before.size();
- ngw.init();
- for(i=0;i<size;i++)
- ngw.addFreq(katOfWord(before[i].w),before[i].n);
- nwgWord=w;
- }
-}
-
-void KategProblem::vnstrich(int i,int j)
-
-{
- cout << ".) " << katFreq.n(i,j) << " ";
- if( i==ursprung )
- cout << "a) "<<-nwg.getFreq(j) << " ";
- if( i==ziel )
- cout << "b) " <<nwg.getFreq(j) << " ";
-
- if( j==ursprung )
- cout << "c) " <<-ngw.getFreq(i) << " ";
- if( j==ziel )
- cout << "d) " <<+ngw.getFreq(i) << " " ;
-
- if( i==ursprung && j==ursprung )
- cout << "e) " <<+nww << " ";
- if( i==ziel && j==ziel )
- cout << "f) " <<+nww << " " ;
- if( i==ursprung && j==ziel )
- cout << "g) " <<-nww << " ";
- if( i==ziel && j==ursprung )
- cout << "h) " <<-nww << " ";
-}
-
-
-
-
-
-void KategProblem::fastPutWord(int word,int toKat)
-
-{
- massert(toKat>=0 && toKat<katFreq.nKats);
-
-
-
- if( wordFreq.fixedWord[word]>=0 )
- toKat=wordFreq.fixedWord[word];
- massert(katOfWord(word)==-1);
- setKatOfWord(word,toKat);
-}
-
-void KategProblem::fixInitLike()
-{
- int fixed=0,fixed2=0;
- over_arr(initLike,i)
- if(initLike[i]>=0 )
- {
- fixed++;
- if( initLike[i]>=wordFreq.minIndex[i] || initLike[i]==1 )
- wordFreq.fixedWord[i]=initLike[i];
- else
- {
- wordFreq.fixedWord[i]=wordFreq.minIndex[i]+initLike[i]-2;
- fixed2++;
- }
- initLike[i]=-1;
- }
- cout << "Fixed from file are: " << fixed << " " << fixed2 << " words.\n";
-}
-
-void KategProblem::putWord(int word,int toKat)
-
-{
- massert(toKat!=0);massert(toKat!=1);
- massert(word<wordFreq.nWords);
- massert(toKat<katFreq.nKats);
- massert(wordFreq.fixedWord[word]<0);
- int k=katOfWord(word);
- massert(k!=0&&k!=1);
- Array<OneFreq>& aft=wordFreq.after[word];
- Array<OneFreq>& bef=wordFreq.before[word];
- int nAft=aft.size();
- int nBef=bef.size();
- int i;
- if(verboseMode>4)
- cout << "putWord(" << word << "," << toKat << ")" << k << " nAft"
- << nAft << " nBef" << nBef << " k" << k << "\n";
-
- massert( k!=-1 );
- massert( k!=toKat );
-
- for(i=0;i<nAft;i++)
- {
- katFreq.addN(k,katOfWord(aft[i].w),-aft[i].n);
- if(verboseMode>4)
- cout << k << " " << katOfWord(aft[i].w) << " " << -aft[i].n << endl;
- }
- for(i=0;i<nBef;i++)
- if( bef[i].w!=word )
- {
- katFreq.addN(katOfWord(bef[i].w),k,-bef[i].n);
- if(verboseMode>4)
- cout << katOfWord(bef[i].w) << " " << k << " " << -bef[i].n << endl;
- }
-
- setKatOfWord(word,toKat);
-
- for(i=0;i<nAft;i++)
- katFreq.addN(toKat,katOfWord(aft[i].w),aft[i].n);
- for(i=0;i<nBef;i++)
- if( bef[i].w!=word )
- katFreq.addN(katOfWord(bef[i].w),toKat,bef[i].n);
-
-}
-
-
-
-
-
-
-
-
-
-
-static KategProblemChange theOneKategProblemChange;
-static int anzKategProblemChange=0;
-
-void *KategProblemChange::operator new(size_t size)
-{
- anzKategProblemChange++;
- massert(anzKategProblemChange>0);
- massert(anzKategProblemChange<2);
- if( anzKategProblemChange==1 )
- return &theOneKategProblemChange;
- else
- {
- if( verboseMode>1 )
- cout << "generate instance of KategProblemChange: " << size
- << " " << anzKategProblemChange<< endl;
- return malloc(size);
- }
-}
-void KategProblemChange::operator delete(void *ptr,size_t
-)
-{ massert(size==sizeof(KategProblemChange));
- anzKategProblemChange--;
- if( ptr!= &theOneKategProblemChange)
- free(ptr);
-}
-
-
-
-
-
-
-
-
-
-
-
-
-NWG::NWG(int n) : freq(n,0),timeOfFreq(n,0),not0(n),word(-1)
-{
- massert(n>0);
- curTime=1;
- init();
-}
-
-void NWG::init()
-{
- curTime++;
- anzNot0=0;
-}
-
-void NWG::sort()
-{
- qsort(not0.getPointerToData(),anzNot0,sizeof(int),intcompare);
- massert(anzNot0<=not0.size());
-}
-
-
-int KategProblem::maxDimension()
-{
- return _maxComp;
-}
-
-int KategProblem::maxDimensionVal()
-{
- return _maxCompVal;
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/KategProblem.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/KategProblem.h b/ext/giza-pp/mkcls-v2/KategProblem.h
deleted file mode 100644
index e5a5a46..0000000
--- a/ext/giza-pp/mkcls-v2/KategProblem.h
+++ /dev/null
@@ -1,439 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-#ifndef KATEG_OPT_H
-#define KATEG_OPT_H
-#include <string>
-
-#include <stdlib.h>
-#include "Problem.h"
-
-extern double rhoLo;
-
-typedef int Kategory;
-typedef int Word;
-
-
-
-#ifdef FREQTYPE_DOUBLE
-typedef double FreqType;
-#else
-typedef int FreqType;
-#endif
-
-
-#include "KategProblemWBC.h"
-
-
-#include "KategProblemKBC.h"
-
-
-enum {
- INIT_RAN=1,
- INIT_AIO=2,
- INIT_LWRW=3,
- INIT_FREQ=4,
- INIT_OTHER=5
- };
-
-
-enum {
- W_RAN=(8|16),
- W_DET_DECR=(16),
- W_DET_INCR =(32)
-};
-#define CHOOSE_WORD (8|16|32)
-
-
-enum {
- K_DET=(64),
- K_RAN=(128),
- K_BEST=(64|128)
-};
-#define CHOOSE_KAT (64|128)
-
-
-enum {
- CRITERION_ML=0,
- CRITERION_LO=1,
- CRITERION_MY=2
-};
-
-
-
-class NWG
-{
- private:
- Array<FreqType> freq;
-
- Array<int> timeOfFreq;
-
-
-
-
- int curTime;
- public:
- NWG(int n);
- void init();
-
- int anzNot0;
-
-
- Array<int> not0;
-
- int word;
-
- inline void addFreq(int C,FreqType n);
-
- void sort();
-
- FreqType getFreq(int i)
- {
- if( timeOfFreq[i]==curTime )
- return freq[i];
- else
- return 0;
- };
-};
-
-inline void NWG::addFreq(int g,FreqType n)
-{
- if(timeOfFreq[g]==curTime)
- freq[g]+=n;
- else
- {
- timeOfFreq[g]=curTime;
- freq[g]=n;
- not0[anzNot0++]=g;
- }
-}
-
-
-
-struct KategProblemChange : public ProblemChange
-{
- void *operator new(size_t size);
- void operator delete(void *ptr,size_t size);
-
- int word;
- int toKat;
- int fromKat;
-};
-
-class KategProblem : public Problem
-{
- private:
- double kat_h_full(int n);
- double kat_h_full(double n);
- double kat_h_part(int n);
- double kat_h_part(double n);
- double sigmaVerfaelschung;
- short katWasEmpty;
-
-
-
- int nwgWord;
-
- NWG nwg;
- NWG ngw;
- FreqType nww;
-
- int ursprung,ziel;
-
- Array<int> _katOfWord;
-
- int _maxComp,_maxCompVal;
-
- double nmo_my(int i,int j);
- double nmo(int i,int j);
-
-
- double nmo_lo(int i,int j,int &e0,int &e1);
-
-
- void putWord(int word,int to);
-
-
- void fastPutWord(int word,int to);
-
-
- void setKatOfWord(int w,int k)
-{
- if( !(wordFreq.fixedWord[w]==k||wordFreq.fixedWord[w]==-1||k==-1) )
- {
- cout << "mkcls::setKatOfWord::ERROR: " << w << " " << k << " " << wordFreq.fixedWord[w] << " " << (*words)[w] << endl;
- }
- _katOfWord[w]=k;
- nwgWord=-1;
-};
-
-
- void fillNWG(int w);
-
-
- inline FreqType nstrich(int i,int j);
-
-
- void vnstrich(int i,int j);
-
-
-
- protected:
- virtual int _change(ProblemChange **p);
-
-
- virtual void _doChange(ProblemChange &c);
-
-
- virtual void _undoChange(ProblemChange &c);
-
-
- virtual double _value();
-
-
- double _valueChange(KategProblemChange &k);
-
-
- virtual void incrementDirection();
-
-
- virtual int maxDimensionVal(void) ;
-
-
- virtual int maxDimension(void) ;
-
-
-public:
- leda_array<string> *words;
-typedef leda_set<int> intSet;
-
-leda_array<intSet> *kats;
-
- KategProblemWBC wordFreq;
- KategProblemKBC katFreq;
-
- Array<int> initLike;
-
- KategProblem(int aw,int mak,int _initialisierung,int _auswertung,
- int _nachbarschaft,int minw=0);
-
-
- virtual ~KategProblem();
-
-
- virtual void _initialize(int initTyp);
- virtual void _initialize(int initTyp,int specialFixedWord);
-
-
- virtual double valueChange(ProblemChange&c);
-
-
- virtual Problem *makeEqualProblem();
-
-
- virtual double nicevalue(double value=1e100);
-
-
- void makeKats();
-
-
- virtual void dumpOn(ostream &strm);
-
-
- virtual void dumpInfos(ostream &strm);
-
-
-
-
-
- inline void katwahl(int k);
-
-
- inline void wortwahl(int w);
-
-
-
-
-
- inline int katOfWord(int w);
-
-
- inline short wortwahl();
-
-
- inline short katwahl() ;
-
-
- virtual int maxNonBetterIterations();
-
-
- virtual int expectedNumberOfIterations();
-
-
- const char *getString(int i);
- string getTheString(int i);
-
-
- void makeTitle(char x[512]);
-
-
- void fixInitLike();
-
-};
-
-inline int KategProblem::katOfWord(int w){return _katOfWord[w];};
-inline short KategProblem::wortwahl(){return nachbarschaft&CHOOSE_WORD;};
-inline short KategProblem::katwahl() {return nachbarschaft&CHOOSE_KAT;};
-
-inline void KategProblem::katwahl(int k)
- {
- nachbarschaft = (nachbarschaft&(~CHOOSE_KAT)) | k;
- if(k==K_BEST)
- _maxCompVal=1;
- else
- _maxCompVal=katFreq.nKats-2;
- };
-
-inline void KategProblem::wortwahl(int w)
- {
- nachbarschaft = (nachbarschaft&(~CHOOSE_WORD)) | w;
- };
-
-
-
-inline FreqType KategProblem::nstrich(int i,int j)
-{
- FreqType n=0;
-
- if( i==ursprung )
- n-=nwg.getFreq(j);
- if( i==ziel )
- n+=nwg.getFreq(j);
-
- if( j==ursprung )
- n-=ngw.getFreq(i);
- if( j==ziel )
- n+=ngw.getFreq(i);
-
- if( i==ursprung && j==ursprung )
- n+=nww;
- if( i==ziel && j==ziel )
- n+=nww;
-
- if( i==ursprung && j==ziel )
- n-=nww;
- if( i==ziel && j==ursprung )
- n-=nww;
-
- return n;
-}
-
-
-
-
-
-#define MAX_H_TABLE 4000
-extern double h_table[],l_table[],hmy_table[],hmy_sigma;
-
-
-inline double kat_mlog(double x)
-{
- if(x<=1e-9)
- return 0;
- else
- return log(x);
-}
-
-
-inline double kat_mlog(int s)
-{
- if(s<=0)
- return 0;
- else if( s<MAX_H_TABLE )
- {
- massert( s==0 || l_table[s]==log(s) );
- return l_table[s];
- }
- else
- return log((double)(s));
-}
-
-
-
-inline double kat_hlo(int n)
-{
- return n*kat_mlog(n-1);
-}
-
-inline double kat_hlo(double n)
-{
- return n*kat_mlog(n-1);
-}
-
-
-inline double kat_h(int n)
-{
- massert(n>=-1);
- if(n<=0)
- return 0;
- else
- if(n<MAX_H_TABLE)
- {
- massert(n==0||fabs(h_table[n]-n*log((double)n))<1e-8);
- return h_table[n];
- }
- else
- return n*log((double)(n));
-}
-inline double kat_h(double n)
-{
- if(n<=1e-9)
- return 0;
- else
- return n*log(n);
-}
-
-
-inline double kat_etaFkt(int _e0,int e1,int immer0,int cats)
-{
- int e0 = _e0 - immer0;
- int ePlus = cats*cats - _e0;
- if(cats*cats-e0>1)
- return e1*log( (ePlus-1.0)/(e0+1.0)*rhoLo );
- else
- return 0;
-}
-
-double mkat_h_full(int n,double tf);
-double mkat_h_part(int n,double cf);
-
-int Hash(const string& s);
-
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/KategProblemKBC.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/KategProblemKBC.cpp b/ext/giza-pp/mkcls-v2/KategProblemKBC.cpp
deleted file mode 100644
index 97c40fc..0000000
--- a/ext/giza-pp/mkcls-v2/KategProblemKBC.cpp
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include <stdlib.h>
-#include "KategProblem.h"
-
-double rhoLo=0.75;
-#define MAX_VERFAELSCHUNG 5000
-double verfTab[MAX_VERFAELSCHUNG],verfTabSigma=-1.0;
-double verfaelsche(int a,double b)
-{
-
- if( a>=0&&verfTabSigma==b&&a<MAX_VERFAELSCHUNG )
- {
- massert(verfTab[a]== b*(erf(10000.0) - erf(a/b))/2+a);
- return verfTab[a];
- }
- else
- {
- double x = b*(erf(10000.0) - erf(a/b))/2+a;
- return x;
- }
-}
-double verfaelsche(double,double b)
-{
- abort();
- return b;
-}
-
-KategProblemKBC::KategProblemKBC(int s,double sv) :
- _n(s),_n1(s,0),_n2(s,0),sigmaVerfaelschung(sv),withVerfaelschung(sv!=0.0),
- _nverf(s),_n1verf(s,0.0),_n2verf(s,0.0),_nWords(0),
- eta0(s*s),eta1(0),c1_0(s),c2_0(s),
- _bigramVerfSum(0.0),_unigramVerfSum1(0.0),_unigramVerfSum2(0.0),nKats(s)
-
-{
- verfInit0=0.0;
- int i;
- if( withVerfaelschung )
- {
- verfInit0=verfaelsche(0,sv);
- cout << "VERFAELSCHUNG wird mitgefuehrt => LANGSAMER!!!\n";
- }
- for(i=0;i<s;i++)
- {
- _n[i].init(s,0);
- _nverf[i].init(s,verfInit0);
- _n1verf[i]=_n2verf[i]=verfInit0;
- _bigramVerfSum+=verfInit0*s;
- _unigramVerfSum1+=verfInit0;
- _unigramVerfSum2+=verfInit0;
- }
- if( withVerfaelschung )
- {
- cout << "VERFAELSCHUNG " << _bigramVerfSum << " " << _unigramVerfSum1 << " " << _unigramVerfSum2 << endl;
- }
- verfTabSigma=sigmaVerfaelschung;
-
-
-
-}
-
-void KategProblemKBC::setN(int w1,int w2, FreqType n)
-
-{
- addN(w1,w2,-_n[w1][w2]);
- addN(w1,w2,n);
-}
-
-
-double KategProblemKBC::fullBewertung(int auswertung)
-{
-
- double bewertung=0;
- int c1,c2;
-
-
- switch( auswertung )
- {
- case CRITERION_ML:
- for(c1=0;c1<nKats;c1++)
- {
- for(c2=0;c2<nKats;c2++)
- bewertung-=kat_h(_n[c1][c2]);
- bewertung+=kat_h(_n1[c1])+kat_h(_n2[c1]);
- }
- break;
- case CRITERION_MY:
- {
- for(c1=0;c1<nKats;c1++)
- {
- for(c2=0;c2<nKats;c2++)
- bewertung-=mkat_h_full((int)n(c1,c2),nverf(c1,c2));
- bewertung+=mkat_h_part((int)(n1(c1)),n1verf(c1))+mkat_h_part((int)(n2(c1)),n2verf(c1));
- }
- double u1=_unigramVerfSum1-verfInit0*c1_0;
- double u2=_unigramVerfSum2-verfInit0*c2_0;
- double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
- if( verboseMode>1 )
- {
- cout << "CRITERION_MY: " << bewertung << endl;
- cout << "U1:"<<_unigramVerfSum1 << " n:"<<u1<< " "
- << "U2:"<<_unigramVerfSum2 << " n:"<<u2<< " "
- << "U3:"<<_bigramVerfSum << " n:"<<b<< endl;
- }
- if(b>0.000001)
- {
-
-
- if(verboseMode>1 )
- cout << " NEU: " <<_nWords*log( u1 * u2 / b ) << endl;
- bewertung -= _nWords*log( u1 * u2 / b );
- if(verboseMode>1)
- cout << "SCHLUSSBEWERTUNG: " << bewertung << endl;
- }
- else
- cout << "B zu klein " << b << endl;
- }
- break;
- case CRITERION_LO:
- for(c1=0;c1<nKats;c1++)
- {
- for(c2=0;c2<nKats;c2++)
- bewertung-=_n[c1][c2]*kat_mlog(_n[c1][c2]-1-rhoLo);
- bewertung+=_n1[c1]*kat_mlog(_n1[c1]-1)+_n2[c1]*kat_mlog(_n2[c1]-1);
- }
- bewertung-=kat_etaFkt(eta0,eta1,(c1_0*nKats+c2_0*nKats-c1_0*c2_0),nKats);
- break;
- default:
- cerr << "Error: wrong criterion " << auswertung << endl;
- exit(1);
- }
- return bewertung;
-}
-
-double KategProblemKBC::myCriterionTerm()
-{
- iassert( withVerfaelschung );
- double r;
- double u1=_unigramVerfSum1-verfInit0*c1_0;
- double u2=_unigramVerfSum2-verfInit0*c2_0;
- double b=_bigramVerfSum-verfInit0*(c1_0*nKats+c2_0*nKats-c1_0*c2_0);
-
-
- if( verboseMode>1 )
- {
- cout << "nwords divisor:"<<_nWords << " " << u1 * u2 / b << endl;
- cout << "ergebnis: "<<_nWords*log( u1 * u2 / b ) << endl;
- cout << "0: "<<c1_0 << endl;
- }
- r = _nWords*log( u1 * u2 / b );
-
- return -r;
-}
-
-
-
-
-double KategProblemKBC::bigramVerfSum()
-{
- double sum=0;
- for(int c1=0;c1<nKats;c1++)
- for(int c2=0;c2<nKats;c2++)
- sum+=nverf(c1,c2);
- cout << "BIGRAMVERFSUM: " << sum << endl;
- return sum;
-}
-
-double KategProblemKBC::unigramVerfSum1()
-{
- double sum=0;
- for(int c1=0;c1<nKats;c1++)
- sum+=n1verf(c1);
- cout << "UNIGRAMVERFSUM1: " << sum << endl;
- return sum;
-}
-
-double KategProblemKBC::unigramVerfSum2()
-{
- double sum=0;
- for(int c1=0;c1<nKats;c1++)
- sum+=n2verf(c1);
- cout << "UNIGRAMVERFSUM2: " << sum << endl;
- return sum;
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-