You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/26 04:22:21 UTC
[04/14] incubator-joshua git commit: JOSHUA-252 Make it possible to
use Maven to build Joshua
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/README
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/README b/ext/giza-pp/mkcls-v2/README
deleted file mode 100644
index 8e453df..0000000
--- a/ext/giza-pp/mkcls-v2/README
+++ /dev/null
@@ -1,10 +0,0 @@
-========================================================================
-mkcls is a tool to train word classes by using a
-maximum-likelihood-criterion. The resulting word classes are
-especially suited for language models or statistical translation
-models. The program mkcls was written by Franz Josef Och
-(och@informatik.rwth-aachen.de)
-========================================================================
-
-In order to know about the options of mkcls simply start the program
-without arguments.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/RRTOptimization.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/RRTOptimization.cpp b/ext/giza-pp/mkcls-v2/RRTOptimization.cpp
deleted file mode 100644
index 55e2122..0000000
--- a/ext/giza-pp/mkcls-v2/RRTOptimization.cpp
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "RRTOptimization.h"
-#include "ProblemTest.h"
-
-double RRTOptimization::defaultAnnRate=0.6;
-double RRTOptimization::defaultMultiple=2.0;
-
-
-
-RRTOptimization::RRTOptimization(Problem &p,double t,double dt,int m)
-: IterOptimization(p,m),deviation(t),deltaDeviation(dt)
-{
- assert(deviation>=0);
-}
-
-
-
-RRTOptimization:: RRTOptimization(Problem &p,int m)
-: IterOptimization(p,m),deviation(-1),deltaDeviation(0)
-{
-}
-
-
-
-RRTOptimization::RRTOptimization(RRTOptimization &o)
-: IterOptimization(o)
-{
- deviation = o.deviation;
- deltaDeviation= o.deltaDeviation;
- record = o.record;
-}
-
-
-
-void RRTOptimization::zInitialize()
-{
- IterOptimization::zInitialize();
- if( deviation<0 )
- {
-
-
- int n;
-
- StatVar &v=problem.deviationStatVar(*this,ANZ_VERSCHLECHTERUNGEN);
-
- if( maxStep>0 )
- n=(int)(maxStep*4.0/5.0);
- else
- maxStep=n=(int)(problem.expectedNumberOfIterations()*defaultMultiple);
-
- deviation = v.quantil(defaultAnnRate);
- deltaDeviation = deviation/(float)n;
-
- if( verboseMode>0 )
- cout << "#Algorithm: Record-To-Record-Travel: (anfAnnRate="
- << defaultAnnRate << ",T=" << deviation << ",deltaT="
- << deltaDeviation << ")\n";
-
- curStep=0;
- endFlag=0;
- delete &v;
- problem.initialize();
- IterOptimization::zInitialize();
- }
- record=problem.value();
- assert(deviation>=0);
-}
-
-short RRTOptimization::end()
-{
- return ( endFlag>0 && deviation==0.0 );
-}
-void RRTOptimization::abkuehlen()
-{
- if( deviation>=0 )
- {
- deviation -= deltaDeviation;
- if(deviation<0)
- deviation=0;
- }
-}
-short RRTOptimization::accept(double delta)
-{
- if( deviation<0 )
- return 1;
- else
- {
- if( delta + curValue - deviation < record )
- {
- if( delta + curValue < record )
- record = delta+curValue;
- return 1;
- }
- else
- return 0;
- }
-}
-
-void RRTOptimization::makeGraphOutput()
-{
- IterOptimization::makeGraphOutput();
- *GraphOutput << deviation;
-}
-
-
-
-
-double RRTOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,int typ,
- int optimierungsschritte,int print)
-{
- switch(typ)
- {
- case 1:
- {
- double bestPar=-1,best=1e100;
- if( print )
- cout << "#RRT-optimizeValues: Quantil: " << numParameter << endl;
- for(int i=0;i<=numParameter;i++)
- {
- StatVar end,laufzeit,init;
- double now;
- if(i==0) defaultAnnRate=0.2;
- else defaultAnnRate = 0.3+(float)(0.6*i)/numParameter;
- solveProblem(0,p,proParameter,optimierungsschritte,RRT_OPT,now,
- end,laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultAnnRate;
- }
- if( print )
- {
- cout << defaultAnnRate << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultAnnRate=0.8;
- return bestPar;
- }
- break;
- case 10:
- {
- double i;
- double bestPar=-1,best=1e100;
- StatVar end,laufzeit,init;
-
- if( print )
- cout << "#RRT-optimizeValues: defaultMultiple" << 8 << endl;
- for(i=0.5;i<=10;i+=1.5)
- {
- double now;
- defaultMultiple = i;
- solveProblem(0,p,proParameter,optimierungsschritte,RRT_OPT,now,
- end,laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultMultiple;
- }
- if( print )
- {
- cout << defaultMultiple << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultMultiple=2.0;
- return bestPar;
- }
- break;
- default:
- cerr << "Error: wrong parameter-type in RRTOptimization::optimizeValue ("
- << typ << ")\n";
- exit(1);
- }
- return 1e100;
-}
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/RRTOptimization.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/RRTOptimization.h b/ext/giza-pp/mkcls-v2/RRTOptimization.h
deleted file mode 100644
index 42ec6e2..0000000
--- a/ext/giza-pp/mkcls-v2/RRTOptimization.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef RRTOPTIMIZATION
-#define RRTOPTIMIZATION
-#include "IterOptimization.h"
-
-class RRTOptimization : public IterOptimization {
-
-
- private:
- double deviation;
- double deltaDeviation;
- double record;
-
- protected:
- virtual void zInitialize();
-
-
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- virtual void makeGraphOutput();
-
-
- public:
- RRTOptimization(Problem &p,double temperatur,
- double deltaTemperatur,int maxIter=-1);
-
-
- RRTOptimization(Problem &p,int maxIter=-1);
-
-
- RRTOptimization(RRTOptimization &o);
-
-
- static double optimizeValue(Problem &p,int proParameter,
- int numParameter,int typ,int schritte= -1,int verbose=1);
-
-
- static double defaultAnnRate;
-
- static double defaultMultiple;
-
-};
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/SAOptimization.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/SAOptimization.cpp b/ext/giza-pp/mkcls-v2/SAOptimization.cpp
deleted file mode 100644
index 6ae589a..0000000
--- a/ext/giza-pp/mkcls-v2/SAOptimization.cpp
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include <stdlib.h>
-#include <iostream>
-
-#include "SAOptimization.h"
-
-#include "ProblemTest.h"
-
-#define ALPHA 0.95
-
-double SAOptimization::defaultAnfAnnRate=0.9;
-double SAOptimization::defaultEndAnnRate=1e-9;
-double SAOptimization::defaultMultiple=2.0;
-
-
-
-SAOptimization::SAOptimization(Problem &p,int m)
-: IterOptimization(p,m), temperatur(-1)
-{
-}
-
-
-
-
-SAOptimization::SAOptimization(Problem &p,double t,double a,int s,int m)
-: IterOptimization(p,m),temperatur(t), alpha(a),schrittzahl(s)
-{
- assert(alpha<1);
- assert(schrittzahl>0);
- assert(t>0);
-}
-
-
-SAOptimization::SAOptimization(SAOptimization &o)
-: IterOptimization(o)
-{
- temperatur = o.temperatur;
- endTemperatur = o.endTemperatur;
- alpha = o.alpha;
- schrittzahl = o.schrittzahl;
- stepsForAbkuehlung = o.stepsForAbkuehlung;
-}
-
-
-void SAOptimization::zInitialize()
-{
- IterOptimization::zInitialize();
- if( temperatur<0)
- {
-
-
-
- StatVar &v=problem.deviationStatVar(*this,ANZ_VERSCHLECHTERUNGEN);
-
- if( maxStep>0 )
- stepsForAbkuehlung=(int)(maxStep*4.0/5.0);
- else
- maxStep=stepsForAbkuehlung=(int)(problem.expectedNumberOfIterations()*
- defaultMultiple);
-
- temperatur = v.getMean()/log(1/defaultAnfAnnRate);
- endTemperatur = v.getMean()/log(1/defaultEndAnnRate);
- schrittzahl = (int)(stepsForAbkuehlung/(log(endTemperatur/temperatur)/
- log(ALPHA)));
- if(schrittzahl==0)schrittzahl=1;
- alpha = ALPHA;
-
- if( verboseMode )
- cout << "#Algorithm: Simulated Annealing(anfAnnRate="
- << defaultAnfAnnRate <<",(endAnnRate=" << defaultEndAnnRate
- << ",T0=" << temperatur<< ",Te=" << endTemperatur<< ",schrittzahl="
- << schrittzahl<< ",stepsForAbkuehlung=" << stepsForAbkuehlung
- << ")\n";
- curStep=0;
- endFlag=0;
- delete &v;
- problem.initialize();
- IterOptimization::zInitialize();
- }
-}
-
-short SAOptimization::end()
-{
- if( temperatur>endTemperatur )
- bestStep = curStep;
- if( endFlag>0 && temperatur<endTemperatur)
- return 1;
- else
- return 0;
-}
-void SAOptimization::abkuehlen()
-{
- if(temperatur>=0)
- {
- if( curStep%schrittzahl == 0 )
- temperatur=temperatur * alpha;
- if( curStep> stepsForAbkuehlung)
- temperatur = 0;
- }
-}
-short SAOptimization::accept(double delta)
-{
- if( temperatur<0 )
- return 1;
- else
- {
- if( delta > 0 )
- {
- if( temperatur==0 )
- return 0;
- else
- {
- double z=zufall01();
- assert(z!=0.0);
- if(z==0.0)
- z+=1e-20;
- double e=exp(-delta/temperatur);
-
-
-
- return z+0.000000000001<=e;
- }
- }
- else
- return 1;
- }
-}
-
-void SAOptimization::makeGraphOutput()
-{
- IterOptimization::makeGraphOutput();
- *GraphOutput << temperatur;
-}
-
-
-
-
-double SAOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,
- int typ,int optimierungsschritte,int print)
-{
- switch(typ)
- {
- case 1:
- {
- double bestPar=-1,best=1e100;
- double now;
- if( print )
- cout << "#SA-optimizeValues: defaultAnfAnnRate" << endl;
- for(int i=0;i<numParameter;i++)
- {
- StatVar end,laufzeit,init;
- defaultAnfAnnRate=0.1 + (1.0/numParameter)*i;
- solveProblem(0,p,proParameter,optimierungsschritte,SA_OPT,now,
- end,laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultAnfAnnRate;
- }
- if( print )
- {
- cout << defaultAnfAnnRate << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultAnfAnnRate=0.9;
- return bestPar;
- }
- break;
- case 2:
- {
- double bestPar=-1,best=1e100;
- double now;
- if( print )
- cout << "#Optimierung von SA: defaultEndAnnRate" << endl;
- for(int i=1;i<=numParameter;i++)
- {
- StatVar end,laufzeit,init;
- defaultEndAnnRate=1/(pow(10.0,i));
- solveProblem(0,p,proParameter,optimierungsschritte,SA_OPT,now,end,
- laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultEndAnnRate;
- }
- if( print )
- {
- cout << defaultEndAnnRate << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultEndAnnRate=1/10000.0;
- return bestPar;
- }
- break;
- case 10:
- {
- double bestPar=-1,best=1e100;
-
- if( print )
- cout << "#SA-optimizeValues: defaultMultiple " << 8 << endl;
- for(int i=1;i<=6;i++)
- {
- StatVar end,laufzeit,init;
- double now;
- defaultMultiple = i;
- solveProblem(0,p,proParameter,optimierungsschritte,SA_OPT,now,end,
- laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultMultiple;
- }
- if( print )
- {
- cout << defaultMultiple << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Parameter Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit "
- "Bester Sigma SigmaSmaller SigmaBigger\n";
- defaultMultiple=2.0;
- return bestPar;
- }
- break;
- default:
- cerr << "Error: wrong parameter-type in SAOptimization::optimizeValue ("
- << typ << ")\n";
- exit(1);
- }
- return 1e100;
-}
-
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/SAOptimization.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/SAOptimization.h b/ext/giza-pp/mkcls-v2/SAOptimization.h
deleted file mode 100644
index 97c528b..0000000
--- a/ext/giza-pp/mkcls-v2/SAOptimization.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef SAOPTIMIZATION
-#define SAOPTIMIZATION
-#include "IterOptimization.h"
-
-class SAOptimization : public IterOptimization
- {
-
-
- private:
- double temperatur;
- double endTemperatur;
- double alpha;
- int schrittzahl;
- int stepsForAbkuehlung;
-
- protected:
- virtual void zInitialize();
-
-
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- virtual void makeGraphOutput();
-
-
- public:
- SAOptimization(Problem &p,double temperatur,double alpha,
- int schrittzahl,int maxIter=-1);
-
-
- SAOptimization(Problem &p,int maxIter=-1);
-
-
- SAOptimization(SAOptimization &o);
-
-
- static double optimizeValue(Problem &p,int proParameter,
- int numParameter,int typ,
- int schritte= -1,int verbose=1);
-
-
- static double defaultAnfAnnRate;
-
- static double defaultEndAnnRate;
-
- static double defaultMultiple;
-
-
-};
-#endif
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/StatVar.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/StatVar.cpp b/ext/giza-pp/mkcls-v2/StatVar.cpp
deleted file mode 100644
index dbd76cd..0000000
--- a/ext/giza-pp/mkcls-v2/StatVar.cpp
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#include "StatVar.h"
-#include <iostream>
-#include <stdlib.h>
-
-double compareStatVarQuantil=-1;
-
-StatV::~StatV() {}
-
-
-int doublecompare(const void *p,const void *j)
-{
- if( *(double *)p == *(double *)j)
- return 0;
- if( *(double *)p- *(double *)j<0 )
- return -1;
- else
- return 1;
-}
-
-int compareStatVar(const void *p,const void *j)
-{
- double a;
- double b;
- if(compareStatVarQuantil>=0)
- {
- a=((StatVar *)p)->quantil(compareStatVarQuantil);
- b=((StatVar *)j)->quantil(compareStatVarQuantil);
- }
- else
- {
- a=((StatVar *)p)->getMean();
- b=((StatVar *)j)->getMean();
- }
- if(a==b)
- return 0;
- if(a<b)
- return -1;
- else
- return +1;
-}
-
-
-double StatVar::getSigmaSmaller()
-{
- double ss=0;
- int ns=0;
- for(int i=0;i<n;i++)
- {
- if( values[i]<getMean() )
- {
- ss+=(values[i]-getMean())*(values[i]-getMean());
- ns++;
- }
- }
- if( ss/ns>0 )
- return sqrt(ss/ns);
- else
- return 0;
-}
-double StatVar::getSigmaBigger()
-{
- double ss=0;
- int ns=0;
- for(int i=0;i<n;i++)
- if( values[i]>getMean() )
- {
- ss+=(values[i]-getMean())*(values[i]-getMean());
- ns++;
- }
- if( ss/ns>0 )
- return sqrt(ss/ns);
- else
- return 0;
-}
-
-
-
-void StatV::dumpOn(ostream &strm)
-{
- strm << "MEAN: " << getMean() << " (" << smallest << "-" << biggest
- << ") SIGMA:" << getSigma()<< " ";
-}
-
-
-
-double StatVar::quantil(double percent)
-{
- int index=(int)(n*percent);
- if(index==n)
- index=n-1;
- assert(index>=0&&index<n);
- if(sortedFlag==0)
- {
- qsort(values.getPointerToData(),n,sizeof(double),doublecompare);
- assert(n<=values.size());
- sortedFlag=1;
- }
- if(index<0)
- {
- cerr << "WARNING: StatVar.cc\n";
- return 0.0;
- }
- else
- return values[index];
-}
-
-
-void StatVar::printValues(ostream &strm)
-{
- qsort(values.getPointerToData(),n,sizeof(double),doublecompare);
- assert(n<=values.size());
- for(int i=0;i<n;i++)
- strm << i/(double)n << " " << values[i] << endl;
- return;
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/StatVar.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/StatVar.h b/ext/giza-pp/mkcls-v2/StatVar.h
deleted file mode 100644
index bdf1e19..0000000
--- a/ext/giza-pp/mkcls-v2/StatVar.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef STATVAR_H
-#define STATVAR_H
-
-#include <stdlib.h>
-#include <iostream>
-#include "Array.h"
-#include "mystl.h"
-#include "myleda.h"
-#include <cmath>
-
-
-extern double compareStatVarQuantil;
-int compareStatVar(const void *p,const void *j);
-
-class StatV
-
-{
- protected:
- int n;
- double sum;
- double squareSum;
- double smallest,biggest;
-
- public:
- const char *title;
- StatV() : n(0),sum(0),squareSum(0),smallest(1e100),biggest(-1e100),title("") {}
- virtual ~StatV();
-
-
- virtual void addValue(double a)
- {
- n++;
- sum+=a;
- squareSum+=a*a;
- if(smallest>a)
- smallest=a;
- if(biggest<a)
- biggest=a;
-
- }
-
-
- double getMean()
- { return sum/n; }
-
-
- double getSigma()
- {
- if(squareSum/n - getMean()*getMean()<=0)
- return 0.0;
- else
- return sqrt(squareSum/n - getMean()*getMean());
- }
-
-
-
- double getBiggest()
- { return biggest; }
-
-
- double getSmallest()
- { return smallest; }
-
-
- int getNum()
- { return n; }
-
-
- void dumpOn(ostream &strm);
-
-
-};
-
-class StatVar : public StatV
-{
- private:
- Array<double> values;
- short sortedFlag;
- public:
- StatVar()
- : values(10,0.0,1),sortedFlag(0) {}
- virtual ~StatVar(){}
- double quantil(double percent=0.5);
-
-
- inline double value(int i)
- {return values[i];}
-
-
- void printValues(ostream &strm);
-
-
- virtual void addValue(double a)
- {
- sortedFlag=0;
- values[n]=a;
- StatV::addValue(a);
- }
-
- double getSigmaSmaller();
-
-
- double getSigmaBigger();
-
-
-};
-
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/TAOptimization.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/TAOptimization.cpp b/ext/giza-pp/mkcls-v2/TAOptimization.cpp
deleted file mode 100644
index 074ff62..0000000
--- a/ext/giza-pp/mkcls-v2/TAOptimization.cpp
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include "TAOptimization.h"
-#include "ProblemTest.h"
-
-
-double TAOptimization::defaultAnnRate=0.4;
-double TAOptimization::defaultMultiple=2.0;
-
-
-TAOptimization::TAOptimization(Problem &p,double t,double d,int m)
-: IterOptimization(p,m) , temperatur(t) , deltaTemperatur(d)
-{
- assert(t>0 && d>0);
-}
-
-
-
-TAOptimization::TAOptimization(Problem&p,int m)
-: IterOptimization(p,m), temperatur(-1)
-{
-}
-
-
-
-TAOptimization::TAOptimization(TAOptimization &o)
-: IterOptimization(o)
-{
- temperatur= o.temperatur;
- deltaTemperatur= o.deltaTemperatur;
-}
-
-
-
-
-void TAOptimization::zInitialize()
-{
- IterOptimization::zInitialize();
- if( temperatur<0)
- {
-
-
- int n;
-
- StatVar &v=problem.deviationStatVar(*this,ANZ_VERSCHLECHTERUNGEN);
-
- if(maxStep>0)
- n=(int)(maxStep*4.0/5.0);
- else
- maxStep=n=(int)(problem.expectedNumberOfIterations()*defaultMultiple);
-
- temperatur = v.quantil(defaultAnnRate);
- deltaTemperatur = temperatur/n;
-
- if( verboseMode>0 )
- cout << "#TA: (anfAnnRate="
- << defaultAnnRate << ",T=" << temperatur << ",deltaT="
- << deltaTemperatur << ")\n";
- curStep=0;
- endFlag=0;
- delete &v;
- }
-}
-
-
-short TAOptimization::end()
-{
-
-
- if( temperatur>0 )
- {
- endFlag=0;
- bestStep=curStep;
- }
- return endFlag>0;
-}
-
-short TAOptimization::accept(double delta)
-{
- if( temperatur<0 )
- return 1;
- else
- if( delta < temperatur )
- return 1;
- else
- return 0;
-}
-
-void TAOptimization::abkuehlen()
-{
- if( temperatur>=0 )
- temperatur=(temperatur-deltaTemperatur>0)?(temperatur-deltaTemperatur):0;
-}
-
-void TAOptimization::makeGraphOutput()
-{
- IterOptimization::makeGraphOutput();
- *GraphOutput << temperatur;
-}
-
-
-
-
-double TAOptimization::optimizeValue(Problem &p,int proParameter,int numParameter,int typ,
- int optimierungsschritte,int print)
-{
- switch(typ)
- {
- case 1:
- {
- double bestPar=-1,best=1e100;
- if(print)cout << "#TA-optimizeValues: " << numParameter << endl;
- for(int i=0;i<=numParameter;i++)
- {
- StatVar end,laufzeit,init;
- double now;
- defaultAnnRate = (float)(i)/numParameter;
- solveProblem(0,p,proParameter,optimierungsschritte,TA_OPT,now,end,
- laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultAnnRate;
- }
- if( print)
- {
- cout << defaultAnnRate << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit Bester"
- " Sigma SigmaSmaller SigmaBigger\n";
- defaultAnnRate=0.5;
- return bestPar;
- }
- break;
- case 10:
- {
- double bestPar=-1,best=1e100;
- if( print )
- cout << "#TA-optimizeValues: defaultMultiple " << 10 << endl;
- for(int i=1;i<=6;i++)
- {
- StatVar end,laufzeit,init;
- double now;
- defaultMultiple = i;
- solveProblem(0,p,proParameter,optimierungsschritte,TA_OPT,now,
- end,laufzeit,init);
- if( best>now )
- {
- best=now;
- bestPar=defaultMultiple;
- }
- if( print )
- {
- cout << defaultMultiple << " ";
- cout << end.getMean() << " " << end.quantil(0.2) << " "
- << end.quantil(0.79) << " " << laufzeit.getMean() << " "
- << end.quantil(0.0) << " " << end.getSigma() << " "
- << end.getSigmaSmaller() << " " << end.getSigmaBigger()
- << " " << now << endl;
- }
- }
- if( print )
- cout << "#Mittelwert 0.2-Quantil 0.8-Quantil Laufzeit Bester Sigma "
- " SigmaSmaller SigmaBigger\n";
- defaultMultiple=2.0;
- return bestPar;
- }
- break;
- default:
- cerr << "Error: wrong parameter-type in TAOptimization::optimizeValue ("
- << typ << ")\n";
- exit(1);
- }
- return 1e100;
-}
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/TAOptimization.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/TAOptimization.h b/ext/giza-pp/mkcls-v2/TAOptimization.h
deleted file mode 100644
index 3382306..0000000
--- a/ext/giza-pp/mkcls-v2/TAOptimization.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-#ifndef TAOPTIMIZATION
-#define TAOPTIMIZATION
-
-#include "IterOptimization.h"
-
-class TAOptimization : public IterOptimization {
-
-
- private:
- double temperatur;
- double deltaTemperatur;
-
- protected:
- virtual void zInitialize();
-
-
- virtual short accept(double delta);
-
-
- virtual void abkuehlen();
-
-
- virtual short end();
-
-
- virtual void makeGraphOutput();
-
-
- public:
- TAOptimization(Problem &p,double temperatur,
- double deltaTemperatur,int maxIter=-1);
-
-
- TAOptimization(Problem &p,int maxIter=-1);
-
-
- TAOptimization(TAOptimization &o);
-
-
- static double optimizeValue(Problem &p,int proParameter,
- int numParameter,int typ,int schritte= -1,int verbose=1);
-
-
- static double defaultAnnRate;
-
- static double defaultMultiple;
-
-};
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/general.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/general.cpp b/ext/giza-pp/mkcls-v2/general.cpp
deleted file mode 100644
index ddd5fe4..0000000
--- a/ext/giza-pp/mkcls-v2/general.cpp
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-#include <stdlib.h>
-#include <stdio.h>
-
-
-extern "C" {
-#include <sys/time.h>
-#include <sys/resource.h>
-
-
-}
-
-#include "general.h"
-
-extern "C" {
-#ifndef __linux__
-int getrusage(int who, struct rusage *rusage);
-#endif
-};
-int verboseMode=0;
-
-#ifdef aNeXT
-#define NO_TEMPLATES
-#endif
-
-
-void myerror(int line,const char *file,const char *expression)
-{
- cerr << "(general.h):Assertion failed: '" << expression << "' ::: b "
- << file << ":" << line << endl;
-}
-
-
-void imyerror(int line,const char *file,const char *expression)
-{
- cerr << "Error: '" << expression << "' ::: in Source " << file
- << ":" << line << endl;
- #ifndef DEBUG
-
- #endif
-}
-
-
-
-void zufallSeed(int z)
-{
-#ifdef NeXT
- srandom(z);
-#else
- srand48(z);
-#endif
-}
-
-
-
-double zufall01()
-{
-#ifdef NeXT
- return (double)(random()%65536)/65536.0;
-#else
- return drand48();
-#endif
-}
-
-
-
-double zufall(double min,double max)
-{
- double z=zufall01()*(max-min)+min;
- assert(z>=min&&z<max);
- return z;
-}
-
-
-
-int randomInt(int exclusive)
-{
- int i=(int)zufall(0,exclusive);
- assert(i>=0);
- assert(i<exclusive);
- return i;
-}
-
-double clockSec()
-{
-#ifdef linux
- enum __rusage_who who=RUSAGE_SELF;
-#else
- int who=RUSAGE_SELF;
-#endif
- struct rusage rusage;
- getrusage(who, &rusage);
- return rusage.ru_utime.tv_sec+rusage.ru_utime.tv_usec/1000000.0;
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/general.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/general.h b/ext/giza-pp/mkcls-v2/general.h
deleted file mode 100644
index 8db48aa..0000000
--- a/ext/giza-pp/mkcls-v2/general.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-
-
-
-
-#ifndef GENERAL_HEADER
-#define GENERAL_HEADER
-
-#include <iostream>
-#ifdef NeXT
-#include <minmax.h>
-#endif
-#include <string.h>
-
-
-
-#define NULLFLOAT(x) ( fabs(x)<=0.0000001 )
-#define EQUALFLOAT(x,y) ( fabs(x-y)<(fabs(x)+fabs(y))/10000000.0 )
-
-
-
-
-#define TEST_RANDOM_SEED 532567487
-
-double zufall01();
-
-
-double zufall(double min,double max);
-
-
-int randomInt(int exclusive);
-
-
-void zufallSeed(int z =TEST_RANDOM_SEED);
-
-
-
-
-#include "myassert.h"
-#include <cassert>
-#include "Array.h"
-
-
-
-
-
-
-double clockSec();
-
-extern int verboseMode;
-
-
-
-inline string operator&(const string&a,const string&b)
-{
- string c(a);
- c+=b;
- return c;
-}
-
-
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/makePackage.sh
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/makePackage.sh b/ext/giza-pp/mkcls-v2/makePackage.sh
deleted file mode 100644
index 2790e61..0000000
--- a/ext/giza-pp/mkcls-v2/makePackage.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#! /bin/csh
-
-setenv VERSION `date +%Y-%m-%d`
-rm -rf mkcls-v2
-
-mkdir mkcls-v2
-foreach i ( Array.h FixedArray.h FlexArray.h GDAOptimization.C GDAOptimization.h HCOptimization.C HCOptimization.h IterOptimization.C IterOptimization.h KategProblem.C KategProblem.h KategProblemKBC.C KategProblemKBC.h KategProblemTest.C KategProblemTest.h KategProblemWBC.C KategProblemWBC.h MSBOptimization.C MSBOptimization.h MYOptimization.C MYOptimization.h Optimization.C Optimization.h PopOptimization.C PopOptimization.h Problem.C Problem.h ProblemTest.C ProblemTest.h RRTOptimization.C RRTOptimization.h SAOptimization.C SAOptimization.h StatVar.C StatVar.h TAOptimization.C TAOptimization.h general.C general.h makePackage.sh mkcls.C my.h myassert.h myleda.h mystl.h )
- cat $i | filterIfdef.out NO_LIGHT_GIZA | filterIfdefInverse.out DEBUG | filterIfdefInverse.out DEBUG_TRICKY_IBM3 | filterIfdefInverse.out VDEBUG | stripcmt | addHead.out -file header > mkcls-v2/$i
-end
-
-cp Makefile.simple mkcls-v2/Makefile
-cp ../giza++/GNU.GPL mkcls-v2
-cp ../giza++/LICENSE mkcls-v2
-cp README mkcls-v2
-
-tar cf - mkcls-v2 | gzip -9 > mkcls.$VERSION.tar.gz
-
-cd mkcls-v2
-gmake -k
-cd ..
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/mkcls.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/mkcls.cpp b/ext/giza-pp/mkcls-v2/mkcls.cpp
deleted file mode 100644
index 90ebfde..0000000
--- a/ext/giza-pp/mkcls-v2/mkcls.cpp
+++ /dev/null
@@ -1,618 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-
-#include <stdio.h>
-#include <iostream>
-#include <stdlib.h>
-#include <ctype.h>
-#include "general.h"
-
-#include "KategProblem.h"
-#include "KategProblemTest.h"
-
-#include "ProblemTest.h"
-#include "TAOptimization.h"
-#include "GDAOptimization.h"
-#include "RRTOptimization.h"
-#include "SAOptimization.h"
-#include "HCOptimization.h"
-
-
-double SigmaVerfaelschung=5.0;
-int OneWithHapas=1;
-char *hapaxInitName=0;
-
-
-
-
-
-static int nLaeufe=1,nLaeufeReduce=3;
-
-
-static int optimizeParameterAnzahl=10;
-
-
-static int IterOptVerf=TA_OPT;
-
-
-static int MaxIterOptSteps= -1;
-
-
-static int MaxSecs=0;
-
-
-
-
-
-static int InitValue=INIT_RAN;
-
-
-static int Criterion=CRITERION_ML;
-
-
-static int Wwahl=W_DET_DECR;
-
-
-static int Kwahl=K_BEST;
-
-
-static int NumberCategories=100;
-
-
-static int MinWordFrequency=0;
-
-
-static int IterOptSet=0;
-
-
-static KategProblem *p = 0;
-
-
-char korpusName[1024]="train";
-int korpusIsText=1;
-
-
-char *FileForOther=0;
-
-void printUsage(int r)
-{
- cout <<
- "mkcls - a program for making word classes: Usage: \n"
- " mkcls [-nnum] [-ptrain] [-Vfile] opt\n"
-
-
-
-
-
-
- "-V output classes (Default: no file)\n"
-
-
- "-n number of optimization runs (Default: 1); larger number => better results\n"
-
- "-p filename of training corpus (Default: 'train')\n"
-
-
-
-
-
-
-
-
- "Example:\n"
- " mkcls -c80 -n10 -pin -Vout opt\n"
- " (generates 80 classes for the corpus 'in' and writes the classes in 'out')\n"
- "Literature: \n"
- " Franz Josef Och: \ufffdMaximum-Likelihood-Sch\ufffdtzung von Wortkategorien mit Verfahren\n"
- " der kombinatorischen Optimierung\ufffd Studienarbeit, Universit\ufffdt Erlangen-N\ufffdrnberg,\n"
- " Germany,1995. \n";
- exit(r);
-}
-
-
-
-
-
-
-
-void makeIterOpt()
-{
- double maxTime=clockSec()+MaxSecs;
- if(MaxSecs==0)maxTime=0;
- double mean;
- StatVar end,laufzeit,init;
- solveProblem(1+(PrintBestTo!=0),*p,nLaeufe,MaxIterOptSteps,IterOptVerf,
- mean,end,laufzeit,init,maxTime);
- if( verboseMode>1 )
- p->dumpOn(cout);
-}
-
-
-
-void makeIzrOpt()
-{
- double maxTime=clockSec()+MaxSecs;
- if(MaxSecs==0)maxTime=0;
- izrOptimization(*p,nLaeufeReduce,nLaeufeReduce,0,maxTime,IterOptVerf);
-}
-
-
-
-int makeMetaOpt(int argc,char **argv)
-{
- int ret=0;
-
- if(argc==4 || argc==3)
- {
- int typ=0;
- if( argc==4 )
- {
- sscanf(argv[3],"%d",&typ);
- assert(typ>0 && typ<=11 );
- }
- if( isdigit(argv[2][0]) )
- {
- int a;
- sscanf(argv[2],"%d",&a);
- switch(a)
- {
- case 1:
- SAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,1);
- break;
- case 2:
- SAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,2);
- break;
- case 3:
- SAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,10);
- break;
- case 4:
- TAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,1);
- break;
- case 5:
- TAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,10);
- break;
- case 6:
- RRTOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,1);
- break;
- case 7:
- RRTOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,10);
- break;
- case 8:
- GDAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,1);
- break;
- default:
- cerr << "Error: Wrong number of parameter (" << argv[2]
- << ").\n";
- printUsage(1);
- }
- }
- else
- {
- if(strcasecmp(argv[2],"gda")==0)
- {
- GDAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,typ);
- }
- else if(strcasecmp(argv[2],"ta")==0)
- {
- TAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,typ);
- }
- else if(strcasecmp(argv[2],"rrt")==0)
- {
- RRTOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,typ);
- }
- else if(strcasecmp(argv[2],"sa")==0)
- {
- SAOptimization::optimizeValue(*p,nLaeufe,
- optimizeParameterAnzahl,typ);
- }
-
-
-
-
- else
- {
- cerr << "Error: unknown algorithm" << argv[2] << endl;
- printUsage(1);
- }
- }
- }
- else
- {
- cerr << "Error: wrong number of arguments: " << argc << endl;
- printUsage(1);
- }
- return ret;
-}
-
-
-
-
-
-
-
-
-
-
-void setVerfahren(char *p)
-{
- if(strcasecmp(p,"rrt")==0 )
- IterOptVerf=RRT_OPT;
- else if(strcasecmp(p,"ta")==0)
- IterOptVerf=TA_OPT;
- else if(strcasecmp(p,"gda")==0)
- IterOptVerf=GDA_OPT;
- else if(strcasecmp(p,"sa")==0)
- IterOptVerf=SA_OPT;
- else if(strcasecmp(p,"hc")==0)
- IterOptVerf=HC_OPT;
- else
- {
- cerr << "Error: Unknown iterativ-optimizing algorithm '" << p << "'.\n";
- printUsage(1);
- }
-}
-
-
-
-void setInitValue(char *iv,char *fileForOther)
-{
- if(strcasecmp(iv,"ran")==0 )
- InitValue=INIT_RAN;
- else if(strcasecmp(iv,"aio")==0)
- InitValue=INIT_AIO;
- else if(strcasecmp(iv,"gda")==0)
- InitValue=INIT_LWRW;
- else if(strcasecmp(iv,"freq")==0)
- InitValue=INIT_FREQ;
- else if(strcasecmp(iv,"other")==0)
- {
- InitValue=INIT_OTHER;
- FileForOther=strdup(fileForOther);
- }
- else
- {
- cerr << "Error: Unknown initialization '" << p << "'.\n";;
- printUsage(1);
- }
-}
-
-
-void setWwahl(const char *ww)
-{
- if(strcasecmp(ww,"ran")==0 )
- Wwahl=W_RAN;
- else if(strcasecmp(ww,"det")==0)
- Wwahl=W_DET_DECR;
- else if(strcasecmp(ww,"incr")==0)
- Wwahl=W_DET_INCR;
- else
- {
- cerr << "Error: Unknown word-selection '" << ww << "'.\n";;
- printUsage(1);
- }
-}
-
-
-void setKwahl(const char *kw)
-{
- if( strcasecmp(kw,"det")==0 )
- Kwahl=K_DET;
- else if(strcasecmp(kw,"ran")==0 )
- Kwahl=K_RAN;
- else if(strcasecmp(kw,"best")==0)
- Kwahl=K_BEST;
- else
- {
- cerr << "Error: Unknown category-selection '" << kw << "'.\n";
- printUsage(1);
- }
-}
-
-
-void setParameter(const char *nr1,const char *nr2)
-{
- int n1;
- float n2;
- sscanf(nr1,"%d",&n1);
- sscanf(nr2,"%f",&n2);
- IterOptSet=1;
- switch(n1)
- {
- case 1:
- SAOptimization::defaultAnfAnnRate=n2;
- if(verboseMode)cout << "Parameter gamma_0 (SA) set to "
- << SAOptimization::defaultAnfAnnRate << endl;
- iassert(0<=SAOptimization::defaultAnfAnnRate&&
- SAOptimization::defaultAnfAnnRate<=1);
- break;
- case 2:
- SAOptimization::defaultEndAnnRate=n2;
- if(verboseMode)cout << "Parameter gamma_e (SA) set to "
- << SAOptimization::defaultEndAnnRate << endl;
- iassert(0<=SAOptimization::defaultEndAnnRate
- &&SAOptimization::defaultEndAnnRate<=1);
- break;
- case 3:
- SAOptimization::defaultMultiple=n2;
- if(verboseMode)cout << "Parameter nu_e (SA) set to "
- << SAOptimization::defaultMultiple << endl;
- iassert( SAOptimization::defaultMultiple>0 );
- break;
- case 4:
- TAOptimization::defaultAnnRate=n2;
- if(verboseMode)cout << "Parameter gamma_{TA} set to "
- << TAOptimization::defaultAnnRate << endl;
- iassert(0<=TAOptimization::defaultAnnRate
- &&TAOptimization::defaultAnnRate<=1);
- break;
- case 5:
- TAOptimization::defaultMultiple=n2;
- if(verboseMode)cout << "Parameter nu_{TA} set to "
- << TAOptimization::defaultMultiple << endl;
- iassert( TAOptimization::defaultMultiple>0 );
- break;
- case 6:
- RRTOptimization::defaultAnnRate=n2;
- if(verboseMode)cout << "Parameter gamma_{RRT} set to "
- << RRTOptimization::defaultAnnRate << endl;
- iassert(0<=RRTOptimization::defaultAnnRate
- && RRTOptimization::defaultAnnRate<=1);
- break;
- case 7:
- RRTOptimization::defaultMultiple=n2;
- if(verboseMode)cout << "Parameter nu_{RRT} set to "
- << RRTOptimization::defaultMultiple << endl;
- iassert( RRTOptimization::defaultMultiple>0 );
- break;
- case 8:
- GDAOptimization::defaultAlpha=n2;
- if(verboseMode)cout << "Parameter alpha set to "
- << GDAOptimization::defaultAlpha << endl;
- iassert(0<=GDAOptimization::defaultAlpha
- && GDAOptimization::defaultAlpha<1 );
- break;
- default:
- cerr << "Error: Wrong parameter number " << nr1 << " " << n1 << endl;
- printUsage(1);
- }
-}
-
-
-
-void setKorpusName(const char *s)
-{
- strcpy(korpusName,s);
-}
-
-void setHapaxInitName(const char *s)
-{
- hapaxInitName=strdup(s);
-}
-
-void setKorpus()
-{
- if( korpusIsText )
- {
- if( (p=fromKModel(korpusName,NumberCategories,InitValue,Criterion,Wwahl|Kwahl,
- MinWordFrequency))==0)
- {
- cerr << "Error: Could not read the file '" << korpusName << "'.\n";
- printUsage(1);
- }
- }
- else
- {
- if( (p=fromNgrFile(korpusName,NumberCategories,InitValue,Criterion,Wwahl|Kwahl,
- MinWordFrequency))==0)
- {
- cerr << "Error: Could not read the file '" << korpusName << "'.\n";
- printUsage(1);
- }
- p->wordFreq.initializeIndex(*(p->words),'1',2,1+NumberCategories/2,!OneWithHapas);
- p->wordFreq.initializeIndex(*(p->words),'2',2+NumberCategories/2,1+NumberCategories,OneWithHapas);
- }
- if( IterOptSet==0 )
- KategProblemSetParameters(*p);
-}
-
-
-
-
-
-
-int main(int argc,char **argv)
-{
- double startTime=clockSec();
- zufallSeed();
- while( argc>1 && argv[1][0]=='-' )
- {
-
- switch(argv[1][1])
- {
- case 'v':
- sscanf(argv[1]+2,"%d",&verboseMode);
- iassert(verboseMode>=0);
- break;
- case 'O':
- sscanf(argv[1]+2,"%d",&OneWithHapas);
- cout << "OneWithHapas: " << OneWithHapas << endl;
- break;
- case 'n':
- sscanf(argv[1]+2,"%d",&nLaeufe);
- nLaeufeReduce=nLaeufe;
- iassert( nLaeufe>=1 );
- break;
- case 'l':
- Criterion=1;
- if( argv[1][2] )
- {
- sscanf(argv[1]+2,"%lf",&rhoLo);
- if( verboseMode )
- cout << "Parameter rho (for LO) set to" << rhoLo << ".\n";
- iassert(0<=rhoLo && rhoLo<=1);
- }
- if( verboseMode )
- cout << "Criterion LO used.\n";
- break;
- case 'y':
- Criterion=2;
- if( argv[1][2] )
- {
- sscanf(argv[1]+2,"%lf",&SigmaVerfaelschung);
- if( verboseMode )
- cout << "Parameter rho (for LO) set to" << SigmaVerfaelschung << ".\n";
- iassert(0<SigmaVerfaelschung);
- }
- if( verboseMode )
- cout << "My special criterion used.\n";
- break;
- case 'p':
- setKorpusName(argv[1]+2);
- assert(argv[2]&&argv[2][0]!='-' || argv[2][0]!='i');
- break;
- case 'P':
- setKorpusName(argv[1]+2);
- korpusIsText=0;
- assert(argv[2]&&argv[2][0]!='-' || argv[2][0]!='i');
- break;
- case 'i':
- setInitValue(argv[1]+2,argv[2]);
- if( InitValue==INIT_OTHER )
- argv++,argc--;
- break;
- case 'h':
- setHapaxInitName(argv[1]+2);
- break;
- case 'k':
- setKwahl(argv[1]+2);
- break;
- case 'w':
- setWwahl(argv[1]+2);
- break;
- case 'c':
- sscanf(argv[1]+2,"%d",&NumberCategories);
- iassert(NumberCategories>=2);
- break;
- case 'm':
- sscanf(argv[1]+2,"%d",&MinWordFrequency);
- break;
- case 'e':
- setParameter(argv[1]+2,argv[2]);
- argv++,argc--;
- break;
- case 'a':
- setVerfahren(argv[1]+2);
- break;
- case 'r':
- {
- int s;
- sscanf(argv[1]+2,"%d",&s);
- zufallSeed(s);
- }
- break;
- case 'V':
- if(argv[1][2])
- {
- char str[1024];
- strcpy(str,argv[1]+2);
- PrintBestTo=new ofstream(str);
- strcat(str,".cats");
- PrintBestTo2=new ofstream(str);
- }
- else
- cout << "AUSGABE auf cout\n";
- break;
- case 'M':
- sscanf(argv[1]+2,"%d",&MaxIterOptSteps);
- break;
- case 's':
- sscanf(argv[1]+2,"%d",&MaxSecs);
- break;
- case 'N':
- sscanf(argv[1]+2,"%d",&optimizeParameterAnzahl);
- break;
- case 'o':
- GraphOutput = new ofstream(argv[1]+2);
- if( GraphOutput==0 )
- cerr << "Warning: Open failed for file '" << argv[1]+2 << "'.\n";
- break;
- default:
- cerr << "Fehlerhafte Option: " << argv[1] << endl;
- printUsage(1);
- }
- argv++;
- argc--;
- }
-
-
- setKorpus();
- if( FileForOther )
- {
- fromCatFile(p,FileForOther);
- p->initialisierung=InitValue;
- p->_initialize(InitValue);
- }
-
- if( hapaxInitName )
- {
- fromCatFile(p,hapaxInitName,0);
- p->fixInitLike();
- }
-
- double start2Time=clockSec();
-
- if(argc>=2 && strcasecmp(argv[1],"opt")==0 )
- makeIterOpt();
- else if(argc>=2 && strcasecmp(argv[1],"meta-opt")==0)
- makeMetaOpt(argc,argv);
- else if(argc>=2 && strcasecmp(argv[1],"izr-opt")==0)
- makeIzrOpt();
-
-
- else
- {
- makeIterOpt();
- }
-
- if( verboseMode )
- {
- cout << " full-time: " << clockSec()-startTime << endl;
- cout << "optimize-time: " << clockSec()-start2Time << endl;
- }
- return 0;
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/my.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/my.h b/ext/giza-pp/mkcls-v2/my.h
deleted file mode 100644
index ba06657..0000000
--- a/ext/giza-pp/mkcls-v2/my.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef HEADER_my_DEFINED
-#define HEADER_my_DEFINED
-
-#define over_array(a,i) for(i=(a).low();i<=(a).high();i++)
-#define backwards_array(a,i) for(i=(a).high();i>=(a).low();i--)
-#define over_arr(a,i) for(int i=(a).low();i<=(a).high();i++)
-#define over_arrMAX(a,i,max) for(int i=(a).low();i<=min((a).high(),max-1);i++)
-#define backwards_arr(a,i) for(int i=(a).high();i>=(a).low();i--)
-
-extern double n1mult,n2mult,n3mult;
-
-inline double realProb(int n1,int n2)
-{
- massert(n1<=n2);
- iassert(n1>=0&&n2>0);
- if(n2==0)n2=1;
- return ((double)n1)/(double)n2;
-}
-
-inline double verfProb(int n1,int n2)
-{
- double prob = realProb(n1,n2);
- if( n1==1 )return prob*n1mult;
- else if( n1==2 )return prob*n2mult;
- else if( n1==3 )return prob*n3mult;
- else return prob;
-}
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/myassert.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/myassert.h b/ext/giza-pp/mkcls-v2/myassert.h
deleted file mode 100644
index da86ffb..0000000
--- a/ext/giza-pp/mkcls-v2/myassert.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef MY_ASSERT_DEFINED
-#define MY_ASSERT_DEFINED
-void myerror(int line,const char *file,const char *expression);
-void imyerror(int line,const char *file,const char *expression);
-
-#define iassert(expression) do {if (!(expression)) {imyerror(__LINE__,__FILE__,#expression);}} while (0)
-
-#define massert(expr) do {} while(0)
-
-#define vassert(expr) do {} while(0)
-
-#include <assert.h>
-
-#endif
-
-
-
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/myleda.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/myleda.h b/ext/giza-pp/mkcls-v2/myleda.h
deleted file mode 100644
index adf3845..0000000
--- a/ext/giza-pp/mkcls-v2/myleda.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef myleda_HEADER_defined
-#define myleda_HEADER_defined
-#include <map>
-#include <set>
-#include <unordered_map>
-#include "myassert.h"
-#include "FixedArray.h"
-using namespace std;
-
-template<class T>
-class leda_array : public FixedArray<T>
-{
-public:
- leda_array() {}
- leda_array(int n) : FixedArray<T>(n) {}
-};
-
-template<class T>
-class leda_set : public set<T>
-{
-public:
- bool member(const T&m) const
- { return this->count(m)!=0; }
- void del(const T&m)
- { this->erase(m); }
-};
-#define forall_set(a,b,c) for(a::iterator __i__=c.begin();__i__!=c.end()&&((b=*__i__),1);++__i__)
-template<class T>
-leda_set<T> operator&(const leda_set<T>&a,const leda_set<T>&b)
-{
- leda_set<T>c;
- insert_iterator<set<T> > iter(c,c.begin());
- set_intersection(a.begin(),a.end(),b.begin(),b.end(),iter);
- return c;
-}
-template<class T>
-leda_set<T> operator-(const leda_set<T>&a,const leda_set<T>&b)
-{
- leda_set<T>c;
- insert_iterator<set<T> > iter(c,c.begin());
- set_difference(a.begin(),a.end(),b.begin(),b.end(),iter);
- return c;
-}
-
-template<class A,class B>
-class leda_d_array : public map<A,B>
-{
-private:
- B init;
-public:
- bool defined(const A&a) const
- { return find(a)!=this->end(); }
- const B&operator[](const A&a)const
- {
- typename map<A,B>::const_iterator pos=find(a);
- iassert(pos!=this->end());
- if( pos==this->end() )
- return init;
- else
- return pos->second;
- }
- B&operator[](const A&a)
- {
- typename map<A,B>::iterator pos=find(a);
- if( pos==this->end() )
- {
- insert(map<A,B>::value_type(a,init));
- pos=find(a);
- iassert(pos!=this->end());
- }
- return pos->second;
- }
-};
-
-#define forall_defined_d(a,b,c,d) for(typename leda_d_array<a,b>::const_iterator __ii__=(d).begin();__ii__!=(d).end()&&((c=__ii__->first),1) ;++__ii__)
-#define forall_d(a,b,c,d) for(typename leda_d_array<a,b>::const_iterator __ii__=(d).begin();__ii__!=(d).end()&&((c=__ii__->second),1);++__ii__)
-
-double used_time();
-
-template<class T>
-class my_hash
-{
-public:
- int operator()(const T&t)const {return Hash(t);}
-};
-
-inline int Hash(int value) { return value; }
-#define MY_HASH_BASE std::unordered_map<A,B>
-
-template<class A,class B>
-class leda_h_array : public MY_HASH_BASE
-{
-private:
- B init;
-public:
- leda_h_array() {}
- leda_h_array(const B&_init)
- : MY_HASH_BASE(),init(_init) {}
- bool defined(const A&a) const
- { return find(a)!=this->end(); }
- const B&operator[](const A&a)const
- {
- typename MY_HASH_BASE::const_iterator pos=this->find(a);
-
- if( pos==this->end() )
- return init;
- else
- return pos->second;
- }
- B&operator[](const A&a)
- {
- typename MY_HASH_BASE::iterator pos=this->find(a);
- if( pos==this->end() )
- {
- this->insert(typename MY_HASH_BASE::value_type(a,init));
- pos=this->find(a);
- iassert(pos!=this->end());
- }
- return pos->second;
- }
-};
-
-#define forall_defined_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
-#define forall_defined_h2(a,b,c,d) for(leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
-#define forall_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jjj__=(d).begin();__jjj__!=(d).end()&&((c=__jjj__->second),1);++__jjj__)
-
-
-template<class T> int compare(const T&a,const T&b)
-{if(a==b)return 0; else if(a<b) return -1; else return 1;}
-
-template<class T,class U>
-ostream & operator<<(ostream&out,const leda_h_array<T,U>&w)
-{
- T t;
- bool makeNl=0;
- out << "h_array{";
- forall_defined_h(T,U,t,w)
- {
- if( makeNl )
- out << "\n ";
- out << "EL:" << t << " INH:" << w[t] << ".";
- makeNl=1;
- }
- return out << "}\n";
-}
-template<class T,class U>
-ostream & operator<<(ostream&out,const leda_d_array<T,U>&w)
-{
- T t;
- bool makeNl=0;
- out << "h_array{";
- forall_defined_h(T,U,t,w)
- {
- if( makeNl )
- out << "\n ";
- out << "EL:" << t << " INH:" << w[t] << ".";
- makeNl=1;
- }
- return out << "}\n";
-}
-
-template<class T>
-ostream&printSet(ostream&out,const leda_set<T>&s)
-{
- bool first=1;
- T t;
- out << "{";
- forall_set(typename set<T>,t,s)
- {
- if( first==0 )
- out << ", ";
- out << t;
- first=0;
- }
- return out << "}\n";
-}
-
-template<class T,class U>
-istream & operator>>(istream&in,leda_h_array<T,U>&)
-{
- return in;
-}
-
-template<class A,class B>
-bool operator==(const leda_h_array<A,B>&p1,const leda_h_array<A,B>&p2)
-{
- A v;
- forall_defined_h(A,B,v,p1)
- if( !( p1[v]==p2[v]) ) return 0;
- forall_defined_h(A,B,v,p2)
- if( !( p1[v]==p2[v]) ) return 0;
- return 1;
-}
-template<class A,class B>
-bool operator==(const leda_d_array<A,B>&p1,const leda_d_array<A,B>&p2)
-{
- A v;
- forall_defined_d(A,B,v,p1)
- if( !( p1[v]==p2[v]) ) return 0;
- forall_defined_d(A,B,v,p2)
- if( !( p1[v]==p2[v]) ) return 0;
- return 1;
-}
-
-
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/giza-pp/mkcls-v2/mystl.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/mkcls-v2/mystl.h b/ext/giza-pp/mkcls-v2/mystl.h
deleted file mode 100644
index 99f7965..0000000
--- a/ext/giza-pp/mkcls-v2/mystl.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
-
-Copyright (C) 1997,1998,1999,2000,2001 Franz Josef Och
-
-mkcls - a program for making word classes .
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-
-
-
-#ifndef MY_STL_H_DEFINED
-#define MY_STL_H_DEFINED
-#include <string>
-#include <utility>
-#include <unordered_map>
-#include <cmath>
-
-using namespace std;
-
-namespace std {
- template <typename T, typename V>
- struct hash<pair<T, V> > {
- static inline void hash_combine(std::size_t & seed, const T & v) {
- hash<T> hasher;
- seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
- }
-
- size_t operator()(const std::pair<T, V>& x) const {
- size_t h = 0;
- hash_combine(h, x.first);
- hash_combine(h, x.second);
- return h;
- }
- };
-}
-
-#define over_string(a,i) for(unsigned int i=0;i<a.length();i++)
-
-template<class T1,class T2>
-istream& operator>>(istream &in,pair<T1,T2> &ir)
-{
- char c;
- do in.get(c); while (in && isspace(c));
- if (!in) return in;
- if (c != '(') in.putback(c);
- in >> ir.first;
- do in.get(c); while (isspace(c));
- if (c != ',') in.putback(c);
- in >> ir.second;
- do in.get(c); while (c == ' ');
- if (c != ')') in.putback(c);
- return in;
-}
-
-template<class T1,class T2>
-ostream& operator<<(ostream &out,const pair<T1,T2> &ir)
-{
- out << "(" << ir.first << "," << ir.second << ")";
- return out;
-}
-
-void printSpaces(ostream&out,int n);
-void mysplit(const string &s,string &s1,string &s2);
-string untilChar(const string&s,char c);
-
-template<class A,class B,class C>
-class tri
-{
-public:
- A a;
- B b;
- C c;
- tri(){};
- tri(const A&_a,const B&_b,const C&_c)
- : a(_a),b(_b),c(_c) {}
-};
-template<class A,class B,class C>
-bool operator==(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{ return x.a==y.a&&x.b==y.b&&x.c==y.c;}
-
-template<class A,class B,class C>
-bool operator<(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{
- if(x.a<y.a)
- return 1;
- if(y.a<x.a)
- return 0;
-
- if(x.b<y.b)
- return 1;
- if(y.b<x.b)
- return 0;
-
- if(x.c<y.c)
- return 1;
- if(y.c<x.c)
- return 0;
- return 0;
-}
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/kenlm
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
deleted file mode 160000
index 56fdb5c..0000000
--- a/ext/kenlm
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/symal/Makefile
----------------------------------------------------------------------
diff --git a/ext/symal/Makefile b/ext/symal/Makefile
deleted file mode 100644
index 3e5d740..0000000
--- a/ext/symal/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-all: symal
-
-clean:
- rm -f *.o symal
-
-cmd.o: cmd.c cmd.h
- $(CC) -O3 -c -o cmd.o cmd.c
-
-symal: symal.cpp cmd.o
- $(CXX) -O3 -o $@ $(@).cpp cmd.o
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/symal/cmd.c
----------------------------------------------------------------------
diff --git a/ext/symal/cmd.c b/ext/symal/cmd.c
deleted file mode 100644
index 149fc72..0000000
--- a/ext/symal/cmd.c
+++ /dev/null
@@ -1,649 +0,0 @@
-
-// $Id$
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-
-#include "cmd.h"
-
-#ifdef WIN32
-# define popen _popen
-# define pclose _pclose
-#endif
-
-static Enum_T BoolEnum[] = {
- { "FALSE", 0 },
- { "TRUE", 1 },
- { 0, 0 }
-};
-
-#ifdef NEEDSTRDUP
-char *strdup();
-#endif
-
-#define FALSE 0
-#define TRUE 1
-
-#define LINSIZ 10240
-#define MAXPARAM 256
-
-static char *GetLine(),
- **str2array();
-static int Scan(),
- SetParam(),
- SetEnum(),
- SetSubrange(),
- SetStrArray(),
- SetGte(),
- SetLte(),
- CmdError(),
- EnumError(),
- SubrangeError(),
- GteError(),
- LteError(),
- PrintParam(),
- PrintEnum(),
- PrintStrArray();
-
-static Cmd_T cmds[MAXPARAM+1];
-static char *SepString = " \t\n";
-
-#if defined(__STDC__)
-#include <stdarg.h>
-int DeclareParams(char *ParName, ...)
-#else
-#include <varargs.h>
-int DeclareParams(ParName, va_alist)
-char *ParName;
-va_dcl
-#endif
-{
- va_list args;
- static int ParamN = 0;
- int j,
- c;
- char *s;
-
-#if defined(__STDC__)
- va_start(args, ParName);
-#else
- va_start(args);
-#endif
- for(;ParName;) {
- if(ParamN==MAXPARAM) {
- fprintf(stderr, "Too many parameters !!\n");
- break;
- }
- for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
- ;
- if(!c) {
- fprintf(stderr,
- "Warning: parameter \"%s\" declared twice.\n",
- ParName);
- }
- for(c=ParamN; c>j; c--) {
- cmds[c] = cmds[c-1];
- }
- cmds[j].Name = ParName;
- cmds[j].Type = va_arg(args, int);
- cmds[j].Val = va_arg(args, void *);
- switch(cmds[j].Type) {
- case CMDENUMTYPE: /* get the pointer to Enum_T struct */
- cmds[j].p = va_arg(args, void *);
- break;
- case CMDSUBRANGETYPE: /* get the two extremes */
- cmds[j].p = (void*) calloc(2, sizeof(int));
- ((int*)cmds[j].p)[0] = va_arg(args, int);
- ((int*)cmds[j].p)[1] = va_arg(args, int);
- break;
- case CMDGTETYPE: /* get lower or upper bound */
- case CMDLTETYPE:
- cmds[j].p = (void*) calloc(1, sizeof(int));
- ((int*)cmds[j].p)[0] = va_arg(args, int);
- break;
- case CMDSTRARRAYTYPE: /* get the separators string */
- cmds[j].p = (s=va_arg(args, char*))
- ? (void*)strdup(s) : 0;
- break;
- case CMDBOOLTYPE:
- cmds[j].Type = CMDENUMTYPE;
- cmds[j].p = BoolEnum;
- break;
- case CMDDOUBLETYPE: /* nothing else is needed */
- case CMDINTTYPE:
- case CMDSTRINGTYPE:
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "DeclareParam()", "Unknown Type",
- cmds[j].Type, "for parameter", cmds[j].Name);
- exit(1);
- }
- ParamN++;
- ParName = va_arg(args, char *);
- }
- cmds[ParamN].Name = NULL;
- va_end(args);
- return 0;
-}
-
-int GetParams(n, a, CmdFileName)
-int *n;
-char ***a;
-char *CmdFileName;
-{
- char *Line,
- *ProgName;
- int argc = *n;
- char **argv = *a,
- *s;
- FILE *fp;
- int IsPipe;
-
-#ifdef MSDOS
-#define PATHSEP '\\'
- char *dot = NULL;
-#else
-#define PATHSEP '/'
-#endif
-
- if(!(Line=malloc(LINSIZ))) {
- fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
- LINSIZ);
- exit(1);
- }
- if((ProgName=strrchr(*argv, PATHSEP))) {
- ++ProgName;
- } else {
- ProgName = *argv;
- }
-#ifdef MSDOS
- if(dot=strchr(ProgName, '.')) *dot = 0;
-#endif
- --argc;
- ++argv;
- for(;;) {
- if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
- CmdFileName = argv[0]+2;
- ++argv;
- --argc;
- }
- if(!CmdFileName) {
- break;
- }
- IsPipe = !strncmp(CmdFileName, "@@", 2);
- fp = IsPipe
- ? popen(CmdFileName+2, "r")
- : strcmp(CmdFileName, "-")
- ? fopen(CmdFileName, "r")
- : stdin;
- if(!fp) {
- fprintf(stderr, "Unable to open command file %s\n",
- CmdFileName);
- exit(1);
- }
- while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
- if(Scan(ProgName, cmds, Line)) {
- CmdError(Line);
- }
- }
- if(fp!=stdin) {
- if(IsPipe) pclose(fp); else fclose(fp);
- }
- CmdFileName = NULL;
- }
- while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
- *s = ' ';
- sprintf(Line, "%s/%s", ProgName, *argv+1);
- *s = '=';
- if(Scan(ProgName, cmds, Line)) CmdError(*argv);
- --argc;
- ++argv;
- }
- *n = argc;
- *a = argv;
-#ifdef MSDOS
- if(dot) *dot = '.';
-#endif
- free(Line);
- return 0;
-}
-
-int PrintParams(ValFlag, fp)
-int ValFlag;
-FILE *fp;
-{
- int i;
-
- fflush(fp);
- if(ValFlag) {
- fprintf(fp, "Parameters Values:\n");
- } else {
- fprintf(fp, "Parameters:\n");
- }
- for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
- fprintf(fp, "\n");
- fflush(fp);
- return 0;
-}
-
-int SPrintParams(a, pfx)
-char ***a,
- *pfx;
-{
- int l,
- n;
- Cmd_T *cmd;
-
- if(!pfx) pfx="";
- l = strlen(pfx);
- for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr;
- a[0] = calloc(n, sizeof(char*));
- for(n=0, cmd=cmds; cmd->Name; cmd++) {
- if(!cmd->ArgStr) continue;
- a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2);
- sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr);
- ++n;
- }
- return n;
-}
-
-static int CmdError(opt)
-char *opt;
-{
- fprintf(stderr, "Invalid option \"%s\"\n", opt);
- fprintf(stderr, "This program expectes the following parameters:\n");
- PrintParams(FALSE, stderr);
- exit(0);
-}
-
-static int PrintParam(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- fprintf(fp, "%4s", "");
- switch(cmd->Type) {
- case CMDDOUBLETYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val);
- fprintf(fp, "\n");
- break;
- case CMDENUMTYPE:
- PrintEnum(cmd, ValFlag, fp);
- break;
- case CMDINTTYPE:
- case CMDSUBRANGETYPE:
- case CMDGTETYPE:
- case CMDLTETYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val);
- fprintf(fp, "\n");
- break;
- case CMDSTRINGTYPE:
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) {
- if(*(char **)cmd->Val) {
- fprintf(fp, ": \"%s\"", *(char **)cmd->Val);
- } else {
- fprintf(fp, ": %s", "NULL");
- }
- }
- fprintf(fp, "\n");
- break;
- case CMDSTRARRAYTYPE:
- PrintStrArray(cmd, ValFlag, fp);
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "PrintParam",
- "Unknown Type",
- cmd->Type,
- "for parameter",
- cmd->Name);
- exit(1);
- }
- return 0;
-}
-
-static char *GetLine(fp, n, Line)
-FILE *fp;
-int n;
-char *Line;
-{
- int j,
- l,
- offs=0;
-
- for(;;) {
- if(!fgets(Line+offs, n-offs, fp)) {
- return NULL;
- }
- if(Line[offs]=='#') continue;
- l = strlen(Line+offs)-1;
- Line[offs+l] = 0;
- for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
- ;
- if(l<1) continue;
- if(j > offs) {
- char *s = Line+offs,
- *q = Line+j;
-
- while((*s++=*q++))
- ;
- }
- if(Line[offs+l-1]=='\\') {
- offs += l;
- Line[offs-1] = ' ';
- } else {
- break;
- }
- }
- return Line;
-}
-
-static int Scan(ProgName, cmds, Line)
-char *ProgName,
- *Line;
-Cmd_T *cmds;
-{
- char *q,
- *p;
- int i,
- hl,
- HasToMatch = FALSE,
- c0,
- c;
-
- p = Line+strspn(Line, SepString);
- if(!(hl=strcspn(p, SepString))) {
- return 0;
- }
- if((q=strchr(p, '/')) && q-p<hl) {
- *q = 0;
- if(strcmp(p, ProgName)) {
- *q = '/';
- return 0;
- }
- *q = '/';
- HasToMatch=TRUE;
- p = q+1;
- }
- if(!(hl = strcspn(p, SepString))) {
- return 0;
- }
- c0 = p[hl];
- p[hl] = 0;
- for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
- ;
- p[hl] = c0;
- if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
- return HasToMatch && c;
-}
-
-static int SetParam(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- if(!*s && cmd->Type != CMDSTRINGTYPE) {
- fprintf(stderr,
- "WARNING: No value specified for parameter \"%s\"\n",
- cmd->Name);
- return 0;
- }
- switch(cmd->Type) {
- case CMDDOUBLETYPE:
- if(sscanf(s, "%lf", (double*)cmd->Val)!=1) {
- fprintf(stderr,
- "Float value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- break;
- case CMDENUMTYPE:
- SetEnum(cmd, s);
- break;
- case CMDINTTYPE:
- if(sscanf(s, "%d", (int*)cmd->Val)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- break;
- case CMDSTRINGTYPE:
- *(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
- ? strdup(s)
- : 0;
- break;
- case CMDSTRARRAYTYPE:
- SetStrArray(cmd, s);
- break;
- case CMDGTETYPE:
- SetGte(cmd, s);
- break;
- case CMDLTETYPE:
- SetLte(cmd, s);
- break;
- case CMDSUBRANGETYPE:
- SetSubrange(cmd, s);
- break;
- default:
- fprintf(stderr, "%s: %s %d %s \"%s\"\n",
- "SetParam",
- "Unknown Type",
- cmd->Type,
- "for parameter",
- cmd->Name);
- exit(1);
- }
- cmd->ArgStr = strdup(s);
- return 0;
-}
-
-static int SetEnum(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- Enum_T *en;
-
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name && !strcmp(s, en->Name)) {
- *(int *) cmd->Val = en->Idx;
- return 0;
- }
- }
- return EnumError(cmd, s);
-}
-
-static int SetSubrange(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) {
- return SubrangeError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int SetGte(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n<*(int *)cmd->p) {
- return GteError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int SetStrArray(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- *(char***)cmd->Val = str2array(s, (char*)cmd->p);
- return 0;
-}
-
-static int SetLte(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- int n;
-
- if(sscanf(s, "%d", &n)!=1) {
- fprintf(stderr,
- "Integer value required for parameter \"%s\"\n",
- cmd->Name);
- exit(1);
- }
- if(n > *(int *)cmd->p) {
- return LteError(cmd, n);
- }
- *(int *)cmd->Val = n;
- return 0;
-}
-
-static int EnumError(cmd, s)
-Cmd_T *cmd;
-char *s;
-{
- Enum_T *en;
-
- fprintf(stderr,
- "Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
- fprintf(stderr, "Valid values are:\n");
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name) {
- fprintf(stderr, " %s\n", en->Name);
- }
- }
- fprintf(stderr, "\n");
- exit(1);
-}
-
-static int GteError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values must be greater than or equal to %d\n",
- *(int *)cmd->p);
- exit(1);
-}
-
-static int LteError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values must be less than or equal to %d\n",
- *(int *)cmd->p);
- exit(1);
-}
-
-static int SubrangeError(cmd, n)
-Cmd_T *cmd;
-int n;
-{
- fprintf(stderr,
- "Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
- fprintf(stderr, "Valid values range from %d to %d\n",
- *(int *)cmd->p, *((int *)cmd->p+1));
- exit(1);
-}
-
-static int PrintEnum(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- Enum_T *en;
-
- fprintf(fp, "%s", cmd->Name);
- if(ValFlag) {
- for(en=(Enum_T *)cmd->p; en->Name; en++) {
- if(*en->Name && en->Idx==*(int *)cmd->Val) {
- fprintf(fp, ": %s", en->Name);
- }
- }
- }
- fprintf(fp, "\n");
- return 0;
-}
-
-static int PrintStrArray(cmd, ValFlag, fp)
-Cmd_T *cmd;
-int ValFlag;
-FILE *fp;
-{
- char *indent,
- **s = *(char***)cmd->Val;
- int l = 4+strlen(cmd->Name);
-
- fprintf(fp, "%s", cmd->Name);
- indent = malloc(l+2);
- memset(indent, ' ', l+1);
- indent[l+1] = 0;
- if(ValFlag) {
- fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
- if(s) while(*s) {
- fprintf(fp, "\n%s %s", indent, *s++);
- }
- }
- free(indent);
- fprintf(fp, "\n");
- return 0;
-}
-
-static char **str2array(s, sep)
-char *s,
- *sep;
-{
- char *p,
- **a;
- int n = 0,
- l;
-
- if(!sep) sep = SepString;
- p = s += strspn(s, sep);
- while(*p) {
- p += strcspn(p, sep);
- p += strspn(p, sep);
- ++n;
- }
- a = calloc(n+1, sizeof(char *));
- p = s;
- n = 0;
- while(*p) {
- l = strcspn(p, sep);
- a[n] = malloc(l+1);
- memcpy(a[n], p, l);
- a[n][l] = 0;
- ++n;
- p += l;
- p += strspn(p, sep);
- }
- return a;
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/symal/cmd.h
----------------------------------------------------------------------
diff --git a/ext/symal/cmd.h b/ext/symal/cmd.h
deleted file mode 100644
index 17231db..0000000
--- a/ext/symal/cmd.h
+++ /dev/null
@@ -1,51 +0,0 @@
-
-// $Id$
-
-#if !defined(CMD_H)
-
-#define CMD_H
-
-#define CMDDOUBLETYPE 1
-#define CMDENUMTYPE 2
-#define CMDINTTYPE 3
-#define CMDSTRINGTYPE 4
-#define CMDSUBRANGETYPE 5
-#define CMDGTETYPE 6
-#define CMDLTETYPE 7
-#define CMDSTRARRAYTYPE 8
-#define CMDBOOLTYPE 9
-
-typedef struct {
- char *Name;
- int Idx;
-} Enum_T;
-
-typedef struct {
- int Type;
- char *Name,
- *ArgStr;
- void *Val,
- *p;
-} Cmd_T;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#if defined(__STDC__)
- int DeclareParams(char *, ...);
-#else
- int DeclareParams();
-#endif
-
- int GetParams(int *n, char ***a,char *CmdFileName),
- SPrintParams(),
- PrintParams();
-
-#ifdef __cplusplus
-}
-#endif
-#endif
-
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/symal/giza2bal.pl
----------------------------------------------------------------------
diff --git a/ext/symal/giza2bal.pl b/ext/symal/giza2bal.pl
deleted file mode 100755
index 553ff2b..0000000
--- a/ext/symal/giza2bal.pl
+++ /dev/null
@@ -1,112 +0,0 @@
-#! /usr/bin/perl
-
-# $Id$
-#Converts direct and inverted alignments into a more compact
-#bi-alignment format. It optionally reads the counting file
-#produced by giza containing the frequency of each traning sentence.
-
-#Copyright Marcello Federico, November 2004
-
-($cnt,$dir,$inv)=();
-
-while ($w=shift @ARGV){
- $dir=shift(@ARGV),next if $w eq "-d";
- $inv=shift(@ARGV),next if $w eq "-i";
- $cnt=shift(@ARGV),next if $w eq "-c";
-}
-
-my $lc = 0;
-
-if (!$dir || !inv){
- print "usage: giza2bal.pl [-c <count-file>] -d <dir-align-file> -i <inv-align-file>\n";
- print "input files can be also commands, e.g. -d \"gunzip -c file.gz\"\n";
- exit(0);
-}
-
-$|=1;
-
-open(DIR,"<$dir") || open(DIR,"$dir|") || die "cannot open $dir\n";
-open(INV,"<$inv") || open(INV,"$inv|") || die "cannot open $dir\n";
-
-if ($cnt){
-open(CNT,"<$cnt") || open(CNT,"$cnt|") || die "cannot open $dir\n";
-}
-
-
-sub ReadBiAlign{
- local($fd0,$fd1,$fd2,*s1,*s2,*a,*b,*c)=@_;
- local($dummy,$n);
-
- chop($c=<$fd0>); ## count
- $dummy=<$fd0>; ## header
- $dummy=<$fd0>; ## header
- $c=1 if !$c;
-
- $dummy=<$fd1>; ## header
- chop($s1=<$fd1>);
- chop($t1=<$fd1>);
-
- $dummy=<$fd2>; ## header
- chop($s2=<$fd2>);
- chop($t2=<$fd2>);
-
- @a=@b=();
- $lc++;
-
- #get target statistics
- $n=1;
- $t1=~s/NULL \(\{((\s+\d+)*)\s+\}\)//;
- while ($t1=~s/(\S+)\s+\(\{((\s+\d+)*)\s+\}\)//){
- grep($a[$_]=$n,split(/\s+/,$2));
- $n++;
- }
-
- $m=1;
- $t2=~s/NULL \(\{((\s+\d+)*)\s+\}\)//;
- while ($t2=~s/(\S+)\s+\(\{((\s+\d+)*)\s+\}\)//){
- grep($b[$_]=$m,split(/\s+/,$2));
- $m++;
- }
-
- $M=split(/\s+/,$s1);
- $N=split(/\s+/,$s2);
-
- if ($m != ($M+1) || $n != ($N+1)) {
- print STDERR "Sentence mismatch error! Line #$lc\n";
- $s1 = "ALIGN_ERR";
- $s2 = "ALIGN_ERR";
- @a=(); @b=();
- for ($j=1;$j<2;$j++){ $a[$j]=1; }
- for ($i=1;$i<2;$i++){ $b[$i]=1; }
- return 1;
- }
-
- for ($j=1;$j<$m;$j++){
- $a[$j]=0 if !$a[$j];
- }
-
- for ($i=1;$i<$n;$i++){
- $b[$i]=0 if !$b[$i];
- }
-
-
- return 1;
-}
-
-$skip=0;
-$ccc=0;
-while(!eof(DIR)){
-
- if (ReadBiAlign(CNT,DIR,INV,*src,*tgt,*a,*b,*c))
- {
- $ccc++;
- print "$c\n";
- print $#a," $src \# @a[1..$#a]\n";
- print $#b," $tgt \# @b[1..$#b]\n";
- }
- else{
- print "\n";
- print STDERR "." if !(++$skip % 1000);
- }
-};
-print STDERR "skip=<$skip> counts=<$ccc>\n";