You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/26 05:11:51 UTC
[46/53] [abbrv] [partial] incubator-joshua git commit: Pulled
JOSHUA-252 changes and Resolved Merge Conflicts
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/model3.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/model3.cpp b/ext/giza-pp/GIZA++-v2/model3.cpp
deleted file mode 100644
index 1fe0216..0000000
--- a/ext/giza-pp/GIZA++-v2/model3.cpp
+++ /dev/null
@@ -1,511 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "model3.h"
-#include "collCounts.h"
-#include "Globals.h"
-#include "utility.h"
-#include "D5Tables.h"
-#include "transpair_model5.h"
-#include "transpair_modelhmm.h"
-#include "Parameter.h"
-
-#define TRICKY_IBM3_TRAINING
-
-GLOBAL_PARAMETER(int,M4_Dependencies,"depm4","d_{=1}: &1:l, &2:m, &4:F, &8:E, d_{>1}&16:l, &32:m, &64:F, &128:E",PARLEV_MODELS,76);
-GLOBAL_PARAMETER(int,M5_Dependencies,"depm5","d_{=1}: &1:l, &2:m, &4:F, &8:E, d_{>1}&16:l, &32:m, &64:F, &128:E",PARLEV_MODELS,68);
-GLOBAL_PARAMETER4(int,Model3_Dump_Freq,"MODEL 345 DUMP FREQUENCY","MODEL 3 DUMP FREQUENCY","t3","t345","dump frequency of Model 3/4/5",PARLEV_OUTPUT,0);
-
-
-extern int Transfer_Dump_Freq;
-
-model3::model3(model2& m2) :
- model2(m2),dTable(true), dCountTable(true),
- nTable(m2.getNoEnglishWords()+1, MAX_FERTILITY),
- nCountTable(m2.getNoEnglishWords()+1, MAX_FERTILITY),h(0)
-{}
-
-void model3::load_tables(const char *nfile, const char *dfile, const char *p0file){
- cout << "Model3: loading n, d, p0 tables \n";
-
- nTable.readNTable(nfile);
- dTable.readTable(dfile);
- ifstream inf(p0file);
- if( !inf )
- cerr << "Can not open: " << p0file << '\n';
- else
- {
- cout << "Reading p0 value from " << p0file << "\n";
- inf >> p0;
- inf.close();
- p1 = 1 - p0;
- }
- cout << "p0 is: " << p0 << " p1:" << p1 << '\n';
-}
-
-model3::~model3()
-{
- dTable.clear();
- dCountTable.clear();
- nTable.clear();
- nCountTable.clear();
-}
-
-
-void model3::em(int noIterations, sentenceHandler& sHandler1)
-{
-
- LogProb all_prob, aprob, temp ;
- WordIndex i, j, l, m ;
- time_t it_st, st, it_fn, fn ;
- string tfile, dfile, nfile, p0file, afile, number;
-
- st = time(NULL) ;
- if (Log)
- logmsg << "\n" << "Starting Model3: Training";
- cout << "\n" << "Starting Model3: Training";
- // sentenceHandler sHandler1(efFilename.c_str());
- sHandler1.rewind();
- for(int it=1; it <= noIterations; it++){
- it_st = time(NULL) ;
- if (Log)
- logmsg << "\n" << "Model3: Iteration " << it;
- cout << "\n" << "Model3: Iteration " << it;
-
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do{
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- tfile = Prefix + ".t3." + number ;
- afile = Prefix + ".a3." + number ;
- nfile = Prefix + ".n3." + number ;
- dfile = Prefix + ".d3." + number ;
- p0file = Prefix + ".p0_3." + number ;
- // tCountTable.clear();
- dCountTable.clear();
- nCountTable.clear();
- p0_count = p1_count = 0 ;
- all_prob = 0 ;
- sentPair sent ;
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.getCount();
- if ((sent.sentenceNo % 1000) == 0)
- cout <<sent.sentenceNo << '\n';
- Vector<WordIndex> A(fs.size(),/*-1*/0);
- Vector<WordIndex> Fert(es.size(),0);
- LogProb lcount=(LogProb)count;
- l = es.size()-1;
- m = fs.size()-1;
- WordIndex x, y ;
- all_prob = prob_of_target_given_source(tTable, fs, es);
- if (all_prob == 0)
- cout << "\n" <<"all_prob = 0";
-
- for ( x = 0 ; x < pow(l+1.0, double(m)) ; x++){ // For all possible alignmets A
- y = x ;
- for (j = 1 ; j <= m ; j++){
- A[j] = y % (l+1) ;
- y /= (l+1) ;
- }
- for(i = 0 ; i <= l ; i++)
- Fert[i] = 0 ;
- for (j = 1 ; j <= m ; j++)
- Fert[A[j]]++;
- if (2 * Fert[0] <= m){ /* consider alignments that has Fert[0] less than
- half the number of words in French sentence */
- aprob = prob_of_target_and_alignment_given_source(A, Fert, tTable, fs, es);
- temp = aprob/all_prob ;
- LogProb templcount = temp*lcount;
-
- for (j = 1 ; j <= m ; j++){
- tTable.incCount(es[A[j]], fs[j], templcount);
- if (0 != A[j])
- dCountTable.getRef(j, A[j], l, m)+=templcount;
- }
- for(i = 0 ; i <= l ; i++)
- {
- nCountTable.getRef(es[i], Fert[i])+=templcount;
- //cout << "AFTER INC2: " << templcount << " " << nCountTable.getRef(es[i], Fert[i]) << '\n';
- }
- p1_count += double(temp) * (Fert[0] * count) ;
- p0_count += double(temp) * ((m - 2 * Fert[0]) * count) ;
- }
- } /* of looping over all alignments */
- } /* of sentence pair E, F */
- sHandler1.rewind();
-
- // normalize tables
- if( OutputInAachenFormat==1 )
- tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
- tTable.normalizeTable(Elist, Flist);
- aCountTable.normalize(aTable);
- dCountTable.normalize(dTable);
- nCountTable.normalize(nTable,&Elist.getVocabList());
-
- // normalize p1 & p0
-
- if (p1_count + p0_count != 0){
- p1 = p1_count / ( p1_count + p0_count ) ;
- p0 = 1 - p1 ;
- }
- else {
- p1 = p0 = 0 ;
- }
- // print tables
- if( OutputInAachenFormat==0 )
- tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- dTable.printTable(dfile.c_str());
- nTable.printNTable(Elist.uniqTokens(), nfile.c_str(), Elist.getVocabList(),OutputInAachenFormat);
- ofstream of(p0file.c_str());
- of << p0;
- of.close();
- it_fn = time(NULL) ;
- cout << "\n" << "Model3 Iteration "<<it<<" took: " << difftime(it_fn, it_st) << " seconds\n";
-
- } /* of iterations */
- fn = time(NULL) ;
- cout << "\n" << "Entire Model3 Training took: " << difftime(fn, st) << " seconds\n";
-}
-
-
-
-
-
-
-
-//-----------------------------------------------------------------------
-
-/*
-void simpleModel3Test()
-{
- PositionIndex l=6;
- PositionIndex m=8;
- alignment al(l,m);
- al.set(1,1);
- al.set(2,2);
- al.set(3,3);
- al.set(4,2);
- al.set(5,0);
- al.set(6,6);
- al.set(7,3);
- al.set(8,4);
- cout << al;
- PositionIndex prev_cept=0;
- PositionIndex vac_all=m;
- Vector<char> vac(m+1,0);
- for(PositionIndex i=1;i<=l;i++)
- {
- PositionIndex cur_j=al.als_i[i];
- cout << "LOOP: " << i << " " << cur_j << '\n';
- PositionIndex prev_j=0;
- PositionIndex k=0;
- if(cur_j) { // process first word of cept
- k++;
- vac_all--;
- assert(vac[cur_j]==0);
- vac[cur_j]=1;
- for(unsigned int q=0;q<vac.size();q++)cout << (vac[q]?'1':'0') << ' ';
- cout << '\n';
- cout << i << " " << cur_j << ": d1(" << vacancies(vac,cur_j) << "|" << vacancies(vac,al.get_center(prev_cept)) << "," << vac_all << "+" << -al.fert(i)<< "+" << +k << ")\n" << '\n';
- prev_j=cur_j;
- cur_j=al.als_j[cur_j].next;
- }
- while(cur_j) { // process following words of cept
- k++;
- vac_all--;
- vac[cur_j]=1;
- int vprev=vacancies(vac,prev_j);
- cout << "PREV: " << prev_j << '\n';
- for(unsigned int q=0;q<vac.size();q++)cout << (vac[q]?'1':'0') << ' ';
- cout << '\n';
- cout << i << " " << cur_j << ": d>1(" << vacancies(vac,cur_j) << "-" << vprev << "|" << vac_all<< "+" << -al.fert(i)<< "+" << +k << ")\n" << '\n';
- prev_j=cur_j;
- cur_j=al.als_j[cur_j].next;
- }
- assert(k==al.fert(i));
- if( k )
- prev_cept=i;
- }
- assert(vac_all==al.fert(0));
-}
-*/
-
-extern short DoViterbiTraining;
-
-int model3::viterbi(int noIterationsModel3, int noIterationsModel4,int noIterationsModel5,int noIterationsModel6)
-{
- double minErrors=1.0;int minIter=0;
- d4model d4m(MAX_SENTENCE_LENGTH);
- d4m.makeWordClasses(Elist,Flist,SourceVocabFilename+".classes",TargetVocabFilename+".classes");
- d5model d5m(d4m);
- d5m.makeWordClasses(Elist,Flist,SourceVocabFilename+".classes",TargetVocabFilename+".classes");
- time_t it_st, st, it_fn, fn;
- bool dump_files = false ;
- string tfile, tfile_actual, dfile, afile, nfile, nfile_actual, p0file, alignfile, number, test_alignfile, d4file,d5file,zeroFertFile;
- st = time(NULL);
- sHandler1.rewind();
- if (testPerp && testHandler)
- (*testHandler).rewind();
- string trainingString;
- trainingString+=(h?'H':'3');
- for(int i=0;i<noIterationsModel3;++i) trainingString+='3';
- for(int i=0;i<noIterationsModel4;++i) trainingString+='4';
- for(int i=0;i<noIterationsModel5;++i) trainingString+='5';
- for(int i=0;i<noIterationsModel6;++i) trainingString+='6';
-
- cout << "\n==========================================================\n";
- cout << "Starting "<<trainingString<<": Viterbi Training";
- if (Log){
- logmsg << "\n==========================================================\n";
- logmsg << "Starting "<<trainingString<<": Viterbi Training";
- }
- cout << "\n "<<trainingString<<" Training Started at: "<< ctime(&st) << '\n';
- for(unsigned int it=1; it < trainingString.length(); it++){
- bool final=0;
- if( it==trainingString.length()-1 )
- final=1;
- string modelName;
- char fromModel=trainingString[it-1],toModel=trainingString[it];
- if(fromModel==toModel)
- modelName=string("Model")+fromModel;
- else
- modelName=string("T")+fromModel+"To"+toModel;
- it_st = time(NULL);
- cout <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
- if (Log)
- logmsg <<"\n---------------------\n"<<modelName<<": Iteration " << it<<'\n';
- dump_files = (final || ((Model3_Dump_Freq != 0) && ((it % Model3_Dump_Freq) == 0))) && !NODUMPS ;
- string d4file2;
- {
- // set up the names of the files where the tables will be printed
- int n = it;
- number = "";
- do{
- //mj changed next line
- number.insert((size_t) 0, 1, (char)(n % 10 + '0'));
- } while((n /= 10) > 0);
- if( final )
- number="final";
- tfile = Prefix + ".t3." + number ;
- tfile_actual = Prefix + ".actual.t3." + number ;
- afile = Prefix + ".a3." + number ;
- nfile = Prefix + ".n3." + number ;
- nfile_actual = Prefix + ".actual.n3." + number ;
- dfile = Prefix + ".d3." + number ;
- d4file = Prefix + ".d4." + number ;
- d4file2 = Prefix + ".D4." + number ;
- d5file = Prefix + ".d5." + number ;
- alignfile = Prefix + ".A3." + number ;
- test_alignfile = Prefix + ".tst.A3." + number ;
- p0file = Prefix + ".p0_3." + number ;
- }
- // clear count tables
- // tCountTable.clear();
- dCountTable.clear();
- aCountTable.clear();
- initAL();
- nCountTable.clear();
- d4m.clear();
- p0_count = p1_count = 0 ;
-
-#ifdef TRICKY_IBM3_TRAINING
-
-#define TRAIN_ARGS perp, trainViterbiPerp, sHandler1, dump_files, alignfile.c_str(), true, modelName,final
-#define TEST_ARGS *testPerp, *testViterbiPerp, *testHandler, dump_files, test_alignfile.c_str(),false, modelName,final
-
-
- switch( toModel )
- {
- case '3':
- switch(fromModel )
- {
- case 'H':
- viterbi_loop_with_tricks <transpair_modelhmm,const hmm>(TRAIN_ARGS,h,(void*)0);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_modelhmm,const hmm>(TEST_ARGS, h,(void*)0);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3>( TRAIN_ARGS, (void*)0,(void*)0);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3>( TEST_ARGS, (void*)0,(void*)0);
- break;
- default: abort();
- }
- break;
- case '4':
- {
- switch(fromModel)
- {
- case 'H':
- viterbi_loop_with_tricks <transpair_modelhmm,const hmm,d4model>(TRAIN_ARGS,h,&d4m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_modelhmm,const hmm,d4model>(TEST_ARGS, h,&d4m);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3, void,d4model>(TRAIN_ARGS, (void*)0,&d4m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3, void,d4model>( TEST_ARGS , (void*)0,&d4m);
- break;
- case '4':
- viterbi_loop_with_tricks<transpair_model4, d4model,d4model>(TRAIN_ARGS , &d4m,&d4m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model4, d4model,d4model>( TEST_ARGS, &d4m,&d4m);
- break;
- default: abort();
- }
- d4m.normalizeTable();
- if( dump_files )
- d4m.printProbTable(d4file.c_str(),d4file2.c_str());
- }
- break;
- case '5':
- {
- switch(fromModel)
- {
- case 'H':
- viterbi_loop_with_tricks <transpair_modelhmm,const hmm,d5model>(TRAIN_ARGS,h,&d5m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_modelhmm,const hmm,d5model>(TEST_ARGS, h,&d5m);
- break;
- case '3':
- viterbi_loop_with_tricks<transpair_model3, void,d5model>(TRAIN_ARGS, (void*)0,&d5m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model3, void,d5model>( TEST_ARGS , (void*)0,&d5m);
- break;
- case '4':
- viterbi_loop_with_tricks<transpair_model4, d4model,d5model>(TRAIN_ARGS, &d4m,&d5m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model4, d4model,d5model>( TEST_ARGS, &d4m,&d5m);
- break;
- case '5':
- viterbi_loop_with_tricks<transpair_model5, d5model, d5model>(TRAIN_ARGS, &d5m,&d5m);
- if (testPerp && testHandler)
- viterbi_loop_with_tricks<transpair_model5, d5model, d5model>( TEST_ARGS, &d5m,&d5m);
- break;
- default: abort();
- }
- d5m.d4m.normalizeTable();
- if( dump_files )
- d5m.d4m.printProbTable(d4file.c_str(),d4file2.c_str());
- d5m.normalizeTable();
- if( dump_files )
- {
- ofstream d5output(d5file.c_str());
- d5output << d5m;
- }
- }
- break;
- default: abort();
- }
-
-#else
- viterbi_loop(perp, trainViterbiPerp, sHandler1, dump_files,
- alignfile.c_str(), true, model);
- if (testPerp && testHandler)
- viterbi_loop(*testPerp, *testViterbiPerp, *testHandler,
- dump_files, test_alignfile.c_str(), false, model);
-
-#endif
- if( errorsAL()<minErrors )
- {
- minErrors=errorsAL();
- minIter=it;
- }
-
- // now normalize count tables
- if( dump_files&&OutputInAachenFormat==1 )
- tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
- tTable.normalizeTable(Elist, Flist);
- aCountTable.normalize(aTable);
- dCountTable.normalize(dTable);
- nCountTable.normalize(nTable,&Elist.getVocabList());
-
- // cout << "tTable contains " <<
- // tTable.getHash().bucket_count() << " buckets and "<<
- //tTable.getHash().size() << " entries.\n";
-
- // normalize p1 & p0
-
- cout << "p0_count is " << p0_count << " and p1 is " << p1_count << "; ";
- if(P0!=-1.0)
- {
- p0 = P0;
- p1 = 1-P0;
- }
- else {
- if (p1_count + p0_count != 0){
- p1 = p1_count / ( p1_count + p0_count ) ;
- p0 = 1 - p1 ;
- }
- else {
- p1 = p0 = 0 ;
- cerr << "ERROR: p0_count+p1_count is zero!!!\n";
- }
- }
- cout << "p0 is " << p0 << " p1: " << p1 << '\n';
-
- cout << modelName<<": TRAIN CROSS-ENTROPY " << perp.cross_entropy()
- << " PERPLEXITY " << perp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ":("<<it<<" TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
- << " PERPLEXITY " << (*testPerp).perplexity() << " sum: " << (*testPerp).getSum()<<
- " wc: " << (*testPerp).word_count() << '\n';
- cout << modelName << ": ("<<it<<") TRAIN VITERBI CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
- << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
- if (testPerp && testHandler)
- cout << modelName << ": ("<<it<<")TEST VITERBI CROSS-ENTROPY " << (*testViterbiPerp).cross_entropy()
- << " PERPLEXITY " << (*testViterbiPerp).perplexity() << " Sum: " << (*testViterbiPerp).getSum() <<
- " wc: " << (*testViterbiPerp).word_count() << '\n';
- if (dump_files)
- {
- if( OutputInAachenFormat==0 )
- tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
- aTable.printTable(afile.c_str());
- dTable.printTable(dfile.c_str());
- nTable.printNTable(Elist.uniqTokens(), nfile.c_str(), Elist.getVocabList(), OutputInAachenFormat);
- ofstream of(p0file.c_str());
- of << p0;
- of.close();
- }
-
- it_fn = time(NULL) ;
- cout << "\n" << modelName << " Viterbi Iteration : "<<it<< " took: " <<
- difftime(it_fn, it_st) << " seconds\n";
- } /* of iterations */
- fn = time(NULL);
- cout << trainingString <<" Training Finished at: " << ctime(&fn) << "\n";
- cout << "\n" << "Entire Viterbi "<<trainingString<<" Training took: " << difftime(fn, st) << " seconds\n";
- cout << "==========================================================\n";
- if( noIterationsModel4||noIterationsModel5 )
- minIter-=noIterationsModel3;
- if( noIterationsModel5 )
- minIter-=noIterationsModel4;
- return minIter;
-}
-
-
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/model3.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/model3.h b/ext/giza-pp/GIZA++-v2/model3.h
deleted file mode 100644
index ea502e0..0000000
--- a/ext/giza-pp/GIZA++-v2/model3.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#ifndef _model3_h
-#define _model3_h 1
-#include <cassert>
-#include <iostream>
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-#include "Vector.h"
-#include <utility>
-
-
-#include <ctime>
-#include <fstream>
-#include <cmath>
-#include "MoveSwapMatrix.h"
-#include "TTables.h"
-#include "ATables.h"
-#include "NTables.h"
-#include "getSentence.h"
-#include "defs.h"
-#include "model2.h"
-#include "Perplexity.h"
-#include "transpair_model3.h"
-#include "transpair_modelhmm.h"
-#include "alignment.h"
-#include "vocab.h"
-#include "D4Tables.h"
-#include "AlignTables.h"
-
-class model3 : public model2
-{
-public:
- amodel<PROB> dTable;
- amodel<COUNT> dCountTable;
-
- PROB p0,p1;
- double p0_count, p1_count ;
-
- nmodel<PROB> nTable;
- nmodel<COUNT> nCountTable;
- hmm*h;
-
-public:
- void setHMM(hmm*_h){h=_h;}
- model3(model2& m2);
- ~model3();
- // methods
- void transfer(sentenceHandler&, bool, Perplexity&, Perplexity&,bool updateT=1);
- void transferSimple(sentenceHandler&, bool, Perplexity&, Perplexity&,bool updateT=1);
- void load_tables(const char *nfile, const char *dfile, const char *p0file);
-
- void em(int, sentenceHandler&);
- int viterbi(int, int, int,int);
-
-private:
- LogProb prob_of_special(Vector<WordIndex>&,
- Vector<WordIndex>&,
- tmodel<COUNT, PROB>&,
- Vector<WordIndex>&,
- Vector<WordIndex>&);
-
- LogProb prob_of_target_and_alignment_given_source(Vector<WordIndex>&,
- Vector<WordIndex>&,
- tmodel<COUNT, PROB>&,
- Vector<WordIndex>&,
- Vector<WordIndex>&);
- LogProb prob_of_target_given_source(tmodel<COUNT, PROB>&,
- Vector<WordIndex>&,
- Vector<WordIndex>&);
-
- LogProb scoreOfMove(Vector<WordIndex>&, Vector<WordIndex>&,
- Vector<WordIndex>&, Vector<WordIndex>&,
- tmodel<COUNT, PROB>&, WordIndex, WordIndex);
-
- LogProb scoreOfSwap(Vector<WordIndex>&, Vector<WordIndex>&,
- Vector<WordIndex>&, tmodel<COUNT, PROB>&, int, int);
-
- void hillClimb(Vector<WordIndex>&, Vector<WordIndex>&,
- Vector<WordIndex>&, Vector<WordIndex>&,
- LogProb&, tmodel<COUNT, PROB>&, int, int);
-
- void findBestAlignment(Vector<WordIndex>&, Vector<WordIndex>&,
- Vector<WordIndex>&, Vector<WordIndex>&,
- LogProb&,int , int);
-
-
- void findAlignmentsNeighborhood( Vector<WordIndex>&,
- Vector<WordIndex>&,
- LogProb&align_total_count,
- alignmodel&neighborhood,
- int, int);
- void collectCountsOverAlignement(const Vector<WordIndex>& es,
- const Vector<WordIndex>& fs,
- const Vector<WordIndex>&,
- LogProb , float count);
- LogProb viterbi_model2(const transpair_model3&ef, alignment&output, int pair_no,int i_peg = -1 , int j_peg = -1 )const;
- LogProb _viterbi_model2(const transpair_model2&ef, alignment&output, int i_peg = -1 , int j_peg = -1 )const;
- LogProb viterbi_model2(const transpair_modelhmm&ef, alignment&output, int pair_no,int i_peg = -1 , int j_peg = -1 )const;
-
- private:
- void estimate_t_a_d(sentenceHandler& sHandler1, Perplexity& perp1, Perplexity& perp2,bool simple, bool dump_files,bool updateT);
- void viterbi_loop(Perplexity&, Perplexity&, sentenceHandler&, bool, const char*,bool,string model);
-
- template<class MODEL_TYPE, class A,class B>
- void viterbi_loop_with_tricks(Perplexity&, Perplexity&, sentenceHandler&,
- bool, const char*, bool, string model, bool final,A*d4m,B*d5m);
-
-};
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/model345-peg.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/model345-peg.cpp b/ext/giza-pp/GIZA++-v2/model345-peg.cpp
deleted file mode 100644
index 8c1bde6..0000000
--- a/ext/giza-pp/GIZA++-v2/model345-peg.cpp
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
-
-Copyright (C) 2000,2001 Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)
-
-This file is part of GIZA++ ( extension of GIZA ).
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "model3.h"
-#include "collCounts.h"
-
-bool makeOneMoveSwap(const alignment&x,const alignment&y,set<OneMoveSwap>&soms)
-{
- OneMoveSwap oms;
- oms.type=0;
- int count=0;
- Vector<int> positions(4);
- assert(x.get_m()==y.get_m());
- for(PositionIndex j=1;j<=x.get_m();j++)
- if(x(j)!=y(j))
- {
- if(count==4)
- return 0;
- positions[count]=j;
- count++;
- }
- assert(count>0);
- if(count==1)
- {
- oms.type=1;
- oms.a=positions[0];
- oms.b=y(positions[0]);
- soms.insert(oms);
- for(unsigned int j=1;j<=x.get_m();++j)
- {
- if( int(j)!=positions[0]&&y(j)==y(positions[0]))
- {
- oms.type=3;
- oms.a=j;
- oms.b=x(positions[0]);
- soms.insert(oms);
- }
- }
- for(unsigned int j=1;j<=x.get_m();++j)
- {
- if( int(j)!=positions[0]&&x(j)==x(positions[0]))
- {
- oms.type=2;
- oms.a=positions[0];
- oms.b=j;
- if( oms.b<oms.a)swap(oms.b,oms.a);
- soms.insert(oms);
- }
- }
- return 1;
- }
- else if(count==2)
- {
- if(x(positions[0])==y(positions[1]) && x(positions[1])==y(positions[0]))
- {
- oms.type=4;
- oms.a=positions[0];
- oms.b=positions[1];
- soms.insert(oms);
- for(unsigned int j=1;j<=x.get_m();++j)
- {
- if( int(j)!=positions[0]&&y(j)==y(positions[0]))
- {
- oms.type=2;oms.a=j;oms.b=positions[1];if( oms.b<oms.a)swap(oms.b,oms.a);soms.insert(oms);
- }
- if( int(j)!=positions[1]&&y(j)==y(positions[1]))
- {
- oms.type=2;oms.a=j;oms.b=positions[0];if( oms.b<oms.a)swap(oms.b,oms.a);soms.insert(oms);
- }
- }
- }
- else if(x(positions[0])==y(positions[1]) )
- {
- oms.type=3;
- oms.a=positions[0];
- oms.b=x(positions[1]);
- soms.insert(oms);
- oms.type=2;
- oms.a=positions[0];
- oms.b=positions[1];
- soms.insert(oms);
- }
- else if( x(positions[1])==y(positions[0]) )
- {
- oms.type=3;
- oms.a=positions[1];
- oms.b=x(positions[0]);
- soms.insert(oms);
- oms.type=2;
- oms.a=positions[0];
- oms.b=positions[1];
- soms.insert(oms);
- }
- oms.type=3;
- oms.a=positions[0];
- oms.b=x(positions[0]);
- soms.insert(oms);
- oms.a=positions[1];
- oms.b=x(positions[1]);
- soms.insert(oms);
- return 1;
- }
- else if( count==3 )
- { // three differences and three different numbers
- Vector<int> xx(3),yy(3);
- xx[0]=x(positions[0]);xx[1]=x(positions[1]);xx[2]=x(positions[2]);
- yy[0]=y(positions[0]);yy[1]=y(positions[1]);yy[2]=y(positions[2]);
- sort(xx.begin(),xx.end());
- sort(yy.begin(),yy.end());
- if(xx==yy)
- {
- oms.type=2;oms.a=positions[0];oms.b=positions[1];soms.insert(oms);
- oms.type=2;oms.a=positions[0];oms.b=positions[2];soms.insert(oms);
- oms.type=2;oms.a=positions[1];oms.b=positions[2];soms.insert(oms);
- }
- else
- {
- //cout << "HERE.\n";
- if( x(positions[0])==y(positions[1])&&x(positions[1])==y(positions[0]) )
- {
- oms.type=2;oms.a=positions[0];oms.b=positions[1];
- if( oms.b<oms.a) swap(oms.b,oms.a);
- soms.insert(oms);
- oms.type=3;oms.a=positions[2];oms.b=x(positions[2]);soms.insert(oms);
- }
- if( x(positions[2])==y(positions[1])&&x(positions[1])==y(positions[2]) )
- {
- oms.type=2;oms.a=positions[2];oms.b=positions[1];
- if( oms.b<oms.a) swap(oms.b,oms.a);
- soms.insert(oms);
- oms.type=3;oms.a=positions[0];oms.b=x(positions[0]);soms.insert(oms);
- }
- if( x(positions[0])==y(positions[2])&&x(positions[2])==y(positions[0]) )
- {
- oms.type=2;oms.a=positions[0];oms.b=positions[2];
- if( oms.b<oms.a) swap(oms.b,oms.a);
- soms.insert(oms);
- oms.type=3;oms.a=positions[1];oms.b=x(positions[1]);soms.insert(oms);
- }
- }
- return 1;
- }
- else if(count==4)
- {
- Vector<int> xx(4),yy(4);
- for(int i=0;i<4;++i)
- {
- xx[i]=x(positions[i]);
- yy[i]=y(positions[i]);
- }
- sort(xx.begin(),xx.end());
- sort(yy.begin(),yy.end());
- if(xx==yy)
- {
- oms.type=2;
- for(int j1=0;j1<4;j1++)
- for(int j2=j1+1;j2<4;j2++)
- {
- if(x(positions[j1])!=x(positions[j2])&&
- x(positions[j1])==y(positions[j2])&&
- x(positions[j2])==y(positions[j1]))
- {
- oms.type=2;oms.a=positions[j1];oms.b=positions[j2];
- soms.insert(oms);
- }
- }
- }
- return 1;
- }
- else
- return 0;
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/model3_viterbi.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/model3_viterbi.cpp b/ext/giza-pp/GIZA++-v2/model3_viterbi.cpp
deleted file mode 100644
index bf1e7ab..0000000
--- a/ext/giza-pp/GIZA++-v2/model3_viterbi.cpp
+++ /dev/null
@@ -1,656 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "model3.h"
-#include "utility.h"
-#include "Globals.h"
-
-
-LogProb model3::prob_of_target_and_alignment_given_source(Vector<WordIndex>& A,
- Vector<WordIndex>& Fert,
- tmodel<COUNT, PROB>& tTable,
- Vector<WordIndex>& fs,
- Vector<WordIndex>& es)
-{
- LogProb total = 1.0 ;
- LogProb temp = 0.0 ;
- const LogProb zero = 0.0 ;
- WordIndex l = es.size()-1, m = fs.size()-1;
- WordIndex i, j ;
-
- total *= pow(double(1-p1), m-2.0 * Fert[0]) * pow(double(p1), double(Fert[0]));
- if (total == 0)
- return(zero);
- for (i = 1 ; i <= Fert[0] ; i++){ // loop caculates m-fert[0] choose fert[0]
- total *= double(m - Fert[0] - i + 1) / i ;
- if (total == 0)
- return(zero);
- }
- for (i = 1 ; i <= l ; i++){ // this loop calculates fertilities term
- total *= double(nTable.getValue(es[i], Fert[i])) * (LogProb) factorial(Fert[i]);
- if (total == 0)
- return(zero);
- }
- for (j = 1 ; j <= m ; j++){
- // temp = tTable.getValue(es[A[j]], fs[j]) ;
- temp = double(tTable.getProb(es[A[j]], fs[j])) ;
- total *= temp ;
- if (0 != A[j])
- total *= double(dTable.getValue(j, A[j], l, m));
- if (total == 0)
- return(zero);
- }
- return(total);
-}
-
-LogProb model3::prob_of_target_given_source(tmodel<COUNT, PROB>& tTable,
- Vector<WordIndex>& fs,
- Vector<WordIndex>& es)
-{
-
- WordIndex x, y ;
- LogProb total = 0 ;
- // WordIndex l = es.size(), m = fs.size();
- WordIndex l = es.size()-1, m = fs.size()-1;
- Vector<WordIndex> A(fs.size(),/*-1*/0);
- Vector<WordIndex> Fert(es.size(),0);
- WordIndex i,j ;
-
- for ( x = 0 ; x < pow(l+1.0, double(m)) ; x++){ // For all possible alignmets A
- y = x ;
- // for (j = 1 ; j < m ; j++){
- for (j = 1 ; j <= m ; j++){
- A[j] = y % (l+1) ;
- y /= (l+1) ;
- }
- // for(i = 0 ; i < l ; i++)
- for(i = 0 ; i <= l ; i++)
- Fert[i] = 0 ;
- // for (j = 1 ; j < m ; j++)
- for (j = 1 ; j <= m ; j++)
- Fert[A[j]]++;
- // if (2 * Fert[0] < m){
- if (2 * Fert[0] <= m){ /* consider alignments that has Fert[0] less than
- half the length of french sentence */
- total += prob_of_target_and_alignment_given_source(A, Fert, tTable, fs, es);
- }
- }
- return(total);
-}
-
-
-LogProb model3::scoreOfMove(Vector<WordIndex>& es,
- Vector<WordIndex>& fs,
- Vector<WordIndex>& A,
- Vector<WordIndex>& Fert,
- tmodel<COUNT, PROB>& tTable,
- WordIndex j,
- WordIndex i)
- // returns the scaling factor of the original score if A[j] is linked to
- // i, no change is really made to A
- // but the score is calculated if the move is to be taken (i.e.
- // no side effects on Alignment A nor its Fertility Fert
- // If the value of the scaling factor is:
- // 1: then the score of the new alignment if the move is taken will
- // not change.
- // 0.5: the new score is half the score of the original alignment.
- // 2.0: the new score will be twice as much.
- //
-{
- // LogProb score;
- LogProb change ;
- WordIndex m, l ;
-
- m = fs.size() - 1;
- l = es.size() - 1;
-
-
- if (A[j] == i)
- // return(original_score);
- return(1) ;
- else if (A[j] == 0){ // a move from position zero to something else
- change = double(p0*p0)/p1 *
- (double((Fert[0]*(m-Fert[0]+1))) / ((m-2*Fert[0]+1)*(m-2*Fert[0]+2))) *
- (Fert[i]+1) *
- double(nTable.getValue(es[i], Fert[i]+1)) /
- double(nTable.getValue(es[i], Fert[i])) *
- double(tTable.getProb(es[i], fs[j])) /
- double(tTable.getProb(es[A[j]], fs[j])) *
- double(dTable.getValue(j, i, l, m));
- }
- else if (i == 0){ // a move to position zero
- change=
- ((double(p1) / (p0*p0)) *
- (double((m-2*Fert[0])*(m-2*Fert[0]-1))/((Fert[0]+1)*(m-Fert[0]))) *
- (double(1)/Fert[A[j]]) *
- double(nTable.getValue(es[A[j]], Fert[A[j]]-1)) /
- double(nTable.getValue(es[A[j]], Fert[A[j]]))*
- double(tTable.getProb(es[i], fs[j])) /
- double(tTable.getProb(es[A[j]], fs[j])) *
- 1.0 / double(dTable.getValue(j, A[j], l, m)));
- }
- else{ // a move that does not involve position zero
- change =
- ((double(Fert[i]+1)/Fert[A[j]]) *
- double(nTable.getValue(es[A[j]], Fert[A[j]]-1)) /
- double(nTable.getValue(es[A[j]], Fert[A[j]])) *
- double(nTable.getValue(es[i], Fert[i]+1)) /
- double(nTable.getValue(es[i], Fert[i])) *
- double(tTable.getProb(es[i], fs[j]))/
- double(tTable.getProb(es[A[j]], fs[j])) *
- double(dTable.getValue(j, i, l, m))/
- double(dTable.getValue(j, A[j], l, m)));
- }
- return(change);
-}
-
-
-LogProb model3::scoreOfSwap(Vector<WordIndex>& es,
- Vector<WordIndex>& fs,
- Vector<WordIndex>& A,
- tmodel<COUNT, PROB>& tTable,
- int j1,
- int j2)
- // returns the scaling factor of the original score if the swap to
- // take place,
- // No side effects here (none of the parameters passed is changed!
- // (i.e. the alignment A is not really changed)
- // If the value of the scaling factor is:
- // 1: then the score of the new alignment if the move is taken will
- // not change.
- // 0.5: the new score is half the score of the original alignment.
- // 2.0: the new score will be twice as much.
- //
-{
- LogProb score ;
- WordIndex i1, i2, m, l ;
-
- m = fs.size() - 1 ;
- l = es.size() - 1 ;
- if (j1 == j2 || A[j1] == A[j2]) // if swapping same position return ratio 1
- return(1);
- else {
- i1 = A[j1] ;
- i2 = A[j2] ;
- score =
- double(tTable.getProb(es[i2], fs[j1]))/double(tTable.getProb(es[i1], fs[j1])) *
- double(tTable.getProb(es[i1], fs[j2]))/double(tTable.getProb(es[i2], fs[j2]));
- if (i1 != 0){
- score *= double(dTable.getValue(j2, i1, l, m))/double(dTable.getValue(j1, i1, l, m));
- }
- if (i2 != 0){
- score *= double(dTable.getValue(j1, i2, l, m))/double(dTable.getValue(j2, i2, l, m));
- }
- return(score);
- }
-}
-
-
-
-void model3::hillClimb(Vector<WordIndex>& es,
- Vector<WordIndex>& fs,
- Vector<WordIndex>& A,
- Vector<WordIndex>& Fert,
- LogProb& best_score,
- tmodel<COUNT, PROB>& tTable,
- int = -1,
- int j_peg = -1)
- // Hill climbing given alignment A .
- // Alignment A will be updated and also best_score
- // if no pegging is needed i_peg == -1, and j_peg == -1
-{
- WordIndex i, j, l, m, j1, old_i;
- LogProb change ;
- bool local_minima;
- int level = 0 ;
- LogProb best_change_so_far, best_change ;
- Vector<WordIndex> A_so_far;
- Vector<WordIndex> Fert_so_far;
-
- l = es.size() - 1;
- m = fs.size() - 1;
- if (Log)
- logmsg << "\nStarting hill climbing with original score: " << best_score <<"\n";
- best_change = 1 ; // overall scaling factor (i.e. from the begining of climb
- do {
- best_change_so_far = 1 ; // best scaling factor of this level of hill climb
- local_minima = true ;
- for (j = 1 ; j <= m ; j++){
- if (int(j) != j_peg){ // make sure not to change the pegged link
- for (j1 = j + 1 ; j1 <= m; j1++){
- // for all possible swaps
- // make sure you are not swapping at same position
- if ((A[j] != A[j1]) && (int(j1) != j_peg)){
- // change = scoreOfSwap(es, fs, A, best_score, tTable, j, j1);
- change = scoreOfSwap(es, fs, A, tTable, j, j1);
- if (change > best_change_so_far){ // if better alignment found, keep it
- local_minima = false ;
- best_change_so_far = change ;
- A_so_far = A ;
- Fert_so_far = Fert ;
- old_i = A_so_far[j] ;
- A_so_far[j] = A_so_far[j1] ;
- A_so_far[j1] = old_i ;
- } // end of if (change > best_change_so_far)
- } // end of if (A[j] != A[j1] ..)
- } // of for (j1 = j+1 ....)
- // for (i = 0 ; i < l ; i++){ // all possible moves
- for (i = 0 ; i <= l ; i++){ // all possible moves
- if (i != A[j]){ // make sure not to move to same position
- if (i != 0 || (m >= 2 * (Fert[0]+1))){ // if moving to NULL word
- // (pos 0), make sure not to violate the fertility restriction
- // i.e. NULL can not take more than half the target words
- // change = scoreOfMove(es, fs, A, Fert, best_score, tTable, j, i);
- change = scoreOfMove(es, fs, A, Fert, tTable, j, i);
- if (change > best_change_so_far){ // if better alignment found, keep it
- best_change_so_far = change ;
- local_minima = false ;
- A_so_far = A ;
- Fert_so_far = Fert ;
- old_i = A_so_far[j] ;
- A_so_far[j] = i ;
- Fert_so_far[old_i]-- ;
- Fert_so_far[i]++ ;
- } // end of if (change > best_change_so_far)
- } // end of if ((i!=0) ...
- } // end of if (i != A[j] )
- } // end of for (i = 0 ; ....)
- } // end of if(j != j_peg)
- } // end of for (j = 1 ; ...)
- level++;
- if (!local_minima){
- if (best_change_so_far > 1){ // if current chage is improving
- A = A_so_far ;
- Fert = Fert_so_far ;
- best_change *= best_change_so_far ;
- }
- else{
- local_minima = true ;
- }
- } // end of if(!local_minima)
- if (Log)
- logmsg << "." ;
- if (level> 15)
- cerr << "." ;
- } while (local_minima == false);
- if (Log)
- logmsg << "\n" << "Hill Climb Level: " << level << " score: scaling old: " <<(best_score*best_change) ;
- if (level > 15)
- cerr << "\nHill Climb Level: " << level << " score: scaling old: " <<(best_score*best_change) ;
- best_score = prob_of_target_and_alignment_given_source(A, Fert, tTable, fs, es);
- if (Log)
- logmsg << " using new calc: " << best_score << '\n';
- if (level>15)
- cerr << " using new calc: " << best_score << '\n';
-}
-
-
-void model3::findBestAlignment(Vector<WordIndex>& es,
- Vector<WordIndex>& fs,
- Vector<WordIndex>& A,
- Vector<WordIndex>& Fert,
- LogProb& best_score,
- /*tmodel<COUNT, PROB>& tTable,
- amodel<PROB>& aTable, */
- int i_peg = -1 ,
- int j_peg = -1 )
- // This finds the best Model2 alignment (i.e. no fertilities stuff) in A
- // for the given sentence pair. Its score is returned in A. Its fertility
- // info in Fert.
- // if j_peg == -1 && i_peg == -1 then No pegging is performed.
-{
- WordIndex i, j, l, m, best_i=0;
- LogProb temp, score, ss;
-
- l = es.size() - 1;
- m = fs.size() - 1;
- for (i=0 ; i <= l ; i++)
- Fert[i] = 0 ;
- ss = 1 ;
- if ((j_peg != -1) && (i_peg != -1)){ // if you're doing pegging
- A[j_peg] = i_peg ;
- Fert[i_peg] = 1 ;
- ss *= double(tTable.getProb(es[i_peg], fs[j_peg])) *
- double(aTable.getValue(i_peg, j_peg, l, m));
- }
- for (j = 1 ; j <= m ; j++){
- if (int(j) != j_peg){
- score = 0 ;
- for (i = 0 ; i <= l ; i++){
- // first make sure that connecting target word at pos j to source word
- // at pos i will not lead to a violation on Fertility restrictions
- // (e.g. maximum fertility for a word, max fertility for NULL word, etc)
- if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2*(Fert[0]+1)))
- || (i != 0))){
- temp = double(tTable.getProb(es[i], fs[j])) *
- double(aTable.getValue(i, j, l, m));
- if (temp > score ){
- best_i = i ;
- score = temp ;
- } // end of if (temp > score)
- } // end of if (((i == 0 ...)
- } // end of for (i= 0 ...)
- if (score == 0){
- cerr << "WARNING: In searching for model2 best alignment\n " ;
- cerr << "Nothing was set for target token " << fs[j] <<
- "at position j: " << j << "\n";
- for (i = 0 ; i <= l ; i++){
- cerr << "i: " << i << "ttable("<<es[i]<<", "<<fs[j]<<") = " <<
- tTable.getProb(es[i], fs[j]) << " atable(" << i<<", "<<j<<", "<<
- l<<", "<<m<<") = "<< aTable.getValue(i, j, l, m) << " product " <<
- double(tTable.getProb(es[i], fs[j])) *
- double(aTable.getValue(i, j, l, m)) << '\n';
- if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2*(Fert[0]+1)))
- || (i != 0)))
- cerr <<"Passed fertility condition \n";
- else
- cerr <<"Failed fertility condition \n";
- }
-
- } // end of if (score == 0)
- else {
- Fert[best_i]++ ;
- A[j] = best_i ;
- }
- ss *= score ;
- } // end of if (j != j_peg)
- } // end of for (j == 1 ; ...)
- if (ss <= 0){
- cerr << "WARNING: Model2 viterbi alignment has zero score for sentence pair:\n" ;
- printSentencePair(es, fs, cerr);
- }
- best_score = prob_of_target_and_alignment_given_source(A, Fert, tTable, fs, es);
- if (Log)
- logmsg << "finding best alignment : score : " << ss <<"p(f, a/e) = "<< best_score<<"\n";
-}
-
-void model3::collectCountsOverAlignement(const Vector<WordIndex>& es,
- const Vector<WordIndex>& fs,
- const Vector<WordIndex>& A,
- LogProb score,
- float count)
-{
- WordIndex j,i,l,m ;
- Vector<WordIndex> Fert(es.size(),0);
- l = es.size() - 1 ;
- m = fs.size() - 1 ;
- score *= LogProb(count);
- COUNT temp = COUNT(score) ;
- for (i=0 ; i <= l ; i++)
- Fert[i] = 0 ;
- for (j = 1 ; j <= m ; j++){
- Fert[A[j]]++;
- tTable.incCount(es[A[j]], fs[j], temp);
- // tCountTable.getRef(es[A[j]], fs[j])+=score;
- if (A[j])
- dCountTable.getRef(j, A[j], l, m)+= temp ;
- aCountTable.getRef(A[j], j, l, m)+= temp ;
- }
- for(i = 0 ; i <= l ; i++)
- nCountTable.getRef(es[i], Fert[i])+= temp ;
- // p1_count += score * (LogProb) (Fert[0]) ;
- // p0_count += score * (LogProb) ((m - 2 * Fert[0])) ;
- p1_count += temp * (Fert[0]) ;
- p0_count += temp * ((m - 2 * Fert[0])) ;
-}
-
-
-
-void model3::findAlignmentsNeighborhood(Vector<WordIndex>& es,
- Vector<WordIndex>& fs,
- LogProb&align_total_count,
- alignmodel&neighborhood,
- int i_peg = -1,
- int j_peg = -1
- )
- // Finding the Neigborhood of a best viterbi alignment after hill climbing
- // if (i_peg == -1 and j_peg == -1, then No Pegging is done.
-{
- LogProb best_score,score;
- WordIndex i,j,l,m,old_i,j1;
- Vector<WordIndex> A(fs.size(),0);
- Vector<WordIndex> Fert(es.size(),0);
- time_t it_st;
-
- best_score = 0 ;
- l = es.size() - 1;
- m = fs.size() - 1;
- findBestAlignment(es, fs, A, Fert, best_score, /*tTable, aTable,*/ i_peg, j_peg);
- if (best_score == 0){
- cerr << "WARNING: viterbi alignment score is zero for the following pair\n";
- printSentencePair(es, fs, cerr);
- }
- hillClimb(es, fs, A, Fert, best_score, tTable, i_peg, j_peg);
- if (best_score <= 0){
- cerr << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
- printSentencePair(es, fs, cerr);
- if(Log){
- logmsg << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
- printSentencePair(es, fs, logmsg);
- }
- }
- else { // best_score > 0
- // if (2 * Fert[0] < m ){
- if (2*Fert[0] <= m ){
- /* consider alignments that has Fert[0] less than
- half the number of words in French sentence */
- if (neighborhood.insert(A, best_score)){
- align_total_count += best_score ;
- }
- }
- else { // else part is added for debugging / Yaser
- cerr << "WARNING:Best Alignment found violates Fertility requiremnets !!\n" ;
- for (i = 0 ; i <= l ; i++)
- cerr << "Fert["<<i<<"] = "<< Fert[i] << "\n";
- for (j = 1 ; j <= m ; j++){
- cerr << "A["<<j<<"] = "<< A[j] <<"\n";
- }
- cerr << "Condition violated : 2 * Fert[0] <= m " << 2*Fert[0] <<"?"<<
- m << "\n";
- } // end of added code for debugging // Yaser
- it_st = time(NULL) ;
-
- // Now find add all neighbors of the best alignmet to the collection
- for (j = 1 ; j <= m ; j++){
- for (j1 = j + 1 ; j1 <= m; j1++){ // all possible swaps
- if (A[j] != A[j1]){// make sure you are not swapping at same position
- // score = best_score * scoreOfSwap(es, fs, A, best_score, tTable, j, j1);
- score = best_score * scoreOfSwap(es, fs, A, tTable, j, j1);
- // ADD A and its score to list of alig. to collect counts over
- if (2 * Fert[0] <= m && score > 0){
- /* consider alignments that has Fert[0] less than
- half the number of words in French sentence */
- old_i = A[j] ;
- A[j] = A[j1] ;
- A[j1] = old_i ;
- if (neighborhood.insert(A, score)){
- align_total_count += score ;
- }
- // restore original alignment
- old_i = A[j] ;
- A[j] = A[j1] ;
- A[j1] = old_i ;
- }
- }
- }
- for (i = 0 ; i <= l ; i++){ // all possible moves
- if (i != A[j]){ // make sure not to move to same position
- if ((Fert[i]+1 < MAX_FERTILITY) &&
- ((i == 0 && (m >= 2*(Fert[0]+1))) || (i != 0))){
- // consider legal alignments only
- score = best_score * scoreOfMove(es, fs, A, Fert, tTable, j, i);
- // ADD A and its score to list of alig. to collect counts over
- if (score > 0){
- old_i = A[j] ;
- A[j] = i ;
- Fert[old_i]-- ;
- Fert[i]++ ;
- // add to list of alignemts here ******************
- if (neighborhood.insert(A, score)){
- align_total_count += score ;
- }
- // now resotre alignment and fertilities to previoud values
- A[j] = old_i ;
- Fert[old_i]++ ;
- Fert[i]-- ;
- } // end of if (score > 0)
- } // end of if (i == 0 ...)
- } // end of if (i != A[j])
- }// end of for(i = 0 ; ...)
- }// end of for (j = 1 ; ...)
- } // of else best_score <= 0
-}
-
-void model3::viterbi_loop(Perplexity& perp, Perplexity& viterbiPerp, sentenceHandler& sHandler1,
- bool dump_files, const char* alignfile,
- bool collect_counts, string model )
-{
- WordIndex i, j, l, m ;
- ofstream of2 ;
- int pair_no;
- LogProb temp;
-
- if (dump_files)
- of2.open(alignfile);
- pair_no = 0 ; // sentence pair number
- // for each sentence pair in the corpus
- perp.clear() ; // clears cross_entrop & perplexity
- viterbiPerp.clear();
- sentPair sent ;
- while(sHandler1.getNextSentence(sent)){
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.getCount();
- if ((sent.sentenceNo % 1000) == 0)
- cerr <<sent.sentenceNo << '\n';
- time_t sent_s = time(NULL) ;
- pair_no++ ;
- l = es.size() - 1 ;
- m = fs.size() - 1 ;
- if (Log){
- logmsg << "Processing sentence pair:\n\t";
- printSentencePair(es, fs, logmsg);
- for (i = 0 ; i <= l ; i++)
- logmsg << Elist.getVocabList()[es[i]].word << " ";
- logmsg << "\n\t";
- for (j = 1 ; j <= m ; j++)
- logmsg << Flist.getVocabList()[fs[j]].word << " ";
- logmsg << "\n";
- }
-
- LogProb align_total_count=0;
- // LogProb best_score;
-
- Vector<WordIndex> viterbi_alignment;
- LogProb viterbi_score ;
- alignmodel neighborhood;
- neighborhood.clear();
- align_total_count = 0;
- findAlignmentsNeighborhood(/*tTable, aTable,*/ /*p1_count, p0_count,*/ es, fs, align_total_count, neighborhood) ;
- if (Peg){
- for (i = 0 ; i <= l ; i++)
- for (j = 1 ; j <= m ; j++){
- if ( (tTable.getProb(es[i], fs[j]) > PROB_SMOOTH) &&
- (aTable.getValue(i, j, l, m) > PROB_SMOOTH) &&
- (dTable.getValue(j, i, l, m) > PROB_SMOOTH))
- findAlignmentsNeighborhood(/*tTable, aTable,*/ /*p1_count,
- p0_count, */ es, fs, align_total_count, neighborhood, i, j);
- }
- }
- // Now Collect counts over saved neighborhoods
- viterbi_score = 0 ;
- if (Verbose)
- cerr << "\nCollecting counts over found alignments, total prob: "
- << align_total_count << "\n";
- if (Log)
- logmsg << "\nCollecting counts over found alignments, total prob: "
- << align_total_count << "\n";
- hash_map<Vector<WordIndex>, LogProb, hashmyalignment, equal_to_myalignment >::iterator align ;
- int acount = 0 ;
- if (align_total_count == 0 ){
- cerr << " WARNINIG: For the following sentence pair : \n";
- printSentencePair(es, fs, cerr);
- cerr << "The collection of alignments found have 0 probability!!\n";
- cerr << "No counts will be collected of it \n";
- if (Log){
- logmsg << "The collection of alignments found have 0 probability!!\n";
- logmsg << "No counts will be collected of it \n";
- }
- }
- else {
- if (collect_counts) {
- for(align = neighborhood.begin(); align != neighborhood.end(); align++){
- temp = (*align).second/align_total_count ;
- collectCountsOverAlignement(/*tTable, aCountTable, */es, fs, /*p1_count,
- p0_count ,*/ ((*align).first), temp , count);
- acount++;
- if (viterbi_score < temp){
- viterbi_alignment = ((*align).first);
- viterbi_score = temp;
- }
- }
- } // end of if (collect_counts)
- perp.addFactor(log(double(align_total_count)), count, l, m,0);
- viterbiPerp.addFactor(log(double(viterbi_score)), count, l, m,0);
-
- if (Verbose){
- cerr << "Collected counts over "<<acount <<" (of "
- << pow(double(m), double(l+1)) <<") differnet alignments\n";
- cerr << "Bucket count of alignments hash: "<<
- neighborhood.getHash().bucket_count()<< ", size " <<
- neighborhood.getHash().size() << "\n";
- }
- if (Log){
- logmsg << "Collected counts over "<<acount <<" (of "
- << pow(double(m), double(l+1)) <<") differnet alignments\n";
- logmsg << "Bucket count of alignments hash: "<<
- neighborhood.getHash().bucket_count()<< "\n";
- }
- } // end of else
- // write best alignment (viterbi) for this sentence pair to alignment file
- if (collect_counts){
- if (viterbi_score <= 0){
- cerr << "Viterbi Alignment for this pair have score zero!!\n";
- of2 << "\n\n";
- }
- else {
- if (dump_files)
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, pair_no, viterbi_score);
- addAL(viterbi_alignment,sent.sentenceNo,l);
- }
- } // end of if (collect_counts)
- double period = difftime(time(NULL), sent_s);
- if (Log)
- logmsg << "processing this sentence pair ("<<l+1<<"x"<<m<<") : "<<
- (l+1)*m << " took : " << period << " seconds\n";
- if (Verbose)
- cerr << "processing this sentence pair took : " << period
- << " seconds\n";
-
- } /* of sentence pair E, F */
- sHandler1.rewind();
- errorReportAL(cerr,model);
- perp.record(model);
- viterbiPerp.record(model);
- if (dump_files)
- of2.close();
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/model3_viterbi_with_tricks.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/model3_viterbi_with_tricks.cpp b/ext/giza-pp/GIZA++-v2/model3_viterbi_with_tricks.cpp
deleted file mode 100644
index 1bfb07f..0000000
--- a/ext/giza-pp/GIZA++-v2/model3_viterbi_with_tricks.cpp
+++ /dev/null
@@ -1,690 +0,0 @@
-/*
-
-EGYPT Toolkit for Statistical Machine Translation
-Written by Yaser Al-Onaizan, Jan Curin, Michael Jahr, Kevin Knight, John Lafferty, Dan Melamed, David Purdy, Franz Och, Noah Smith, and David Yarowsky.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-USA.
-
-*/
-#include "mystl.h"
-#include "model3.h"
-#include "collCounts.h"
-#include "utility.h"
-#include "Globals.h"
-#include "D5Tables.h"
-#include "transpair_model5.h"
-#include "transpair_modelhmm.h"
-#include "myassert.h"
-#include "Parameter.h"
-
-GLOBAL_PARAMETER(float,PrintN,"nbestalignments","for printing the n best alignments",PARLEV_OUTPUT,0);
-
-const short LogHillClimb=0,LogPeg=0;
-const short UseHMMViterbiAlignmentIfPossible=1;
-short DoViterbiTraining=0;
-
-GLOBAL_PARAMETER(int,VerboseSentence,"VerboseSentence","number of sentence for which a lot of information should be printed (negative: no output)",PARLEV_OUTPUT,-10);
-GLOBAL_PARAMETER(double,PEGGED_CUTOFF,"PEGGED_CUTOFF","relative cutoff probability for alignment-centers in pegging",PARLEV_OPTHEUR,3e-2);
-GLOBAL_PARAMETER2(float, COUNTINCREASE_CUTOFF_AL,"COUNTINCREASE CUTOFF AL","countCutoffAl","Counts increment cutoff threshold for alignments in training of fertility models",PARLEV_OPTHEUR,1e-5);
-
-int SentNr;
-bool UseLinkCache=1; /// optimization for pegging
-int NumberOfAlignmentsInSophisticatedCountCollection;
-
-extern bool ONLYALDUMPS;
-
-int PrintHillClimbWarning=0;
-int PrintZeroScoreWarning=0;
-
-
-LogProb model3::viterbi_model2(const transpair_modelhmm&ef, alignment&output, int
-#ifdef STORE_HMM_ALIGNMENTS
-pair_no
-#endif
-, int i_peg , int j_peg )const
-{
- static Vector<pair<alignment,LogProb> > viterbis;
- Vector<int>vit;
- int m=ef.get_m();
- int l=ef.get_l();
- double ret=0.0;
- //#define STORE_HMM_ALIGNMENTS
-#ifdef STORE_HMM_ALIGNMENTS
- if( i_peg==-1 && j_peg==-1 && viterbis.size()>pair_no )
- {
- output=viterbis[pair_no].first;
- ret=viterbis[pair_no].second;
- massert( ret==HMMRealViterbi(*ef.net,vit,i_peg-1,j_peg-1)*ef.net->finalMultiply );
- }
- else
- {
- ret=HMMRealViterbi(*ef.net,vit,i_peg-1,j_peg-1)*ef.net->finalMultiply;
- for(int j=1;j<=m;j++)
- {
- if( vit[j-1]+1>l )
- output.set(j,0);
- else
- output.set(j,vit[j-1]+1);
- massert( (j==j_peg&&int(output(j))==i_peg) || j_peg!=j);
- }
- if( i_peg==-1 && j_peg==-1 )
- {
- iassert(viterbis.size()==pair_no);
- viterbis.push_back(make_pair(output,ret));
- }
- }
-#else
- ret=HMMRealViterbi(*ef.net,vit,i_peg-1,j_peg-1)*ef.net->finalMultiply;
- for(int j=1;j<=m;j++)
- {
- if( vit[j-1]+1>l )
- output.set(j,0);
- else
- output.set(j,vit[j-1]+1);
- massert( (j==j_peg&&int(output(j))==i_peg) || j_peg!=j);
- }
-#endif
- massert( j_peg==-1 || int(output(j_peg))==i_peg );
- if( j_peg!=-1 )
- massert(int(output(j_peg))==i_peg);
- if( output.valid() )
- return ret;
- else
- {
- return _viterbi_model2(ef,output,i_peg,j_peg);
- }
-}
-
-LogProb model3::_viterbi_model2(const transpair_model2&ef, alignment&output, int i_peg, int j_peg)const
-{
- WordIndex best_i=0;
- LogProb ss=1;
- PositionIndex l = ef.get_l(), m=ef.get_m();
- Vector<WordIndex> Fert(l+1, (WordIndex)0);
- if ((j_peg != -1) && (i_peg != -1))
- {
- output.set(j_peg, i_peg);
- ss *= ef.get_t(i_peg, j_peg) * ef.get_a(i_peg, j_peg);
- if( ss==0 )
- cerr << "WARNING: already starting is zero: " << ef.get_t(i_peg, j_peg) << " " << ef.get_a(i_peg, j_peg) << '\n';
- }
- else
- ss=1;
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg)
- {
- LogProb score = 0 ;
- for (PositionIndex i = 0 ; i <= l ; i++)
- {
- if( Fert[i]+1<MAX_FERTILITY && (i != 0 || m>=(2 * (Fert[0] + 1))))
- {
- LogProb temp = ef.get_t(i, j) * ef.get_a(i, j);
- if (temp > score )
- {
- best_i = i ;
- score = temp ;
- }
- }
- }
- if (score == 0){
- cerr << "WARNING: In searching for model2 best alignment\n";
- cerr << "Nothing was set for target token at position j: " << j << "\n";
- for (PositionIndex i = 0 ; i <= l ; i++){
- cerr << "i: " << i << "ttable("<<i<<", "<<j<<") = " <<
- ef.get_t(i, j) << " atable(" << i<<", "<<j<<", "<<
- l<<", "<<m<<") = "<< ef.get_a(i, j) << " product " <<
- ef.get_t(i, j) * ef.get_a(i, j) ;
- if ((Fert[i]+1 < MAX_FERTILITY) && ((i == 0 && (m >= 2*(Fert[0]+1)))
- || (i != 0)))
- cerr <<"Passed fertility condition \n";
- else
- cerr <<"Failed fertility condition \n";
- }
- }
- else
- {
- output.set(j, best_i);
- Fert[best_i]++;
- }
- ss *= score;
- }
- if (ss <= 0){
- //cerr << ef;
- cerr << "WARNING: Model2 viterbi alignment has zero score.\n" ;
- cerr << "Here are the different elements that made this alignment probability zero \n";
- cerr << "Source length " << l << " target length " << m << '\n';
- LogProb gg=1 ; // for debugging only .....
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg){
- LogProb score = 0 ;
- LogProb a = 0, t =0 ;
- for (PositionIndex i = 0 ; i <= l ; i++){
- // if( Debug_Fert[i]+1<MAX_FERTILITY && (i != 0 || m>=(2 * (Debug_Fert[0] + 1)))){
- LogProb temp = ef.get_t(i, j) * ef.get_a(i, j);
- if (temp > score ){
- score = temp ;
- best_i = i ;
- a = ef.get_a(i, j);
- t = ef.get_t(i, j) ;
- }
- // }
- }
- gg *= score ;
- cerr << "best: fs[" << j << "] "<< j <<" : es[" << best_i << "] " <<
- best_i << " , a: " << ef.get_a(best_i, j) << " t: " << t << " score " << score << " product : " << gg << " ss " <<
- ss << '\n';
- }
- for(PositionIndex i = 0 ; i <= l ; i++)
- cerr << "Fert["<<i<<"] selected " << Fert[i] << '\n';
- }
- massert(output.valid());
- return ss;
-}
-LogProb model3::viterbi_model2(const transpair_model3&ef, alignment&output, int pair_no,int i_peg , int j_peg )const
-{
- if( h&&UseHMMViterbiAlignmentIfPossible )
- {
- transpair_modelhmm efhmm(ef.E,ef.F,tTable,aTable,dTable,nTable,0.0,0.0,h);
- LogProb ret=viterbi_model2(efhmm,output,pair_no,i_peg,j_peg);
- massert(output.valid());
- return ret;
- }
- return _viterbi_model2(ef,output,i_peg,j_peg);
-}
-
-int HillClimbingSteps=0;
-
-template<class TRANSPAIR>
-LogProb greedyClimb_WithIBM3Scoring(MoveSwapMatrix<TRANSPAIR>&msc2,int j_peg=-1)
-{
- PositionIndex l = msc2.get_l(), m=msc2.get_m();
- int changed=0;
- int iter=0;
- bool hereVERB=0;
- do
- {
- MoveSwapMatrix<typename TRANSPAIR::simpler_transpair_model> msc_IBM3(msc2.get_ef(),alignment(msc2));
- vector<pair<double,OneMoveSwap> > msvec;
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg)
- {
- WordIndex aj=msc2(j);
- for (PositionIndex j1 = j + 1 ; j1 <= m; j1++)
- if((aj != msc2(j1)) && (int(j1) != j_peg))
- msvec.push_back(pair<double,OneMoveSwap>(-msc_IBM3.cswap(j,j1),OneMoveSwap(1,j,j1)));
- for (PositionIndex i = 0 ; i <= l ; i++)
- if(i != aj &&(i != 0 || (m >= 2 * (msc2.fert(0)+1))) && msc2.fert(i)+1<MAX_FERTILITY)
- msvec.push_back(pair<double,OneMoveSwap>(-msc_IBM3.cmove(i,j),OneMoveSwap(2,i,j)));
- }
- sort(msvec.begin(),msvec.end());
- HillClimbingSteps++;
- int iused=-1;
- changed=0;
- for(unsigned int i=0;i<msvec.size()&&changed==0;++i)
- {
- LogProb csts;
- const OneMoveSwap &oms=msvec[i].second;
- if( oms.type==1&&(csts=msc2.cswap(oms.a,oms.b))>1.0001 )
- {
- if( hereVERB==1 )
- cerr << "SWAP: " << csts << '\n';
- msc2.doSwap(oms.a,oms.b);
- changed=1;
- iused=i;
- break;
- }
- if( oms.type==2&&(csts=msc2.cmove(oms.a,oms.b))>1.0001 )
- {
- if( hereVERB==1 )
- cerr << "MOVE: " << csts << '\n';
- msc2.doMove(oms.a,oms.b);
- changed=1;
- iused=i;
- break;
- }
- }
- if( ++iter>30 )
- {
- //msc2.ef.verboseTP=1;
- hereVERB=1;
- cerr << "ERROR: more than 30 iterations in hill-climbing: " << iused
- << " improvement: " << msvec[iused].first << " value:" << msvec[iused].second
- << '\n' << msc2 << '\n';
- for(int a=0;a<20;++a)
- cout << a << ' ' << msvec[a].first << ' ' << msvec[a].second << '\n';
- //cerr << msvec << '\n';
- }
- if( iter>50 )
- break;
- } while(changed);
- return msc2.get_ef().prob_of_target_and_alignment_given_source(msc2);
-}
-
-template<class TRANSPAIR>
-LogProb greedyClimb(MoveSwapMatrix<TRANSPAIR>&msc2, int j_peg = -1)
-{
- if( msc2.get_ef().greedyHillClimbing()==1 )
- return greedyClimb_WithIBM3Scoring(msc2,j_peg);
- PositionIndex l = msc2.get_l(), m=msc2.get_m();
- int changed=0;
- do
- {
- HillClimbingSteps++;
- changed=0;
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg)
- {
- WordIndex aj=msc2(j);
- for (PositionIndex j1 = j + 1 ; j1 <= m; j1++)if((aj != msc2(j1)) && (int(j1) != j_peg)&&msc2.cswap(j, j1) > 1.0)
- msc2.doSwap(j, j1), changed=1;
- for (PositionIndex i = 0 ; i <= l ; i++)if(i != aj &&(i != 0 || (m >= 2 * (msc2.fert(0)+1))) && msc2.fert(i)+1<MAX_FERTILITY && msc2.cmove(i, j)>1.0)
- msc2.doMove(i, j), changed=1;
- }
- } while (changed);
- return msc2.get_ef().prob_of_target_and_alignment_given_source(msc2);
-}
-
-template<class TRANSPAIR>
-LogProb hillClimb_std(MoveSwapMatrix<TRANSPAIR>&msc2, int= -1,int j_peg = -1)
-{
- if( msc2.isLazy() )
- return greedyClimb_WithIBM3Scoring(msc2,j_peg);
- if( LogHillClimb>1 )
- cout << msc2 << '\n';
- PositionIndex l = msc2.get_l(), m=msc2.get_m();
- int changes=0;
- int best_change_type=-1, best_change_v1=-1, best_change_v2=-1;
- do
- {
- HillClimbingSteps++;
- LogProb best_change_so_far = 1.00001 ;
- best_change_type=0;
- for (PositionIndex j = 1 ; j <= m ; j++)if (int(j) != j_peg)
- {
- WordIndex aj=msc2(j);
- for (PositionIndex j1 = j + 1 ; j1 <= m; j1++)if((aj != msc2(j1)) && (int(j1) != j_peg))
- {
- LogProb change = msc2.cswap(j, j1);
- if (change > best_change_so_far)
- {
- best_change_so_far = change ;
- best_change_type=1;
- best_change_v1=j;
- best_change_v2=j1;
- if( LogHillClimb )
- cerr << "CLIMB: " << best_change_type << " " << best_change_v1 << " " << best_change_v2 << " " << best_change_so_far << msc2 << '\n';
- massert(msc2.get_ef().isSubOptimal()==1);
- }
- }
- for (PositionIndex i = 0 ; i <= l ; i++)if(i != aj &&(i != 0 || (m >= 2 * (msc2.fert(0)+1))) && msc2.fert(i)+1<MAX_FERTILITY)
- {
- LogProb change = msc2.cmove(i, j);
- if (change > best_change_so_far)
- {
- best_change_so_far = change ;
- best_change_type=2;
- best_change_v1=j;
- best_change_v2=i;
- if( LogHillClimb )
- cerr << "CLIMB: " << best_change_type << " " << best_change_v1 << " " << best_change_v2 << " " << best_change_so_far << msc2 << '\n';
- massert(msc2.get_ef().isSubOptimal()==1);
- }
- }
- }
- if (best_change_type==1)
- {
- msc2.doSwap(best_change_v1, best_change_v2);
- if( LogHillClimb )
- cerr << "SW-CLIMB-DONE: " << j_peg << msc2 << '\n';
- }
- if (best_change_type==2)
- {
- msc2.doMove(best_change_v2, best_change_v1);
- if( LogHillClimb )
- cerr << "MO-CLIMB-DONE: " << j_peg << msc2 << '\n';
- }
- changes++;
- if( changes>40 )
- {
- if( PrintHillClimbWarning++<1000 )
- cerr << "WARNING: already " << changes << " iterations in hillclimb: " << best_change_so_far << " " << best_change_type << " " << best_change_v1 << " " << best_change_v2 << '\n';
- else if (PrintHillClimbWarning==1000)
- cerr << "ERROR: too many hill climbing warnings => I do not print more.\n";
- }
- if(changes>60 )
- {
- cerr << msc2 << '\n';
- break;
- }
- } while (best_change_type);
- return msc2.get_ef().prob_of_target_and_alignment_given_source(msc2);
-}
-
-template<class MODEL_TYPE>
-bool extendCenterList(Vector<pair<MoveSwapMatrix<MODEL_TYPE>*,LogProb> >&setOfGoodCenters,MoveSwapMatrix<MODEL_TYPE> *msc,double peggedAlignmentScore)
-{
- unsigned int l=msc->get_ef().get_l();
- set<OneMoveSwap> alreadyCovered;
- for(unsigned int nr=0;nr<setOfGoodCenters.size();nr++)
- makeOneMoveSwap(*setOfGoodCenters[nr].first,*msc,alreadyCovered);
- for(set<OneMoveSwap>::const_iterator i=alreadyCovered.begin();i!=alreadyCovered.end();++i)
- {
- if( i->type==1||i->type==4)
- msc->delCenter();
- if( i->type==1 )
- {
- for(unsigned int ii=0;ii<=l;++ii)
- if( (*msc)(i->a)!=ii )
- msc->delMove(ii,i->a);
- }
- else if( i->type==2||i->type==4 )
- msc->delSwap(i->a,i->b);
- else if( i->type==3 )
- msc->delMove(i->b,i->a);
- else abort();
- }
- setOfGoodCenters.push_back(make_pair(msc,peggedAlignmentScore));
- return 1;
-}
-
-bool OldLog=0;
-short OldLogPeg=0,OldLogHillClimb=0;
-class Als
-{
-public:
- int s,a,b;
- double v;
- Als(int _s,int _a,int _b,double _v)
- : s(_s),a(_a),b(_b),v(_v) {}
-};
-
-inline bool operator<(const Als&x,const Als&y)
-{return x.v>y.v;}
-
-template<class MODEL_TYPE, class ADDITIONAL_MODEL_DATA_IN,class ADDITIONAL_MODEL_DATA_OUT>
-void model3::viterbi_loop_with_tricks(Perplexity& perp, Perplexity& viterbiPerp, sentenceHandler& sHandler1,
- bool dump_files, const char* alignfile,
- bool collect_counts, string model, bool final,
- ADDITIONAL_MODEL_DATA_IN*dm_in,
- ADDITIONAL_MODEL_DATA_OUT*dm_out)
-{
- ofstream *writeNBestErrorsFile=0;
- if( (dump_files||FEWDUMPS)&&PrintN&&ReferenceAlignment.size()>0 )
- {
- string x=alignfile+string("NBEST");
- writeNBestErrorsFile= new ofstream(x.c_str());
- }
- ofstream *of3=0;
- PositionIndex i, j, l, m ;
- ofstream of2;
- int pair_no;
- HillClimbingSteps=0;
- NumberOfAlignmentsInSophisticatedCountCollection=0;
- if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) )
- of2.open(alignfile);
- if( dump_files&&PrintN&&final )
- {
- string x=alignfile+string("NBEST");
- of3= new ofstream(x.c_str());
- }
- pair_no = 0 ; // sentence pair number
- // for each sentence pair in the corpus
- perp.clear() ; // clears cross_entrop & perplexity
- viterbiPerp.clear() ; // clears cross_entrop & perplexity
- sentPair sent ;
- int NCenter=0,NHillClimbed=0,NAlignment=0,NTotal=0,NBetterByPegging=0;
- while(sHandler1.getNextSentence(sent)){
- if( sent.eSent.size()==1||sent.fSent.size()==1 )
- continue;
- SentNr=sent.sentenceNo;
- Vector<WordIndex>& es = sent.eSent;
- Vector<WordIndex>& fs = sent.fSent;
- const float count = sent.getCount();
- if ((sent.sentenceNo % 10000) == 0)
- cerr <<sent.sentenceNo << '\n';
- time_t sent_s = time(NULL) ;
- pair_no++ ;
- l = es.size() - 1 ;
- m = fs.size() - 1 ;
- if (Log){
- logmsg << "Processing sentence pair:\n\t";
- printSentencePair(es, fs, logmsg);
- for (i = 0 ; i <= l ; i++)
- logmsg << Elist.getVocabList()[es[i]].word << " ";
- logmsg << "\n\t";
- for (j = 1 ; j <= m ; j++)
- logmsg << Flist.getVocabList()[fs[j]].word << " ";
- logmsg << "\n";
- }
-
- LogProb align_total_count=0;
- alignment viterbi2alignment(l,m);
- MODEL_TYPE ef(es,fs,tTable,aTable,dTable,nTable,p1,p0,dm_in);
- viterbi_model2(ef,viterbi2alignment,pair_no-1);
- Vector<pair<MoveSwapMatrix<MODEL_TYPE>*,LogProb> >setOfGoodCenters(1);
- set<alignment> alignments;
- MoveSwapMatrix<MODEL_TYPE> *best = (setOfGoodCenters[0].first = new MoveSwapMatrix<MODEL_TYPE>(ef, viterbi2alignment));
- MoveSwapMatrix<MODEL_TYPE> _viterbi(*best), *viterbi=&_viterbi; // please, don't delete this line (FJO)
- if (Log)
- logmsg << "VITERBI: " << alignment(_viterbi);
- if( ef.isSubOptimal() )
- setOfGoodCenters[0].second = hillClimb_std(*best);
- else
- {
- setOfGoodCenters[0].second = best->get_ef().prob_of_target_and_alignment_given_source(*best);
- if( setOfGoodCenters[0].second==0 )
- {
- cerr << "PROBLEM: alignment is 0.\n";
- best->get_ef().prob_of_target_and_alignment_given_source(*best,1);
- }
- }
- int bestAlignment=0;
-
-
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- alignments.insert(*best);
- if (setOfGoodCenters[bestAlignment].second <= 0){
- if( PrintZeroScoreWarning++<100 )
- {
- cerr << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
- cerr << alignment(*setOfGoodCenters[bestAlignment].first) ;
- printSentencePair(es, fs, cerr);
- if(Log){
- logmsg << "WARNING: Hill Climbing yielded a zero score viterbi alignment for the following pair:\n";
- printSentencePair(es, fs, logmsg);
- }
- }
- else if(PrintZeroScoreWarning==100)
- {
- cerr << "ERROR: too many zero score warnings => no additional one will be printed\n";
- }
- setOfGoodCenters[bestAlignment].second=1e-300;
- continue;
- }
- int nHillClimbed=1,nAlignment=1;
- bool flagBetterByPegging=0;
- if ( Peg )
- {
- const MoveSwapMatrix<MODEL_TYPE> *useMatrix=viterbi; // it is faster using 'best', ... (FJO)
- Array2<short, vector<short> > linkCache(l+1, m+1, false);
- if(UseLinkCache)for(unsigned int j=1;j<=m;j++)linkCache((*useMatrix)(j), j)=1;
- for(PositionIndex j=1;j<=m;j++)for(PositionIndex i=0;i<=l;i++)
- {
- nAlignment++;
- if( i!=(*useMatrix)(j) && (UseLinkCache==0||linkCache(i,j)==0) &&
- ef.get_t(i,j)>ef.get_t((*useMatrix)(j),j)*PEGGED_CUTOFF &&
- (i != 0 || (m >= 2 * (useMatrix->fert(0)+1))))
- {
- MoveSwapMatrix<MODEL_TYPE> *BESTPEGGED=0;
- LogProb peggedAlignmentScore;
- nHillClimbed++;
- if( ef.isSubOptimal() )
- {
- BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(*useMatrix);
- BESTPEGGED->doMove(i, j);
- peggedAlignmentScore= hillClimb_std(*BESTPEGGED, i,j);
- }
- else
- {
- alignment pegAlignment(l,m);
- peggedAlignmentScore=viterbi_model2(ef,pegAlignment,pair_no-1,i,j);
- BESTPEGGED = new MoveSwapMatrix<MODEL_TYPE>(ef,pegAlignment);
- massert( pegAlignment(j)==i );
- }
- if(UseLinkCache)
- for(unsigned int j=1;j<=m;j++)
- linkCache((*BESTPEGGED)(j), j)=1;
- if( peggedAlignmentScore>setOfGoodCenters[bestAlignment].second*(LogProb)PEGGED_CUTOFF && alignments.count(*BESTPEGGED)==0 )
- {
- if(extendCenterList(setOfGoodCenters,BESTPEGGED,peggedAlignmentScore))
- {
- alignments.insert(*BESTPEGGED);
- if( peggedAlignmentScore>1.00001*setOfGoodCenters[bestAlignment].second )
- {
- if( LogPeg )
- {
- cerr << "found better alignment by pegging " << pair_no << " " << peggedAlignmentScore/setOfGoodCenters[bestAlignment].second << '\n';
- cerr << "NEW BEST: " << alignment(*BESTPEGGED);
- cerr << "OLD : " << alignment(*setOfGoodCenters[bestAlignment].first);
- }
- flagBetterByPegging=1;
- bestAlignment=alignments.size()-1;
- }
- }
- assert( differences(*BESTPEGGED, *best)!=0 );
- BESTPEGGED=0; }
- else
- delete BESTPEGGED;
- }
- }
- } // end of if(Peg)
- NBetterByPegging+=flagBetterByPegging;
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- if( LogPeg>1 )
- cout << "PEGGED: " << setOfGoodCenters.size() << " HILLCLIMBED:" << nHillClimbed << " TOTAL:" << nAlignment << " alignments." << '\n';
- int alTotal=collectCountsOverNeighborhood(setOfGoodCenters,es, fs, tTable, aCountTable,
- dCountTable, nCountTable, p1_count, p0_count,
- align_total_count, count, collect_counts, dm_out);
- if( LogPeg>1 )
- {
- cout << "ALL: " << alTotal << " from " << pow(float(l+1),float(m)) << '\n';
- massert(alTotal<=pow(double(l+1),double(m)));
- }
- NCenter+=setOfGoodCenters.size();NHillClimbed+=nHillClimbed;NAlignment+=nAlignment;NTotal+=alTotal;
- perp.addFactor(log(double(align_total_count)), count, l, m,0);
- viterbiPerp.addFactor(log(double(setOfGoodCenters[bestAlignment].second)), count, l, m,0);
- massert(log(double(setOfGoodCenters[bestAlignment].second)) <= log(double(align_total_count)));
- if (dump_files||(FEWDUMPS&&sent.sentenceNo<1000)||(final&&(ONLYALDUMPS)) )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, (setOfGoodCenters[bestAlignment].first)->getAlignment(), pair_no,
- setOfGoodCenters[bestAlignment].second);
- for(unsigned int i=0;i<setOfGoodCenters.size();++i)
- setOfGoodCenters[i].first->check();
- if( of3||(writeNBestErrorsFile&&pair_no<int(ReferenceAlignment.size())) )
- {
- vector<Als> als;
- for(unsigned int s=0;s<setOfGoodCenters.size();++s)
- {
- const MoveSwapMatrix<MODEL_TYPE>&msc= *setOfGoodCenters[s].first;
- msc.check();
- double normalized_ascore=setOfGoodCenters[s].second;
- if( !msc.isCenterDeleted() )
- als.push_back( Als(s,0,0,normalized_ascore) );
-
- for(WordIndex j=1;j<=m;j++)
- for(WordIndex i=0;i<=l;i++)
- if( i!=msc(j)&& !msc.isDelMove(i,j) )
- als.push_back( Als(s,i,j,msc.cmove(i,j)*normalized_ascore));
- for(PositionIndex j1=1;j1<=m;j1++)
- for(PositionIndex j2=j1+1;j2<=m;j2++)
- if( msc(j1)!=msc(j2) && !msc.isDelSwap(j1,j2) )
- als.push_back( Als(s,-j1,-j2,msc.cswap(j1,j2)*normalized_ascore));
- }
- sort(als.begin(),als.end());
- double sum=0,sum2=0;
- for(unsigned int i=0;i<als.size();++i)
- sum+=als[i].v;
- for(unsigned int i=0;i<min((unsigned int)als.size(),(unsigned int)PrintN);++i)
- {
- alignment x=*setOfGoodCenters[als[i].s].first;
- if( !(als[i].a==0 && als[i].b==0) )
- {
- if( als[i].a<=0&&als[i].b<=0 )
- x.doSwap(-als[i].a,-als[i].b);
- else
- x.doMove(als[i].a,als[i].b);
- }
- if( of3&&i<PrintN )
- printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(),*of3,x.getAlignment(), pair_no,
- als[i].v/sum*count);
- sum2+=als[i].v;
- if( writeNBestErrorsFile )
- {
- if( pair_no<int(ReferenceAlignment.size()) )
- {
- int ALmissing=0,ALtoomuch=0,ALeventsMissing=0,ALeventsToomuch=0;
- vector<double> scores;
- ErrorsInAlignment(ReferenceAlignment[pair_no-1],x.getAlignment(),l,ALmissing,ALtoomuch,ALeventsMissing,ALeventsToomuch,pair_no);
- ef.computeScores(x,scores);
- *writeNBestErrorsFile << ALmissing+ALtoomuch << ' ';
- for(unsigned int i=0;i<scores.size();++i)
- *writeNBestErrorsFile << ((scores[i]>0.0)?(-log(scores[i])):1.0e6) << ' ';
- *writeNBestErrorsFile << '\n';
- }
- }
- }
- if( writeNBestErrorsFile )
- *writeNBestErrorsFile << '\n';
- }
- addAL((setOfGoodCenters[bestAlignment].first)->getAlignment(),sent.sentenceNo,l);
- if (Log)
- logmsg << "processing this sentence pair ("<<l+1<<"x"<<m<<") : "<<
- (l+1)*m << " prob : " << align_total_count << " " << (setOfGoodCenters[bestAlignment].second) << alignment(*setOfGoodCenters[bestAlignment].first) << " \n";
- for(unsigned int i=0;i<setOfGoodCenters.size();i++)
- delete setOfGoodCenters[i].first;
- double period = difftime(time(NULL), sent_s);
- if (Verbose)
- cerr << "processing this sentence pair took : " << period
- << " seconds\n";
-
- } /* of sentence pair E, F */
- sHandler1.rewind();
- perp.record(model);
- errorReportAL(cerr,model);
- viterbiPerp.record(model);
- if (dump_files||FEWDUMPS||(final&&(ONLYALDUMPS)) )
- of2.close();
- delete of3;
- delete writeNBestErrorsFile;
- double FSent=pair_no;
- cout << "#centers(pre/hillclimbed/real): " << NAlignment/FSent << " " << NHillClimbed/FSent << " " << NCenter/FSent << " #al: " << NTotal/FSent << " #alsophisticatedcountcollection: " << NumberOfAlignmentsInSophisticatedCountCollection/FSent << " #hcsteps: " << HillClimbingSteps/FSent << '\n';
- cout << "#peggingImprovements: " << NBetterByPegging/FSent << '\n';
- }
-
-
-
-#include "collCounts.cpp"
-#define INSTANTIATE(A,B,C) template \
-void model3::viterbi_loop_with_tricks<A,B,C>(Perplexity& perp, Perplexity& viterbiPerp, sentenceHandler& sHandler1, \
- bool dump_files, const char* alignfile,bool collect_counts, string, bool final,\
- B*d4m,C*d5m);
-
-INSTANTIATE(transpair_model3, void, void);
-INSTANTIATE(transpair_modelhmm, const hmm, void);
-INSTANTIATE(transpair_modelhmm, const hmm, d4model);
-INSTANTIATE(transpair_modelhmm, const hmm, d5model);
-INSTANTIATE(transpair_model3, void,d4model);
-INSTANTIATE(transpair_model3, void,d5model);
-INSTANTIATE(transpair_model4, d4model,d4model);
-INSTANTIATE(transpair_model4, d4model,d5model);
-INSTANTIATE(transpair_model5, d5model,d5model);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/myassert.cpp
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/myassert.cpp b/ext/giza-pp/GIZA++-v2/myassert.cpp
deleted file mode 100644
index 2d49be8..0000000
--- a/ext/giza-pp/GIZA++-v2/myassert.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-#include "mystl.h"
-#include <iostream>
-#include "myassert.h"
-
-#ifndef STANDARD_ASSERT
-void myerror(int line,const char *file,const char *expression)
-{
- cerr << "(general.h):Assertion failed: '" << expression << "' ::: b "
- << file << ":" << line << endl;
- cout << "(general.h):Assertion failed: '" << expression << "' ::: b "
- << file << ":" << line << endl;
-}
-void imyerror(int line,const char *file,const char *expression)
-{
- cerr << "Error: '" << expression << "' ::: in Source " << file
- << ":" << line << endl;
-}
-
-#endif
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/myassert.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/myassert.h b/ext/giza-pp/GIZA++-v2/myassert.h
deleted file mode 100644
index b648fdd..0000000
--- a/ext/giza-pp/GIZA++-v2/myassert.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef MY_ASSERT_DEFINED
-#define MY_ASSERT_DEFINED
-void myerror(int line,const char *file,const char *expression);
-void imyerror(int line,const char *file,const char *expression);
-
-#define iassert(expression) do {if (!(expression)) {imyerror(__LINE__,__FILE__,#expression);}} while (0)
-
-#
-#define massert(expr) do {} while(0)
-
-#define vassert(expr) do {} while(0)
-
-#include <cassert>
-
-#endif
-
-
-
-
-
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/mymath.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/mymath.h b/ext/giza-pp/GIZA++-v2/mymath.h
deleted file mode 100644
index f8ad926..0000000
--- a/ext/giza-pp/GIZA++-v2/mymath.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* ---------------------------------------------------------------- */
-/* Copyright 1998 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
-/* Franz Josef Och */
-/* ---------------------------------------------------------------- */
-#ifndef HEADER_MYMATH_DEFINED
-#define HEADER_MYMATH_DEFINED
-inline double mfabs(double x){return (x<0)?(-x):x;}
-#include <math.h>
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ef91969a/ext/giza-pp/GIZA++-v2/mystl.h
----------------------------------------------------------------------
diff --git a/ext/giza-pp/GIZA++-v2/mystl.h b/ext/giza-pp/GIZA++-v2/mystl.h
deleted file mode 100644
index 2046e11..0000000
--- a/ext/giza-pp/GIZA++-v2/mystl.h
+++ /dev/null
@@ -1,321 +0,0 @@
-/* ---------------------------------------------------------------- */
-/* Copyright 1998 (c) by RWTH Aachen - Lehrstuhl fuer Informatik VI */
-/* Franz Josef Och */
-/* ---------------------------------------------------------------- */
-#ifndef MY_STL_H_DEFINED
-#define MY_STL_H_DEFINED
-
-#include <string>
-using namespace std;
-#ifdef USE_STLPORT
-#ifdef __STL_DEBUG
-using namespace _STLD;
-#else
-using namespace _STL;
-#endif
-#endif
-
-#include "myassert.h"
-#include <string>
-#include <utility>
-
-#include <unordered_map>
-#define hash_map unordered_map
-
-#include <vector>
-#include <iostream>
-#include "mymath.h"
-#include "Array2.h"
-
-#define over_string(a,i) for(unsigned int i=0;i<a.length();i++)
-#define over_array(a,i) for(i=(a).low();i<=(a).high();i++)
-#define backwards_array(a,i) for(i=(a).high();i>=(a).low();i--)
-#define over_arr(a,i) for(int i=(a).low();i<=(a).high();i++)
-#define over_arrMAX(a,i,max) for(int i=(a).low();i<=min((a).high(),max-1);i++)
-#define backwards_arr(a,i) for(int i=(a).high();i>=(a).low();i--)
-
-extern double n1mult,n2mult,n3mult;
-
-inline double realProb(int n1,int n2)
-{
- massert(n1<=n2);
- iassert(n1>=0&&n2>0);
- if(n2==0)n2=1;
- return ((double)n1)/(double)n2;
-}
-
-inline double verfProb(int n1,int n2)
-{
- double prob = realProb(n1,n2);
- if( n1==1 )return prob*n1mult;
- else if( n1==2 )return prob*n2mult;
- else if( n1==3 )return prob*n3mult;
- else
- return prob;
-}
-
-inline bool prefix(const string&x,const string&y)
-{
- if(y.size()>x.size() )
- return 0;
- for(unsigned int i=0;i<y.size();++i)
- if( y[i]!=x[i] )
- return 0;
- return 1;
-}
-
-
-/*template<class T>
-int lev(const T&s1,const T&s2)
-{
- Array2<int,vector<int> > a(s1.size()+1,s2.size()+1,1000);
- Array2<pair<int,int>,vector<pair<int,int> > > back(s1.size()+1,s2.size()+1,pair<int,int>(0,0));
- for(unsigned int i=0;i<=s1.size();i++)
- for(unsigned int j=0;j<=s2.size();j++)
- {
- if( i==0&&j==0 )
- a(i,j)=0;
- else
- {
- int aDEL=100,aINS=100,aSUB=100;
- if(i>0)
- aDEL=a(i-1,j)+1;
- if(j>0)
- aINS=a(i,j-1)+1;
- if(i>0&&j>0)
- aSUB=a(i-1,j-1)+ !(s1[i-1]==s2[j-1]);
- if( aSUB<=aDEL && aSUB<=aINS )
- {
- a(i,j)=aSUB;
- back(i,j)=pair<int,int>(i-1,j-1);
- }
- else if( aDEL<=aSUB && aDEL<=aINS )
- {
- a(i,j)=aDEL;
- back(i,j)=pair<int,int>(i-1,j);
- }
- else
- {
- a(i,j)=aINS;
- back(i,j)=pair<int,int>(i,j-1);
- }
- }
- }
- return a(s1.size(),s2.size());
-}
-
-template<class T>
-float rel_lev(const T&s1,const T&s2)
-{
- if( s1.size()==0 )
- return s2.size()==0;
- else
- return min(1.0,lev(s1,s2)/(double)s1.size());
-}*/
-
-template<class V> int Hash(const pair<V,V>&a)
-{ return Hash(a.first)+13001*Hash(a.second); }
-
-template<class T1,class T2>
-ostream& operator<<(ostream &out,const pair<T1,T2> &ir)
-{
- out << "(" << ir.first << "," << ir.second << ")";
- return out;
-}
-
-inline int Hash(const string& s)
-{
- int sum=0;
- string::const_iterator i=s.begin(),end=s.end();
- for(;i!=end;i++)sum=5*sum+(*i);
- return sum;
-}
-template<class A,class B,class C>
-class tri
-{
-public:
- A a;
- B b;
- C c;
- tri(){};
- tri(const A&_a,const B&_b,const C&_c)
- : a(_a),b(_b),c(_c) {}
-};
-template<class A,class B,class C>
-bool operator==(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{ return x.a==y.a&&x.b==y.b&&x.c==y.c;}
-
-template<class A,class B,class C>
-bool operator<(const tri<A,B,C>&x,const tri<A,B,C>&y)
-{
- if(x.a<y.a)return 1;
- if(y.a<x.a)return 0;
- if(x.b<y.b)return 1;
- if(y.b<x.b)return 0;
- if(x.c<y.c)return 1;
- if(y.c<x.c)return 0;
- return 0;
-}
-
-double used_time();
-
-template<class T>
-class my_hash
-{
-public:
- int operator()(const T&t)const {return Hash(t);}
-};
-
-inline int Hash(int value) { return value; }
-#define MY_HASH_BASE hash_map<A,B,my_hash<A> >
-
-template<class A,class B>
-class leda_h_array : public MY_HASH_BASE
-{
-private:
- B init;
-public:
- leda_h_array() : MY_HASH_BASE() {}
- leda_h_array(const B&_init)
- : MY_HASH_BASE(),init(_init) {}
- bool defined(const A&a) const
- { return find(a)!=this->end(); }
- const B&operator[](const A&a)const
- {
- typename MY_HASH_BASE::const_iterator pos=find(a);
- if( pos==this->end() )
- return init;
- else
- return pos->second;
- }
- B&operator[](const A&a)
- {
- typename MY_HASH_BASE::iterator pos=find(a);
- if( pos==this->end() )
- {
- insert(MY_HASH_BASE::value_type(a,init));
- pos=find(a);
- iassert(pos!=this->end());
- }
- return pos->second;
- }
- const B&initValue()const
- {return init;}
-};
-
-#define forall_defined_h(a,b,c,d) for(typename leda_h_array<a,b>::const_iterator __jj__=(d).begin();__jj__!=(d).end()&&((c=__jj__->first),1); ++__jj__)
-template<class T,class U>
-ostream & operator<<(ostream&out,const leda_h_array<T,U>&w)
-{
- T t;
- bool makeNl=0;
- out << "h_array{";
- forall_defined_h(T,U,t,w)
- {
- if( makeNl )
- out << "\n ";
- out << "EL:" << t << " INH:" << w[t] << ".";
- makeNl=1;
- }
- return out << "}\n";
-}
-
-template<class T,class U>
-istream & operator>>(istream&in,leda_h_array<T,U>&)
-{
- return in;
-}
-
-template<class A,class B>
-bool operator==(const leda_h_array<A,B>&p1,const leda_h_array<A,B>&p2)
-{
- A v;
- forall_defined_h(A,B,v,p1)
- if( !( p1[v]==p2[v]) ) return 0;
- forall_defined_h(A,B,v,p2)
- if( !( p1[v]==p2[v]) ) return 0;
- return 1;
-}
-
-template<class T>
-int count_elements(T a,T b)
-{
- int c=0;
- while(a!=b)
- {
- a++;
- c++;
- }
- return c;
-}
-
-template<class T>
-T normalize_if_possible_with_increment(T*a,T*b,int increment)
-{
- T sum=0;
- for(T*i=a;i!=b;i+=increment)
- sum+=*i;
- if( sum )
- for(T*i=a;i!=b;i+=increment)
- *i/=sum;
- else
- {
- T factor=increment/(b-a);
- for(T*i=a;i!=b;i+=increment)
- *i=factor;
- }
- return sum;
-}
-
-template<class T>
-inline int m_comp_3way(T a,T b,int n)
-{
- int _n=0;
- while((_n++<n) && a && b)
- {
- const typename T::value_type &aa=*a;
- const typename T::value_type &bb=*b;
- if( aa<bb )return 1;
- if( bb<aa )return -1;
- ++a;
- ++b;
- }
- return 0;
-}
-
-template<class T>
-void smooth_standard(T*a,T*b,double p)
-{
- int n=b-a;
- if( n==0 )
- return;
- double pp=p/n;
- for(T*i=a;i!=b;++i)
- *i = (1.0-p)*(*i)+pp;
-}
-
-template<class T>
-const T *conv(typename vector<T>::const_iterator i)
-{
- return &(*i);
-}
-#if __GNUC__>2
-template<class T>
-T *conv(typename vector<T>::iterator i)
-{
- return &(*i);
-}
-#endif
-
-/*template<class T>
-const T *conv(const T*x)
-{
- return x;
-}*/
-template<class T>
-T *conv(T*x)
-{
- return x;
-}
-
-#endif