You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by le...@apache.org on 2016/05/26 04:22:20 UTC

[03/14] incubator-joshua git commit: JOSHUA-252 Make it possible to use Maven to build Joshua

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9475d943/ext/symal/symal.cpp
----------------------------------------------------------------------
diff --git a/ext/symal/symal.cpp b/ext/symal/symal.cpp
deleted file mode 100644
index 8f1bac0..0000000
--- a/ext/symal/symal.cpp
+++ /dev/null
@@ -1,503 +0,0 @@
-// $Id$
-
-#include <cassert>
-#include <iomanip>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <list>
-#include <vector>
-#include <set>
-#include <algorithm>
-#include <cstring>
-#include "cmd.h"
-
-using namespace std;
-
-#define MAX_WORD 10000 // maximum lengthsource/target strings 
-#define MAX_M 200      // maximum length of source strings
-#define MAX_N 200      // maximum length of target strings 
-
-#define UNION                      1
-#define INTERSECT                  2
-#define GROW                       3
-#define SRCTOTGT                   4
-#define TGTTOSRC                   5
-#define BOOL_YES                   1
-#define BOOL_NO                    0
-
-#define END_ENUM    {   (char*)0,  0 }
-
-static Enum_T AlignEnum [] = {
-  {    "union",                        UNION },
-  {    "u",                            UNION },
-  {    "intersect",                    INTERSECT},
-  {    "i",                            INTERSECT},
-  {    "grow",                         GROW },
-  {    "g",                            GROW },
-  {    "srctotgt",                     SRCTOTGT },
-  {    "s2t",                          SRCTOTGT },
-  {    "tgttosrc",                     TGTTOSRC },
-  {    "t2s",                          TGTTOSRC },
-  END_ENUM
-};
-
-static Enum_T BoolEnum [] = {
-  {    "true",        BOOL_YES },
-  {    "yes",         BOOL_YES },
-  {    "y",           BOOL_YES },
-  {    "false",       BOOL_NO },
-  {    "no",          BOOL_NO },
-  {    "n",           BOOL_NO },
-  END_ENUM
-};
-
-
-
-// global variables and constants
-
-int* fa; //counters of covered foreign positions
-int* ea; //counters of covered english positions
-int** A; //alignment matrix with information symmetric/direct/inverse alignments
-
-int verbose=0;
-
-//read an alignment pair from the input stream.
-
-int lc = 0;
-
-int getals(fstream& inp,int& m, int *a,int& n, int *b)
-{
-  char w[MAX_WORD], dummy[10];
-  int i,j,freq;
-  if (inp >> freq) {
-    ++lc;
-    //target sentence
-    inp >> n;
-    assert(n<MAX_N);
-    for (i=1; i<=n; i++) {
-      inp >> setw(MAX_WORD) >> w;
-      if (strlen(w)>=MAX_WORD-1) {
-        cerr << lc << ": target len=" << strlen(w) << " is not less than MAX_WORD-1="
-             << MAX_WORD-1 << endl;
-        assert(strlen(w)<MAX_WORD-1);
-      }
-    }
-
-    inp >> dummy; //# separator
-    // inverse alignment
-    for (i=1; i<=n; i++) inp >> b[i];
-
-    //source sentence
-    inp >> m;
-    assert(m<MAX_M);
-    for (j=1; j<=m; j++) {
-      inp >> setw(MAX_WORD) >> w;
-      if (strlen(w)>=MAX_WORD-1) {
-        cerr << lc << ": source len=" << strlen(w) << " is not less than MAX_WORD-1="
-             << MAX_WORD-1 << endl;
-        assert(strlen(w)<MAX_WORD-1);
-      }
-    }
-
-    inp >> dummy; //# separator
-
-    // direct alignment
-    for (j=1; j<=m; j++) {
-      inp >> a[j];
-      assert(0<=a[j] && a[j]<=n);
-    }
-
-    //check inverse alignemnt
-    for (i=1; i<=n; i++)
-      assert(0<=b[i] && b[i]<=m);
-
-    return 1;
-
-  } else
-    return 0;
-};
-
-
-//compute union alignment
-int prunionalignment(fstream& out,int m,int *a,int n,int* b)
-{
-
-  ostringstream sout;
-
-  for (int j=1; j<=m; j++)
-    if (a[j])
-      sout << j-1 << "-" << a[j]-1 << " ";
-
-  for (int i=1; i<=n; i++)
-    if (b[i] && a[b[i]]!=i)
-      sout << b[i]-1 <<  "-" << i-1 << " ";
-
-  //fix the last " "
-  string str = sout.str();
-  if (str.length() == 0)
-    str = "\n";
-  else
-    str.replace(str.length()-1,1,"\n");
-
-  out << str;
-  out.flush();
-
-  return 1;
-}
-
-
-//Compute intersection alignment
-
-int printersect(fstream& out,int m,int *a,int n,int* b)
-{
-
-  ostringstream sout;
-
-  for (int j=1; j<=m; j++)
-    if (a[j] && b[a[j]]==j)
-      sout << j-1 << "-" << a[j]-1 << " ";
-
-  //fix the last " "
-  string str = sout.str();
-  if (str.length() == 0)
-    str = "\n";
-  else
-    str.replace(str.length()-1,1,"\n");
-
-  out << str;
-  out.flush();
-
-  return 1;
-}
-
-//Compute target-to-source alignment
-
-int printtgttosrc(fstream& out,int m,int *a,int n,int* b)
-{
-
-  ostringstream sout;
-
-  for (int i=1; i<=n; i++)
-    if (b[i])
-      sout << b[i]-1 << "-" << i-1 << " ";
-
-  //fix the last " "
-  string str = sout.str();
-  if (str.length() == 0)
-    str = "\n";
-  else
-    str.replace(str.length()-1,1,"\n");
-
-  out << str;
-  out.flush();
-
-  return 1;
-}
-
-//Compute source-to-target alignment
-
-int printsrctotgt(fstream& out,int m,int *a,int n,int* b)
-{
-
-  ostringstream sout;
-
-  for (int j=1; j<=m; j++)
-    if (a[j])
-      sout << j-1 << "-" << a[j]-1 << " ";
-
-  //fix the last " "
-  string str = sout.str();
-  if (str.length() == 0)
-    str = "\n";
-  else
-    str.replace(str.length()-1,1,"\n");
-
-  out << str;
-  out.flush();
-
-  return 1;
-}
-
-//Compute Grow Diagonal Alignment
-//Nice property: you will never introduce more points
-//than the unionalignment alignemt. Hence, you will always be able
-//to represent the grow alignment as the unionalignment of a
-//directed and inverted alignment
-
-int printgrow(fstream& out,int m,int *a,int n,int* b, bool diagonal=false,bool final=false,bool bothuncovered=false)
-{
-
-  ostringstream sout;
-
-  vector <pair <int,int> > neighbors; //neighbors
-
-  pair <int,int> entry;
-
-  neighbors.push_back(make_pair(-1,-0));
-  neighbors.push_back(make_pair(0,-1));
-  neighbors.push_back(make_pair(1,0));
-  neighbors.push_back(make_pair(0,1));
-
-
-  if (diagonal) {
-    neighbors.push_back(make_pair(-1,-1));
-    neighbors.push_back(make_pair(-1,1));
-    neighbors.push_back(make_pair(1,-1));
-    neighbors.push_back(make_pair(1,1));
-  }
-
-
-  int i,j,o;
-
-
-  //covered foreign and english positions
-
-  memset(fa,0,(m+1)*sizeof(int));
-  memset(ea,0,(n+1)*sizeof(int));
-
-  //matrix to quickly check if one point is in the symmetric
-  //alignment (value=2), direct alignment (=1) and inverse alignment
-
-  for (int i=1; i<=n; i++) memset(A[i],0,(m+1)*sizeof(int));
-
-  set <pair <int,int> > currentpoints; //symmetric alignment
-  set <pair <int,int> > unionalignment; //union alignment
-
-  pair <int,int> point; //variable to store points
-  set<pair <int,int> >::const_iterator k; //iterator over sets
-
-  //fill in the alignments
-  for (j=1; j<=m; j++) {
-    if (a[j]) {
-      unionalignment.insert(make_pair(a[j],j));
-      if (b[a[j]]==j) {
-        fa[j]=1;
-        ea[a[j]]=1;
-        A[a[j]][j]=2;
-        currentpoints.insert(make_pair(a[j],j));
-      } else
-        A[a[j]][j]=-1;
-    }
-  }
-
-  for (i=1; i<=n; i++)
-    if (b[i] && a[b[i]]!=i) { //not intersection
-      unionalignment.insert(make_pair(i,b[i]));
-      A[i][b[i]]=1;
-    }
-
-
-  int added=1;
-
-  while (added) {
-    added=0;
-    ///scan the current alignment
-    for (k=currentpoints.begin(); k!=currentpoints.end(); k++) {
-      //cout << "{"<< (k->second)-1 << "-" << (k->first)-1 << "}";
-      for (o=0; o<neighbors.size(); o++) {
-        //cout << "go over check all neighbors\n";
-        point.first=k->first+neighbors[o].first;
-        point.second=k->second+neighbors[o].second;
-        //cout << point.second-1 << " " << point.first-1 << "\n";
-        //check if neighbor is inside 'matrix'
-        if (point.first>0 && point.first <=n && point.second>0 && point.second<=m)
-          //check if neighbor is in the unionalignment alignment
-          if (b[point.first]==point.second || a[point.second]==point.first) {
-            //cout << "In unionalignment ";cout.flush();
-            //check if it connects at least one uncovered word
-            if (!(ea[point.first] && fa[point.second])) {
-              //insert point in currentpoints!
-              currentpoints.insert(point);
-              A[point.first][point.second]=2;
-              ea[point.first]=1;
-              fa[point.second]=1;
-              added=1;
-              //cout << "added grow: " << point.second-1 << "-" << point.first-1 << "\n";cout.flush();
-            }
-          }
-      }
-    }
-  }
-
-  if (final) {
-    for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
-      if (A[k->first][k->second]==1) {
-        point.first=k->first;
-        point.second=k->second;
-        //one of the two words is not covered yet
-        //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
-        if ((bothuncovered &&  !ea[point.first] && !fa[point.second]) ||
-            (!bothuncovered && !(ea[point.first] && fa[point.second]))) {
-          //add it!
-          currentpoints.insert(point);
-          A[point.first][point.second]=2;
-          //keep track of new covered positions
-          ea[point.first]=1;
-          fa[point.second]=1;
-
-          //added=1;
-          //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
-        }
-      }
-
-    for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
-      if (A[k->first][k->second]==-1) {
-        point.first=k->first;
-        point.second=k->second;
-        //one of the two words is not covered yet
-        //cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
-        if ((bothuncovered &&  !ea[point.first] && !fa[point.second]) ||
-            (!bothuncovered && !(ea[point.first] && fa[point.second]))) {
-          //add it!
-          currentpoints.insert(point);
-          A[point.first][point.second]=2;
-          //keep track of new covered positions
-          ea[point.first]=1;
-          fa[point.second]=1;
-
-          //added=1;
-          //cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
-        }
-      }
-  }
-
-
-  for (k=currentpoints.begin(); k!=currentpoints.end(); k++)
-    sout << k->second-1 << "-" << k->first-1 << " ";
-
-
-  //fix the last " "
-  string str = sout.str();
-  if (str.length() == 0)
-    str = "\n";
-  else
-    str.replace(str.length()-1,1,"\n");
-
-  out << str;
-  out.flush();
-  return 1;
-
-  return 1;
-}
-
-
-
-//Main file here
-
-
-int main(int argc, char** argv)
-{
-
-  int alignment=0;
-  char* input="/dev/stdin";
-  char* output="/dev/stdout";
-  int diagonal=false;
-  int final=false;
-  int bothuncovered=false;
-
-
-  DeclareParams("a", CMDENUMTYPE,  &alignment, AlignEnum,
-                "alignment", CMDENUMTYPE,  &alignment, AlignEnum,
-                "d", CMDENUMTYPE,  &diagonal, BoolEnum,
-                "diagonal", CMDENUMTYPE,  &diagonal, BoolEnum,
-                "f", CMDENUMTYPE,  &final, BoolEnum,
-                "final", CMDENUMTYPE,  &final, BoolEnum,
-                "b", CMDENUMTYPE,  &bothuncovered, BoolEnum,
-                "both", CMDENUMTYPE,  &bothuncovered, BoolEnum,
-                "i", CMDSTRINGTYPE, &input,
-                "o", CMDSTRINGTYPE, &output,
-                "v", CMDENUMTYPE,  &verbose, BoolEnum,
-                "verbose", CMDENUMTYPE,  &verbose, BoolEnum,
-
-                (char *)NULL);
-
-  GetParams(&argc, &argv, (char*) NULL);
-
-  if (alignment==0) {
-    cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"
-         << "Input file or std must be in .bal format (see script giza2bal.pl).\n";
-
-    exit(1);
-
-  }
-
-  fstream inp(input,ios::in);
-  fstream out(output,ios::out);
-
-  if (!inp.is_open()) {
-    cerr << "cannot open " << input << "\n";
-    exit(1);
-  }
-
-  if (!out.is_open()) {
-    cerr << "cannot open " << output << "\n";
-    exit(1);
-  }
-
-
-  int a[MAX_M],b[MAX_N],m,n;
-  fa=new int[MAX_M+1];
-  ea=new int[MAX_N+1];
-
-
-  int sents = 0;
-  A=new int *[MAX_N+1];
-  for (int i=1; i<=MAX_N; i++) A[i]=new int[MAX_M+1];
-
-  switch (alignment) {
-  case UNION:
-    cerr << "symal: computing union alignment\n";
-    while(getals(inp,m,a,n,b)) {
-      prunionalignment(out,m,a,n,b);
-      sents++;
-    }
-    cerr << "Sents: " << sents << endl;
-    break;
-  case INTERSECT:
-    cerr << "symal: computing intersect alignment\n";
-    while(getals(inp,m,a,n,b)) {
-      printersect(out,m,a,n,b);
-      sents++;
-    }
-    cerr << "Sents: " << sents << endl;
-    break;
-  case GROW:
-    cerr << "symal: computing grow alignment: diagonal ("
-         << diagonal << ") final ("<< final << ")"
-         <<  "both-uncovered (" << bothuncovered <<")\n";
-
-    while(getals(inp,m,a,n,b))
-      printgrow(out,m,a,n,b,diagonal,final,bothuncovered);
-
-    break;
-  case TGTTOSRC:
-    cerr << "symal: computing target-to-source alignment\n";
-
-    while(getals(inp,m,a,n,b)) {
-      printtgttosrc(out,m,a,n,b);
-      sents++;
-    }
-    cerr << "Sents: " << sents << endl;
-    break;
-  case SRCTOTGT:
-    cerr << "symal: computing source-to-target alignment\n";
-
-    while(getals(inp,m,a,n,b)) {
-      printsrctotgt(out,m,a,n,b);
-      sents++;
-    }
-    cerr << "Sents: " << sents << endl;
-    break;
-  default:
-    exit(1);
-  }
-
-  delete [] fa;
-  delete [] ea;
-  for (int i=1; i<=MAX_N; i++) delete [] A[i];
-  delete [] A;
-
-  exit(0);
-}