You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/22 03:08:12 UTC

[1/4] incubator-joshua git commit: Adding the CHRF metric for tuning

Repository: incubator-joshua
Updated Branches:
  refs/heads/master 55e88d1fc -> c01ce779c


Adding the CHRF metric for tuning

Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/52c83d59
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/52c83d59
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/52c83d59

Branch: refs/heads/master
Commit: 52c83d5930c74e15186c5710b9b47b97d2a9230b
Parents: 55e88d1
Author: antot <an...@riseup.net>
Authored: Tue Jun 21 17:12:40 2016 +0100
Committer: antot <an...@riseup.net>
Committed: Tue Jun 21 17:12:40 2016 +0100

----------------------------------------------------------------------
 .../java/org/apache/joshua/metrics/CHRF.java    | 302 +++++++++++++++++++
 .../apache/joshua/metrics/EvaluationMetric.java |   6 +-
 2 files changed, 307 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/52c83d59/src/main/java/org/apache/joshua/metrics/CHRF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/CHRF.java b/src/main/java/org/apache/joshua/metrics/CHRF.java
new file mode 100644
index 0000000..f490f58
--- /dev/null
+++ b/src/main/java/org/apache/joshua/metrics/CHRF.java
@@ -0,0 +1,302 @@
+/*
+ * Copyright 2016 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.joshua.metrics;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.logging.Logger;
+
+
+/**
+ *
+ * An implementation of the chrF evaluation metric for tuning.
+ * It is based on the original code by Maja Popovic [1] with the following main modifications:
+ * - Adapted to extend Joshua's EvaluationMetric class
+ * - Use of a length penalty to prevent chrF to prefer too long (with beta>1) or too short (with beta<1) translations
+ * - Use of hash tables for efficient n-gram matching
+ * 
+ * If you use this metric in your research please cite [2]
+ * 
+ * [1] Maja Popovic. 2015. chrF: character n-gram F-score for automatic MT evaluation.
+ * In Proceedings of the Tenth Workshop on Statistical Machine Translation. Lisbon, Portugal, pages 392\u2013395.
+ * [2] V�ctor S�nchez Cartagena and Antonio Toral. 2016. 
+ * Abu-MaTran at WMT 2016 Translation Task: Deep Learning, Morphological Segmentation and Tuning on Character Sequences.
+ * In Proceedings of the First Conference on Machine Translation (WMT16). Berlin, Germany.
+
+ * @author Antonio Toral
+ */
+public class CHRF extends EvaluationMetric {
+    private static final Logger logger = Logger.getLogger(CHRF.class.getName());
+
+    protected double beta = 3;
+    protected double factor;
+    protected int maxGramLength = 6; // The maximum n-gram we care about
+    //private double[] nGramWeights; //TODO to weight them differently
+    
+    //private String metricName;
+    //private boolean toBeMinimized;
+    //private int suffStatsCount;
+    
+
+  public CHRF()
+  {
+      this(3, 6);
+  }
+          
+  public CHRF(String[] CHRF_options)
+  {
+    //
+    //
+    // process the Metric_options array
+    //
+    //
+    this(Double.parseDouble(CHRF_options[0]), Integer.parseInt(CHRF_options[1]));
+  }
+  
+  public CHRF(double bt, int mxGrmLn){   
+    if (bt > 0) {
+      beta = bt;
+    } else {
+      logger.severe("Beta must be positive");
+      System.exit(1);
+    }
+    
+    if (mxGrmLn >= 1) {
+      maxGramLength = mxGrmLn;
+    } else {
+      logger.severe("Maximum gram length must be positive");
+      System.exit(1);
+    }
+    
+    initialize(); // set the data members of the metric
+  }
+
+  protected void initialize()
+  {
+    metricName = "CHRF";
+    toBeMinimized = false;
+    suffStatsCount = 4 * maxGramLength;
+    factor = Math.pow(beta, 2);    
+  }
+  
+  public double bestPossibleScore() { return 100.0; }
+ 
+  public double worstPossibleScore() { return 0.0; }
+
+  protected String separateCharacters(String s)
+  {
+    String s_chars = "";  
+    //alternative implementation (the one below seems more robust)
+    /*for (int i = 0; i < s.length(); i++) {
+        if (s.charAt(i) == ' ') continue;
+        s_chars += s.charAt(i) + " ";  
+    }
+    System.out.println("CHRF separate chars1: " + s_chars);*/
+
+    String[] words = s.split("\\s+");
+    for (String w: words) {
+        for (int i = 0; i<w.length(); i++)
+            s_chars += w.charAt(i);
+    }
+
+    //System.out.println("CHRF separate chars: " + s_chars);
+    return s_chars;
+  }
+
+  
+  protected HashMap<String, Integer>[] getGrams(String s)
+  {
+    HashMap<String, Integer>[] grams = new HashMap[1 + maxGramLength];
+    grams[0] = null;
+    for (int n = 1; n <= maxGramLength; ++n) {
+      grams[n] = new HashMap<String, Integer>();
+    }
+
+    
+    for (int n=1; n<=maxGramLength; n++){
+      String gram = "";
+      for (int i = 0; i < s.length() - n + 1; i++){
+          gram = s.substring(i, i+n);
+          if(grams[n].containsKey(gram)){
+            int old_count = grams[n].get(gram);
+            grams[n].put(gram, old_count+1);
+          } else {
+            grams[n].put(gram, 1);
+          }
+      }
+      
+    }
+
+    /* debugging
+    String key, value;
+    for (int n=1; n<=maxGramLength; n++){
+      System.out.println("Grams of order " + n);
+      for (String gram: grams[n].keySet()){
+        key = gram.toString();
+        value = grams[n].get(gram).toString();
+        System.out.println(key + " " + value); 
+      }
+    }*/
+    
+    return grams;
+  }
+
+  
+  protected int[] candRefErrors(HashMap<String, Integer> ref, HashMap<String, Integer> cand)
+  {
+      int[] to_return = {0,0};
+      String gram;
+      int cand_grams = 0, ref_grams = 0;
+      int candGramCount = 0, refGramCount = 0;
+      int errors = 0;
+      double result = 0;
+      String not_found = "";
+      
+              
+        Iterator<String> it = (cand.keySet()).iterator();
+
+        while (it.hasNext()) {
+            gram = it.next();
+            candGramCount = cand.get(gram);
+            cand_grams += candGramCount;
+            if (ref.containsKey(gram)) {
+                refGramCount = ref.get(gram);
+                ref_grams += refGramCount;
+                if(candGramCount>refGramCount){
+                    int error_here = candGramCount - refGramCount;
+                    errors += error_here;
+                    not_found += gram + " (" + error_here + " times) ";
+                }
+            } else {
+                refGramCount = 0;
+                errors += candGramCount;
+                not_found += gram + " ";
+            }      
+        }
+      
+      //System.out.println("  Ngrams not found: " + not_found);
+      
+      to_return[0] = cand_grams;
+      to_return[1] = errors;
+      
+      return to_return;
+  }
+  
+  public int[] suffStats(String cand_str, int i) //throws Exception
+  {
+    int[] stats = new int[suffStatsCount];
+
+    double[] precisions = new double[maxGramLength+1];
+    double[] recalls = new double[maxGramLength+1];
+    
+    //TODO check unicode chars correctly split
+    String cand_char = separateCharacters(cand_str);
+    String ref_char = separateCharacters(refSentences[i][0]);
+    
+    HashMap<String, Integer>[] grams_cand = getGrams(cand_char);
+    HashMap<String, Integer>[] grams_ref = getGrams(ref_char);
+    
+    for (int n = 1; n <= maxGramLength; ++n) {
+        //System.out.println("Calculating precision...");
+        int[] precision_vals = candRefErrors(grams_ref[n], grams_cand[n]);
+        //System.out.println("  length: " + precision_vals[0] + ", errors: " + precision_vals[1]);
+        //System.out.println("Calculating recall...");
+        int[] recall_vals = candRefErrors(grams_cand[n], grams_ref[n]);
+        //System.out.println("  length: " + recall_vals[0] + ", errors: " + recall_vals[1]);
+         
+        stats[4*(n-1)] = precision_vals[0]; //cand_grams
+        stats[4*(n-1)+1] = precision_vals[1]; //errors (precision)
+        stats[4*(n-1)+2] = recall_vals[0]; //ref_grams
+        stats[4*(n-1)+3] = recall_vals[1]; //errors (recall)
+    }
+
+    return stats;
+  }
+
+
+  public double score(int[] stats)
+  {
+    int precision_ngrams, recall_ngrams, precision_errors, recall_errors;
+    double[] precisions = new double[maxGramLength+1];
+    double[] recalls = new double[maxGramLength+1];
+    double[] fs = new double[maxGramLength+1];
+    //double[] scs = new double[maxGramLength+1];
+    double totalPrecision = 0, totalRecall = 0, totalF = 0, totalSC = 0;
+    double lp = 1;
+    
+    if (stats.length != suffStatsCount) {
+      System.out.println("Mismatch between stats.length and suffStatsCount (" + stats.length + " vs. " + suffStatsCount + ") in NewMetric.score(int[])");
+      System.exit(1);
+    }
+
+    for (int n = 1; n <= maxGramLength; n++) {
+      precision_ngrams = stats[4 * (n - 1)];
+      precision_errors = stats[4 * (n - 1) + 1];
+      recall_ngrams = stats[4 * (n - 1) + 2];
+      recall_errors = stats[4 * (n - 1) + 3];
+
+      if (precision_ngrams != 0)
+        precisions[n] = 100 - 100*precision_errors/ (double)precision_ngrams;
+      else precisions[n] = 0;
+      
+      if (recall_ngrams != 0)
+        recalls[n] = 100 - 100*recall_errors/ (double)recall_ngrams;
+      else
+        recalls[n] = 0;
+              
+      if(precisions[n] != 0 || recalls[n] != 0)
+        fs[n] = (1+factor) * recalls[n] * precisions[n] / (factor * precisions[n] + recalls[n]);
+      else
+        fs[n] = 0;
+      
+      //System.out.println("Precision (n=" + n + "): " + precisions[n]);
+      //System.out.println("Recall (n=" + n + "): " + recalls[n]);
+      //System.out.println("F (n=" + n + "): " + fs[n]);
+
+      totalPrecision += (1/(double)maxGramLength) * precisions[n];
+      totalRecall += (1/(double)maxGramLength) * recalls[n];
+      totalF += (1/(double)maxGramLength) * fs[n];
+    }
+
+    //length penalty
+    if (beta>1){ //penalise long translations 
+        lp = Math.min(1, stats[2]/(double)stats[0]);
+    } else if (beta < 1){ //penalise short translations
+        lp = Math.min(1, stats[0]/(double)stats[2]);
+    }
+    totalSC = totalF*lp;
+        
+    //System.out.println("Precision (total): " + totalPrecision);
+    //System.out.println("Recall (total):" + totalRecall);
+    //System.out.println("F (total): " + totalF);
+    
+    return totalSC;
+  }
+
+
+  public void printDetailedScore_fromStats(int[] stats, boolean oneLiner)
+  {
+    System.out.println(metricName + " = " + score(stats));
+
+    //
+    //
+    // optional (for debugging purposes)
+    //
+    //
+  }
+
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/52c83d59/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java b/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java
index 9a8786c..9ac77f1 100644
--- a/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java
+++ b/src/main/java/org/apache/joshua/metrics/EvaluationMetric.java
@@ -77,6 +77,7 @@ public abstract class EvaluationMetric {
     metricOptionCount.put("PRECIS-SRC_BLEU", 6);
     metricOptionCount.put("GL_BLEU", 3);
     metricOptionCount.put("SARI", 2); // options: max-ngram source-path
+    metricOptionCount.put("CHRF", 2); // options: beta (how much to weight recall vs precision) and max-ngram
   }
 
   public static EvaluationMetric getMetric(String metricName, String[] metricOptions) {
@@ -117,7 +118,10 @@ public abstract class EvaluationMetric {
                                                      // GradeLevelBLEU class
     } else if (metricName.equals("SARI")) { 
       retMetric = new SARI(metricOptions);
-    } 
+    
+    } else if (metricName.equals("CHRF")) {
+        retMetric = new CHRF(metricOptions);
+    }
     
     return retMetric;
   }


[3/4] incubator-joshua git commit: don't return formatted string; updated test case

Posted by mj...@apache.org.
don't return formatted string; updated test case


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/6bf0c848
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/6bf0c848
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/6bf0c848

Branch: refs/heads/master
Commit: 6bf0c8489dcae5d9152770b048409a2689c0526d
Parents: 32a5003
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Jun 21 23:03:19 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Jun 21 23:03:19 2016 -0400

----------------------------------------------------------------------
 src/main/java/org/apache/joshua/decoder/io/JSONMessage.java | 2 +-
 src/test/resources/server/http/expected                     | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6bf0c848/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java b/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
index 9c3899e..6ec3e57 100644
--- a/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
+++ b/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
@@ -77,7 +77,7 @@ public class JSONMessage {
     TranslationItem item = addTranslation(viterbi);
 
     for (StructuredTranslation hyp: translation.getStructuredTranslations()) {
-      String text = hyp.getFormattedTranslationString();
+      String text = hyp.getTranslationString();
       float score = hyp.getTranslationScore();
 
       item.addHypothesis(text, score);

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6bf0c848/src/test/resources/server/http/expected
----------------------------------------------------------------------
diff --git a/src/test/resources/server/http/expected b/src/test/resources/server/http/expected
index 11ea273..d0a254b 100644
--- a/src/test/resources/server/http/expected
+++ b/src/test/resources/server/http/expected
@@ -11,5 +11,6 @@
         ]
       }
     ]
-  }
+  },
+  "metadata": []
 }


[4/4] incubator-joshua git commit: Reverted commit 6d2213a20b74432fc7cb131c732f7507b74053e9, removed FeatureVector from StructuredTranslation

Posted by mj...@apache.org.
Reverted commit 6d2213a20b74432fc7cb131c732f7507b74053e9, removed FeatureVector from StructuredTranslation


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/c01ce779
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/c01ce779
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/c01ce779

Branch: refs/heads/master
Commit: c01ce779c18da9f4eb8411d2a22a3f429e51d2f9
Parents: 6bf0c84
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Jun 21 23:08:06 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Jun 21 23:08:06 2016 -0400

----------------------------------------------------------------------
 .../joshua/decoder/StructuredTranslation.java     | 18 +++++-------------
 .../decoder/StructuredTranslationFactory.java     |  6 +++---
 2 files changed, 8 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c01ce779/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
index 2faacf2..fb97ee7 100644
--- a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
+++ b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
@@ -45,7 +45,7 @@ public class StructuredTranslation {
   private final List<String> translationTokens;
   private final float translationScore;
   private final List<List<Integer>> translationWordAlignments;
-  private final FeatureVector translationFeatures;
+  private final Map<String,Float> translationFeatures;
   private final float extractionTime;
   
   public StructuredTranslation(
@@ -54,7 +54,7 @@ public class StructuredTranslation {
       final List<String> translationTokens,
       final float translationScore,
       final List<List<Integer>> translationWordAlignments,
-      final FeatureVector translationFeatures,
+      final Map<String,Float> translationFeatures,
       final float extractionTime) {
     this.sourceSentence = sourceSentence;
     this.translationString = translationString;
@@ -83,20 +83,12 @@ public class StructuredTranslation {
   }
   
   /**
-   * Produces the translation formatted according to the value of {@value JoshuaConfiguration.output_format}.
-   * Also includes formatting options such as {@value JoshuaConfiguration.project_case}.
+   * Returns the output string formatted according to {@value JoshuaConfiguration.output_format}.
    * 
    * @return
    */
   public String getFormattedTranslationString() {
-    JoshuaConfiguration config = sourceSentence.config;
-    String outputString = config.outputFormat
-        .replace("%s", getTranslationString())
-        .replace("%S", DeNormalize.processSingleLine(maybeProjectCase(getTranslationString())))
-        .replace("%i", Integer.toString(getSentenceId()))
-        .replace("%f", config.moses ? translationFeatures.mosesString() : translationFeatures.toString())
-        .replace("%c", String.format("%.3f", getTranslationScore()));
-    return outputString;
+    throw new RuntimeException("Not yet implemented");
   }
 
   public List<String> getTranslationTokens() {
@@ -116,7 +108,7 @@ public class StructuredTranslation {
   }
   
   public Map<String,Float> getTranslationFeatures() {
-    return translationFeatures.getMap();
+    return translationFeatures;
   }
   
   /**

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c01ce779/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java b/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java
index 4389135..916a5a7 100644
--- a/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java
+++ b/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java
@@ -62,7 +62,7 @@ public class StructuredTranslationFactory {
         extractTranslationTokens(translationString),
         extractTranslationScore(hypergraph),
         getViterbiWordAlignmentList(hypergraph),
-        getViterbiFeatures(hypergraph, featureFunctions, sourceSentence),
+        getViterbiFeatures(hypergraph, featureFunctions, sourceSentence).getMap(),
         (System.currentTimeMillis() - startTime) / 1000.0f);
   }
   
@@ -73,7 +73,7 @@ public class StructuredTranslationFactory {
    */
   public static StructuredTranslation fromEmptyOutput(final Sentence sourceSentence) {
         return new StructuredTranslation(
-                sourceSentence, "", emptyList(), 0, emptyList(), new FeatureVector(), 0f);
+                sourceSentence, "", emptyList(), 0, emptyList(), emptyMap(), 0f);
       }
   
   /**
@@ -93,7 +93,7 @@ public class StructuredTranslationFactory {
         extractTranslationTokens(translationString),
         derivationState.getModelCost(),
         derivationState.getWordAlignmentList(),
-        derivationState.getFeatures(),
+        derivationState.getFeatures().getMap(),
         (System.currentTimeMillis() - startTime) / 1000.0f);
   }
   


[2/4] incubator-joshua git commit: Adding the CHRF metric for tuning

Posted by mj...@apache.org.
Adding the CHRF metric for tuning

Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/32a5003e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/32a5003e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/32a5003e

Branch: refs/heads/master
Commit: 32a5003e671bfd781a5bf774cc48d72443fc2c1e
Parents: 52c83d5
Author: antot <an...@riseup.net>
Authored: Tue Jun 21 17:24:34 2016 +0100
Committer: antot <an...@riseup.net>
Committed: Tue Jun 21 17:24:34 2016 +0100

----------------------------------------------------------------------
 src/main/java/org/apache/joshua/metrics/CHRF.java | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32a5003e/src/main/java/org/apache/joshua/metrics/CHRF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/CHRF.java b/src/main/java/org/apache/joshua/metrics/CHRF.java
index f490f58..f02fc5f 100644
--- a/src/main/java/org/apache/joshua/metrics/CHRF.java
+++ b/src/main/java/org/apache/joshua/metrics/CHRF.java
@@ -28,7 +28,13 @@ import java.util.logging.Logger;
  * - Use of a length penalty to prevent chrF to prefer too long (with beta>1) or too short (with beta<1) translations
  * - Use of hash tables for efficient n-gram matching
  * 
- * If you use this metric in your research please cite [2]
+ * The metric has 2 parameters:
+ * - Beta. It assigns beta times more weight to recall than to precision. By default 1.
+ *   Although for evaluation the best correlation was found with beta=3, we've found the
+ *   best results for tuning so far with beta=1
+ * - Max-ngram. Maximum n-gram length (characters). By default 6.
+ * 
+ * If you use this metric in your research please cite [2].
  * 
  * [1] Maja Popovic. 2015. chrF: character n-gram F-score for automatic MT evaluation.
  * In Proceedings of the Tenth Workshop on Statistical Machine Translation. Lisbon, Portugal, pages 392\u2013395.
@@ -41,7 +47,7 @@ import java.util.logging.Logger;
 public class CHRF extends EvaluationMetric {
     private static final Logger logger = Logger.getLogger(CHRF.class.getName());
 
-    protected double beta = 3;
+    protected double beta = 1;
     protected double factor;
     protected int maxGramLength = 6; // The maximum n-gram we care about
     //private double[] nGramWeights; //TODO to weight them differently
@@ -53,7 +59,7 @@ public class CHRF extends EvaluationMetric {
 
   public CHRF()
   {
-      this(3, 6);
+      this(1, 6);
   }
           
   public CHRF(String[] CHRF_options)