You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/22 03:08:13 UTC

[2/4] incubator-joshua git commit: Adding the CHRF metric for tuning

Adding the CHRF metric for tuning

Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/32a5003e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/32a5003e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/32a5003e

Branch: refs/heads/master
Commit: 32a5003e671bfd781a5bf774cc48d72443fc2c1e
Parents: 52c83d5
Author: antot <an...@riseup.net>
Authored: Tue Jun 21 17:24:34 2016 +0100
Committer: antot <an...@riseup.net>
Committed: Tue Jun 21 17:24:34 2016 +0100

----------------------------------------------------------------------
 src/main/java/org/apache/joshua/metrics/CHRF.java | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32a5003e/src/main/java/org/apache/joshua/metrics/CHRF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/CHRF.java b/src/main/java/org/apache/joshua/metrics/CHRF.java
index f490f58..f02fc5f 100644
--- a/src/main/java/org/apache/joshua/metrics/CHRF.java
+++ b/src/main/java/org/apache/joshua/metrics/CHRF.java
@@ -28,7 +28,13 @@ import java.util.logging.Logger;
  * - Use of a length penalty to prevent chrF to prefer too long (with beta>1) or too short (with beta<1) translations
  * - Use of hash tables for efficient n-gram matching
  * 
- * If you use this metric in your research please cite [2]
+ * The metric has 2 parameters:
+ * - Beta. It assigns beta times more weight to recall than to precision. By default 1.
+ *   Although for evaluation the best correlation was found with beta=3, we've found the
+ *   best results for tuning so far with beta=1
+ * - Max-ngram. Maximum n-gram length (characters). By default 6.
+ * 
+ * If you use this metric in your research please cite [2].
  * 
  * [1] Maja Popovic. 2015. chrF: character n-gram F-score for automatic MT evaluation.
  * In Proceedings of the Tenth Workshop on Statistical Machine Translation. Lisbon, Portugal, pages 392\u2013395.
@@ -41,7 +47,7 @@ import java.util.logging.Logger;
 public class CHRF extends EvaluationMetric {
     private static final Logger logger = Logger.getLogger(CHRF.class.getName());
 
-    protected double beta = 3;
+    protected double beta = 1;
     protected double factor;
     protected int maxGramLength = 6; // The maximum n-gram we care about
     //private double[] nGramWeights; //TODO to weight them differently
@@ -53,7 +59,7 @@ public class CHRF extends EvaluationMetric {
 
   public CHRF()
   {
-      this(3, 6);
+      this(1, 6);
   }
           
   public CHRF(String[] CHRF_options)