You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/22 03:08:13 UTC
[2/4] incubator-joshua git commit: Adding the CHRF metric for tuning
Adding the CHRF metric for tuning
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/32a5003e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/32a5003e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/32a5003e
Branch: refs/heads/master
Commit: 32a5003e671bfd781a5bf774cc48d72443fc2c1e
Parents: 52c83d5
Author: antot <an...@riseup.net>
Authored: Tue Jun 21 17:24:34 2016 +0100
Committer: antot <an...@riseup.net>
Committed: Tue Jun 21 17:24:34 2016 +0100
----------------------------------------------------------------------
src/main/java/org/apache/joshua/metrics/CHRF.java | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/32a5003e/src/main/java/org/apache/joshua/metrics/CHRF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/metrics/CHRF.java b/src/main/java/org/apache/joshua/metrics/CHRF.java
index f490f58..f02fc5f 100644
--- a/src/main/java/org/apache/joshua/metrics/CHRF.java
+++ b/src/main/java/org/apache/joshua/metrics/CHRF.java
@@ -28,7 +28,13 @@ import java.util.logging.Logger;
* - Use of a length penalty to prevent chrF to prefer too long (with beta>1) or too short (with beta<1) translations
* - Use of hash tables for efficient n-gram matching
*
- * If you use this metric in your research please cite [2]
+ * The metric has 2 parameters:
+ * - Beta. It assigns beta times more weight to recall than to precision. By default 1.
+ * Although for evaluation the best correlation was found with beta=3, we've found the
+ * best results for tuning so far with beta=1
+ * - Max-ngram. Maximum n-gram length (characters). By default 6.
+ *
+ * If you use this metric in your research please cite [2].
*
* [1] Maja Popovic. 2015. chrF: character n-gram F-score for automatic MT evaluation.
* In Proceedings of the Tenth Workshop on Statistical Machine Translation. Lisbon, Portugal, pages 392\u2013395.
@@ -41,7 +47,7 @@ import java.util.logging.Logger;
public class CHRF extends EvaluationMetric {
private static final Logger logger = Logger.getLogger(CHRF.class.getName());
- protected double beta = 3;
+ protected double beta = 1;
protected double factor;
protected int maxGramLength = 6; // The maximum n-gram we care about
//private double[] nGramWeights; //TODO to weight them differently
@@ -53,7 +59,7 @@ public class CHRF extends EvaluationMetric {
public CHRF()
{
- this(3, 6);
+ this(1, 6);
}
public CHRF(String[] CHRF_options)