You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/21 17:42:10 UTC
[01/22] incubator-joshua git commit: fiddling with demo
Repository: incubator-joshua
Updated Branches:
refs/heads/JOSHUA-284 25d28fe2c -> d28b4f39c
fiddling with demo
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/c465e715
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/c465e715
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/c465e715
Branch: refs/heads/JOSHUA-284
Commit: c465e71530eecead797f73c06da25f1c0cff28ed
Parents: 0663a9c
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 2 09:08:42 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 2 09:08:42 2016 -0400
----------------------------------------------------------------------
demo/apache_joshua_logo.png | Bin 0 -> 306617 bytes
demo/apache_joshua_logo_faded.png | Bin 0 -> 309216 bytes
demo/demo.js | 3 ++-
demo/index.html | 36 ++++++++++++++++-----------------
4 files changed, 19 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c465e715/demo/apache_joshua_logo.png
----------------------------------------------------------------------
diff --git a/demo/apache_joshua_logo.png b/demo/apache_joshua_logo.png
new file mode 100644
index 0000000..f2d2423
Binary files /dev/null and b/demo/apache_joshua_logo.png differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c465e715/demo/apache_joshua_logo_faded.png
----------------------------------------------------------------------
diff --git a/demo/apache_joshua_logo_faded.png b/demo/apache_joshua_logo_faded.png
new file mode 100644
index 0000000..28ad01f
Binary files /dev/null and b/demo/apache_joshua_logo_faded.png differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c465e715/demo/demo.js
----------------------------------------------------------------------
diff --git a/demo/demo.js b/demo/demo.js
index e329617..aea4a01 100644
--- a/demo/demo.js
+++ b/demo/demo.js
@@ -224,8 +224,9 @@ function record_results(data, status) {
* Cleans out OOVs
*/
function clean_oovs(str) {
- str = str.replace(/(\S+)_OOV/g, "<span style='color:red'>$1</span>");
+ str = str.replace(/(\S+)_OOV/g, "<span class='oov'>$1</span>");
str = str.replace(/ ([\.\?,])/g, "$1");
+ str = str.replace(/" (.*?) "/g, "\"$1\"");
return str;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/c465e715/demo/index.html
----------------------------------------------------------------------
diff --git a/demo/index.html b/demo/index.html
index 8340b6a..9c014cd 100644
--- a/demo/index.html
+++ b/demo/index.html
@@ -17,28 +17,31 @@
<style type="text/css">
/* Move down content because we have a fixed navbar that is 50px tall */
body {
- padding-top: 50px;
- padding-bottom: 20px;
- <!-- background-image: url("apache_joshua_logo.png"); -->
- <!-- background-repeat: no-repeat; -->
- <!-- background-position: -300px -300px; -->
+ background-image: url("apache_joshua_logo_faded.png");
+ background-repeat: no-repeat;
+ background-position: -300px -300px;
+ }
+
+ oov {
+ color: red;
+ font-weight: bold;
+ text-decoration: -300px -300px;
+ }
+
+ .navbar-brand {
+ font-weight: bold;
+ color: black
}
</style>
</head>
<body>
- <nav class="navbar navbar-inverse navbar-fixed-top">
+ <nav class="navbar navbar-static-top">
<div class="container">
<div class="navbar-header">
- <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false" aria-controls="navbar">
- <span class="sr-only">Toggle navigation</span>
- <span class="icon-bar"></span>
- <span class="icon-bar"></span>
- <span class="icon-bar"></span>
- </button>
<a class="navbar-brand" href="#">
- Apache Joshua Machine Translation Toolkit
+ Apache Joshua Machine Translation Demonstration
</a>
</div>
<div id="navbar" class="navbar-collapse collapse">
@@ -56,11 +59,6 @@
</nav>
<!-- Main jumbotron for a primary marketing message or call to action -->
- <div class="page-header">
- <div class="container">
- <h1>Translation Console</h1>
- </div>
- </div>
<div class="container">
<div>
@@ -81,7 +79,7 @@
<div class="panel-body">
<div class="row">
- <div class="col-lg-8">
+ <div class="col-lg-12">
<fieldset class="form-group">
<textarea class="form-control" id="sourceTxt" rows="5" placeholder="Enter sentences one per line and type ^-return or \u2318-return to submit" autofocus="autofocus"></textarea>
</fieldset>
[02/22] incubator-joshua git commit: quick add rules for OOVs,
format fiddling
Posted by mj...@apache.org.
quick add rules for OOVs, format fiddling
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/1038a144
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/1038a144
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/1038a144
Branch: refs/heads/JOSHUA-284
Commit: 1038a144468b48408f3eca1122efb3c8f84e4537
Parents: c465e71
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 2 09:41:39 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 2 09:41:39 2016 -0400
----------------------------------------------------------------------
demo/README.md | 2 +-
demo/demo.config | 3 +++
demo/demo.js | 6 ++++++
demo/index.html | 7 +++----
4 files changed, 13 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1038a144/demo/README.md
----------------------------------------------------------------------
diff --git a/demo/README.md b/demo/README.md
index 96a7876..d086532 100644
--- a/demo/README.md
+++ b/demo/README.md
@@ -9,7 +9,7 @@ There are two steps:
-feature-function OOVPenalty \
-feature-function "PhrasePenalty -owner custom" \
-weight-overwrite "OOVPenalty 1 PhrasePenalty -1" \
- -mark-oovs
+ -mark-oovs -lowercase -projectcase -output-format %S
Alternately, you can use the config file in this directory, which
contains all the above parameteres, and simply run it like this:
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1038a144/demo/demo.config
----------------------------------------------------------------------
diff --git a/demo/demo.config b/demo/demo.config
index f10ec32..4c29c9a 100644
--- a/demo/demo.config
+++ b/demo/demo.config
@@ -1,6 +1,9 @@
server-type = http
server-port = 5674
mark-oovs = true
+lowercase = true
+project-case = true
+output-format = %S
feature-function = OOVPenalty
feature-function = PhrasePenalty -owner custom
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1038a144/demo/demo.js
----------------------------------------------------------------------
diff --git a/demo/demo.js b/demo/demo.js
index aea4a01..a6a5711 100644
--- a/demo/demo.js
+++ b/demo/demo.js
@@ -218,6 +218,12 @@ function record_results(data, status) {
result += "</ul>";
$("#output").html(result);
+
+ $(".oov").click(function(e) {
+ var oov = e.target.innerHTML;
+ $("#addPhrase_source").val(oov);
+ $("#addPhrase_target").select();
+ });
};
/**
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/1038a144/demo/index.html
----------------------------------------------------------------------
diff --git a/demo/index.html b/demo/index.html
index 9c014cd..aa2acd9 100644
--- a/demo/index.html
+++ b/demo/index.html
@@ -9,7 +9,7 @@
<meta name="author" content="">
<link rel="icon" href="favicon.ico">
- <title>Apache Joshua Machine Translation Toolkit</title>
+ <title>Apache Joshua Machine Translation Demonstration</title>
<!-- Bootstrap core CSS -->
<link href="bootstrap/css/bootstrap.min.css" rel="stylesheet">
@@ -22,15 +22,14 @@
background-position: -300px -300px;
}
- oov {
+ .oov {
color: red;
font-weight: bold;
- text-decoration: -300px -300px;
+ text-decoration: underline;
}
.navbar-brand {
font-weight: bold;
- color: black
}
</style>
</head>
[21/22] incubator-joshua git commit: JOSHUA-291 - added missing
import in StructuredTranslation
Posted by mj...@apache.org.
JOSHUA-291 - added missing import in StructuredTranslation
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2b570d2b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2b570d2b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2b570d2b
Branch: refs/heads/JOSHUA-284
Commit: 2b570d2b6ca7ad54e1cd6ed09fec58919efc59ce
Parents: 029cbbc
Author: Tommaso Teofili <to...@apache.org>
Authored: Thu Aug 18 10:00:54 2016 +0200
Committer: Tommaso Teofili <to...@apache.org>
Committed: Thu Aug 18 10:00:54 2016 +0200
----------------------------------------------------------------------
src/main/java/org/apache/joshua/decoder/StructuredTranslation.java | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/2b570d2b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
index aa4e0c7..9f32d31 100644
--- a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
+++ b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
@@ -21,6 +21,7 @@ package org.apache.joshua.decoder;
import java.util.List;
import java.util.Map;
+import org.apache.joshua.decoder.io.DeNormalize;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.apache.joshua.decoder.segment_file.Token;
import org.apache.joshua.util.FormatUtils;
[14/22] incubator-joshua git commit: JOSHUA-302 - Remove dependency
on concurrent/concurrent package; replace instances with JDK library builtins
Posted by mj...@apache.org.
JOSHUA-302 - Remove dependency on concurrent/concurrent package; replace instances with JDK library builtins
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/19fe7561
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/19fe7561
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/19fe7561
Branch: refs/heads/JOSHUA-284
Commit: 19fe7561aa516b1a7eba63473e8d26111a708c33
Parents: 2e2ee09
Author: Max Thomas <ma...@maxthomas.io>
Authored: Wed Aug 17 10:32:20 2016 -0500
Committer: Max Thomas <ma...@maxthomas.io>
Committed: Wed Aug 17 10:32:20 2016 -0500
----------------------------------------------------------------------
pom.xml | 5 -----
.../org/apache/joshua/adagrad/AdaGradCore.java | 22 ++++++++++----------
.../java/org/apache/joshua/pro/PROCore.java | 22 ++++++++++----------
3 files changed, 22 insertions(+), 27 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/19fe7561/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 49d77fb..05af17a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -231,11 +231,6 @@
<artifactId>slf4j-log4j12</artifactId>
<version>${slf4j.version}</version>
</dependency>
- <dependency>
- <groupId>concurrent</groupId>
- <artifactId>concurrent</artifactId>
- <version>1.3.4</version>
- </dependency>
<!-- Test Dependencies -->
<dependency>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/19fe7561/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java b/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java
index 789757f..e51e8a3 100755
--- a/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java
+++ b/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java
@@ -40,6 +40,7 @@ import java.util.Random;
import java.util.Scanner;
import java.util.TreeSet;
import java.util.Vector;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
@@ -49,7 +50,6 @@ import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.metrics.EvaluationMetric;
import org.apache.joshua.util.StreamGobbler;
-import EDU.oswego.cs.dl.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -719,7 +719,7 @@ public class AdaGradCore {
candCount[i] = 0;
lastUsedIndex[i] = -1;
// suffStats_array[i].clear();
- suffStats_array[i] = new ConcurrentHashMap();
+ suffStats_array[i] = new ConcurrentHashMap<>();
}
// initLambda[0] is not used!
@@ -1539,7 +1539,7 @@ public class AdaGradCore {
/*
* line format:
- *
+ *
* i ||| words of candidate translation . ||| feat-1_val feat-2_val ... feat-numParams_val
* .*
*/
@@ -1922,13 +1922,13 @@ public class AdaGradCore {
/*
* InputStream inStream = new FileInputStream(new File(origFileName)); BufferedReader inFile =
* new BufferedReader(new InputStreamReader(inStream, "utf8"));
- *
+ *
* FileOutputStream outStream = new FileOutputStream(newFileName, false); OutputStreamWriter
* outStreamWriter = new OutputStreamWriter(outStream, "utf8"); BufferedWriter outFile = new
* BufferedWriter(outStreamWriter);
- *
+ *
* String line; while(inFile.ready()) { line = inFile.readLine(); writeLine(line, outFile); }
- *
+ *
* inFile.close(); outFile.close();
*/
return true;
@@ -2026,10 +2026,10 @@ public class AdaGradCore {
/*
* OBSOLETE MODIFICATION //SPECIAL HANDLING FOR AdaGrad CLASSIFIER PARAMETERS String[]
* paramA = line.split("\\s+");
- *
+ *
* if( paramA[0].equals("-classifierParams") ) { String classifierParam = ""; for(int p=1;
* p<=paramA.length-1; p++) classifierParam += paramA[p]+" ";
- *
+ *
* if(paramA.length>=2) { String[] tmpParamA = new String[2]; tmpParamA[0] = paramA[0];
* tmpParamA[1] = classifierParam; paramA = tmpParamA; } else {
* println("Malformed line in config file:"); println(origLine); System.exit(70); } }//END
@@ -2572,12 +2572,12 @@ public class AdaGradCore {
/*
* 1: -docSet bottom 8d 2: -docSet bottom 25% the bottom ceil(0.20*numDocs) documents 3: -docSet
* top 8d 4: -docSet top 25% the top ceil(0.20*numDocs) documents
- *
+ *
* 5: -docSet window 11d around 90percentile 11 docs centered around 80th percentile (complain
* if not enough docs; don't adjust) 6: -docSet window 11d around 40rank 11 docs centered around
* doc ranked 50 (complain if not enough docs; don't adjust)
- *
- *
+ *
+ *
* [0]: method (0-6) [1]: first (1-indexed) [2]: last (1-indexed) [3]: size [4]: center [5]:
* arg1 (-1 for method 0) [6]: arg2 (-1 for methods 0-4)
*/
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/19fe7561/src/main/java/org/apache/joshua/pro/PROCore.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/pro/PROCore.java b/src/main/java/org/apache/joshua/pro/PROCore.java
index 290000d..ec23e0a 100755
--- a/src/main/java/org/apache/joshua/pro/PROCore.java
+++ b/src/main/java/org/apache/joshua/pro/PROCore.java
@@ -40,6 +40,7 @@ import java.util.Random;
import java.util.Scanner;
import java.util.TreeSet;
import java.util.Vector;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
@@ -49,7 +50,6 @@ import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.metrics.EvaluationMetric;
import org.apache.joshua.util.StreamGobbler;
-import EDU.oswego.cs.dl.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -713,7 +713,7 @@ public class PROCore {
candCount[i] = 0;
lastUsedIndex[i] = -1;
// suffStats_array[i].clear();
- suffStats_array[i] = new ConcurrentHashMap();
+ suffStats_array[i] = new ConcurrentHashMap<>();
}
// initLambda[0] is not used!
@@ -1503,7 +1503,7 @@ public class PROCore {
/*
* line format:
- *
+ *
* i ||| words of candidate translation . ||| feat-1_val feat-2_val ... feat-numParams_val
* .*
*/
@@ -1886,13 +1886,13 @@ public class PROCore {
/*
* InputStream inStream = new FileInputStream(new File(origFileName)); BufferedReader inFile =
* new BufferedReader(new InputStreamReader(inStream, "utf8"));
- *
+ *
* FileOutputStream outStream = new FileOutputStream(newFileName, false); OutputStreamWriter
* outStreamWriter = new OutputStreamWriter(outStream, "utf8"); BufferedWriter outFile = new
* BufferedWriter(outStreamWriter);
- *
+ *
* String line; while(inFile.ready()) { line = inFile.readLine(); writeLine(line, outFile); }
- *
+ *
* inFile.close(); outFile.close();
*/
return true;
@@ -1990,10 +1990,10 @@ public class PROCore {
/*
* OBSOLETE MODIFICATION //SPECIAL HANDLING FOR PRO CLASSIFIER PARAMETERS String[] paramA
* = line.split("\\s+");
- *
+ *
* if( paramA[0].equals("-classifierParams") ) { String classifierParam = ""; for(int p=1;
* p<=paramA.length-1; p++) classifierParam += paramA[p]+" ";
- *
+ *
* if(paramA.length>=2) { String[] tmpParamA = new String[2]; tmpParamA[0] = paramA[0];
* tmpParamA[1] = classifierParam; paramA = tmpParamA; } else {
* println("Malformed line in config file:"); println(origLine); System.exit(70); } }//END
@@ -2474,12 +2474,12 @@ public class PROCore {
/*
* 1: -docSet bottom 8d 2: -docSet bottom 25% the bottom ceil(0.20*numDocs) documents 3: -docSet
* top 8d 4: -docSet top 25% the top ceil(0.20*numDocs) documents
- *
+ *
* 5: -docSet window 11d around 90percentile 11 docs centered around 80th percentile (complain
* if not enough docs; don't adjust) 6: -docSet window 11d around 40rank 11 docs centered around
* doc ranked 50 (complain if not enough docs; don't adjust)
- *
- *
+ *
+ *
* [0]: method (0-6) [1]: first (1-indexed) [2]: last (1-indexed) [3]: size [4]: center [5]:
* arg1 (-1 for method 0) [6]: arg2 (-1 for methods 0-4)
*/
[11/22] incubator-joshua git commit: typo fix, added name function
Posted by mj...@apache.org.
typo fix, added name function
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/3e2e053d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/3e2e053d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/3e2e053d
Branch: refs/heads/JOSHUA-284
Commit: 3e2e053db9cafcdc5b4885a0f6fa5e54a63bc468
Parents: 216d61d
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 17 06:29:27 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 17 06:29:27 2016 -0400
----------------------------------------------------------------------
.../java/org/apache/joshua/system/LmOovFeatureTest.java | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3e2e053d/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java b/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java
index e4cddfc..d6fc16d 100644
--- a/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java
+++ b/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java
@@ -55,7 +55,7 @@ public class LmOovFeatureTest {
}
@Test
- public void givenInputWithDifferntOovTypes_whenDecode_thenFeaturesAreAsExpected() throws IOException {
+ public void givenInputWithDifferentOovTypes_whenDecode_thenFeaturesAreAsExpected() throws IOException {
final String translation = decode(INPUT).toString().trim();
System.out.println(translation);
assertEquals(translation, EXPECTED_FEATURES);
@@ -65,5 +65,12 @@ public class LmOovFeatureTest {
final Sentence sentence = new Sentence(input, 0, joshuaConfig);
return decoder.decode(sentence);
}
-
+
+ public static void main(String[] args) throws Exception {
+
+ LmOovFeatureTest test = new LmOovFeatureTest();
+ test.setUp();
+ test.givenInputWithDifferentOovTypes_whenDecode_thenFeaturesAreAsExpected();
+ test.tearDown();
+ }
}
[22/22] incubator-joshua git commit: Merge branch 'master' into
JOSHUA-284
Posted by mj...@apache.org.
Merge branch 'master' into JOSHUA-284
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d28b4f39
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d28b4f39
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d28b4f39
Branch: refs/heads/JOSHUA-284
Commit: d28b4f39c578197803beba2c376db5ed95774576
Parents: 25d28fe 2b570d2
Author: Matt Post <po...@cs.jhu.edu>
Authored: Sun Aug 21 12:36:37 2016 -0500
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Sun Aug 21 12:36:37 2016 -0500
----------------------------------------------------------------------
demo/README.md | 2 +-
demo/apache_joshua_logo.png | Bin 0 -> 306617 bytes
demo/apache_joshua_logo_faded.png | Bin 0 -> 309216 bytes
demo/demo.config | 3 +
demo/demo.js | 19 +-
demo/index.html | 37 +-
pom.xml | 30 +-
.../org/apache/joshua/adagrad/AdaGradCore.java | 101 +++---
.../org/apache/joshua/adagrad/Optimizer.java | 348 +++++++++----------
.../org/apache/joshua/corpus/BasicPhrase.java | 2 +-
.../apache/joshua/corpus/ContiguousPhrase.java | 8 +-
.../java/org/apache/joshua/corpus/Phrase.java | 2 +-
.../java/org/apache/joshua/corpus/Span.java | 6 +-
.../org/apache/joshua/corpus/SymbolTable.java | 2 +-
.../org/apache/joshua/corpus/Vocabulary.java | 10 +-
.../joshua/corpus/syntax/ArraySyntaxTree.java | 51 +--
.../apache/joshua/corpus/syntax/SyntaxTree.java | 10 +-
.../org/apache/joshua/decoder/ArgsParser.java | 8 +-
.../java/org/apache/joshua/decoder/BLEU.java | 72 ++--
.../java/org/apache/joshua/decoder/Decoder.java | 26 +-
.../apache/joshua/decoder/DecoderThread.java | 2 +-
.../joshua/decoder/JoshuaConfiguration.java | 45 +--
.../joshua/decoder/NbestMinRiskReranker.java | 33 +-
.../joshua/decoder/StructuredTranslation.java | 9 +-
.../decoder/StructuredTranslationFactory.java | 5 +-
.../org/apache/joshua/decoder/Translation.java | 8 +-
.../org/apache/joshua/decoder/Translations.java | 2 +-
.../joshua/decoder/chart_parser/Cell.java | 12 +-
.../joshua/decoder/chart_parser/Chart.java | 51 ++-
.../decoder/chart_parser/ComputeNodeResult.java | 8 +-
.../decoder/chart_parser/CubePruneState.java | 20 +-
.../joshua/decoder/chart_parser/DotChart.java | 24 +-
.../joshua/decoder/chart_parser/SourcePath.java | 4 +-
.../decoder/chart_parser/StateConstraint.java | 5 +-
.../joshua/decoder/chart_parser/SuperNode.java | 2 +-
.../joshua/decoder/ff/FeatureFunction.java | 24 +-
.../apache/joshua/decoder/ff/FeatureVector.java | 21 +-
.../joshua/decoder/ff/LabelCombinationFF.java | 2 +-
.../joshua/decoder/ff/LabelSubstitutionFF.java | 8 +-
.../joshua/decoder/ff/LexicalFeatures.java | 2 +-
.../apache/joshua/decoder/ff/OOVPenalty.java | 7 +-
.../apache/joshua/decoder/ff/PhraseModel.java | 2 +-
.../apache/joshua/decoder/ff/PhrasePenalty.java | 4 +-
.../org/apache/joshua/decoder/ff/RuleFF.java | 6 +-
.../decoder/ff/RulePropertiesQuerying.java | 6 +-
.../org/apache/joshua/decoder/ff/RuleShape.java | 2 +-
.../joshua/decoder/ff/SourceDependentFF.java | 4 +-
.../apache/joshua/decoder/ff/SourcePathFF.java | 2 +-
.../apache/joshua/decoder/ff/TargetBigram.java | 13 +-
.../ff/fragmentlm/ConcatenationIterator.java | 10 +-
.../decoder/ff/fragmentlm/FragmentLMFF.java | 59 ++--
.../ff/fragmentlm/PennTreebankReader.java | 17 +-
.../joshua/decoder/ff/fragmentlm/Tree.java | 56 ++-
.../joshua/decoder/ff/fragmentlm/Trees.java | 8 +-
.../org/apache/joshua/decoder/ff/lm/KenLM.java | 26 +-
.../joshua/decoder/ff/lm/LanguageModelFF.java | 39 +--
.../ff/lm/berkeley_lm/LMGrammarBerkeley.java | 4 +-
.../ff/lm/bloomfilter_lm/BloomFilter.java | 2 +-
.../BloomFilterLanguageModel.java | 18 +-
.../joshua/decoder/ff/lm/buildin_lm/TrieLM.java | 25 +-
.../joshua/decoder/ff/phrase/Distortion.java | 2 +-
.../ff/similarity/EdgePhraseSimilarityFF.java | 17 +-
.../ff/state_maintenance/NgramDPState.java | 6 +-
.../joshua/decoder/ff/tm/AbstractGrammar.java | 12 +-
.../decoder/ff/tm/BasicRuleCollection.java | 2 +-
.../joshua/decoder/ff/tm/CreateGlueGrammar.java | 2 +-
.../joshua/decoder/ff/tm/GrammarReader.java | 2 +-
.../apache/joshua/decoder/ff/tm/OwnerMap.java | 2 +-
.../org/apache/joshua/decoder/ff/tm/Rule.java | 67 ++--
.../decoder/ff/tm/SentenceFilteredGrammar.java | 12 +-
.../decoder/ff/tm/format/MosesFormatReader.java | 2 +-
.../ff/tm/hash_based/ExtensionIterator.java | 2 +-
.../tm/hash_based/MemoryBasedBatchGrammar.java | 8 +-
.../decoder/ff/tm/packed/PackedGrammar.java | 87 ++---
.../ff/tm/packed/SliceAggregatingTrie.java | 4 +-
.../decoder/hypergraph/AlignedSourceTokens.java | 2 +-
.../decoder/hypergraph/AllSpansWalker.java | 19 +-
.../hypergraph/DefaultInsideOutside.java | 34 +-
.../joshua/decoder/hypergraph/ForestWalker.java | 10 +-
.../GrammarBuilderWalkerFunction.java | 14 +-
.../joshua/decoder/hypergraph/HGNode.java | 54 ++-
.../joshua/decoder/hypergraph/HyperEdge.java | 6 +-
.../joshua/decoder/hypergraph/HyperGraph.java | 30 +-
.../decoder/hypergraph/HyperGraphPruning.java | 9 +-
.../decoder/hypergraph/KBestExtractor.java | 51 ++-
.../hypergraph/OutputStringExtractor.java | 8 +-
.../hypergraph/StringToTreeConverter.java | 16 +-
.../decoder/hypergraph/ViterbiExtractor.java | 10 +-
.../hypergraph/WordAlignmentExtractor.java | 2 +-
.../decoder/hypergraph/WordAlignmentState.java | 8 +-
.../apache/joshua/decoder/io/JSONMessage.java | 18 +-
.../decoder/io/TranslationRequestStream.java | 6 +-
.../apache/joshua/decoder/phrase/Candidate.java | 4 +-
.../apache/joshua/decoder/phrase/Coverage.java | 2 +-
.../apache/joshua/decoder/phrase/Future.java | 4 +-
.../apache/joshua/decoder/phrase/Header.java | 87 +++++
.../joshua/decoder/phrase/Hypothesis.java | 5 +-
.../joshua/decoder/phrase/PhraseChart.java | 20 +-
.../joshua/decoder/phrase/PhraseTable.java | 4 +-
.../org/apache/joshua/decoder/phrase/Stack.java | 12 +-
.../apache/joshua/decoder/phrase/Stacks.java | 23 +-
.../decoder/segment_file/ConstraintRule.java | 4 +-
.../joshua/decoder/segment_file/Sentence.java | 18 +-
.../joshua/decoder/segment_file/Token.java | 9 +-
.../java/org/apache/joshua/pro/PROCore.java | 22 +-
.../org/apache/joshua/server/ServerThread.java | 9 +-
.../phrase/decode/PhraseDecodingTest.java | 10 +
.../apache/joshua/system/LmOovFeatureTest.java | 11 +-
108 files changed, 1072 insertions(+), 1030 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
index d92665d,280ea5a..ddbd222
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
@@@ -46,16 -46,16 +46,16 @@@ public class ComputeNodeResult
private static final Logger LOG = LoggerFactory.getLogger(ComputeNodeResult.class);
// The cost incurred by the rule itself (and all associated feature functions)
- private final float transitionCost;
+ private float transitionCost;
// transitionCost + the Viterbi costs of the tail nodes.
- private final float viterbiCost;
-
- // viterbiCost + a future estimate (outside cost estimate).
- private final float pruningCostEstimate;
+ private float viterbiCost;
+ // The future or outside cost (estimated)
+ private float futureCostEstimate;
+
// The StateComputer objects themselves serve as keys.
- private List<DPState> dpStates;
+ private final List<DPState> dpStates;
/**
* Computes the new state(s) that are produced when applying the given rule to the list of tail
@@@ -99,13 -99,13 +99,13 @@@
}
}
- List<DPState> allDPStates = new ArrayList<DPState>();
+ List<DPState> allDPStates = new ArrayList<>();
// The transition cost is the new cost incurred by applying this rule
- float transitionCost = 0.0f;
+ this.transitionCost = 0.0f;
// The future cost estimate is a heuristic estimate of the outside cost of this edge.
- float futureCostEstimate = 0.0f;
+ this.futureCostEstimate = 0.0f;
/*
* We now iterate over all the feature functions, computing their cost and their expected future
@@@ -115,7 -115,7 +115,7 @@@
FeatureFunction.ScoreAccumulator acc = feature.new ScoreAccumulator();
DPState newState = feature.compute(rule, tailNodes, i, j, sourcePath, sentence, acc);
-- transitionCost += acc.getScore();
++ this.transitionCost += acc.getScore();
if (LOG.isDebugEnabled()) {
@@@ -129,10 -129,13 +129,10 @@@
allDPStates.add(((StatefulFF)feature).getStateIndex(), newState);
}
}
-- viterbiCost += transitionCost;
++ this.viterbiCost += transitionCost;
if (LOG.isDebugEnabled())
LOG.debug("-> COST = {}", transitionCost);
- // Set the final results.
- this.pruningCostEstimate = viterbiCost + futureCostEstimate;
- this.viterbiCost = viterbiCost;
- this.transitionCost = transitionCost;
+
this.dpStates = allDPStates;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index a446eab,93e21cd..2a5dc03
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@@ -42,41 -33,26 +42,41 @@@ import org.apache.joshua.decoder.ff.Fea
import org.apache.joshua.decoder.ff.state_maintenance.DPState;
import org.apache.joshua.decoder.ff.tm.Rule;
import org.apache.joshua.decoder.hypergraph.HGNode;
+import org.apache.joshua.decoder.hypergraph.HyperEdge;
+import org.apache.joshua.decoder.segment_file.Sentence;
-public class Candidate {
-
+public class Candidate implements Comparable<Candidate> {
+
+ private List<FeatureFunction> featureFunctions;
+ private Sentence sentence;
+
// the set of hypotheses that can be paired with phrases from this span
- private List<Hypothesis> hypotheses;
+ private final List<Hypothesis> hypotheses;
// the list of target phrases gathered from a span of the input
- private final TargetPhrases phrases;
-
- // source span of new phrase
- public final Span span;
+ private TargetPhrases phrases;
// future cost of applying phrases to hypotheses
- final float future_delta;
+ private float future_delta;
// indices into the hypotheses and phrases arrays (used for cube pruning)
- private int[] ranks;
+ private final int[] ranks;
- // scoring and state information
- private ComputeNodeResult result;
+ // the reordering rule used by an instantiated Candidate
+ private Rule rule;
+
+ /*
+ * Stores the inside cost of the current phrase, as well as the computed dynamic programming
+ * state. Expensive to compute so there is an option of delaying it.
+ */
+ private ComputeNodeResult computedResult;
+
+ /*
+ * This is the HGNode built over the current target side phrase. It requires the computed results
+ * as part of its constructor, so we delay computing it unless needed.
+ */
+ private HGNode phraseNode;
+ private ComputeNodeResult phraseResult;
/**
* When candidate objects are extended, the new one is initialized with the same underlying
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 8ef5597,af5069d..2710a48
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@@ -40,17 -39,14 +40,16 @@@ import org.apache.joshua.decoder.hyperg
public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
// The hypothesis' coverage vector
- private Coverage coverage;
+ private final Coverage coverage;
- public static final Rule BEGIN_RULE = new HieroFormatReader().parseLine("[X] ||| <s> ||| <s> ||| ||| 0-0");
- public static final Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] </s> ||| [X,1] </s> ||| ||| 0-0 1-1");
-
+ public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[GOAL] ||| <s> ||| <s> ||| ||| 0-0");
+ public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| </s> ||| </s> ||| ||| 0-0");
+ public static Rule MONO_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [GOAL,1] [X,2] ||| [GOAL,1] [X,2] ||| ||| 0-0 1-1");
+ public static Rule SWAP_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] [GOAL,2] ||| [GOAL,2] [X,1] ||| ||| 0-1 1-0");
+
public String toString() {
StringBuffer sb = new StringBuffer();
- for (DPState state: getDPStates())
- sb.append(state);
+ getDPStates().forEach(sb::append);
String words = bestHyperedge.getRule().getEnglishWords();
// return String.format("HYP[%s] %.5f j=%d words=%s state=%s", coverage, score, j, words, sb);
return String.format("HYP[%s] j=%d words=[%s] state=%s", coverage, j, words, sb);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --cc src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index 67f62b6,6661dfb..47a3396
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@@ -40,19 -43,20 +40,19 @@@ public class Stack extends ArrayList<Hy
private static final long serialVersionUID = 7885252799032416068L;
- private HashMap<Coverage, ArrayList<Hypothesis>> coverages;
+ private final HashMap<Coverage, ArrayList<Hypothesis>> coverages;
- private final Sentence sentence;
- private final List<FeatureFunction> featureFunctions;
- private final JoshuaConfiguration config;
+ private Sentence sentence;
+ private JoshuaConfiguration config;
/* The list of states we've already visited. */
- private HashSet<Candidate> visitedStates;
+ private final HashSet<Candidate> visitedStates;
/* A list of candidates sorted for consideration for entry to the chart (for cube pruning) */
- private PriorityQueue<Candidate> candidates;
+ private final PriorityQueue<Candidate> candidates;
/* Short-circuits adding a cube-prune state more than once */
- private HashMap<Hypothesis, Hypothesis> deduper;
+ private final HashMap<Hypothesis, Hypothesis> deduper;
/**
* Create a new stack. Stacks are organized one for each number of source words that are covered.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d28b4f39/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --cc src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index d3db223,f2fc6a7..5e878cb
--- a/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@@ -59,19 -60,12 +59,29 @@@ public class PhraseDecodingTest
decoder = null;
}
- @Test(enabled = false)
+ @Test(enabled = true)
public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
- final String translation = decode(INPUT).toString();
- final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
- assertEquals(gold, translation);
+ final String translation = decode(INPUT).toString().trim();
+ final String gold = OUTPUT;
+ assertEquals(translation, gold);
+ }
+
+ @Test(enabled = false)
+ public void givenInput_whenPhraseDecodingWithAlignments_thenOutputHasAlignments() throws IOException {
+ final String translation = decode(INPUT).toString().trim();
+ final String gold = OUTPUT_WITH_ALIGNMENTS;
+ assertEquals(translation, gold);
+ }
++
++ @Test(enabled = true)
++ public void givenInput_whenPhraseDecoding_thenInputCanBeRetrieved() throws IOException {
++ String outputFormat = joshuaConfig.outputFormat;
++ joshuaConfig.outputFormat = "%e";
++ final String translation = decode(INPUT).toString().trim();
++ joshuaConfig.outputFormat = outputFormat;
++ final String gold = INPUT;
++ assertEquals(translation, gold);
+ }
private Translation decode(String input) {
final Sentence sentence = new Sentence(input, 0, joshuaConfig);
[06/22] incubator-joshua git commit: remove sentence marker
Posted by mj...@apache.org.
remove sentence marker
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/84301b9f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/84301b9f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/84301b9f
Branch: refs/heads/JOSHUA-284
Commit: 84301b9f04b3ffeecf35432309b1ca762c692313
Parents: ca6fc49
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 2 12:20:10 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 2 12:20:10 2016 -0400
----------------------------------------------------------------------
demo/demo.js | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/84301b9f/demo/demo.js
----------------------------------------------------------------------
diff --git a/demo/demo.js b/demo/demo.js
index 4904757..38b20e1 100644
--- a/demo/demo.js
+++ b/demo/demo.js
@@ -212,8 +212,8 @@ function record_results(data, status) {
/* This version outputs the 1-best candidate of multiple input sentences */
$(data.data.translations).each(function(i, item) {
// result += item.totalScore + " " + item.hyp + "<br/>\n";
- result += "<li class=\"list-group-item\"><span class=\"badge\">" + (i + 1) + "</span>" + clean_oovs(item.translatedText) + "</li>";
- // result += "<li class=\"list-group-item\">" + clean_oovs(item.translatedText) + "</li>";
+ // result += "<li class=\"list-group-item\"><span class=\"badge\">" + (i + 1) + "</span>" + clean(item.translatedText) + "</li>";
+ result += "<li class=\"list-group-item\">" + clean(item.translatedText) + "</li>";
});
result += "</ul>";
[03/22] incubator-joshua git commit: HTTP server now projects case
Posted by mj...@apache.org.
HTTP server now projects case
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/aedeafd0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/aedeafd0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/aedeafd0
Branch: refs/heads/JOSHUA-284
Commit: aedeafd0df1a30ecf686193cd9446ba7e569e84f
Parents: 1038a14
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 2 12:14:22 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 2 12:14:22 2016 -0400
----------------------------------------------------------------------
demo/demo.js | 2 +-
.../java/org/apache/joshua/decoder/StructuredTranslation.java | 7 +++----
.../apache/joshua/decoder/StructuredTranslationFactory.java | 4 ++--
src/main/java/org/apache/joshua/decoder/io/JSONMessage.java | 2 +-
4 files changed, 7 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/aedeafd0/demo/demo.js
----------------------------------------------------------------------
diff --git a/demo/demo.js b/demo/demo.js
index a6a5711..1662fe2 100644
--- a/demo/demo.js
+++ b/demo/demo.js
@@ -221,7 +221,7 @@ function record_results(data, status) {
$(".oov").click(function(e) {
var oov = e.target.innerHTML;
- $("#addPhrase_source").val(oov);
+ $("#addPhrase_source").val(oov.toLowerCase());
$("#addPhrase_target").select();
});
};
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/aedeafd0/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
index b44a7f0..887f2fc 100644
--- a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
+++ b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
@@ -88,7 +88,7 @@ public class StructuredTranslation {
* @return the formatted string
*/
public String getFormattedTranslationString() {
- throw new RuntimeException("Not yet implemented");
+ return maybeProjectCase(getTranslationString());
}
public List<String> getTranslationTokens() {
@@ -124,9 +124,8 @@ public class StructuredTranslation {
* If requested, projects source-side lettercase to target, and appends the alignment from
* to the source-side sentence in ||s.
*
- * @param hypothesis todo
- * @param state todo
- * @return source-side lettercase to target, and appends the alignment from to the source-side sentence in ||s
+ * @param hypothesis the string hypothesis
+ * @return source-side lettercase to target, and appends the alignment from to the source-side sentence
*/
private String maybeProjectCase(String hypothesis) {
String output = hypothesis;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/aedeafd0/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java b/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java
index 916a5a7..9be1f1a 100644
--- a/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java
+++ b/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java
@@ -33,6 +33,8 @@ import org.apache.joshua.decoder.ff.FeatureVector;
import org.apache.joshua.decoder.hypergraph.HyperGraph;
import org.apache.joshua.decoder.hypergraph.KBestExtractor.DerivationState;
import org.apache.joshua.decoder.segment_file.Sentence;
+import org.apache.joshua.decoder.segment_file.Token;
+import org.apache.joshua.util.FormatUtils;
/**
* This factory provides methods to create StructuredTranslation objects
@@ -112,6 +114,4 @@ public class StructuredTranslationFactory {
return asList(translationString.split("\\s+"));
}
}
-
-
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/aedeafd0/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java b/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
index 90a550b..5056aaa 100644
--- a/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
+++ b/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
@@ -72,7 +72,7 @@ public class JSONMessage {
// }
public void addTranslation(Translation translation) {
- String viterbi = translation.getStructuredTranslations().get(0).getTranslationString();
+ String viterbi = translation.getStructuredTranslations().get(0).getFormattedTranslationString();
TranslationItem item = addTranslation(viterbi);
[08/22] incubator-joshua git commit: Merge branch 'master' into demo
Posted by mj...@apache.org.
Merge branch 'master' into demo
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/278be37e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/278be37e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/278be37e
Branch: refs/heads/JOSHUA-284
Commit: 278be37e58ffa4fcd12382e8ba7c7a55a7a21b3b
Parents: 81b33d2 fcaf0bf
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 16 07:15:06 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 16 07:15:06 2016 +0200
----------------------------------------------------------------------
README.md | 4 +-
.../apache/joshua/util/CommandLineParser.java | 738 -------------------
2 files changed, 3 insertions(+), 739 deletions(-)
----------------------------------------------------------------------
[12/22] incubator-joshua git commit: Merge branch 'demo'
Posted by mj...@apache.org.
Merge branch 'demo'
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2e2ee09b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2e2ee09b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2e2ee09b
Branch: refs/heads/JOSHUA-284
Commit: 2e2ee09b725f2182413219e406ba3fca9d71c00f
Parents: 3e2e053 47f4c96
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 17 06:46:07 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 17 06:46:07 2016 -0400
----------------------------------------------------------------------
demo/README.md | 2 +-
demo/apache_joshua_logo.png | Bin 0 -> 306617 bytes
demo/apache_joshua_logo_faded.png | Bin 0 -> 309216 bytes
demo/demo.config | 3 ++
demo/demo.js | 19 +++++++---
demo/index.html | 37 +++++++++----------
.../joshua/decoder/StructuredTranslation.java | 7 ++--
.../decoder/StructuredTranslationFactory.java | 4 +-
.../apache/joshua/decoder/io/JSONMessage.java | 2 +-
.../org/apache/joshua/server/ServerThread.java | 9 +++--
10 files changed, 46 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
[19/22] incubator-joshua git commit: JOSHUA-291 - static analysis
based code improvements on adagrad package
Posted by mj...@apache.org.
JOSHUA-291 - static analysis based code improvements on adagrad package
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/233818d6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/233818d6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/233818d6
Branch: refs/heads/JOSHUA-284
Commit: 233818d6ce526f3a77b33110d67314b723371743
Parents: 44b1b7b
Author: Tommaso Teofili <to...@apache.org>
Authored: Sun Aug 7 19:21:59 2016 +0200
Committer: Tommaso Teofili <to...@apache.org>
Committed: Thu Aug 18 09:57:05 2016 +0200
----------------------------------------------------------------------
.../org/apache/joshua/adagrad/AdaGradCore.java | 79 +++--
.../org/apache/joshua/adagrad/Optimizer.java | 348 +++++++++----------
2 files changed, 207 insertions(+), 220 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/233818d6/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java b/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java
index e51e8a3..9dc81a4 100755
--- a/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java
+++ b/src/main/java/org/apache/joshua/adagrad/AdaGradCore.java
@@ -33,6 +33,7 @@ import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@@ -132,9 +133,9 @@ public class AdaGradCore {
/* *********************************************************** */
// private double[] lambda;
- private ArrayList<Double> lambda = new ArrayList<Double>();
+ private ArrayList<Double> lambda = new ArrayList<>();
// the current weight vector. NOTE: indexing starts at 1.
- private ArrayList<Double> bestLambda = new ArrayList<Double>();
+ private final ArrayList<Double> bestLambda = new ArrayList<>();
// the best weight vector across all iterations
private boolean[] isOptimizable;
@@ -341,8 +342,8 @@ public class AdaGradCore {
// and one line for the normalization method
// indexing starts at 1 in these arrays
for (int p = 0; p <= numParams; ++p)
- lambda.add(new Double(0));
- bestLambda.add(new Double(0));
+ lambda.add(0d);
+ bestLambda.add(0d);
// why only lambda is a list? because the size of lambda
// may increase over time, but other arrays are specified in
// the param config file, only used for initialization
@@ -497,7 +498,7 @@ public class AdaGradCore {
indicesOfInterest_all = temp_TSA;
for (int i = 0; i < numSentences; ++i) {
- indicesOfInterest_all[i] = new TreeSet<Integer>();
+ indicesOfInterest_all[i] = new TreeSet<>();
}
} // void initialize(...)
@@ -522,9 +523,9 @@ public class AdaGradCore {
if (folder.exists()) {
File[] listOfFiles = folder.listFiles();
- for (int i = 0; i < listOfFiles.length; i++) {
- if (listOfFiles[i].isFile()) {
- files = listOfFiles[i].getName();
+ for (File listOfFile : listOfFiles) {
+ if (listOfFile.isFile()) {
+ files = listOfFile.getName();
if (files.startsWith("AdaGrad.temp")) {
deleteFile(files);
}
@@ -627,11 +628,11 @@ public class AdaGradCore {
// save feats and stats for all candidates(old & new)
HashMap<String, String>[] feat_hash = new HashMap[numSentences];
for (int i = 0; i < numSentences; i++)
- feat_hash[i] = new HashMap<String, String>();
+ feat_hash[i] = new HashMap<>();
HashMap<String, String>[] stats_hash = new HashMap[numSentences];
for (int i = 0; i < numSentences; i++)
- stats_hash[i] = new HashMap<String, String>();
+ stats_hash[i] = new HashMap<>();
while (!done) { // NOTE: this "loop" will only be carried out once
println("--- Starting AdaGrad iteration #" + iteration + " @ " + (new Date()) + " ---", 1);
@@ -848,7 +849,7 @@ public class AdaGradCore {
// (It's not actually a bug, but only because existingCandStats gets
// cleared before moving to the next source sentence.)
// FIX: should be made an array, indexed by i
- HashMap<String, String> existingCandStats = new HashMap<String, String>();
+ HashMap<String, String> existingCandStats = new HashMap<>();
// VERY IMPORTANT:
// A CANDIDATE X MAY APPEARED IN ITER 1, ITER 3
// BUT IF THE USER SPECIFIED TO CONSIDER ITERATIONS FROM ONLY ITER 2, THEN
@@ -943,7 +944,7 @@ public class AdaGradCore {
String[] sentsCurrIt_currSrcSent = new String[sizeOfNBest + 1];
- Vector<String> unknownCands_V = new Vector<String>();
+ Vector<String> unknownCands_V = new Vector<>();
// which candidates (of the i'th source sentence) have not been seen before
// this iteration?
@@ -1122,7 +1123,7 @@ public class AdaGradCore {
// initialized as zero anyway
if (featId > numParams) {
++numParams;
- lambda.add(new Double(0));
+ lambda.add(0d);
}
}
}
@@ -1236,7 +1237,7 @@ public class AdaGradCore {
lambda.set(p, bestLambda.get(p));
// and set the rest of lambda to be 0
for (int p = 0; p < lambda.size() - bestLambda.size(); ++p)
- lambda.set(p + bestLambda.size(), new Double(0));
+ lambda.set(p + bestLambda.size(), 0d);
}
return null; // this means that the old values should be kept by the caller
@@ -1284,7 +1285,7 @@ public class AdaGradCore {
}
}
- Vector<String> output = new Vector<String>();
+ Vector<String> output = new Vector<>();
// note: initialLambda[] has length = numParamsOld
// augmented with new feature weights, initial values are 0
@@ -1328,8 +1329,8 @@ public class AdaGradCore {
/************* end optimization **************/
- for (int i = 0; i < output.size(); i++)
- println(output.get(i));
+ for (String anOutput : output)
+ println(anOutput);
// check if any parameter has been updated
boolean anyParamChanged = false;
@@ -1407,7 +1408,7 @@ public class AdaGradCore {
// (interpolation with previous wt vector)
double interCoef = 1.0; // no interpolation for now
for (int i = 1; i <= numParams; i++)
- lambda.set(i, interCoef * finalLambda[i] + (1 - interCoef) * lambda.get(i).doubleValue());
+ lambda.set(i, interCoef * finalLambda[i] + (1 - interCoef) * lambda.get(i));
println("Next iteration will decode with lambda: " + lambdaToString(lambda), 1);
println("", 1);
@@ -1441,9 +1442,9 @@ public class AdaGradCore {
retStr += "(listing the first " + featToPrint + " lambdas)";
for (int c = 1; c <= featToPrint - 1; ++c) {
- retStr += "" + String.format("%.4f", lambdaA.get(c).doubleValue()) + ", ";
+ retStr += "" + String.format("%.4f", lambdaA.get(c)) + ", ";
}
- retStr += "" + String.format("%.4f", lambdaA.get(numParams).doubleValue()) + "}";
+ retStr += "" + String.format("%.4f", lambdaA.get(numParams)) + "}";
return retStr;
}
@@ -1476,7 +1477,7 @@ public class AdaGradCore {
println("Running external decoder...", 1);
try {
- ArrayList<String> cmd = new ArrayList<String>();
+ ArrayList<String> cmd = new ArrayList<>();
cmd.add(decoderCommandFileName);
if (passIterationToDecoder)
@@ -1627,7 +1628,7 @@ public class AdaGradCore {
if (c_match == -1) {
outFile.println(line);
} else {
- if (Math.abs(params.get(c_match).doubleValue()) > 1e-20)
+ if (Math.abs(params.get(c_match)) > 1e-20)
outFile.println(Vocabulary.word(c_match) + " " + params.get(c_match));
}
@@ -1636,7 +1637,7 @@ public class AdaGradCore {
// now append weights of new features
for (int c = origFeatNum + 1; c <= numParams; ++c) {
- if (Math.abs(params.get(c).doubleValue()) > 1e-20)
+ if (Math.abs(params.get(c)) > 1e-20)
outFile.println(Vocabulary.word(c) + " " + params.get(c));
}
@@ -1667,7 +1668,7 @@ public class AdaGradCore {
// read default value
lambda.set(c, inFile_init.nextDouble());
- defaultLambda[c] = lambda.get(c).doubleValue();
+ defaultLambda[c] = lambda.get(c);
// read isOptimizable
dummy = inFile_init.next();
@@ -1849,7 +1850,7 @@ public class AdaGradCore {
boolean format3 = false;
- HashSet<String> seenStrings = new HashSet<String>();
+ HashSet<String> seenStrings = new HashSet<>();
BufferedReader inFile = new BufferedReader(new FileReader(docInfoFileName));
for (int i = 0; i < numSentences; ++i) {
// set format3 = true if a duplicate is found
@@ -1861,8 +1862,8 @@ public class AdaGradCore {
inFile.close();
- HashSet<String> seenDocNames = new HashSet<String>();
- HashMap<String, Integer> docOrder = new HashMap<String, Integer>();
+ HashSet<String> seenDocNames = new HashSet<>();
+ HashMap<String, Integer> docOrder = new HashMap<>();
// maps a document name to the order (0-indexed) in which it was seen
inFile = new BufferedReader(new FileReader(docInfoFileName));
@@ -1989,7 +1990,7 @@ public class AdaGradCore {
try {
PrintWriter outFile_lambdas = new PrintWriter(finalLambdaFileName);
for (int c = 1; c <= numParams; ++c) {
- outFile_lambdas.println(Vocabulary.word(c) + " ||| " + lambda.get(c).doubleValue());
+ outFile_lambdas.println(Vocabulary.word(c) + " ||| " + lambda.get(c));
}
outFile_lambdas.close();
@@ -2003,7 +2004,7 @@ public class AdaGradCore {
private String[] cfgFileToArgsArray(String fileName) {
checkFile(fileName);
- Vector<String> argsVector = new Vector<String>();
+ Vector<String> argsVector = new Vector<>();
BufferedReader inFile = null;
try {
@@ -2015,7 +2016,7 @@ public class AdaGradCore {
if (line != null && line.length() > 0 && line.charAt(0) != '#') {
- if (line.indexOf("#") != -1) { // discard comment
+ if (line.contains("#")) { // discard comment
line = line.substring(0, line.indexOf("#"));
}
@@ -2038,7 +2039,7 @@ public class AdaGradCore {
// cmu modification(from meteor for zmert)
// Parse args
- ArrayList<String> argList = new ArrayList<String>();
+ ArrayList<String> argList = new ArrayList<>();
StringBuilder arg = new StringBuilder();
boolean quoted = false;
for (int i = 0; i < line.length(); i++) {
@@ -2071,9 +2072,7 @@ public class AdaGradCore {
argsVector.add(paramA[1]);
} else if (paramA.length > 2 && (paramA[0].equals("-m") || paramA[0].equals("-docSet"))) {
// -m (metricName), -docSet are allowed to have extra optinos
- for (int opt = 0; opt < paramA.length; ++opt) {
- argsVector.add(paramA[opt]);
- }
+ Collections.addAll(argsVector, paramA);
} else {
String msg = "Malformed line in config file:" + origLine;
throw new RuntimeException(msg);
@@ -2413,7 +2412,7 @@ public class AdaGradCore {
if (val < 0 || val > 1) {
throw new RuntimeException("passIterationToDecoder should be either 0 or 1");
}
- passIterationToDecoder = (val == 1) ? true : false;
+ passIterationToDecoder = (val == 1);
} else if (option.equals("-decOut")) {
decoderOutFileName = args[i + 1];
} else if (option.equals("-decExit")) {
@@ -2808,7 +2807,7 @@ public class AdaGradCore {
str = " " + str + " ";
str = str.replaceAll("\\s+", " ");
- TreeSet<Integer> splitIndices = new TreeSet<Integer>();
+ TreeSet<Integer> splitIndices = new TreeSet<>();
for (int i = 0; i < str.length(); ++i) {
char ch = str.charAt(i);
@@ -2855,7 +2854,7 @@ public class AdaGradCore {
// remove spaces around dashes
if (normMethod == 2 || normMethod == 4) {
- TreeSet<Integer> skipIndices = new TreeSet<Integer>();
+ TreeSet<Integer> skipIndices = new TreeSet<>();
str = " " + str + " ";
for (int i = 0; i < str.length(); ++i) {
@@ -3031,7 +3030,7 @@ public class AdaGradCore {
}
private ArrayList<Double> randomLambda() {
- ArrayList<Double> retLambda = new ArrayList<Double>(1 + numParams);
+ ArrayList<Double> retLambda = new ArrayList<>(1 + numParams);
for (int c = 1; c <= numParams; ++c) {
if (isOptimizable[c]) {
@@ -3092,8 +3091,8 @@ public class AdaGradCore {
// print("discarding: ",4);
int numCandidates = slope.length;
- HashSet<Integer> discardedIndices = new HashSet<Integer>();
- HashMap<Double, Integer> indicesOfSlopes = new HashMap<Double, Integer>();
+ HashSet<Integer> discardedIndices = new HashSet<>();
+ HashMap<Double, Integer> indicesOfSlopes = new HashMap<>();
// maps slope to index of best candidate that has that slope.
// ("best" as in the one with the highest offset)
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/233818d6/src/main/java/org/apache/joshua/adagrad/Optimizer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/adagrad/Optimizer.java b/src/main/java/org/apache/joshua/adagrad/Optimizer.java
index 722c593..16c25cd 100755
--- a/src/main/java/org/apache/joshua/adagrad/Optimizer.java
+++ b/src/main/java/org/apache/joshua/adagrad/Optimizer.java
@@ -42,13 +42,12 @@ public class Optimizer {
feat_hash = _feat_hash; // feature hash table
stats_hash = _stats_hash; // suff. stats hash table
finalLambda = new double[initialLambda.length];
- for(int i = 0; i < finalLambda.length; i++)
- finalLambda[i] = initialLambda[i];
+ System.arraycopy(initialLambda, 0, finalLambda, 0, finalLambda.length);
}
//run AdaGrad for one epoch
public double[] runOptimizer() {
- List<Integer> sents = new ArrayList<Integer>();
+ List<Integer> sents = new ArrayList<>();
for( int i = 0; i < sentNum; ++i )
sents.add(i);
double[] avgLambda = new double[initialLambda.length]; //only needed if averaging is required
@@ -87,14 +86,14 @@ public class Optimizer {
double Hii = 0;
double gradiiSquare = 0;
int lastUpdateTime = 0;
- HashMap<Integer, Integer> lastUpdate = new HashMap<Integer, Integer>();
- HashMap<Integer, Double> lastVal = new HashMap<Integer, Double>();
- HashMap<Integer, Double> H = new HashMap<Integer, Double>();
+ HashMap<Integer, Integer> lastUpdate = new HashMap<>();
+ HashMap<Integer, Double> lastVal = new HashMap<>();
+ HashMap<Integer, Double> H = new HashMap<>();
while( sentCount < sentNum ) {
loss = 0;
thisBatchSize = batchSize;
++numBatch;
- HashMap<Integer, Double> featDiff = new HashMap<Integer, Double>();
+ HashMap<Integer, Double> featDiff = new HashMap<>();
for(int b = 0; b < batchSize; ++b ) {
//find out oracle and prediction
s = sents.get(sentCount);
@@ -124,51 +123,48 @@ public class Optimizer {
//accumulate difference feature vector
if ( b == 0 ) {
- for (int i = 0; i < vecOraFeat.length; i++) {
- featInfo = vecOraFeat[i].split("=");
- diffFeatId = Integer.parseInt(featInfo[0]);
- featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
- }
- for (int i = 0; i < vecPredFeat.length; i++) {
- featInfo = vecPredFeat[i].split("=");
- diffFeatId = Integer.parseInt(featInfo[0]);
- if (featDiff.containsKey(diffFeatId)) { //overlapping features
- diff = featDiff.get(diffFeatId)-Double.parseDouble(featInfo[1]);
- if ( Math.abs(diff) > 1e-20 )
- featDiff.put(diffFeatId, diff);
- else
- featDiff.remove(diffFeatId);
+ for (String aVecOraFeat : vecOraFeat) {
+ featInfo = aVecOraFeat.split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
+ }
+ for (String aVecPredFeat : vecPredFeat) {
+ featInfo = aVecPredFeat.split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ if (featDiff.containsKey(diffFeatId)) { //overlapping features
+ diff = featDiff.get(diffFeatId) - Double.parseDouble(featInfo[1]);
+ if (Math.abs(diff) > 1e-20)
+ featDiff.put(diffFeatId, diff);
+ else
+ featDiff.remove(diffFeatId);
+ } else //features only firing in the 2nd feature vector
+ featDiff.put(diffFeatId, -1.0 * Double.parseDouble(featInfo[1]));
}
- else //features only firing in the 2nd feature vector
- featDiff.put(diffFeatId, -1.0*Double.parseDouble(featInfo[1]));
- }
} else {
- for (int i = 0; i < vecOraFeat.length; i++) {
- featInfo = vecOraFeat[i].split("=");
- diffFeatId = Integer.parseInt(featInfo[0]);
- if (featDiff.containsKey(diffFeatId)) { //overlapping features
- diff = featDiff.get(diffFeatId)+Double.parseDouble(featInfo[1]);
- if ( Math.abs(diff) > 1e-20 )
- featDiff.put(diffFeatId, diff);
- else
- featDiff.remove(diffFeatId);
+ for (String aVecOraFeat : vecOraFeat) {
+ featInfo = aVecOraFeat.split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ if (featDiff.containsKey(diffFeatId)) { //overlapping features
+ diff = featDiff.get(diffFeatId) + Double.parseDouble(featInfo[1]);
+ if (Math.abs(diff) > 1e-20)
+ featDiff.put(diffFeatId, diff);
+ else
+ featDiff.remove(diffFeatId);
+ } else //features only firing in the new oracle feature vector
+ featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
}
- else //features only firing in the new oracle feature vector
- featDiff.put(diffFeatId, Double.parseDouble(featInfo[1]));
- }
- for (int i = 0; i < vecPredFeat.length; i++) {
- featInfo = vecPredFeat[i].split("=");
- diffFeatId = Integer.parseInt(featInfo[0]);
- if (featDiff.containsKey(diffFeatId)) { //overlapping features
- diff = featDiff.get(diffFeatId)-Double.parseDouble(featInfo[1]);
- if ( Math.abs(diff) > 1e-20 )
- featDiff.put(diffFeatId, diff);
- else
- featDiff.remove(diffFeatId);
+ for (String aVecPredFeat : vecPredFeat) {
+ featInfo = aVecPredFeat.split("=");
+ diffFeatId = Integer.parseInt(featInfo[0]);
+ if (featDiff.containsKey(diffFeatId)) { //overlapping features
+ diff = featDiff.get(diffFeatId) - Double.parseDouble(featInfo[1]);
+ if (Math.abs(diff) > 1e-20)
+ featDiff.put(diffFeatId, diff);
+ else
+ featDiff.remove(diffFeatId);
+ } else //features only firing in the new prediction feature vector
+ featDiff.put(diffFeatId, -1.0 * Double.parseDouble(featInfo[1]));
}
- else //features only firing in the new prediction feature vector
- featDiff.put(diffFeatId, -1.0*Double.parseDouble(featInfo[1]));
- }
}
//remember the model scores here are already scaled
@@ -350,7 +346,7 @@ public class Optimizer {
} //for ( int iter = 0; iter < adagradIter; ++iter ) {
//non-optimizable weights should remain unchanged
- ArrayList<Double> cpFixWt = new ArrayList<Double>();
+ ArrayList<Double> cpFixWt = new ArrayList<>();
for ( int i = 1; i < isOptimizable.length; ++i ) {
if ( ! isOptimizable[i] )
cpFixWt.add(finalLambda[i]);
@@ -388,26 +384,25 @@ public class Optimizer {
// find out the 1-best candidate for each sentence
// this depends on the training mode
maxModelScore = NegInf;
- for (Iterator it = candSet.iterator(); it.hasNext();) {
- modelScore = 0.0;
- candStr = it.next().toString();
+ for (String aCandSet : candSet) {
+ modelScore = 0.0;
+ candStr = aCandSet.toString();
- feat_str = feat_hash[i].get(candStr).split("\\s+");
+ feat_str = feat_hash[i].get(candStr).split("\\s+");
- String[] feat_info;
+ String[] feat_info;
- for (int f = 0; f < feat_str.length; f++) {
- feat_info = feat_str[f].split("=");
- modelScore +=
- Double.parseDouble(feat_info[1]) * finalLambda[Vocabulary.id(feat_info[0])];
- }
+ for (String aFeat_str : feat_str) {
+ feat_info = aFeat_str.split("=");
+ modelScore += Double.parseDouble(feat_info[1]) * finalLambda[Vocabulary.id(feat_info[0])];
+ }
- if (maxModelScore < modelScore) {
- maxModelScore = modelScore;
- tmpStatsVal = stats_hash[i].get(candStr).split("\\s+"); // save the
- // suff stats
- }
- }
+ if (maxModelScore < modelScore) {
+ maxModelScore = modelScore;
+ tmpStatsVal = stats_hash[i].get(candStr).split("\\s+"); // save the
+ // suff stats
+ }
+ }
for (int j = 0; j < suffStatsCount; j++)
corpusStatsVal[j] += Integer.parseInt(tmpStatsVal[j]); // accumulate
@@ -451,115 +446,108 @@ public class Optimizer {
else
worstPredScore = PosInf;
}
-
- for (Iterator it = candSet.iterator(); it.hasNext();) {
- cand = it.next().toString();
- candMetric = computeSentMetric(sentId, cand); //compute metric score
-
- //start to compute model score
- candScore = 0;
- featStr = feat_hash[sentId].get(cand).split("\\s+");
- feats = "";
-
- for (int i = 0; i < featStr.length; i++) {
- featInfo = featStr[i].split("=");
- actualFeatId = Vocabulary.id(featInfo[0]);
- candScore += Double.parseDouble(featInfo[1]) * lambda[actualFeatId];
- if ( (actualFeatId < isOptimizable.length && isOptimizable[actualFeatId]) ||
- actualFeatId >= isOptimizable.length )
- feats += actualFeatId + "=" + Double.parseDouble(featInfo[1]) + " ";
- }
-
- candScore *= featScale; //scale the model score
-
- //is this cand oracle?
- if(oraSelectMode == 1) {//"hope", b=1, r=1
- if(evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
- if( bestOraScore<=(candScore-candMetric) ) {
- bestOraScore = candScore-candMetric;
- oraMetric = candMetric;
- oraScore = candScore;
- oraFeat = feats;
- oraCand = cand;
- }
- }
- else {
- if( bestOraScore<=(candScore+candMetric) ) {
- bestOraScore = candScore+candMetric;
- oraMetric = candMetric;
- oraScore = candScore;
- oraFeat = feats;
- oraCand = cand;
- }
- }
- }
- else {//best metric score(ex: max BLEU), b=1, r=0
- if(evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
- if( bestOraScore>=candMetric ) {
- bestOraScore = candMetric;
- oraMetric = candMetric;
- oraScore = candScore;
- oraFeat = feats;
- oraCand = cand;
- }
- }
- else {
- if( bestOraScore<=candMetric ) {
- bestOraScore = candMetric;
- oraMetric = candMetric;
- oraScore = candScore;
- oraFeat = feats;
- oraCand = cand;
- }
- }
- }
-
- //is this cand prediction?
- if(predSelectMode == 1) {//"fear"
- if(evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
- if( worstPredScore<=(candScore+candMetric) ) {
- worstPredScore = candScore+candMetric;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- }
- else {
- if( worstPredScore<=(candScore-candMetric) ) {
- worstPredScore = candScore-candMetric;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- }
- }
- else if(predSelectMode == 2) {//model prediction(max model score)
- if( worstPredScore<=candScore ) {
- worstPredScore = candScore;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- }
- else {//worst metric score(ex: min BLEU)
- if(evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
- if( worstPredScore<=candMetric ) {
- worstPredScore = candMetric;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- }
- else {
- if( worstPredScore>=candMetric ) {
- worstPredScore = candMetric;
- predMetric = candMetric;
- predScore = candScore;
- predFeat = feats;
- }
- }
- }
- }
+
+ for (String aCandSet : candSet) {
+ cand = aCandSet.toString();
+ candMetric = computeSentMetric(sentId, cand); //compute metric score
+
+ //start to compute model score
+ candScore = 0;
+ featStr = feat_hash[sentId].get(cand).split("\\s+");
+ feats = "";
+
+ for (String aFeatStr : featStr) {
+ featInfo = aFeatStr.split("=");
+ actualFeatId = Vocabulary.id(featInfo[0]);
+ candScore += Double.parseDouble(featInfo[1]) * lambda[actualFeatId];
+ if ((actualFeatId < isOptimizable.length && isOptimizable[actualFeatId])
+ || actualFeatId >= isOptimizable.length)
+ feats += actualFeatId + "=" + Double.parseDouble(featInfo[1]) + " ";
+ }
+
+ candScore *= featScale; //scale the model score
+
+ //is this cand oracle?
+ if (oraSelectMode == 1) {//"hope", b=1, r=1
+ if (evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
+ if (bestOraScore <= (candScore - candMetric)) {
+ bestOraScore = candScore - candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ } else {
+ if (bestOraScore <= (candScore + candMetric)) {
+ bestOraScore = candScore + candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ }
+ } else {//best metric score(ex: max BLEU), b=1, r=0
+ if (evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
+ if (bestOraScore >= candMetric) {
+ bestOraScore = candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ } else {
+ if (bestOraScore <= candMetric) {
+ bestOraScore = candMetric;
+ oraMetric = candMetric;
+ oraScore = candScore;
+ oraFeat = feats;
+ oraCand = cand;
+ }
+ }
+ }
+
+ //is this cand prediction?
+ if (predSelectMode == 1) {//"fear"
+ if (evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
+ if (worstPredScore <= (candScore + candMetric)) {
+ worstPredScore = candScore + candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ } else {
+ if (worstPredScore <= (candScore - candMetric)) {
+ worstPredScore = candScore - candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ }
+ } else if (predSelectMode == 2) {//model prediction(max model score)
+ if (worstPredScore <= candScore) {
+ worstPredScore = candScore;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ } else {//worst metric score(ex: min BLEU)
+ if (evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better
+ if (worstPredScore <= candMetric) {
+ worstPredScore = candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ } else {
+ if (worstPredScore >= candMetric) {
+ worstPredScore = candMetric;
+ predMetric = candMetric;
+ predScore = candScore;
+ predFeat = feats;
+ }
+ }
+ }
+ }
oraPredScore[0] = oraMetric;
oraPredScore[1] = oraScore;
@@ -695,14 +683,14 @@ public class Optimizer {
return finalMetricScore;
}
- private Vector<String> output;
+ private final Vector<String> output;
private double[] initialLambda;
- private double[] finalLambda;
+ private final double[] finalLambda;
private double finalMetricScore;
- private HashMap<String, String>[] feat_hash;
- private HashMap<String, String>[] stats_hash;
- private int paramDim;
- private boolean[] isOptimizable;
+ private final HashMap<String, String>[] feat_hash;
+ private final HashMap<String, String>[] stats_hash;
+ private final int paramDim;
+ private final boolean[] isOptimizable;
public static int sentNum;
public static int adagradIter; //AdaGrad internal iterations
public static int oraSelectMode;
[15/22] incubator-joshua git commit: Merge branch 'JOSHUA-301' of
github.com:maxthomas/incubator-joshua into JOSHUA-301
Posted by mj...@apache.org.
Merge branch 'JOSHUA-301' of github.com:maxthomas/incubator-joshua into JOSHUA-301
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/44b1b7be
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/44b1b7be
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/44b1b7be
Branch: refs/heads/JOSHUA-284
Commit: 44b1b7be2a9ae349cb8c798fec152f0075f76e9d
Parents: 19fe756 dccb48b
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 17 14:57:22 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 17 14:57:22 2016 -0400
----------------------------------------------------------------------
pom.xml | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/44b1b7be/pom.xml
----------------------------------------------------------------------
[18/22] incubator-joshua git commit: JOSHUA-291 - static analysis
based code improvements on decoder package
Posted by mj...@apache.org.
JOSHUA-291 - static analysis based code improvements on decoder package
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/029cbbcc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/029cbbcc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/029cbbcc
Branch: refs/heads/JOSHUA-284
Commit: 029cbbcc156d4939ebe503cc58962ab24728f653
Parents: 356b173
Author: Tommaso Teofili <to...@apache.org>
Authored: Sun Aug 7 19:35:27 2016 +0200
Committer: Tommaso Teofili <to...@apache.org>
Committed: Thu Aug 18 09:57:05 2016 +0200
----------------------------------------------------------------------
.../org/apache/joshua/decoder/ArgsParser.java | 8 +-
.../java/org/apache/joshua/decoder/BLEU.java | 72 ++++++++--------
.../java/org/apache/joshua/decoder/Decoder.java | 26 +++---
.../apache/joshua/decoder/DecoderThread.java | 2 +-
.../joshua/decoder/JoshuaConfiguration.java | 45 +++++-----
.../joshua/decoder/NbestMinRiskReranker.java | 33 ++++----
.../joshua/decoder/StructuredTranslation.java | 3 -
.../decoder/StructuredTranslationFactory.java | 1 -
.../org/apache/joshua/decoder/Translation.java | 8 +-
.../org/apache/joshua/decoder/Translations.java | 2 +-
.../joshua/decoder/chart_parser/Cell.java | 12 +--
.../joshua/decoder/chart_parser/Chart.java | 51 ++++++------
.../decoder/chart_parser/ComputeNodeResult.java | 10 +--
.../decoder/chart_parser/CubePruneState.java | 20 ++---
.../joshua/decoder/chart_parser/DotChart.java | 24 +++---
.../joshua/decoder/chart_parser/SourcePath.java | 4 +-
.../decoder/chart_parser/StateConstraint.java | 5 +-
.../joshua/decoder/chart_parser/SuperNode.java | 2 +-
.../joshua/decoder/ff/FeatureFunction.java | 24 +++---
.../apache/joshua/decoder/ff/FeatureVector.java | 21 +++--
.../joshua/decoder/ff/LabelCombinationFF.java | 2 +-
.../joshua/decoder/ff/LabelSubstitutionFF.java | 8 +-
.../joshua/decoder/ff/LexicalFeatures.java | 2 +-
.../apache/joshua/decoder/ff/OOVPenalty.java | 7 +-
.../apache/joshua/decoder/ff/PhraseModel.java | 2 +-
.../apache/joshua/decoder/ff/PhrasePenalty.java | 4 +-
.../org/apache/joshua/decoder/ff/RuleFF.java | 6 +-
.../decoder/ff/RulePropertiesQuerying.java | 6 +-
.../org/apache/joshua/decoder/ff/RuleShape.java | 2 +-
.../joshua/decoder/ff/SourceDependentFF.java | 4 +-
.../apache/joshua/decoder/ff/SourcePathFF.java | 2 +-
.../apache/joshua/decoder/ff/TargetBigram.java | 13 ++-
.../ff/fragmentlm/ConcatenationIterator.java | 10 +--
.../decoder/ff/fragmentlm/FragmentLMFF.java | 59 ++++++-------
.../ff/fragmentlm/PennTreebankReader.java | 17 ++--
.../joshua/decoder/ff/fragmentlm/Tree.java | 56 ++++++-------
.../joshua/decoder/ff/fragmentlm/Trees.java | 8 +-
.../org/apache/joshua/decoder/ff/lm/KenLM.java | 26 +++---
.../joshua/decoder/ff/lm/LanguageModelFF.java | 39 +++++----
.../ff/lm/berkeley_lm/LMGrammarBerkeley.java | 4 +-
.../ff/lm/bloomfilter_lm/BloomFilter.java | 2 +-
.../BloomFilterLanguageModel.java | 18 ++--
.../joshua/decoder/ff/lm/buildin_lm/TrieLM.java | 25 +++---
.../joshua/decoder/ff/phrase/Distortion.java | 2 +-
.../ff/similarity/EdgePhraseSimilarityFF.java | 17 ++--
.../ff/state_maintenance/NgramDPState.java | 6 +-
.../joshua/decoder/ff/tm/AbstractGrammar.java | 12 +--
.../decoder/ff/tm/BasicRuleCollection.java | 2 +-
.../joshua/decoder/ff/tm/CreateGlueGrammar.java | 2 +-
.../joshua/decoder/ff/tm/GrammarReader.java | 2 +-
.../apache/joshua/decoder/ff/tm/OwnerMap.java | 2 +-
.../org/apache/joshua/decoder/ff/tm/Rule.java | 67 +++++++--------
.../decoder/ff/tm/SentenceFilteredGrammar.java | 12 +--
.../decoder/ff/tm/format/MosesFormatReader.java | 2 +-
.../ff/tm/hash_based/ExtensionIterator.java | 2 +-
.../tm/hash_based/MemoryBasedBatchGrammar.java | 8 +-
.../decoder/ff/tm/packed/PackedGrammar.java | 87 +++++++++-----------
.../ff/tm/packed/SliceAggregatingTrie.java | 4 +-
.../decoder/hypergraph/AlignedSourceTokens.java | 2 +-
.../decoder/hypergraph/AllSpansWalker.java | 19 ++---
.../hypergraph/DefaultInsideOutside.java | 34 ++++----
.../joshua/decoder/hypergraph/ForestWalker.java | 10 +--
.../GrammarBuilderWalkerFunction.java | 14 ++--
.../joshua/decoder/hypergraph/HGNode.java | 54 ++++++------
.../joshua/decoder/hypergraph/HyperEdge.java | 6 +-
.../joshua/decoder/hypergraph/HyperGraph.java | 30 ++++---
.../decoder/hypergraph/HyperGraphPruning.java | 9 +-
.../decoder/hypergraph/KBestExtractor.java | 51 ++++++------
.../hypergraph/OutputStringExtractor.java | 8 +-
.../hypergraph/StringToTreeConverter.java | 16 ++--
.../decoder/hypergraph/ViterbiExtractor.java | 10 +--
.../hypergraph/WordAlignmentExtractor.java | 2 +-
.../decoder/hypergraph/WordAlignmentState.java | 8 +-
.../apache/joshua/decoder/io/JSONMessage.java | 16 ++--
.../decoder/io/TranslationRequestStream.java | 6 +-
.../apache/joshua/decoder/phrase/Candidate.java | 12 +--
.../apache/joshua/decoder/phrase/Coverage.java | 2 +-
.../apache/joshua/decoder/phrase/Future.java | 6 +-
.../apache/joshua/decoder/phrase/Header.java | 2 +-
.../joshua/decoder/phrase/Hypothesis.java | 9 +-
.../joshua/decoder/phrase/PhraseChart.java | 20 ++---
.../joshua/decoder/phrase/PhraseTable.java | 6 +-
.../org/apache/joshua/decoder/phrase/Stack.java | 26 +++---
.../apache/joshua/decoder/phrase/Stacks.java | 23 +++---
.../decoder/segment_file/ConstraintRule.java | 4 +-
.../joshua/decoder/segment_file/Sentence.java | 18 ++--
.../joshua/decoder/segment_file/Token.java | 9 +-
87 files changed, 638 insertions(+), 716 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ArgsParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ArgsParser.java b/src/main/java/org/apache/joshua/decoder/ArgsParser.java
index 5af6d11..26ed674 100644
--- a/src/main/java/org/apache/joshua/decoder/ArgsParser.java
+++ b/src/main/java/org/apache/joshua/decoder/ArgsParser.java
@@ -70,11 +70,9 @@ public class ArgsParser {
} else if (args[i].equals("-license")) {
try {
- for (String line: Files.readAllLines(Paths.get(String.format("%s/../LICENSE",
- JoshuaConfiguration.class.getProtectionDomain().getCodeSource().getLocation().getPath())),
- Charset.defaultCharset())) {
- System.out.println(line);
- }
+ Files.readAllLines(Paths.get(String.format("%s/../LICENSE",
+ JoshuaConfiguration.class.getProtectionDomain().getCodeSource().getLocation()
+ .getPath())), Charset.defaultCharset()).forEach(System.out::println);
} catch (IOException e) {
throw new RuntimeException("FATAL: missing license file!", e);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/BLEU.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/BLEU.java b/src/main/java/org/apache/joshua/decoder/BLEU.java
index 8b51403..0a56c5e 100644
--- a/src/main/java/org/apache/joshua/decoder/BLEU.java
+++ b/src/main/java/org/apache/joshua/decoder/BLEU.java
@@ -69,7 +69,7 @@ public class BLEU {
// === hyp tbl
String[] hypWrds = Regex.spaces.split(hypSent);
- HashMap<String, Integer> hypNgramTbl = new HashMap<String, Integer>();
+ HashMap<String, Integer> hypNgramTbl = new HashMap<>();
Ngram.getNgrams(hypNgramTbl, 1, bleuOrder, hypWrds);
return computeSentenceBleu(effectiveRefLen, maxRefCountTbl, hypWrds.length, hypNgramTbl,
doNgramClip, bleuOrder);
@@ -78,14 +78,14 @@ public class BLEU {
public static float computeEffectiveLen(int[] refLens, boolean useShortestRef) {
if (useShortestRef) {
int res = Integer.MAX_VALUE;
- for (int i = 0; i < refLens.length; i++)
- if (refLens[i] < res)
- res = refLens[i];
+ for (int refLen : refLens)
+ if (refLen < res)
+ res = refLen;
return res;
} else {// default is average length
float res = 0;
- for (int i = 0; i < refLens.length; i++)
- res += refLens[i];
+ for (int refLen : refLens)
+ res += refLen;
return res * 1.0f / refLens.length;
}
}
@@ -98,13 +98,13 @@ public class BLEU {
* */
public static HashMap<String, Integer> constructMaxRefCountTable(String[] refSents, int bleuOrder) {
- List<HashMap<String, Integer>> listRefNgramTbl = new ArrayList<HashMap<String, Integer>>();
- for (int i = 0; i < refSents.length; i++) {
+ List<HashMap<String, Integer>> listRefNgramTbl = new ArrayList<>();
+ for (String refSent : refSents) {
// if(refSents[i]==null){System.out.println("null ref sent"); System.exit(1);}
// String[] refWords = refSents[i].split("\\s+");
- String[] refWords = Regex.spaces.split(refSents[i]);
+ String[] refWords = Regex.spaces.split(refSent);
- HashMap<String, Integer> refNgramTbl = new HashMap<String, Integer>();
+ HashMap<String, Integer> refNgramTbl = new HashMap<>();
Ngram.getNgrams(refNgramTbl, 1, bleuOrder, refWords);
listRefNgramTbl.add(refNgramTbl);
}
@@ -120,7 +120,7 @@ public class BLEU {
public static HashMap<String, Integer> computeMaxRefCountTbl(
List<HashMap<String, Integer>> listRefNgramTbl) {
- HashMap<String, Integer> merged = new HashMap<String, Integer>();
+ HashMap<String, Integer> merged = new HashMap<>();
// == get merged key set
for (HashMap<String, Integer> tbl : listRefNgramTbl) {
@@ -180,9 +180,9 @@ public class BLEU {
int bleuOrder) {
String[] refWrds = Regex.spaces.split(refSent);
String[] hypWrds = Regex.spaces.split(hypSent);
- HashMap<String, Integer> refNgramTbl = new HashMap<String, Integer>();
+ HashMap<String, Integer> refNgramTbl = new HashMap<>();
Ngram.getNgrams(refNgramTbl, 1, bleuOrder, refWrds);
- HashMap<String, Integer> hypNgramTbl = new HashMap<String, Integer>();
+ HashMap<String, Integer> hypNgramTbl = new HashMap<>();
Ngram.getNgrams(hypNgramTbl, 1, bleuOrder, hypWrds);
return computeSentenceBleu(refWrds.length, refNgramTbl, hypWrds.length, hypNgramTbl,
doNgramClip, bleuOrder);
@@ -237,7 +237,7 @@ public class BLEU {
}
public static HashMap<String, Integer> constructNgramTable(String sentence, int bleuOrder) {
- HashMap<String, Integer> ngramTable = new HashMap<String, Integer>();
+ HashMap<String, Integer> ngramTable = new HashMap<>();
String[] refWrds = Regex.spaces.split(sentence);
Ngram.getNgrams(ngramTable, 1, bleuOrder, refWrds);
return ngramTable;
@@ -371,22 +371,22 @@ public class BLEU {
// System.err.println(String.format("compute(%s)", rule));
- ArrayList<Integer> currentNgram = new ArrayList<Integer>();
+ ArrayList<Integer> currentNgram = new ArrayList<>();
int boundary = -1;
int tailIndex = -1;
- for (int i = 0; i < symbols.length; i++) {
- if (symbols[i] < 0) {
+ for (int symbol : symbols) {
+ if (symbol < 0) {
tailIndex++;
NgramDPState ngramState = null;
try {
ngramState = (NgramDPState) edge.getTailNodes().get(tailIndex).getDPState(0);
} catch (ClassCastException e) {
- throw new RuntimeException(String.format(
- "* FATAL: first state needs to be NgramDPState (found %s)", edge.getTailNodes()
- .get(tailIndex).getDPState(0).getClass()));
+ throw new RuntimeException(String
+ .format("* FATAL: first state needs to be NgramDPState (found %s)",
+ edge.getTailNodes().get(tailIndex).getDPState(0).getClass()));
}
-
+
// Compute ngrams overlapping with left context of tail node
if (currentNgram.size() > 0) {
boundary = currentNgram.size();
@@ -394,15 +394,15 @@ public class BLEU {
currentNgram.add(id);
// Compute the BLEU statistics
- BLEU.Stats partStats = computeOverDivide(currentNgram, references, boundary);
+ Stats partStats = computeOverDivide(currentNgram, references, boundary);
stats.add(partStats);
-
-// System.err.println(" " + Vocabulary.getWords(ngramState.getLeftLMStateWords()));
+
+ // System.err.println(" " + Vocabulary.getWords(ngramState.getLeftLMStateWords()));
currentNgram.clear();
}
-
-// System.err.println(" " + Vocabulary.getWords(ngramState.getRightLMStateWords()));
+
+ // System.err.println(" " + Vocabulary.getWords(ngramState.getRightLMStateWords()));
// Accumulate ngrams from right context of tail node
for (int id : ngramState.getRightLMStateWords())
@@ -411,13 +411,13 @@ public class BLEU {
boundary = currentNgram.size();
} else { // terminal symbol
- currentNgram.add(symbols[i]);
+ currentNgram.add(symbol);
stats.len++;
-// System.err.println(" " + Vocabulary.word(symbols[i]));
-
+ // System.err.println(" " + Vocabulary.word(symbols[i]));
+
if (boundary != -1) {
- BLEU.Stats partStats = computeOverDivide(currentNgram, references, boundary);
+ Stats partStats = computeOverDivide(currentNgram, references, boundary);
stats.add(partStats);
// Shift off the context from the nonterminal's righthand side
@@ -433,7 +433,7 @@ public class BLEU {
* nonterminal's righthand context and from the rule
*/
if (currentNgram.size() > 0 && currentNgram.size() != boundary) { // skip cases (a) and (b)
- BLEU.Stats partStats = computeOverDivide(currentNgram, references, boundary);
+ Stats partStats = computeOverDivide(currentNgram, references, boundary);
stats.add(partStats);
}
}
@@ -456,7 +456,7 @@ public class BLEU {
// System.err.print(String.format(" BOUNDARY(%s, %d)", Vocabulary.getWords(ngram), boundary));
- HashMap<String, Integer> boundaryNgrams = new HashMap<String, Integer>();
+ HashMap<String, Integer> boundaryNgrams = new HashMap<>();
for (int width = 1; width <= Math.min(maxOrder, ngram.size()); width++) {
for (int i = 0; i < ngram.size() - width + 1; i++) {
int j = i + width;
@@ -501,10 +501,10 @@ public class BLEU {
}
private void fill(String[] references) {
- ngramCounts = new HashMap<String, Integer>();
+ ngramCounts = new HashMap<>();
reflen = 0.0f;
- for (int i = 0; i < references.length; i++) {
- String[] ref = references[i].split(" ");
+ for (String reference : references) {
+ String[] ref = reference.split(" ");
Ngram.getNgrams(ngramCounts, 1, maxOrder, ref);
reflen += ref.length;
}
@@ -536,7 +536,7 @@ public class BLEU {
* Accumulated sufficient statistics for computing BLEU.
*/
public static class Stats {
- public int[] counts;
+ public final int[] counts;
public float len;
public float reflen;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/Decoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Decoder.java b/src/main/java/org/apache/joshua/decoder/Decoder.java
index e1eadb1..d524a27 100644
--- a/src/main/java/org/apache/joshua/decoder/Decoder.java
+++ b/src/main/java/org/apache/joshua/decoder/Decoder.java
@@ -100,7 +100,7 @@ public class Decoder {
* overhead, but it can be problematic because of unseen dependencies (for example, in the
* Vocabulary shared by language model, translation grammar, etc).
*/
- private List<Grammar> grammars;
+ private final List<Grammar> grammars;
private ArrayList<FeatureFunction> featureFunctions;
private Grammar customPhraseTable;
@@ -146,9 +146,8 @@ public class Decoder {
*/
private Decoder(JoshuaConfiguration joshuaConfiguration) {
this.joshuaConfiguration = joshuaConfiguration;
- this.grammars = new ArrayList<Grammar>();
- this.threadPool = new ArrayBlockingQueue<DecoderThread>(
- this.joshuaConfiguration.num_parallel_decoders, true);
+ this.grammars = new ArrayList<>();
+ this.threadPool = new ArrayBlockingQueue<>(this.joshuaConfiguration.num_parallel_decoders, true);
this.customPhraseTable = null;
}
@@ -385,7 +384,7 @@ public class Decoder {
try {
for (String line : reader) {
line = line.trim();
- if (Regex.commentOrEmptyLine.matches(line) || line.indexOf("=") != -1) {
+ if (Regex.commentOrEmptyLine.matches(line) || line.contains("=")) {
// comment, empty line, or parameter lines: just copy
writer.write(line);
writer.newLine();
@@ -492,13 +491,12 @@ public class Decoder {
/* Sanity check for old-style unsupported feature invocations. */
if (pair.length != 2) {
- StringBuilder errMsg = new StringBuilder();
- errMsg.append("FATAL: Invalid feature weight line found in config file.\n");
- errMsg.append(String.format("The line was '%s'\n", pairStr));
- errMsg.append("You might be using an old version of the config file that is no longer supported\n");
- errMsg.append("Check joshua.apache.org or email dev@joshua.apache.org for help\n");
- errMsg.append("Code = " + 17);
- throw new RuntimeException(errMsg.toString());
+ String errMsg = "FATAL: Invalid feature weight line found in config file.\n" +
+ String.format("The line was '%s'\n", pairStr) +
+ "You might be using an old version of the config file that is no longer supported\n" +
+ "Check joshua.apache.org or email dev@joshua.apache.org for help\n" +
+ "Code = " + 17;
+ throw new RuntimeException(errMsg);
}
weights.set(pair[0], Float.parseFloat(pair[1]));
@@ -507,7 +505,7 @@ public class Decoder {
LOG.info("Read {} weights ({} of them dense)", weights.size(), DENSE_FEATURE_NAMES.size());
// Do this before loading the grammars and the LM.
- this.featureFunctions = new ArrayList<FeatureFunction>();
+ this.featureFunctions = new ArrayList<>();
// Initialize and load grammars. This must happen first, since the vocab gets defined by
// the packed grammar (if any)
@@ -643,7 +641,7 @@ public class Decoder {
}
/* Now create a feature function for each owner */
- final Set<OwnerId> ownersSeen = new HashSet<OwnerId>();
+ final Set<OwnerId> ownersSeen = new HashSet<>();
for (Grammar grammar: this.grammars) {
OwnerId owner = grammar.getOwner();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/DecoderThread.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/DecoderThread.java b/src/main/java/org/apache/joshua/decoder/DecoderThread.java
index d6f5233..bdbdba0 100644
--- a/src/main/java/org/apache/joshua/decoder/DecoderThread.java
+++ b/src/main/java/org/apache/joshua/decoder/DecoderThread.java
@@ -70,7 +70,7 @@ public class DecoderThread extends Thread {
this.joshuaConfiguration = joshuaConfiguration;
this.allGrammars = grammars;
- this.featureFunctions = new ArrayList<FeatureFunction>();
+ this.featureFunctions = new ArrayList<>();
for (FeatureFunction ff : featureFunctions) {
if (ff instanceof SourceDependentFF) {
this.featureFunctions.add(((SourceDependentFF) ff).clone());
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java b/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
index e7ad3b4..e6f2955 100644
--- a/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
+++ b/src/main/java/org/apache/joshua/decoder/JoshuaConfiguration.java
@@ -66,11 +66,11 @@ public class JoshuaConfiguration {
public boolean project_case = false;
// List of grammar files to read
- public ArrayList<String> tms = new ArrayList<String>();
+ public ArrayList<String> tms = new ArrayList<>();
// A rule cache for commonly used tries to avoid excess object allocations
// Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
- public Integer cachedRuleSize = new Integer(5000);
+ public Integer cachedRuleSize = 5000;
/*
* The file to read the weights from (part of the sparse features implementation). Weights can
@@ -94,9 +94,9 @@ public class JoshuaConfiguration {
* If this is empty, an unweighted default_non_terminal is used.
*/
public class OOVItem implements Comparable<OOVItem> {
- public String label;
+ public final String label;
- public float weight;
+ public final float weight;
OOVItem(String l, float w) {
label = l;
@@ -192,17 +192,19 @@ public class JoshuaConfiguration {
/* A list of the feature functions. */
- public ArrayList<String> features = new ArrayList<String>();
+ public ArrayList<String> features = new ArrayList<>();
/* A list of weights found in the main config file (instead of in a separate weights file) */
- public ArrayList<String> weights = new ArrayList<String>();
+ public ArrayList<String> weights = new ArrayList<>();
/* Determines whether to expect JSON input or plain lines */
- public enum INPUT_TYPE { plain, json };
+ public enum INPUT_TYPE { plain, json }
+
public INPUT_TYPE input_type = INPUT_TYPE.plain;
/* Type of server. Not sure we need to keep the regular TCP one around. */
- public enum SERVER_TYPE { none, TCP, HTTP };
+ public enum SERVER_TYPE { none, TCP, HTTP }
+
public SERVER_TYPE server_type = SERVER_TYPE.TCP;
/* If set, Joshua will start a (multi-threaded, per "threads") TCP/IP server on this port. */
@@ -286,10 +288,10 @@ public class JoshuaConfiguration {
LOG.info("\n\tResetting the StatefullFF global state index ...");
LOG.info("\n\t...done");
StatefulFF.resetGlobalStateIndex();
- tms = new ArrayList<String>();
+ tms = new ArrayList<>();
weights_file = "";
default_non_terminal = "[X]";
- oovList = new ArrayList<OOVItem>();
+ oovList = new ArrayList<>();
oovList.add(new OOVItem(default_non_terminal, 1.0f));
goal_symbol = "[GOAL]";
amortized_sorting = true;
@@ -307,8 +309,8 @@ public class JoshuaConfiguration {
mark_oovs = false;
// oracleFile = null;
parse = false; // perform synchronous parsing
- features = new ArrayList<String>();
- weights = new ArrayList<String>();
+ features = new ArrayList<>();
+ weights = new ArrayList<>();
server_port = 0;
reordering_limit = 8;
@@ -376,7 +378,7 @@ public class JoshuaConfiguration {
* interpreted as features.
*/
- if (line.indexOf("=") != -1) { // parameters; (not feature function)
+ if (line.contains("=")) { // parameters; (not feature function)
String[] fds = Regex.equalsWithSpaces.split(line, 2);
if (fds.length < 2) {
LOG.warn("skipping config file line '{}'", line);
@@ -434,7 +436,7 @@ public class JoshuaConfiguration {
} else if (parameter.equals(normalize_key("oov-list"))) {
if (new File(fds[1]).exists()) {
- oovList = new ArrayList<OOVItem>();
+ oovList = new ArrayList<>();
try {
File file = new File(fds[1]);
BufferedReader br = new BufferedReader(new FileReader(file));
@@ -462,7 +464,7 @@ public class JoshuaConfiguration {
if (tokens.length % 2 != 0) {
throw new RuntimeException(String.format("* FATAL: invalid format for '%s'", fds[0]));
}
- oovList = new ArrayList<OOVItem>();
+ oovList = new ArrayList<>();
for (int i = 0; i < tokens.length; i += 2)
oovList.add(new OOVItem(FormatUtils.ensureNonTerminalBrackets(tokens[i]),
@@ -538,12 +540,16 @@ public class JoshuaConfiguration {
LOG.info("pop-limit: {}", pop_limit);
} else if (parameter.equals(normalize_key("input-type"))) {
- if (fds[1].equals("json")) {
+ switch (fds[1]) {
+ case "json":
input_type = INPUT_TYPE.json;
- } else if (fds[1].equals("plain")) {
+ break;
+ case "plain":
input_type = INPUT_TYPE.plain;
- } else {
- throw new RuntimeException(String.format("* FATAL: invalid server type '%s'", fds[1]));
+ break;
+ default:
+ throw new RuntimeException(
+ String.format("* FATAL: invalid server type '%s'", fds[1]));
}
LOG.info(" input-type: {}", input_type);
@@ -573,7 +579,6 @@ public class JoshuaConfiguration {
} else if (parameter.equals("c") || parameter.equals("config")) {
// this was used to send in the config file, just ignore it
- ;
} else if (parameter.equals(normalize_key("feature-function"))) {
// add the feature to the list of features for later processing
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java b/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
index 9f63cad..d3913fb 100644
--- a/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
+++ b/src/main/java/org/apache/joshua/decoder/NbestMinRiskReranker.java
@@ -55,13 +55,12 @@ public class NbestMinRiskReranker {
double scalingFactor = 1.0;
- static int bleuOrder = 4;
- static boolean doNgramClip = true;
+ static final int bleuOrder = 4;
+ static final boolean doNgramClip = true;
- static boolean useGoogleLinearCorpusGain = false;
+ static final boolean useGoogleLinearCorpusGain = false;
- final PriorityBlockingQueue<RankerResult> resultsQueue =
- new PriorityBlockingQueue<RankerResult>();
+ final PriorityBlockingQueue<RankerResult> resultsQueue = new PriorityBlockingQueue<>();
public NbestMinRiskReranker(boolean produceRerankedNbest, double scalingFactor) {
this.produceRerankedNbest = produceRerankedNbest;
@@ -85,12 +84,12 @@ public class NbestMinRiskReranker {
}
}
- List<String> hypsItself = new ArrayList<String>();
+ List<String> hypsItself = new ArrayList<>();
// ArrayList<String> l_feat_scores = new ArrayList<String>();
- List<Double> baselineScores = new ArrayList<Double>(); // linear combination of all baseline
+ List<Double> baselineScores = new ArrayList<>(); // linear combination of all baseline
// features
- List<HashMap<String, Integer>> ngramTbls = new ArrayList<HashMap<String, Integer>>();
- List<Integer> sentLens = new ArrayList<Integer>();
+ List<HashMap<String, Integer>> ngramTbls = new ArrayList<>();
+ List<Integer> sentLens = new ArrayList<>();
for (String hyp : nbest) {
String[] fds = Regex.threeBarsWithSpace.split(hyp);
@@ -104,7 +103,7 @@ public class NbestMinRiskReranker {
String[] words = Regex.spaces.split(hypothesis);
sentLens.add(words.length);
- HashMap<String, Integer> ngramTbl = new HashMap<String, Integer>();
+ HashMap<String, Integer> ngramTbl = new HashMap<>();
Ngram.getNgrams(ngramTbl, 1, bleuOrder, words);
ngramTbls.add(ngramTbl);
@@ -125,13 +124,11 @@ public class NbestMinRiskReranker {
* */
computeNormalizedProbs(baselineScores, scalingFactor);
- List<Double> normalizedProbs = baselineScores;
-
// === required by google linear corpus gain
HashMap<String, Double> posteriorCountsTbl = null;
if (useGoogleLinearCorpusGain) {
- posteriorCountsTbl = new HashMap<String, Double>();
- getGooglePosteriorCounts(ngramTbls, normalizedProbs, posteriorCountsTbl);
+ posteriorCountsTbl = new HashMap<>();
+ getGooglePosteriorCounts(ngramTbls, baselineScores, posteriorCountsTbl);
}
@@ -143,7 +140,7 @@ public class NbestMinRiskReranker {
* */
double bestGain = -1000000000;// set as worst gain
String bestHyp = null;
- List<Double> gains = new ArrayList<Double>();
+ List<Double> gains = new ArrayList<>();
for (int i = 0; i < hypsItself.size(); i++) {
String curHyp = hypsItself.get(i);
int curHypLen = sentLens.get(i);
@@ -154,7 +151,7 @@ public class NbestMinRiskReranker {
curGain = computeExpectedLinearCorpusGain(curHypLen, curHypNgramTbl, posteriorCountsTbl);
} else {
curGain =
- computeExpectedGain(curHypLen, curHypNgramTbl, ngramTbls, sentLens, normalizedProbs);
+ computeExpectedGain(curHypLen, curHypNgramTbl, ngramTbls, sentLens, baselineScores);
}
gains.add(curGain);
@@ -335,7 +332,7 @@ public class NbestMinRiskReranker {
LOG.info("Running mbr reranking");
int oldSentID = -1;
- List<String> nbest = new ArrayList<String>();
+ List<String> nbest = new ArrayList<>();
Scanner scanner = new Scanner(System.in, "UTF-8");
@@ -415,7 +412,7 @@ public class NbestMinRiskReranker {
final int sentID;
RankerTask(final List<String> nbest, final int sentID) {
- this.nbest = new ArrayList<String>(nbest);
+ this.nbest = new ArrayList<>(nbest);
this.sentID = sentID;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
index cb48c0c..aa4e0c7 100644
--- a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
+++ b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
@@ -21,9 +21,6 @@ package org.apache.joshua.decoder;
import java.util.List;
import java.util.Map;
-import org.apache.joshua.decoder.ff.FeatureVector;
-import org.apache.joshua.decoder.hypergraph.KBestExtractor.DerivationState;
-import org.apache.joshua.decoder.io.DeNormalize;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.apache.joshua.decoder.segment_file.Token;
import org.apache.joshua.util.FormatUtils;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java b/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java
index 9be1f1a..1ba19f0 100644
--- a/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java
+++ b/src/main/java/org/apache/joshua/decoder/StructuredTranslationFactory.java
@@ -29,7 +29,6 @@ import static org.apache.joshua.util.FormatUtils.removeSentenceMarkers;
import java.util.List;
import org.apache.joshua.decoder.ff.FeatureFunction;
-import org.apache.joshua.decoder.ff.FeatureVector;
import org.apache.joshua.decoder.hypergraph.HyperGraph;
import org.apache.joshua.decoder.hypergraph.KBestExtractor.DerivationState;
import org.apache.joshua.decoder.segment_file.Sentence;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/Translation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translation.java b/src/main/java/org/apache/joshua/decoder/Translation.java
index 46f3061..1688805 100644
--- a/src/main/java/org/apache/joshua/decoder/Translation.java
+++ b/src/main/java/org/apache/joshua/decoder/Translation.java
@@ -28,6 +28,7 @@ import static java.util.Arrays.asList;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.StringWriter;
+import java.util.Collections;
import java.util.List;
import org.apache.joshua.decoder.ff.FeatureFunction;
@@ -51,7 +52,7 @@ import org.slf4j.LoggerFactory;
public class Translation {
private static final Logger LOG = LoggerFactory.getLogger(Translation.class);
- private Sentence source;
+ private final Sentence source;
/**
* This stores the output of the translation so we don't have to hold onto the hypergraph while we
@@ -82,7 +83,7 @@ public class Translation {
*/
StructuredTranslation translation = fromViterbiDerivation(source, hypergraph, featureFunctions);
this.output = translation.getTranslationString();
- structuredTranslations = asList(translation);
+ structuredTranslations = Collections.singletonList(translation);
} else {
/*
@@ -91,7 +92,8 @@ public class Translation {
final KBestExtractor kBestExtractor = new KBestExtractor(source, featureFunctions, Decoder.weights, false, joshuaConfiguration);
structuredTranslations = kBestExtractor.KbestExtractOnHG(hypergraph, joshuaConfiguration.topN);
if (structuredTranslations.isEmpty()) {
- structuredTranslations = asList(StructuredTranslationFactory.fromEmptyOutput(source));
+ structuredTranslations = Collections
+ .singletonList(StructuredTranslationFactory.fromEmptyOutput(source));
this.output = "";
} else {
this.output = structuredTranslations.get(0).getTranslationString();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/Translations.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/Translations.java b/src/main/java/org/apache/joshua/decoder/Translations.java
index e607225..1eb859a 100644
--- a/src/main/java/org/apache/joshua/decoder/Translations.java
+++ b/src/main/java/org/apache/joshua/decoder/Translations.java
@@ -53,7 +53,7 @@ public class Translations implements Iterator<Translation>, Iterable<Translation
public Translations(TranslationRequestStream request) {
this.request = request;
- this.translations = new LinkedList<Translation>();
+ this.translations = new LinkedList<>();
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java b/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
index 10b9200..cfcd06b 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/Cell.java
@@ -57,13 +57,13 @@ class Cell {
private Chart chart = null;
// The top-level (goal) symbol
- private int goalSymbol;
+ private final int goalSymbol;
// to maintain uniqueness of nodes
- private HashMap<HGNode.Signature, HGNode> nodesSigTbl = new LinkedHashMap<HGNode.Signature, HGNode>();
+ private final HashMap<HGNode.Signature, HGNode> nodesSigTbl = new LinkedHashMap<>();
// signature by lhs
- private Map<Integer, SuperNode> superNodesTbl = new HashMap<Integer, SuperNode>();
+ private final Map<Integer, SuperNode> superNodesTbl = new HashMap<>();
/**
* sort values in nodesSigTbl, we need this list when necessary
@@ -104,19 +104,19 @@ class Cell {
*/
// note that the input bin is bin[0][n], not the goal bin
boolean transitToGoal(Cell bin, List<FeatureFunction> featureFunctions, int sentenceLength) {
- this.sortedNodes = new ArrayList<HGNode>();
+ this.sortedNodes = new ArrayList<>();
HGNode goalItem = null;
for (HGNode antNode : bin.getSortedNodes()) {
if (antNode.lhs == this.goalSymbol) {
float logP = antNode.bestHyperedge.getBestDerivationScore();
- List<HGNode> antNodes = new ArrayList<HGNode>();
+ List<HGNode> antNodes = new ArrayList<>();
antNodes.add(antNode);
float finalTransitionLogP = ComputeNodeResult.computeFinalCost(featureFunctions, antNodes,
0, sentenceLength, null, this.chart.getSentence());
- List<HGNode> previousItems = new ArrayList<HGNode>();
+ List<HGNode> previousItems = new ArrayList<>();
previousItems.add(antNode);
HyperEdge dt = new HyperEdge(null, logP + finalTransitionLogP, finalTransitionLogP,
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
index 355a6f1..5c123f9 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/Chart.java
@@ -85,14 +85,14 @@ public class Chart {
// ===============================================================
// Private instance fields (maybe could be protected instead)
// ===============================================================
- private ChartSpan<Cell> cells; // note that in some cell, it might be null
- private int sourceLength;
- private List<FeatureFunction> featureFunctions;
- private Grammar[] grammars;
- private DotChart[] dotcharts; // each grammar should have a dotchart associated with it
+ private final ChartSpan<Cell> cells; // note that in some cell, it might be null
+ private final int sourceLength;
+ private final List<FeatureFunction> featureFunctions;
+ private final Grammar[] grammars;
+ private final DotChart[] dotcharts; // each grammar should have a dotchart associated with it
private Cell goalBin;
private int goalSymbolID = -1;
- private Lattice<Token> inputLattice;
+ private final Lattice<Token> inputLattice;
private Sentence sentence = null;
// private SyntaxTree parseTree;
@@ -129,15 +129,14 @@ public class Chart {
// if (sentence instanceof ParsedSentence)
// this.parseTree = ((ParsedSentence) sentence).syntaxTree();
//
- this.cells = new ChartSpan<Cell>(sourceLength, null);
+ this.cells = new ChartSpan<>(sourceLength, null);
this.goalSymbolID = Vocabulary.id(goalSymbol);
this.goalBin = new Cell(this, this.goalSymbolID);
/* Create the grammars, leaving space for the OOV grammar. */
this.grammars = new Grammar[grammars.length + 1];
- for (int i = 0; i < grammars.length; i++)
- this.grammars[i + 1] = grammars[i];
+ System.arraycopy(grammars, 0, this.grammars, 1, grammars.length);
MemoryBasedBatchGrammar oovGrammar = new MemoryBasedBatchGrammar("oov", this.config, 20);
AbstractGrammar.addOOVRules(oovGrammar, sentence.getLattice(), featureFunctions,
@@ -158,9 +157,8 @@ public class Chart {
+ Vocabulary.STOP_SYM);
/* Find the SourceDependent feature and give it access to the sentence. */
- for (FeatureFunction ff : this.featureFunctions)
- if (ff instanceof SourceDependentFF)
- ((SourceDependentFF) ff).setSource(sentence);
+ this.featureFunctions.stream().filter(ff -> ff instanceof SourceDependentFF)
+ .forEach(ff -> ((SourceDependentFF) ff).setSource(sentence));
LOG.debug("Finished seeding chart.");
}
@@ -179,7 +177,6 @@ public class Chart {
public void setGoalSymbolID(int i) {
this.goalSymbolID = i;
this.goalBin = new Cell(this, i);
- return;
}
// ===============================================================
@@ -204,7 +201,7 @@ public class Chart {
private void completeSpan(int i, int j) {
/* STEP 1: create the heap, and seed it with all of the candidate states */
- PriorityQueue<CubePruneState> candidates = new PriorityQueue<CubePruneState>();
+ PriorityQueue<CubePruneState> candidates = new PriorityQueue<>();
/*
* Look at all the grammars, seeding the chart with completed rules from the
@@ -257,7 +254,7 @@ public class Chart {
Rule bestRule = rules.get(0);
- List<HGNode> currentTailNodes = new ArrayList<HGNode>();
+ List<HGNode> currentTailNodes = new ArrayList<>();
List<SuperNode> superNodes = dotNode.getAntSuperNodes();
for (SuperNode si : superNodes) {
currentTailNodes.add(si.nodes.get(0));
@@ -305,7 +302,7 @@ public class Chart {
* There are multiple ways to reach each point in the cube, so short-circuit
* that.
*/
- HashSet<CubePruneState> visitedStates = new HashSet<CubePruneState>();
+ HashSet<CubePruneState> visitedStates = new HashSet<>();
int popLimit = config.pop_limit;
int popCount = 0;
@@ -352,7 +349,7 @@ public class Chart {
/* Use the updated ranks to assign the next rule and tail node. */
Rule nextRule = rules.get(nextRanks[0] - 1);
// HGNode[] nextAntNodes = new HGNode[state.antNodes.size()];
- List<HGNode> nextAntNodes = new ArrayList<HGNode>(state.antNodes.size());
+ List<HGNode> nextAntNodes = new ArrayList<>(state.antNodes.size());
for (int x = 0; x < state.ranks.length - 1; x++)
nextAntNodes.add(superNodes.get(x).nodes.get(nextRanks[x + 1] - 1));
@@ -387,15 +384,15 @@ public class Chart {
for (i = sourceLength - 1; i >= 0; i--) {
allCandidates = new PriorityQueue[sourceLength - i + 2];
for (int id = 0; id < allCandidates.length; id++)
- allCandidates[id] = new PriorityQueue<CubePruneState>();
+ allCandidates[id] = new PriorityQueue<>();
- nodeStack = new ArrayList<SuperNode>();
+ nodeStack = new ArrayList<>();
for (int j = i + 1; j <= sourceLength; j++) {
if (!sentence.hasPath(i, j))
continue;
- for (int g = 0; g < this.grammars.length; g++) {
+ for (Grammar grammar : this.grammars) {
// System.err.println(String.format("\n*** I=%d J=%d GRAMMAR=%d", i, j, g));
if (j == i + 1) {
@@ -405,13 +402,13 @@ public class Chart {
int word = arc.getLabel().getWord();
// disallow lattice decoding for now
assert arc.getHead().id() == j;
- Trie trie = this.grammars[g].getTrieRoot().match(word);
+ Trie trie = grammar.getTrieRoot().match(word);
if (trie != null && trie.hasRules())
addToChart(trie, j, false);
}
} else {
/* Recurse for non-terminal case */
- consume(this.grammars[g].getTrieRoot(), i, j - 1);
+ consume(grammar.getTrieRoot(), i, j - 1);
}
}
@@ -508,7 +505,7 @@ public class Chart {
// isUnary));
if (!isUnary && trie.hasRules()) {
- DotNode dotNode = new DotNode(i, j, trie, new ArrayList<SuperNode>(nodeStack), null);
+ DotNode dotNode = new DotNode(i, j, trie, new ArrayList<>(nodeStack), null);
addToCandidates(dotNode);
}
@@ -534,7 +531,7 @@ public class Chart {
Rule bestRule = rules.get(0);
List<SuperNode> superNodes = dotNode.getAntSuperNodes();
- List<HGNode> tailNodes = new ArrayList<HGNode>();
+ List<HGNode> tailNodes = new ArrayList<>();
for (SuperNode superNode : superNodes)
tailNodes.add(superNode.nodes.get(0));
@@ -679,8 +676,8 @@ public class Chart {
return 0;
}
int qtyAdditionsToQueue = 0;
- ArrayList<HGNode> queue = new ArrayList<HGNode>(chartBin.getSortedNodes());
- HashSet<Integer> seen_lhs = new HashSet<Integer>();
+ ArrayList<HGNode> queue = new ArrayList<>(chartBin.getSortedNodes());
+ HashSet<Integer> seen_lhs = new HashSet<>();
if (LOG.isDebugEnabled())
LOG.debug("Adding unary to [{}, {}]", i, j);
@@ -701,7 +698,7 @@ public class Chart {
if (childNode != null && childNode.getRuleCollection() != null
&& childNode.getRuleCollection().getArity() == 1) {
- ArrayList<HGNode> antecedents = new ArrayList<HGNode>();
+ ArrayList<HGNode> antecedents = new ArrayList<>();
antecedents.add(node);
List<Rule> rules = childNode.getRuleCollection().getSortedRules(this.featureFunctions);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java b/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
index 9833734..280ea5a 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/ComputeNodeResult.java
@@ -46,16 +46,16 @@ public class ComputeNodeResult {
private static final Logger LOG = LoggerFactory.getLogger(ComputeNodeResult.class);
// The cost incurred by the rule itself (and all associated feature functions)
- private float transitionCost;
+ private final float transitionCost;
// transitionCost + the Viterbi costs of the tail nodes.
- private float viterbiCost;
+ private final float viterbiCost;
// viterbiCost + a future estimate (outside cost estimate).
- private float pruningCostEstimate;
+ private final float pruningCostEstimate;
// The StateComputer objects themselves serve as keys.
- private List<DPState> dpStates;
+ private final List<DPState> dpStates;
/**
* Computes the new state(s) that are produced when applying the given rule to the list of tail
@@ -99,7 +99,7 @@ public class ComputeNodeResult {
}
}
- List<DPState> allDPStates = new ArrayList<DPState>();
+ List<DPState> allDPStates = new ArrayList<>();
// The transition cost is the new cost incurred by applying this rule
float transitionCost = 0.0f;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/chart_parser/CubePruneState.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/CubePruneState.java b/src/main/java/org/apache/joshua/decoder/chart_parser/CubePruneState.java
index d57a6a2..1f06d30 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/CubePruneState.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/CubePruneState.java
@@ -30,10 +30,10 @@ import org.apache.joshua.decoder.ff.tm.Rule;
// CubePruneState class
// ===============================================================
public class CubePruneState implements Comparable<CubePruneState> {
- int[] ranks;
- ComputeNodeResult computeNodeResult;
- List<HGNode> antNodes;
- List<Rule> rules;
+ final int[] ranks;
+ final ComputeNodeResult computeNodeResult;
+ final List<HGNode> antNodes;
+ final List<Rule> rules;
private DotNode dotNode;
public CubePruneState(ComputeNodeResult score, int[] ranks, List<Rule> rules, List<HGNode> antecedents, DotNode dotNode) {
@@ -58,10 +58,10 @@ public class CubePruneState implements Comparable<CubePruneState> {
}
public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append("STATE ||| rule=" + getRule() + " inside cost = " + computeNodeResult.getViterbiCost()
- + " estimate = " + computeNodeResult.getPruningEstimate());
- return sb.toString();
+ String sb = "STATE ||| rule=" + getRule() + " inside cost = " +
+ computeNodeResult.getViterbiCost() + " estimate = " +
+ computeNodeResult.getPruningEstimate();
+ return sb;
}
public void setDotNode(DotNode node) {
@@ -83,10 +83,8 @@ public class CubePruneState implements Comparable<CubePruneState> {
for (int i = 0; i < ranks.length; i++)
if (state.ranks[i] != ranks[i])
return false;
- if (getDotNode() != state.getDotNode())
- return false;
+ return getDotNode() == state.getDotNode();
- return true;
}
public int hashCode() {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java b/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java
index 71c4f03..8b5c81a 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/DotChart.java
@@ -71,7 +71,7 @@ class DotChart {
* Two-dimensional chart of cells. Some cells might be null. This could definitely be represented
* more efficiently, since only the upper half of this triangle is every used.
*/
- private ChartSpan<DotCell> dotcells;
+ private final ChartSpan<DotCell> dotcells;
public DotCell getDotCell(int i, int j) {
return dotcells.get(i, j);
@@ -84,12 +84,12 @@ class DotChart {
/**
* CKY+ style parse chart in which completed span entries are stored.
*/
- private Chart dotChart;
+ private final Chart dotChart;
/**
* Translation grammar which contains the translation rules.
*/
- private Grammar pGrammar;
+ private final Grammar pGrammar;
/* Length of input sentence. */
private final int sentLen;
@@ -119,7 +119,7 @@ class DotChart {
this.pGrammar = grammar;
this.input = input;
this.sentLen = input.size();
- this.dotcells = new ChartSpan<DotCell>(sentLen, null);
+ this.dotcells = new ChartSpan<>(sentLen, null);
seed();
}
@@ -243,8 +243,7 @@ class DotChart {
}
// complete super-items (items over the same span with different LHSs)
- List<SuperNode> superNodes = new ArrayList<SuperNode>(this.dotChart.getCell(k, j)
- .getSortedSuperItems().values());
+ List<SuperNode> superNodes = new ArrayList<>(this.dotChart.getCell(k, j).getSortedSuperItems().values());
/* For every partially complete item over (i,k) */
for (DotNode dotNode : dotcells.get(i, k).dotNodes) {
@@ -319,7 +318,7 @@ class DotChart {
*/
private void addDotItem(Trie tnode, int i, int j, ArrayList<SuperNode> antSuperNodesIn,
SuperNode curSuperNode, SourcePath srcPath) {
- ArrayList<SuperNode> antSuperNodes = new ArrayList<SuperNode>();
+ ArrayList<SuperNode> antSuperNodes = new ArrayList<>();
if (antSuperNodesIn != null) {
antSuperNodes.addAll(antSuperNodesIn);
}
@@ -360,7 +359,7 @@ class DotChart {
static class DotCell {
// Package-protected fields
- private List<DotNode> dotNodes = new ArrayList<DotNode>();
+ private final List<DotNode> dotNodes = new ArrayList<>();
public List<DotNode> getDotNodes() {
return dotNodes;
@@ -380,14 +379,15 @@ class DotChart {
*/
static class DotNode {
- private int i, j;
+ private final int i;
+ private final int j;
private Trie trieNode = null;
/* A list of grounded (over a span) nonterminals that have been crossed in traversing the rule */
private ArrayList<SuperNode> antSuperNodes = null;
/* The source lattice cost of applying the rule */
- private SourcePath srcPath;
+ private final SourcePath srcPath;
@Override
public String toString() {
@@ -430,10 +430,8 @@ class DotChart {
// if (this.i != state.i || this.j != state.j)
// return false;
- if (this.trieNode != state.trieNode)
- return false;
+ return this.trieNode == state.trieNode;
- return true;
}
/**
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java b/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
index 1d96149..efc6688 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/SourcePath.java
@@ -45,11 +45,11 @@ public class SourcePath {
}
public SourcePath extend(Arc<Token> srcEdge) {
- float tcost = (float) srcEdge.getCost();
+ float tcost = srcEdge.getCost();
if (tcost == 0.0)
return this;
else
- return new SourcePath(pathCost + (float) srcEdge.getCost());
+ return new SourcePath(pathCost + srcEdge.getCost());
}
public SourcePath extendNonTerminal() {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java b/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
index d21ceca..b6b27c8 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/StateConstraint.java
@@ -62,11 +62,10 @@ public class StateConstraint {
int leftPos = this.target.indexOf(leftWords);
int rightPos = this.target.lastIndexOf(rightWords);
- boolean legal = (leftPos != -1 && leftPos <= rightPos);
-// System.err.println(String.format(" isLegal(%s @ %d,%s @ %d) = %s", leftWords, leftPos,
+ // System.err.println(String.format(" isLegal(%s @ %d,%s @ %d) = %s", leftWords, leftPos,
// rightWords, rightPos, legal));
- return legal;
+ return (leftPos != -1 && leftPos <= rightPos);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/chart_parser/SuperNode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/chart_parser/SuperNode.java b/src/main/java/org/apache/joshua/decoder/chart_parser/SuperNode.java
index a7c6e34..f228836 100644
--- a/src/main/java/org/apache/joshua/decoder/chart_parser/SuperNode.java
+++ b/src/main/java/org/apache/joshua/decoder/chart_parser/SuperNode.java
@@ -57,6 +57,6 @@ class SuperNode {
*/
public SuperNode(int lhs) {
this.lhs = lhs;
- this.nodes = new ArrayList<HGNode>();
+ this.nodes = new ArrayList<>();
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
index cb4a4bd..8b17326 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/FeatureFunction.java
@@ -86,17 +86,17 @@ public abstract class FeatureFunction {
protected int denseFeatureIndex = -1;
// The list of arguments passed to the feature, and the hash for the parsed args
- protected String[] args;
+ protected final String[] args;
protected HashMap<String, String> parsedArgs = null;
/*
* The global weight vector used by the decoder, passed it when the feature is
* instantiated
*/
- protected FeatureVector weights;
+ protected final FeatureVector weights;
/* The config */
- protected JoshuaConfiguration config;
+ protected final JoshuaConfiguration config;
public String getName() {
return name;
@@ -123,7 +123,7 @@ public abstract class FeatureFunction {
* @return a list of dense feature names
*/
public ArrayList<String> reportDenseFeatures(int id) {
- return new ArrayList<String>();
+ return new ArrayList<>();
}
public String logString() {
@@ -279,13 +279,13 @@ public abstract class FeatureFunction {
* @return A hash with the keys and the values of the string
*/
public static HashMap<String, String> parseArgs(String[] args) {
- HashMap<String, String> parsedArgs = new HashMap<String, String>();
+ HashMap<String, String> parsedArgs = new HashMap<>();
boolean lookingForValue = false;
String currentKey = null;
- for (int i = 0; i < args.length; i++) {
+ for (String arg : args) {
Pattern argKeyPattern = Pattern.compile("^-[a-zA-Z]\\S+");
- Matcher argKey = argKeyPattern.matcher(args[i]);
+ Matcher argKey = argKeyPattern.matcher(arg);
if (argKey.find()) {
// This is a key
// First check to see if there is a key that is waiting to be written
@@ -294,12 +294,12 @@ public abstract class FeatureFunction {
parsedArgs.put(currentKey, "");
}
// Now store the new key and look for its value
- currentKey = args[i].substring(1);
+ currentKey = arg.substring(1);
lookingForValue = true;
} else {
// This is a value
if (lookingForValue) {
- parsedArgs.put(currentKey, args[i]);
+ parsedArgs.put(currentKey, arg);
lookingForValue = false;
}
}
@@ -320,8 +320,8 @@ public abstract class FeatureFunction {
* (for k-best extraction).
*/
public interface Accumulator {
- public void add(String name, float value);
- public void add(int id, float value);
+ void add(String name, float value);
+ void add(int id, float value);
}
public class ScoreAccumulator implements Accumulator {
@@ -347,7 +347,7 @@ public abstract class FeatureFunction {
}
public class FeatureAccumulator implements Accumulator {
- private FeatureVector features;
+ private final FeatureVector features;
public FeatureAccumulator() {
this.features = new FeatureVector();
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java b/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
index 1b39c78..34f19ae 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java
@@ -46,7 +46,7 @@ public class FeatureVector {
/*
* A list of the dense feature names. Increased via calls to registerDenseFeatures()
*/
- public static ArrayList<String> DENSE_FEATURE_NAMES = new ArrayList<String>();
+ public static final ArrayList<String> DENSE_FEATURE_NAMES = new ArrayList<>();
/*
* The values of each of the dense features, defaulting to 0.
@@ -56,11 +56,11 @@ public class FeatureVector {
/*
* Value of sparse features.
*/
- private HashMap<String, Float> sparseFeatures;
+ private final HashMap<String, Float> sparseFeatures;
public FeatureVector() {
- sparseFeatures = new HashMap<String, Float>();
- denseFeatures = new ArrayList<Float>(DENSE_FEATURE_NAMES.size());
+ sparseFeatures = new HashMap<>();
+ denseFeatures = new ArrayList<>(DENSE_FEATURE_NAMES.size());
for (int i = 0; i < denseFeatures.size(); i++)
denseFeatures.set(i, 0.0f);
}
@@ -334,7 +334,7 @@ public class FeatureVector {
public String mosesString() {
StringBuilder outputString = new StringBuilder();
- HashSet<String> printed_keys = new HashSet<String>();
+ HashSet<String> printed_keys = new HashSet<>();
// First print all the dense feature names in order
for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
@@ -343,7 +343,7 @@ public class FeatureVector {
}
// Now print the sparse features
- ArrayList<String> keys = new ArrayList<String>(sparseFeatures.keySet());
+ ArrayList<String> keys = new ArrayList<>(sparseFeatures.keySet());
Collections.sort(keys);
for (String key: keys) {
if (! printed_keys.contains(key)) {
@@ -365,7 +365,7 @@ public class FeatureVector {
public String toString() {
StringBuilder outputString = new StringBuilder();
- HashSet<String> printed_keys = new HashSet<String>();
+ HashSet<String> printed_keys = new HashSet<>();
// First print all the dense feature names in order
for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
@@ -374,11 +374,10 @@ public class FeatureVector {
}
// Now print the rest of the features
- ArrayList<String> keys = new ArrayList<String>(sparseFeatures.keySet());
+ ArrayList<String> keys = new ArrayList<>(sparseFeatures.keySet());
Collections.sort(keys);
- for (String key: keys)
- if (! printed_keys.contains(key))
- outputString.append(String.format("%s=%.3f ", key, sparseFeatures.get(key)));
+ keys.stream().filter(key -> !printed_keys.contains(key)).forEach(
+ key -> outputString.append(String.format("%s=%.3f ", key, sparseFeatures.get(key))));
return outputString.toString().trim();
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java b/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
index bfebaa5..8072a79 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/LabelCombinationFF.java
@@ -41,7 +41,7 @@ public class LabelCombinationFF extends StatelessFF {
return name.toLowerCase();
}
- private final String computeRuleLabelCombinationDescriptor(Rule rule) {
+ private String computeRuleLabelCombinationDescriptor(Rule rule) {
StringBuilder result = new StringBuilder(getLowerCasedFeatureName() + "_");
result.append(RulePropertiesQuerying.getLHSAsString(rule));
// System.out.println("Rule: " + rule);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java b/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
index 8735be6..4955d1b 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/LabelSubstitutionFF.java
@@ -56,21 +56,21 @@ public class LabelSubstitutionFF extends StatelessFF {
return substitutionNonterminal + "_substitutes_" + ruleNonterminal;
}
- private final String computeLabelMatchingFeature(String ruleNonterminal,
+ private String computeLabelMatchingFeature(String ruleNonterminal,
String substitutionNonterminal) {
String result = getLowerCasedFeatureName() + "_";
result += getMatchFeatureSuffix(ruleNonterminal, substitutionNonterminal);
return result;
}
- private final String computeLabelSubstitutionFeature(String ruleNonterminal,
+ private String computeLabelSubstitutionFeature(String ruleNonterminal,
String substitutionNonterminal) {
String result = getLowerCasedFeatureName() + "_";
result += getSubstitutionSuffix(ruleNonterminal, substitutionNonterminal);
return result;
}
- private static final String getRuleLabelsDescriptorString(Rule rule) {
+ private static String getRuleLabelsDescriptorString(Rule rule) {
String result = "";
String leftHandSide = RulePropertiesQuerying.getLHSAsString(rule);
List<String> ruleSourceNonterminals = RulePropertiesQuerying
@@ -92,7 +92,7 @@ public class LabelSubstitutionFF extends StatelessFF {
return result;
}
- private static final String getSubstitutionsDescriptorString(List<HGNode> tailNodes) {
+ private static String getSubstitutionsDescriptorString(List<HGNode> tailNodes) {
String result = "_<Subst>";
List<String> substitutionNonterminals = RulePropertiesQuerying
.getSourceNonterminalStrings(tailNodes);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java b/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
index 75158d0..0b9cbcb 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/LexicalFeatures.java
@@ -60,7 +60,7 @@ public class LexicalFeatures extends StatelessFF {
public LexicalFeatures(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, NAME, args, config);
- ownerRestriction = (parsedArgs.containsKey("owner")) ? true : false;
+ ownerRestriction = (parsedArgs.containsKey("owner"));
owner = ownerRestriction ? OwnerMap.register(parsedArgs.get("owner")) : OwnerMap.UNKNOWN_OWNER_ID;
useAlignments = parsedArgs.containsKey("alignments");
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
index 92ee740..6f37fa4 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/OOVPenalty.java
@@ -45,16 +45,14 @@ import org.apache.joshua.decoder.chart_parser.SourcePath;
*/
public class OOVPenalty extends StatelessFF {
private final OwnerId ownerID;
-
- /* The default value returned for OOVs. Can be overridden with -oov-list */
- private final float defaultValue = -100f;
+
private final HashMap<Integer,Float> oovWeights;
public OOVPenalty(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "OOVPenalty", args, config);
ownerID = OwnerMap.register("oov");
- oovWeights = new HashMap<Integer,Float>();
+ oovWeights = new HashMap<>();
if (config.oovList != null) {
for (OOVItem item: config.oovList) {
@@ -103,6 +101,7 @@ public class OOVPenalty extends StatelessFF {
}
private float getValue(int lhs) {
+ float defaultValue = -100f;
return oovWeights.containsKey(lhs) ? oovWeights.get(lhs) : defaultValue;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
index 7ae3dbc..bd490dc 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/PhraseModel.java
@@ -74,7 +74,7 @@ public class PhraseModel extends StatelessFF {
public ArrayList<String> reportDenseFeatures(int index) {
denseFeatureIndex = index;
- ArrayList<String> names = new ArrayList<String>();
+ ArrayList<String> names = new ArrayList<>();
for (int i = 0; i < phrase_weights.length; i++)
names.add(String.format("tm_%s_%d", owner, i));
return names;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java b/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
index 9eecd0c..2643729 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/PhrasePenalty.java
@@ -44,7 +44,7 @@ import org.apache.joshua.decoder.segment_file.Sentence;
public class PhrasePenalty extends StatelessFF {
private final OwnerId owner;
- private float value = 1.0f;
+ private final float value = 1.0f;
public PhrasePenalty(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "PhrasePenalty", args, config);
@@ -68,7 +68,7 @@ public class PhrasePenalty extends StatelessFF {
@Override
public ArrayList<String> reportDenseFeatures(int index) {
denseFeatureIndex = index;
- ArrayList<String> names = new ArrayList<String>();
+ ArrayList<String> names = new ArrayList<>();
names.add(name);
return names;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java b/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
index 308d38a..cc1ffa4 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/RuleFF.java
@@ -42,8 +42,8 @@ import com.google.common.cache.Cache;
*/
public class RuleFF extends StatelessFF {
- private enum Sides { SOURCE, TARGET, BOTH };
-
+ private enum Sides { SOURCE, TARGET, BOTH }
+
private static final String NAME = "RuleFF";
// value to fire for features
private static final int VALUE = 1;
@@ -62,7 +62,7 @@ public class RuleFF extends StatelessFF {
public RuleFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, NAME, args, config);
- ownerRestriction = (parsedArgs.containsKey("owner")) ? true : false;
+ ownerRestriction = (parsedArgs.containsKey("owner"));
owner = ownerRestriction ? OwnerMap.register(parsedArgs.get("owner")) : UNKNOWN_OWNER_ID;
if (parsedArgs.containsKey("sides")) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java b/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java
index a1867a3..0ee41be 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/RulePropertiesQuerying.java
@@ -26,12 +26,12 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
public class RulePropertiesQuerying {
- public static final String getLHSAsString(Rule rule) {
+ public static String getLHSAsString(Rule rule) {
return Vocabulary.word(rule.getLHS());
}
public static List<String> getRuleSourceNonterminalStrings(Rule rule) {
- List<String> result = new ArrayList<String>();
+ List<String> result = new ArrayList<>();
for (int nonTerminalIndex : rule.getForeignNonTerminals()) {
result.add(Vocabulary.word(nonTerminalIndex));
}
@@ -39,7 +39,7 @@ public class RulePropertiesQuerying {
}
public static List<String> getSourceNonterminalStrings(List<HGNode> tailNodes) {
- List<String> result = new ArrayList<String>();
+ List<String> result = new ArrayList<>();
for (HGNode tailNode : tailNodes) {
result.add(Vocabulary.word(tailNode.lhs));
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java b/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
index 8483ad6..a331b90 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/RuleShape.java
@@ -42,7 +42,7 @@ public class RuleShape extends StatelessFF {
private final String string;
private boolean repeats;
- private WordType(final String string) {
+ WordType(final String string) {
this.string = string;
this.repeats = false;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java b/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
index 841402a..dec509f 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/SourceDependentFF.java
@@ -22,8 +22,8 @@ import org.apache.joshua.decoder.segment_file.Sentence;
public interface SourceDependentFF extends Cloneable {
- public void setSource(Sentence sentence);
+ void setSource(Sentence sentence);
- public FeatureFunction clone();
+ FeatureFunction clone();
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java b/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
index b138426..a3c4e57 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/SourcePathFF.java
@@ -48,7 +48,7 @@ public final class SourcePathFF extends StatelessFF {
public ArrayList<String> reportDenseFeatures(int index) {
denseFeatureIndex = index;
- ArrayList<String> names = new ArrayList<String>();
+ ArrayList<String> names = new ArrayList<>();
names.add(name);
return names;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
index e7de1f8..4e75af5 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/TargetBigram.java
@@ -77,7 +77,7 @@ public class TargetBigram extends StatefulFF {
* @param filename
*/
private void loadVocab(String filename) {
- this.vocab = new HashSet<String>();
+ this.vocab = new HashSet<>();
this.vocab.add("<s>");
this.vocab.add("</s>");
try {
@@ -109,10 +109,8 @@ public class TargetBigram extends StatefulFF {
int left = -1;
int right = -1;
- List<String> currentNgram = new LinkedList<String>();
- for (int c = 0; c < enWords.length; c++) {
- int curID = enWords[c];
-
+ List<String> currentNgram = new LinkedList<>();
+ for (int curID : enWords) {
if (FormatUtils.isNonterminal(curID)) {
int index = -(curID + 1);
NgramDPState state = (NgramDPState) tailNodes.get(index).getDPState(stateIndex);
@@ -151,9 +149,8 @@ public class TargetBigram extends StatefulFF {
}
}
- NgramDPState state = new NgramDPState(new int[] { left }, new int[] { right });
// System.err.println(String.format("RULE %s -> state %s", rule.getRuleString(), state));
- return state;
+ return new NgramDPState(new int[] { left }, new int[] { right });
}
/**
@@ -208,7 +205,7 @@ public class TargetBigram extends StatefulFF {
private String join(List<String> list) {
StringBuilder sb = new StringBuilder();
for (String item : list) {
- sb.append(item.toString() + "_");
+ sb.append(item).append("_");
}
return sb.substring(0, sb.length() - 1);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/ConcatenationIterator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/ConcatenationIterator.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/ConcatenationIterator.java
index f75dffa..1d181e7 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/ConcatenationIterator.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/ConcatenationIterator.java
@@ -33,14 +33,12 @@ import java.util.NoSuchElementException;
*/
public class ConcatenationIterator<E> implements Iterator<E> {
- Iterator<Iterator<E>> sourceIterators;
+ final Iterator<Iterator<E>> sourceIterators;
Iterator<E> currentIterator;
Iterator<E> lastIteratorToReturn;
public boolean hasNext() {
- if (currentIterator.hasNext())
- return true;
- return false;
+ return currentIterator.hasNext();
}
public E next() {
@@ -80,12 +78,12 @@ public class ConcatenationIterator<E> implements Iterator<E> {
List<String> list0 = Collections.emptyList();
List<String> list1 = Arrays.asList("a b c d".split(" "));
List<String> list2 = Arrays.asList("e f".split(" "));
- List<Iterator<String>> iterators = new ArrayList<Iterator<String>>();
+ List<Iterator<String>> iterators = new ArrayList<>();
iterators.add(list1.iterator());
iterators.add(list0.iterator());
iterators.add(list2.iterator());
iterators.add(list0.iterator());
- Iterator<String> iterator = new ConcatenationIterator<String>(iterators);
+ Iterator<String> iterator = new ConcatenationIterator<>(iterators);
while (iterator.hasNext()) {
System.out.println(iterator.next());
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
index 861cf35..7388262 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/FragmentLMFF.java
@@ -95,11 +95,6 @@ public class FragmentLMFF extends StatefulFF {
private int MIN_LEX_DEPTH = 1;
/*
- * Set to true to activate meta-features.
- */
- private boolean OPTS_DEPTH = false;
-
- /*
* This contains a list of the language model fragments, indexed by LHS.
*/
private HashMap<String, ArrayList<Tree>> lmFragments = null;
@@ -117,7 +112,7 @@ public class FragmentLMFF extends StatefulFF {
public FragmentLMFF(FeatureVector weights, String[] args, JoshuaConfiguration config) {
super(weights, "FragmentLMFF", args, config);
- lmFragments = new HashMap<String, ArrayList<Tree>>();
+ lmFragments = new HashMap<>();
fragmentLMFile = parsedArgs.get("lm");
BUILD_DEPTH = Integer.parseInt(parsedArgs.get("build-depth"));
@@ -127,12 +122,9 @@ public class FragmentLMFF extends StatefulFF {
/* Read in the language model fragments */
try {
Collection<Tree> trees = PennTreebankReader.readTrees(fragmentLMFile);
- for (Tree fragment : trees) {
- addLMFragment(fragment);
-
- // System.err.println(String.format("Read fragment: %s",
- // lmFragments.get(lmFragments.size()-1)));
- }
+ // System.err.println(String.format("Read fragment: %s",
+ // lmFragments.get(lmFragments.size()-1)));
+ trees.forEach(this::addLMFragment);
} catch (IOException e) {
throw new RuntimeException(String.format("* WARNING: couldn't read fragment LM file '%s'",
fragmentLMFile), e);
@@ -162,7 +154,7 @@ public class FragmentLMFF extends StatefulFF {
}
if (lmFragments.get(fragment.getRule()) == null) {
- lmFragments.put(fragment.getRule(), new ArrayList<Tree>());
+ lmFragments.put(fragment.getRule(), new ArrayList<>());
}
lmFragments.get(fragment.getRule()).add(fragment);
numFragments++;
@@ -196,7 +188,7 @@ public class FragmentLMFF extends StatefulFF {
*/
Tree baseTree = Tree.buildTree(rule, tailNodes, BUILD_DEPTH);
- Stack<Tree> nodeStack = new Stack<Tree>();
+ Stack<Tree> nodeStack = new Stack<>();
nodeStack.add(baseTree);
while (!nodeStack.empty()) {
Tree tree = nodeStack.pop();
@@ -204,20 +196,21 @@ public class FragmentLMFF extends StatefulFF {
continue;
if (lmFragments.get(tree.getRule()) != null) {
- for (Tree fragment : lmFragments.get(tree.getRule())) {
-// System.err.println(String.format("Does\n %s match\n %s??\n -> %s", fragment, tree,
-// match(fragment, tree)));
-
- if (fragment.getLabel() == tree.getLabel() && match(fragment, tree)) {
-// System.err.println(String.format(" FIRING: matched %s against %s", fragment, tree));
- acc.add(fragment.escapedString(), 1);
- if (OPTS_DEPTH)
- if (fragment.isLexicalized())
- acc.add(String.format("FragmentFF_lexdepth%d", fragment.getDepth()), 1);
- else
- acc.add(String.format("FragmentFF_depth%d", fragment.getDepth()), 1);
- }
- }
+ // System.err.println(String.format("Does\n %s match\n %s??\n -> %s", fragment, tree,
+ // match(fragment, tree)));
+ // System.err.println(String.format(" FIRING: matched %s against %s", fragment, tree));
+ lmFragments.get(tree.getRule()).stream()
+ .filter(fragment -> fragment.getLabel() == tree.getLabel() && match(fragment, tree))
+ .forEach(fragment -> {
+ // System.err.println(String.format(" FIRING: matched %s against %s", fragment, tree));
+ acc.add(fragment.escapedString(), 1);
+ boolean OPTS_DEPTH = false;
+ if (OPTS_DEPTH)
+ if (fragment.isLexicalized())
+ acc.add(String.format("FragmentFF_lexdepth%d", fragment.getDepth()), 1);
+ else
+ acc.add(String.format("FragmentFF_depth%d", fragment.getDepth()), 1);
+ });
}
// We also need to try matching rules against internal nodes of the fragment corresponding to
@@ -312,18 +305,18 @@ public class FragmentLMFF extends StatefulFF {
ruleSBAR.setOwner(owner);
rulePERIOD.setOwner(owner);
- HyperEdge edgeSBAR = new HyperEdge(ruleSBAR, 0.0f, 0.0f, null, (SourcePath) null);
+ HyperEdge edgeSBAR = new HyperEdge(ruleSBAR, 0.0f, 0.0f, null, null);
HGNode nodeSBAR = new HGNode(3, 7, ruleSBAR.getLHS(), null, edgeSBAR, 0.0f);
- ArrayList<HGNode> tailNodesVP = new ArrayList<HGNode>();
+ ArrayList<HGNode> tailNodesVP = new ArrayList<>();
Collections.addAll(tailNodesVP, nodeSBAR);
- HyperEdge edgeVP = new HyperEdge(ruleVP, 0.0f, 0.0f, tailNodesVP, (SourcePath) null);
+ HyperEdge edgeVP = new HyperEdge(ruleVP, 0.0f, 0.0f, tailNodesVP, null);
HGNode nodeVP = new HGNode(2, 7, ruleVP.getLHS(), null, edgeVP, 0.0f);
- HyperEdge edgePERIOD = new HyperEdge(rulePERIOD, 0.0f, 0.0f, null, (SourcePath) null);
+ HyperEdge edgePERIOD = new HyperEdge(rulePERIOD, 0.0f, 0.0f, null, null);
HGNode nodePERIOD = new HGNode(7, 8, rulePERIOD.getLHS(), null, edgePERIOD, 0.0f);
- ArrayList<HGNode> tailNodes = new ArrayList<HGNode>();
+ ArrayList<HGNode> tailNodes = new ArrayList<>();
Collections.addAll(tailNodes, nodeVP, nodePERIOD);
Tree tree = Tree.buildTree(ruleS, tailNodes, 1);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/PennTreebankReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/PennTreebankReader.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/PennTreebankReader.java
index 1637b5f..bb1c29a 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/PennTreebankReader.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/PennTreebankReader.java
@@ -30,13 +30,13 @@ public class PennTreebankReader {
static class TreeCollection extends AbstractCollection<Tree> {
- List<File> files;
- Charset charset;
+ final List<File> files;
+ final Charset charset;
static class TreeIteratorIterator implements Iterator<Iterator<Tree>> {
- Iterator<File> fileIterator;
+ final Iterator<File> fileIterator;
Iterator<Tree> nextTreeIterator;
- Charset charset;
+ final Charset charset;
public boolean hasNext() {
return nextTreeIterator != null;
@@ -75,7 +75,7 @@ public class PennTreebankReader {
}
public Iterator<Tree> iterator() {
- return new ConcatenationIterator<Tree>(new TreeIteratorIterator(files, this.charset));
+ return new ConcatenationIterator<>(new TreeIteratorIterator(files, this.charset));
}
public int size() {
@@ -91,7 +91,7 @@ public class PennTreebankReader {
@SuppressWarnings("unused")
private List<File> getFilesUnder(String path, FileFilter fileFilter) {
File root = new File(path);
- List<File> files = new ArrayList<File>();
+ List<File> files = new ArrayList<>();
addFilesUnder(root, files, fileFilter);
return files;
}
@@ -105,15 +105,14 @@ public class PennTreebankReader {
}
if (root.isDirectory()) {
File[] children = root.listFiles();
- for (int i = 0; i < children.length; i++) {
- File child = children[i];
+ for (File child : children) {
addFilesUnder(child, files, fileFilter);
}
}
}
public TreeCollection(String file) throws FileNotFoundException, IOException {
- this.files = new ArrayList<File>();
+ this.files = new ArrayList<>();
this.files.add(new File(file));
this.charset = Charset.defaultCharset();
}
[20/22] incubator-joshua git commit: JOSHUA-291 - static analysis
based code improvements on corpus package
Posted by mj...@apache.org.
JOSHUA-291 - static analysis based code improvements on corpus package
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/356b173d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/356b173d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/356b173d
Branch: refs/heads/JOSHUA-284
Commit: 356b173d4fa3f0efa4ea53809e46c9e04c8c1ca5
Parents: 233818d
Author: Tommaso Teofili <to...@apache.org>
Authored: Sun Aug 7 19:27:27 2016 +0200
Committer: Tommaso Teofili <to...@apache.org>
Committed: Thu Aug 18 09:57:05 2016 +0200
----------------------------------------------------------------------
.../org/apache/joshua/corpus/BasicPhrase.java | 2 +-
.../apache/joshua/corpus/ContiguousPhrase.java | 8 +--
.../java/org/apache/joshua/corpus/Phrase.java | 2 +-
.../java/org/apache/joshua/corpus/Span.java | 6 +--
.../org/apache/joshua/corpus/SymbolTable.java | 2 +-
.../org/apache/joshua/corpus/Vocabulary.java | 10 ++--
.../joshua/corpus/syntax/ArraySyntaxTree.java | 51 ++++++++++----------
.../apache/joshua/corpus/syntax/SyntaxTree.java | 10 ++--
8 files changed, 44 insertions(+), 47 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/356b173d/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/BasicPhrase.java b/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
index 6c50458..8ab8add 100644
--- a/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
+++ b/src/main/java/org/apache/joshua/corpus/BasicPhrase.java
@@ -58,7 +58,7 @@ public class BasicPhrase extends AbstractPhrase {
/* See Javadoc for Phrase interface. */
public ArrayList<Phrase> getSubPhrases(int maxLength) {
- ArrayList<Phrase> phrases = new ArrayList<Phrase>();
+ ArrayList<Phrase> phrases = new ArrayList<>();
int len = this.size();
for (int n = 1; n <= maxLength; n++)
for (int i = 0; i <= len - n; i++)
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/356b173d/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java b/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java
index af669b7..9c76ce2 100644
--- a/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java
+++ b/src/main/java/org/apache/joshua/corpus/ContiguousPhrase.java
@@ -31,9 +31,9 @@ import java.util.List;
*/
public class ContiguousPhrase extends AbstractPhrase {
- protected int startIndex;
- protected int endIndex;
- protected Corpus corpusArray;
+ protected final int startIndex;
+ protected final int endIndex;
+ protected final Corpus corpusArray;
public ContiguousPhrase(int startIndex, int endIndex, Corpus corpusArray) {
this.startIndex = startIndex;
@@ -94,7 +94,7 @@ public class ContiguousPhrase extends AbstractPhrase {
*/
public List<Phrase> getSubPhrases(int maxLength) {
if (maxLength > size()) return getSubPhrases(size());
- List<Phrase> phrases = new ArrayList<Phrase>();
+ List<Phrase> phrases = new ArrayList<>();
for (int i = 0; i < size(); i++) {
for (int j = i + 1; (j <= size()) && (j - i <= maxLength); j++) {
Phrase subPhrase = subPhrase(i, j);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/356b173d/src/main/java/org/apache/joshua/corpus/Phrase.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Phrase.java b/src/main/java/org/apache/joshua/corpus/Phrase.java
index 5a06a8b..41e8c63 100644
--- a/src/main/java/org/apache/joshua/corpus/Phrase.java
+++ b/src/main/java/org/apache/joshua/corpus/Phrase.java
@@ -34,7 +34,7 @@ public interface Phrase extends Comparable<Phrase> {
*
* @return an int[] corresponding to the ID of each word in the phrase
*/
- public int[] getWordIDs();
+ int[] getWordIDs();
/**
* Returns the integer word id of the word at the specified position.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/356b173d/src/main/java/org/apache/joshua/corpus/Span.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Span.java b/src/main/java/org/apache/joshua/corpus/Span.java
index 414fe95..26e00aa 100644
--- a/src/main/java/org/apache/joshua/corpus/Span.java
+++ b/src/main/java/org/apache/joshua/corpus/Span.java
@@ -31,10 +31,10 @@ import java.util.NoSuchElementException;
public class Span implements Iterable<Integer>, Comparable<Span> {
/** Inclusive starting index of this span. */
- public int start;
+ public final int start;
/** Exclusive ending index of this span. */
- public int end;
+ public final int end;
/**
@@ -75,7 +75,7 @@ public class Span implements Iterable<Integer>, Comparable<Span> {
*/
public List<Span> getSubSpans(int max) {
int spanSize = size();
- ArrayList<Span> result = new ArrayList<Span>(max * spanSize);
+ ArrayList<Span> result = new ArrayList<>(max * spanSize);
for (int len = max; len > 0; len--) {
for (int i = start; i < end - len + 1; i++) {
result.add(new Span(i, i + len));
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/356b173d/src/main/java/org/apache/joshua/corpus/SymbolTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/SymbolTable.java b/src/main/java/org/apache/joshua/corpus/SymbolTable.java
index 274e8b9..07a2760 100644
--- a/src/main/java/org/apache/joshua/corpus/SymbolTable.java
+++ b/src/main/java/org/apache/joshua/corpus/SymbolTable.java
@@ -116,7 +116,7 @@ public interface SymbolTable {
* then the value returned must be {@link #X}.
*
* Otherwise, the value returned must be a negative number
- * whose value is less than {@link X}.
+ * whose value is less than {@link #X}.
*
* @param nonterminal Nonterminal symbol
* @return a unique integer identifier for the nonterminal
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/356b173d/src/main/java/org/apache/joshua/corpus/Vocabulary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/Vocabulary.java b/src/main/java/org/apache/joshua/corpus/Vocabulary.java
index 24644ee..0a26822 100644
--- a/src/main/java/org/apache/joshua/corpus/Vocabulary.java
+++ b/src/main/java/org/apache/joshua/corpus/Vocabulary.java
@@ -262,8 +262,8 @@ public class Vocabulary implements Externalizable {
public static void clear() {
long lock_stamp = lock.writeLock();
try {
- idToString = new ArrayList<String>();
- stringToId = new HashMap<String, Integer>();
+ idToString = new ArrayList<>();
+ stringToId = new HashMap<>();
idToString.add(UNKNOWN_ID, UNKNOWN_WORD);
stringToId.put(UNKNOWN_WORD, UNKNOWN_ID);
@@ -291,11 +291,7 @@ public class Vocabulary implements Externalizable {
@Override
public boolean equals(Object o) {
- if(getClass() == o.getClass()) {
- return true;
- } else {
- return false;
- }
+ return getClass() == o.getClass();
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/356b173d/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java b/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
index f374279..10efdc6 100644
--- a/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
+++ b/src/main/java/org/apache/joshua/corpus/syntax/ArraySyntaxTree.java
@@ -46,7 +46,7 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
private ArrayList<Integer> terminals;
- private boolean useBackwardLattice = true;
+ private final boolean useBackwardLattice = true;
private static final int MAX_CONCATENATIONS = 3;
private static final int MAX_LABELS = 100;
@@ -72,7 +72,7 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
* lattice.
*/
public Collection<Integer> getConstituentLabels(int from, int to) {
- Collection<Integer> labels = new HashSet<Integer>();
+ Collection<Integer> labels = new HashSet<>();
int span_length = to - from;
for (int i = forwardIndex.get(from); i < forwardIndex.get(from + 1); i += 2) {
int current_span = forwardLattice.get(i + 1);
@@ -86,7 +86,7 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
public int getOneConstituent(int from, int to) {
int spanLength = to - from;
- Stack<Integer> stack = new Stack<Integer>();
+ Stack<Integer> stack = new Stack<>();
for (int i = forwardIndex.get(from); i < forwardIndex.get(from + 1); i += 2) {
int currentSpan = forwardLattice.get(i + 1);
@@ -168,12 +168,12 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
* the total number of labels returned is bounded by MAX_LABELS.
*/
public Collection<Integer> getConcatenatedLabels(int from, int to) {
- Collection<Integer> labels = new HashSet<Integer>();
+ Collection<Integer> labels = new HashSet<>();
int span_length = to - from;
- Stack<Integer> nt_stack = new Stack<Integer>();
- Stack<Integer> pos_stack = new Stack<Integer>();
- Stack<Integer> depth_stack = new Stack<Integer>();
+ Stack<Integer> nt_stack = new Stack<>();
+ Stack<Integer> pos_stack = new Stack<>();
+ Stack<Integer> depth_stack = new Stack<>();
// seed stacks (reverse order to save on iterations, longer spans)
for (int i = forwardIndex.get(from + 1) - 2; i >= forwardIndex.get(from); i -= 2) {
@@ -217,14 +217,14 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
// TODO: can pre-comupute all that in top-down fashion.
public Collection<Integer> getCcgLabels(int from, int to) {
- Collection<Integer> labels = new HashSet<Integer>();
+ Collection<Integer> labels = new HashSet<>();
int span_length = to - from;
// TODO: range checks on the to and from
boolean is_prefix = (forwardLattice.get(forwardIndex.get(from) + 1) > span_length);
if (is_prefix) {
- Map<Integer, Set<Integer>> main_constituents = new HashMap<Integer, Set<Integer>>();
+ Map<Integer, Set<Integer>> main_constituents = new HashMap<>();
// find missing to the right
for (int i = forwardIndex.get(from); i < forwardIndex.get(from + 1); i += 2) {
int current_span = forwardLattice.get(i + 1);
@@ -233,7 +233,7 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
else {
int end_pos = forwardLattice.get(i + 1) + from;
Set<Integer> nts = main_constituents.get(end_pos);
- if (nts == null) main_constituents.put(end_pos, new HashSet<Integer>());
+ if (nts == null) main_constituents.put(end_pos, new HashSet<>());
main_constituents.get(end_pos).add(forwardLattice.get(i));
}
}
@@ -255,7 +255,7 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
// check longest span ending in to..
if (backwardLattice.get(to_end - 1) <= span_length) return labels;
- Map<Integer, Set<Integer>> main_constituents = new HashMap<Integer, Set<Integer>>();
+ Map<Integer, Set<Integer>> main_constituents = new HashMap<>();
// find missing to the left
for (int i = to_end - 2; i >= backwardIndex.get(to); i -= 2) {
int current_span = backwardLattice.get(i + 1);
@@ -264,7 +264,7 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
else {
int start_pos = to - backwardLattice.get(i + 1);
Set<Integer> nts = main_constituents.get(start_pos);
- if (nts == null) main_constituents.put(start_pos, new HashSet<Integer>());
+ if (nts == null) main_constituents.put(start_pos, new HashSet<>());
main_constituents.get(start_pos).add(backwardLattice.get(i));
}
}
@@ -326,40 +326,41 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
public String toString() {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < forwardIndex.size(); i++)
- sb.append("FI[" + i + "] =\t" + forwardIndex.get(i) + "\n");
+ sb.append("FI[").append(i).append("] =\t").append(forwardIndex.get(i)).append("\n");
sb.append("\n");
for (int i = 0; i < forwardLattice.size(); i += 2)
- sb.append("F[" + i + "] =\t" + Vocabulary.word(forwardLattice.get(i)) + " , "
- + forwardLattice.get(i + 1) + "\n");
+ sb.append("F[").append(i).append("] =\t").append(Vocabulary.word(forwardLattice.get(i)))
+ .append(" , ").append(forwardLattice.get(i + 1)).append("\n");
sb.append("\n");
for (int i = 0; i < terminals.size(); i += 1)
- sb.append("T[" + i + "] =\t" + Vocabulary.word(terminals.get(i)) + " , 1 \n");
+ sb.append("T[").append(i).append("] =\t").append(Vocabulary.word(terminals.get(i)))
+ .append(" , 1 \n");
if (this.useBackwardLattice) {
sb.append("\n");
for (int i = 0; i < backwardIndex.size(); i++)
- sb.append("BI[" + i + "] =\t" + backwardIndex.get(i) + "\n");
+ sb.append("BI[").append(i).append("] =\t").append(backwardIndex.get(i)).append("\n");
sb.append("\n");
for (int i = 0; i < backwardLattice.size(); i += 2)
- sb.append("B[" + i + "] =\t" + Vocabulary.word(backwardLattice.get(i)) + " , "
- + backwardLattice.get(i + 1) + "\n");
+ sb.append("B[").append(i).append("] =\t").append(Vocabulary.word(backwardLattice.get(i)))
+ .append(" , ").append(backwardLattice.get(i + 1)).append("\n");
}
return sb.toString();
}
private void initialize() {
- forwardIndex = new ArrayList<Integer>();
+ forwardIndex = new ArrayList<>();
forwardIndex.add(0);
- forwardLattice = new ArrayList<Integer>();
+ forwardLattice = new ArrayList<>();
if (this.useBackwardLattice) {
- backwardIndex = new ArrayList<Integer>();
+ backwardIndex = new ArrayList<>();
backwardIndex.add(0);
- backwardLattice = new ArrayList<Integer>();
+ backwardLattice = new ArrayList<>();
}
- terminals = new ArrayList<Integer>();
+ terminals = new ArrayList<>();
}
@@ -369,7 +370,7 @@ public class ArraySyntaxTree implements SyntaxTree, Externalizable {
boolean next_nt = false;
int current_id = 0;
- Stack<Integer> stack = new Stack<Integer>();
+ Stack<Integer> stack = new Stack<>();
for (String token : tokens) {
if ("(".equals(token)) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/356b173d/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java b/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java
index 6bb4c0b..f96cd2c 100644
--- a/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java
+++ b/src/main/java/org/apache/joshua/corpus/syntax/SyntaxTree.java
@@ -22,13 +22,13 @@ import java.util.Collection;
public interface SyntaxTree {
- public Collection<Integer> getConstituentLabels(int from, int to);
+ Collection<Integer> getConstituentLabels(int from, int to);
- public Collection<Integer> getConcatenatedLabels(int from, int to);
+ Collection<Integer> getConcatenatedLabels(int from, int to);
- public Collection<Integer> getCcgLabels(int from, int to);
+ Collection<Integer> getCcgLabels(int from, int to);
- public int[] getTerminals();
+ int[] getTerminals();
- public int[] getTerminals(int from, int to);
+ int[] getTerminals(int from, int to);
}
[07/22] incubator-joshua git commit: added alignment point in custom
rules
Posted by mj...@apache.org.
added alignment point in custom rules
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/81b33d22
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/81b33d22
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/81b33d22
Branch: refs/heads/JOSHUA-284
Commit: 81b33d2218e51bc70f3ef42c2f0cd3c0d3059340
Parents: 84301b9
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 2 12:38:27 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 2 12:38:27 2016 -0400
----------------------------------------------------------------------
demo/demo.js | 4 ++--
src/main/java/org/apache/joshua/server/ServerThread.java | 9 ++++++---
2 files changed, 8 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/81b33d22/demo/demo.js
----------------------------------------------------------------------
diff --git a/demo/demo.js b/demo/demo.js
index 38b20e1..131683b 100644
--- a/demo/demo.js
+++ b/demo/demo.js
@@ -158,8 +158,8 @@ $('#add_rule').click(function() {
var ruleStr = "add_rule [X] ||| " + sourcePhrase + " ||| " + targetPhrase;
// Add word-word alignment if unambiguous
- // if (sourcePhrase.split().length == 1 && targetPhrase.split().length == 1)
- // ruleStr += " ||| ||| 0-0";
+ if (sourcePhrase.split().length == 1 && targetPhrase.split().length == 1)
+ ruleStr += " ||| ||| 0-0";
sendMeta(ruleStr);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/81b33d22/src/main/java/org/apache/joshua/server/ServerThread.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/server/ServerThread.java b/src/main/java/org/apache/joshua/server/ServerThread.java
index 0301e71..72caa5f 100644
--- a/src/main/java/org/apache/joshua/server/ServerThread.java
+++ b/src/main/java/org/apache/joshua/server/ServerThread.java
@@ -225,15 +225,18 @@ public class ServerThread extends Thread implements HttpHandler {
String source = argTokens[1];
String target = argTokens[2];
String featureStr = "";
+ String alignmentStr = "";
if (argTokens.length > 3)
featureStr = argTokens[3];
-
+ if (argTokens.length > 4)
+ alignmentStr = " ||| " + argTokens[4];
+
/* Prepend source and target side nonterminals for phrase-based decoding. Probably better
* handled in each grammar type's addRule() function.
*/
String ruleString = (joshuaConfiguration.search_algorithm.equals("stack"))
- ? String.format("%s ||| [X,1] %s ||| [X,1] %s ||| custom=1 %s", lhs, source, target, featureStr)
- : String.format("%s ||| %s ||| %s ||| custom=1 %s", lhs, source, target, featureStr);
+ ? String.format("%s ||| [X,1] %s ||| [X,1] %s ||| -1 %s %s", lhs, source, target, featureStr, alignmentStr)
+ : String.format("%s ||| %s ||| %s ||| -1 %s %s", lhs, source, target, featureStr, alignmentStr);
Rule rule = new HieroFormatReader().parseLine(ruleString);
decoder.addCustomRule(rule);
[04/22] incubator-joshua git commit: fixed to work with detokenized
output
Posted by mj...@apache.org.
fixed to work with detokenized output
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/3387b16b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/3387b16b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/3387b16b
Branch: refs/heads/JOSHUA-284
Commit: 3387b16befb1e8121a05e7b3a13d041bdeb7260b
Parents: aedeafd
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 2 12:19:44 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 2 12:19:44 2016 -0400
----------------------------------------------------------------------
demo/demo.js | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/3387b16b/demo/demo.js
----------------------------------------------------------------------
diff --git a/demo/demo.js b/demo/demo.js
index 1662fe2..4904757 100644
--- a/demo/demo.js
+++ b/demo/demo.js
@@ -229,8 +229,8 @@ function record_results(data, status) {
/**
* Cleans out OOVs
*/
-function clean_oovs(str) {
- str = str.replace(/(\S+)_OOV/g, "<span class='oov'>$1</span>");
+function clean(str) {
+ str = str.replace(/(\S+?)_OOV/g, "<span class='oov'>$1</span>");
str = str.replace(/ ([\.\?,])/g, "$1");
str = str.replace(/" (.*?) "/g, "\"$1\"");
return str;
[09/22] incubator-joshua git commit: added alignment for phrase-based
OOVs
Posted by mj...@apache.org.
added alignment for phrase-based OOVs
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/216d61db
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/216d61db
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/216d61db
Branch: refs/heads/JOSHUA-284
Commit: 216d61db80b927315e211a8a75d328a09473cf5a
Parents: fcaf0bf
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 16 07:17:08 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 16 07:17:08 2016 +0200
----------------------------------------------------------------------
src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/216d61db/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
index 312781f..2c8a3e9 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
@@ -130,7 +130,7 @@ public class PhraseTable implements Grammar {
: sourceWord;
int nt_i = Vocabulary.id("[X]");
- Rule oovRule = new Rule(nt_i, new int[] { nt_i, sourceWord }, new int[] { -1, targetWord }, "", 1);
+ Rule oovRule = new Rule(nt_i, new int[] { nt_i, sourceWord }, new int[] { -1, targetWord }, "0-0", 1);
addRule(oovRule);
oovRule.estimateRuleCost(featureFunctions);
[16/22] incubator-joshua git commit: JOSHUA-291 - static analysis
based code improvements on decoder package
Posted by mj...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java b/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java
index f20e063..77b76a4 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/OutputStringExtractor.java
@@ -34,7 +34,7 @@ public class OutputStringExtractor implements WalkerFunction, DerivationVisitor
this.extractSource = extractSource;
}
- private Stack<OutputString> outputStringStack = new Stack<>();
+ private final Stack<OutputString> outputStringStack = new Stack<>();
private final boolean extractSource;
@Override
@@ -103,7 +103,7 @@ public class OutputStringExtractor implements WalkerFunction, DerivationVisitor
private static String arrayToString(int[] ids) {
StringBuilder sb = new StringBuilder();
for (int i : ids) {
- sb.append(i + " ");
+ sb.append(i).append(" ");
}
return sb.toString().trim();
}
@@ -180,8 +180,8 @@ public class OutputStringExtractor implements WalkerFunction, DerivationVisitor
for (int i = 0; i < position; i++) {
result[resultIndex++] = this.words[i];
}
- for (int i = 0; i < words.length; i++) {
- result[resultIndex++] = words[i];
+ for (int word : words) {
+ result[resultIndex++] = word;
}
for (int i = position + 1; i < this.words.length; i++) {
result[resultIndex++] = this.words[i];
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/StringToTreeConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/StringToTreeConverter.java b/src/main/java/org/apache/joshua/decoder/hypergraph/StringToTreeConverter.java
index f393a01..d71cba6 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/StringToTreeConverter.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/StringToTreeConverter.java
@@ -32,16 +32,18 @@ public class StringToTreeConverter {
HyperGraph tree = null;
- Stack<String> stack = new Stack<String>();
+ Stack<String> stack = new Stack<>();
for (int i = 0; i < inputStr.length(); i++) {
char curChar = inputStr.charAt(i);
if (curChar == ')' && inputStr.charAt(i - 1) != ' ') {// end of a rule
StringBuffer ruleString = new StringBuffer();
+ label:
while (stack.empty() == false) {
String cur = stack.pop();
- if (cur.equals(beginSymbol)) {// stop
+ switch (cur) {
+ case beginSymbol: // stop
// setup a node
// HGNode(int i, int j, int lhs, HashMap<Integer,DPState> dpStates, HyperEdge
// initHyperedge, double estTotalLogP)
@@ -50,13 +52,15 @@ public class StringToTreeConverter {
// public Rule(int lhs, int[] sourceRhs, int[] targetRhs, float[]
// featureScores, int arity, int owner, float latticeCost, int ruleID)
-
stack.add(nodeSymbol);// TODO: should be lHS+id
- break;
- } else if (cur.equals(nodeSymbol)) {
- } else {
+ break label;
+ case nodeSymbol:
+
+ break;
+ default:
ruleString.append(cur);
+ break;
}
}
} else if (curChar == '(' && inputStr.charAt(i + 1) != ' ') {// begin of a rule
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/ViterbiExtractor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/ViterbiExtractor.java b/src/main/java/org/apache/joshua/decoder/hypergraph/ViterbiExtractor.java
index 734e0aa..51ae3c8 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/ViterbiExtractor.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/ViterbiExtractor.java
@@ -157,7 +157,7 @@ public class ViterbiExtractor {
// TODO: tbl_states
private static HGNode cloneNodeWithBestHyperedge(HGNode inNode) {
- List<HyperEdge> hyperedges = new ArrayList<HyperEdge>(1);
+ List<HyperEdge> hyperedges = new ArrayList<>(1);
HyperEdge cloneEdge = cloneHyperedge(inNode.bestHyperedge);
hyperedges.add(cloneEdge);
return new HGNode(inNode.i, inNode.j, inNode.lhs, hyperedges, cloneEdge, inNode.getDPStates());
@@ -167,12 +167,10 @@ public class ViterbiExtractor {
private static HyperEdge cloneHyperedge(HyperEdge inEdge) {
List<HGNode> antNodes = null;
if (null != inEdge.getTailNodes()) {
- antNodes = new ArrayList<HGNode>(inEdge.getTailNodes());// l_ant_items will be changed in
+ antNodes = new ArrayList<>(inEdge.getTailNodes());// l_ant_items will be changed in
// get_1best_tree_item
}
- HyperEdge res =
- new HyperEdge(inEdge.getRule(), inEdge.getBestDerivationScore(), inEdge.getTransitionLogP(false),
- antNodes, inEdge.getSourcePath());
- return res;
+ return new HyperEdge(inEdge.getRule(), inEdge.getBestDerivationScore(), inEdge.getTransitionLogP(false),
+ antNodes, inEdge.getSourcePath());
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentExtractor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentExtractor.java b/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentExtractor.java
index 04d0897..c949699 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentExtractor.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentExtractor.java
@@ -38,7 +38,7 @@ import org.apache.joshua.decoder.hypergraph.KBestExtractor.DerivationVisitor;
*/
public class WordAlignmentExtractor implements WalkerFunction, DerivationVisitor {
- private final Stack<WordAlignmentState> stack = new Stack<WordAlignmentState>();
+ private final Stack<WordAlignmentState> stack = new Stack<>();
/**
* Merges a state with the top of the stack if applicable or places it on top of the stack.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentState.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentState.java b/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentState.java
index f057f23..aef4665 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentState.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/WordAlignmentState.java
@@ -42,7 +42,7 @@ public class WordAlignmentState {
* rule. The values of the elements correspond to the aligned source token on
* the source side of the rule.
*/
- private List<AlignedSourceTokens> trgPoints;
+ private final List<AlignedSourceTokens> trgPoints;
private final int srcStart;
/** number of NTs we need to substitute. */
private int numNT;
@@ -57,7 +57,7 @@ public class WordAlignmentState {
* @param start the start index
*/
public WordAlignmentState(final Rule rule, final int start) {
- trgPoints = new LinkedList<AlignedSourceTokens>();
+ trgPoints = new LinkedList<>();
srcLength = rule.getFrench().length;
numNT = rule.getArity();
srcStart = start;
@@ -123,7 +123,7 @@ public class WordAlignmentState {
* @return a final alignment list
*/
public List<List<Integer>> toFinalList() {
- final List<List<Integer>> alignment = new ArrayList<List<Integer>>(trgPoints.size());
+ final List<List<Integer>> alignment = new ArrayList<>(trgPoints.size());
if (trgPoints.isEmpty()) {
return alignment;
}
@@ -132,7 +132,7 @@ public class WordAlignmentState {
while (it.hasNext()) {
final AlignedSourceTokens alignedSourceTokens = it.next();
if (it.hasNext()) { // if not last element in trgPoints
- final List<Integer> newAlignedSourceTokens = new ArrayList<Integer>();
+ final List<Integer> newAlignedSourceTokens = new ArrayList<>();
for (Integer sourceIndex : alignedSourceTokens) {
newAlignedSourceTokens.add(sourceIndex - 1); // shift by one to disregard sentence marker
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java b/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
index 5056aaa..36415fe 100644
--- a/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
+++ b/src/main/java/org/apache/joshua/decoder/io/JSONMessage.java
@@ -51,14 +51,14 @@ public class JSONMessage {
public Data data = null;
public List<String> metadata = null;
public JSONMessage() {
- metadata = new ArrayList<String>();
+ metadata = new ArrayList<>();
}
public class Data {
- public List<TranslationItem> translations;
+ public final List<TranslationItem> translations;
public Data() {
- translations = new ArrayList<TranslationItem>();
+ translations = new ArrayList<>();
}
}
//
@@ -119,12 +119,12 @@ public class JSONMessage {
}
public class TranslationItem {
- public String translatedText;
- public List<NBestItem> raw_nbest;
+ public final String translatedText;
+ public final List<NBestItem> raw_nbest;
public TranslationItem(String value) {
this.translatedText = value;
- this.raw_nbest = new ArrayList<NBestItem>();
+ this.raw_nbest = new ArrayList<>();
}
/**
@@ -139,8 +139,8 @@ public class JSONMessage {
}
public class NBestItem {
- public String hyp;
- public float totalScore;
+ public final String hyp;
+ public final float totalScore;
public NBestItem(String hyp, float score) {
this.hyp = hyp;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java b/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java
index 0287688..afb63ab 100644
--- a/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java
+++ b/src/main/java/org/apache/joshua/decoder/io/TranslationRequestStream.java
@@ -51,9 +51,7 @@ public class TranslationRequestStream {
private final JoshuaConfiguration joshuaConfiguration;
private int sentenceNo = -1;
- private Sentence nextSentence = null;
-
- /* Plain text or JSON input */
+ /* Plain text or JSON input */
private StreamHandler requestHandler = null;
/* Whether the request has been killed by a broken client connection. */
@@ -139,7 +137,7 @@ public class TranslationRequestStream {
* new one.
*/
public synchronized Sentence next() {
- nextSentence = null;
+ Sentence nextSentence = null;
if (isShutDown)
return null;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
index ee8a2a9..93e21cd 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Candidate.java
@@ -37,19 +37,19 @@ import org.apache.joshua.decoder.hypergraph.HGNode;
public class Candidate {
// the set of hypotheses that can be paired with phrases from this span
- private List<Hypothesis> hypotheses;
+ private final List<Hypothesis> hypotheses;
// the list of target phrases gathered from a span of the input
- private TargetPhrases phrases;
+ private final TargetPhrases phrases;
// source span of new phrase
- public Span span;
+ public final Span span;
// future cost of applying phrases to hypotheses
- float future_delta;
+ final float future_delta;
// indices into the hypotheses and phrases arrays (used for cube pruning)
- private int[] ranks;
+ private final int[] ranks;
// scoring and state information
private ComputeNodeResult result;
@@ -186,7 +186,7 @@ public class Candidate {
* @return a list of size one, wrapping the tail node pointer
*/
public List<HGNode> getTailNodes() {
- List<HGNode> tailNodes = new ArrayList<HGNode>();
+ List<HGNode> tailNodes = new ArrayList<>();
tailNodes.add(getHypothesis());
return tailNodes;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/phrase/Coverage.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Coverage.java b/src/main/java/org/apache/joshua/decoder/phrase/Coverage.java
index 2c674fc..4ef0ede 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Coverage.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Coverage.java
@@ -39,7 +39,7 @@ public class Coverage {
private BitSet bits;
// Default bit vector length
- private static int INITIAL_LENGTH = 10;
+ private static final int INITIAL_LENGTH = 10;
public Coverage() {
firstZero = 0;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/phrase/Future.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Future.java b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
index 0ece4a3..83baf9c 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Future.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Future.java
@@ -27,9 +27,9 @@ public class Future {
private static final Logger LOG = LoggerFactory.getLogger(Future.class);
// Square matrix with half the values ignored.
- private ChartSpan<Float> entries;
+ private final ChartSpan<Float> entries;
- private int sentlen;
+ private final int sentlen;
/**
* Computes bottom-up the best way to cover all spans of the input sentence, using the phrases
@@ -42,7 +42,7 @@ public class Future {
public Future(PhraseChart chart) {
sentlen = chart.SentenceLength();
- entries = new ChartSpan<Float>(sentlen + 1, Float.NEGATIVE_INFINITY);
+ entries = new ChartSpan<>(sentlen + 1, Float.NEGATIVE_INFINITY);
/*
* The sentence is represented as a sequence of words, with the first and last words set
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/phrase/Header.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Header.java b/src/main/java/org/apache/joshua/decoder/phrase/Header.java
index 30d771c..d55c08b 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Header.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Header.java
@@ -30,7 +30,7 @@ public class Header implements Comparable<Header>, Comparator<Header> {
private static final Logger LOG = LoggerFactory.getLogger(Header.class);
private float score;
- private int arity;
+ private final int arity;
private Note note;
protected Header() {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
index 71d3df9..af5069d 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Hypothesis.java
@@ -39,15 +39,14 @@ import org.apache.joshua.decoder.hypergraph.HyperEdge;
public class Hypothesis extends HGNode implements Comparable<Hypothesis> {
// The hypothesis' coverage vector
- private Coverage coverage;
+ private final Coverage coverage;
- public static Rule BEGIN_RULE = new HieroFormatReader().parseLine("[X] ||| <s> ||| <s> ||| ||| 0-0");
- public static Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] </s> ||| [X,1] </s> ||| ||| 0-0 1-1");
+ public static final Rule BEGIN_RULE = new HieroFormatReader().parseLine("[X] ||| <s> ||| <s> ||| ||| 0-0");
+ public static final Rule END_RULE = new HieroFormatReader().parseLine("[GOAL] ||| [X,1] </s> ||| [X,1] </s> ||| ||| 0-0 1-1");
public String toString() {
StringBuffer sb = new StringBuffer();
- for (DPState state: getDPStates())
- sb.append(state);
+ getDPStates().forEach(sb::append);
String words = bestHyperedge.getRule().getEnglishWords();
// return String.format("HYP[%s] %.5f j=%d words=%s state=%s", coverage, score, j, words, sb);
return String.format("HYP[%s] j=%d words=[%s] state=%s", coverage, j, words, sb);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java b/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
index 9803d9b..8f8bafe 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseChart.java
@@ -38,15 +38,15 @@ public class PhraseChart {
private static final Logger LOG = LoggerFactory.getLogger(PhraseChart.class);
- private int sentence_length;
+ private final int sentence_length;
private int max_source_phrase_length;
// Banded array: different source lengths are next to each other.
- private List<TargetPhrases> entries;
+ private final List<TargetPhrases> entries;
// number of translation options
int numOptions = 20;
- private List<FeatureFunction> features;
+ private final List<FeatureFunction> features;
/**
* Create a new PhraseChart object, which represents all phrases that are
@@ -67,16 +67,16 @@ public class PhraseChart {
this.features = features;
max_source_phrase_length = 0;
- for (int i = 0; i < tables.length; i++)
- max_source_phrase_length = Math.max(max_source_phrase_length,
- tables[i].getMaxSourcePhraseLength());
+ for (PhraseTable table1 : tables)
+ max_source_phrase_length = Math
+ .max(max_source_phrase_length, table1.getMaxSourcePhraseLength());
sentence_length = source.length();
// System.err.println(String.format(
// "PhraseChart()::Initializing chart for sentlen %d max %d from %s", sentence_length,
// max_source_phrase_length, source));
- entries = new ArrayList<TargetPhrases>();
+ entries = new ArrayList<>();
for (int i = 0; i < sentence_length * max_source_phrase_length; i++)
entries.add(null);
@@ -93,10 +93,8 @@ public class PhraseChart {
}
}
- for (TargetPhrases phrases : entries) {
- if (phrases != null)
- phrases.finish(features, Decoder.weights, num_options);
- }
+ entries.stream().filter(phrases -> phrases != null)
+ .forEach(phrases -> phrases.finish(features, Decoder.weights, num_options));
LOG.info("Input {}: Collecting options took {} seconds", source.id(),
(System.currentTimeMillis() - startTime) / 1000.0f);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
index 2c8a3e9..4c347dd 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java
@@ -18,8 +18,6 @@
*/
package org.apache.joshua.decoder.phrase;
-import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER;
-
import java.io.File;
import java.io.IOException;
import java.util.List;
@@ -43,7 +41,7 @@ import org.apache.joshua.decoder.ff.tm.packed.PackedGrammar;
*/
public class PhraseTable implements Grammar {
- private JoshuaConfiguration config;
+ private final JoshuaConfiguration config;
private Grammar backend;
/**
@@ -118,7 +116,7 @@ public class PhraseTable implements Grammar {
* @param rule the rule to add
*/
public void addRule(Rule rule) {
- ((MemoryBasedBatchGrammar)backend).addRule(rule);
+ backend.addRule(rule);
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
index d0ae2da..6661dfb 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stack.java
@@ -43,20 +43,20 @@ public class Stack extends ArrayList<Hypothesis> {
private static final long serialVersionUID = 7885252799032416068L;
- private HashMap<Coverage, ArrayList<Hypothesis>> coverages;
+ private final HashMap<Coverage, ArrayList<Hypothesis>> coverages;
- private Sentence sentence;
- private List<FeatureFunction> featureFunctions;
- private JoshuaConfiguration config;
+ private final Sentence sentence;
+ private final List<FeatureFunction> featureFunctions;
+ private final JoshuaConfiguration config;
/* The list of states we've already visited. */
- private HashSet<Candidate> visitedStates;
+ private final HashSet<Candidate> visitedStates;
/* A list of candidates sorted for consideration for entry to the chart (for cube pruning) */
- private PriorityQueue<Candidate> candidates;
+ private final PriorityQueue<Candidate> candidates;
/* Short-circuits adding a cube-prune state more than once */
- private HashMap<Hypothesis, Hypothesis> deduper;
+ private final HashMap<Hypothesis, Hypothesis> deduper;
/**
* Create a new stack. Stacks are organized one for each number of source words that are covered.
@@ -70,10 +70,10 @@ public class Stack extends ArrayList<Hypothesis> {
this.sentence = sentence;
this.config = config;
- this.candidates = new PriorityQueue<Candidate>(1, new CandidateComparator());
- this.coverages = new HashMap<Coverage, ArrayList<Hypothesis>>();
- this.visitedStates = new HashSet<Candidate>();
- this.deduper = new HashMap<Hypothesis,Hypothesis>();
+ this.candidates = new PriorityQueue<>(1, new CandidateComparator());
+ this.coverages = new HashMap<>();
+ this.visitedStates = new HashSet<>();
+ this.deduper = new HashMap<>();
}
/**
@@ -86,7 +86,7 @@ public class Stack extends ArrayList<Hypothesis> {
public boolean add(Hypothesis hyp) {
if (! coverages.containsKey((hyp.getCoverage())))
- coverages.put(hyp.getCoverage(), new ArrayList<Hypothesis>());
+ coverages.put(hyp.getCoverage(), new ArrayList<>());
coverages.get(hyp.getCoverage()).add(hyp);
return super.add(hyp);
@@ -153,7 +153,7 @@ public class Stack extends ArrayList<Hypothesis> {
String newWords = cand.getRule().getEnglishWords().replace("[X,1] ", "");
// If the string is not found in the target sentence, explore the cube neighbors
- if (sentence.fullTarget().indexOf(oldWords + " " + newWords) == -1) {
+ if (!sentence.fullTarget().contains(oldWords + " " + newWords)) {
Candidate next = cand.extendPhrase();
if (next != null)
addCandidate(next);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
index 8c092ec..2802d65 100644
--- a/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
+++ b/src/main/java/org/apache/joshua/decoder/phrase/Stacks.java
@@ -63,14 +63,14 @@ public class Stacks {
// The end state
private Hypothesis end;
- List<FeatureFunction> featureFunctions;
+ final List<FeatureFunction> featureFunctions;
- private Sentence sentence;
+ private final Sentence sentence;
- private JoshuaConfiguration config;
+ private final JoshuaConfiguration config;
/* Contains all the phrase tables */
- private PhraseChart chart;
+ private final PhraseChart chart;
/**
* Entry point. Initialize everything. Create pass-through (OOV) phrase table and glue phrase
@@ -89,8 +89,8 @@ public class Stacks {
this.config = config;
int num_phrase_tables = 0;
- for (int i = 0; i < grammars.length; i++)
- if (grammars[i] instanceof PhraseTable)
+ for (Grammar grammar : grammars)
+ if (grammar instanceof PhraseTable)
++num_phrase_tables;
PhraseTable[] phraseTables = new PhraseTable[num_phrase_tables + 2];
@@ -118,7 +118,7 @@ public class Stacks {
long startTime = System.currentTimeMillis();
Future future = new Future(chart);
- stacks = new ArrayList<Stack>();
+ stacks = new ArrayList<>();
// <s> counts as the first word. Pushing null lets us count from one.
stacks.add(null);
@@ -233,11 +233,8 @@ public class Stacks {
/* If a gap is created by applying this phrase, make sure that you can reach the first
* zero later on without violating the distortion constraint.
*/
- if (end - firstZero > config.reordering_limit) {
- return false;
- }
-
- return true;
+ return end - firstZero <= config.reordering_limit;
+
}
@@ -254,7 +251,7 @@ public class Stacks {
for (Hypothesis hyp: lastStack) {
float score = hyp.getScore();
- List<HGNode> tailNodes = new ArrayList<HGNode>();
+ List<HGNode> tailNodes = new ArrayList<>();
tailNodes.add(hyp);
float finalTransitionScore = ComputeNodeResult.computeFinalCost(featureFunctions, tailNodes, 0, sentence.length(), null, sentence);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintRule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintRule.java b/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintRule.java
index 5146e2c..0db76cb 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintRule.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/ConstraintRule.java
@@ -54,9 +54,9 @@ public interface ConstraintRule {
* The interpretation of a RHS is that it provides a hard constraint to filter the regular grammar
* such that only rules generating the desired translation can be used.</p>
*/
- public enum Type {
+ enum Type {
RULE, LHS, RHS
- };
+ }
/**
* Return the type of this ConstraintRule.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
index e323ef6..7127870 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/Sentence.java
@@ -91,7 +91,7 @@ public class Sentence {
config = joshuaConfiguration;
- this.constraints = new LinkedList<ConstraintSpan>();
+ this.constraints = new LinkedList<>();
// Check if the sentence has SGML markings denoting the
// sentence ID; if so, override the id passed in to the
@@ -103,7 +103,7 @@ public class Sentence {
this.id = Integer.parseInt(idstr);
} else {
- if (inputString.indexOf(" ||| ") != -1) {
+ if (inputString.contains(" ||| ")) {
/* Target-side given; used for parsing and forced decoding */
String[] pieces = inputString.split("\\s?\\|{3}\\s?");
source = pieces[0];
@@ -178,7 +178,7 @@ public class Sentence {
Lattice<Token> oldLattice = this.getLattice();
/* Build a list of terminals across all grammars */
- HashSet<Integer> vocabulary = new HashSet<Integer>();
+ HashSet<Integer> vocabulary = new HashSet<>();
for (Grammar grammar : grammars) {
Iterator<Integer> iterator = grammar.getTrieRoot().getTerminalExtensionIterator();
while (iterator.hasNext())
@@ -197,11 +197,11 @@ public class Sentence {
List<Arc<Token>> savedArcs = oldNodes.get(nodeid).getOutgoingArcs();
char[] chars = word.toCharArray();
- ChartSpan<Boolean> wordChart = new ChartSpan<Boolean>(chars.length + 1, false);
- ArrayList<Node<Token>> nodes = new ArrayList<Node<Token>>(chars.length + 1);
+ ChartSpan<Boolean> wordChart = new ChartSpan<>(chars.length + 1, false);
+ ArrayList<Node<Token>> nodes = new ArrayList<>(chars.length + 1);
nodes.add(oldNodes.get(nodeid));
for (int i = 1; i < chars.length; i++)
- nodes.add(new Node<Token>(i));
+ nodes.add(new Node<>(i));
nodes.add(oldNodes.get(nodeid + 1));
for (int width = 1; width <= chars.length; width++) {
for (int i = 0; i <= chars.length - width; i++) {
@@ -228,7 +228,7 @@ public class Sentence {
/* If there's a path from beginning to end */
if (wordChart.get(0, chars.length)) {
// Remove nodes not part of a complete path
- HashSet<Node<Token>> deletedNodes = new HashSet<Node<Token>>();
+ HashSet<Node<Token>> deletedNodes = new HashSet<>();
for (int k = 1; k < nodes.size() - 1; k++)
if (!(wordChart.get(0, k) && wordChart.get(k, chars.length)))
nodes.set(k, null);
@@ -382,7 +382,7 @@ public class Sentence {
*/
public List<Token> getTokens() {
assert isLinearChain();
- List<Token> tokens = new ArrayList<Token>();
+ List<Token> tokens = new ArrayList<>();
for (Node<Token> node: getLattice().getNodes())
if (node != null && node.getOutgoingArcs().size() > 0)
tokens.add(node.getOutgoingArcs().get(0).getLabel());
@@ -435,7 +435,7 @@ public class Sentence {
public String toString() {
StringBuilder sb = new StringBuilder(source());
if (target() != null) {
- sb.append(" ||| " + target());
+ sb.append(" ||| ").append(target());
}
return sb.toString();
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/segment_file/Token.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/segment_file/Token.java b/src/main/java/org/apache/joshua/decoder/segment_file/Token.java
index b84826d..4cbc7fa 100644
--- a/src/main/java/org/apache/joshua/decoder/segment_file/Token.java
+++ b/src/main/java/org/apache/joshua/decoder/segment_file/Token.java
@@ -42,10 +42,9 @@ public class Token {
// The token without the annotations
private String token;
- private int tokenID;
+ private final int tokenID;
private HashMap<String,String> annotations = null;
- private JoshuaConfiguration joshuaConfiguration;
/**
* <p>Constructor : Creates a Token object from a raw word
@@ -75,10 +74,10 @@ public class Token {
*
*/
public Token(String rawWord, JoshuaConfiguration config) {
+
+ JoshuaConfiguration joshuaConfiguration = config;
- this.joshuaConfiguration = config;
-
- annotations = new HashMap<String,String>();
+ annotations = new HashMap<>();
// Matches a word with an annotation
// Check guidelines in constructor description
[10/22] incubator-joshua git commit: Merge branch 'master' into demo
Posted by mj...@apache.org.
Merge branch 'master' into demo
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/47f4c969
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/47f4c969
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/47f4c969
Branch: refs/heads/JOSHUA-284
Commit: 47f4c969922d5b8339f6fba26d14aa8a228380ef
Parents: 278be37 216d61d
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 16 07:17:20 2016 +0200
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 16 07:17:20 2016 +0200
----------------------------------------------------------------------
src/main/java/org/apache/joshua/decoder/phrase/PhraseTable.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
[13/22] incubator-joshua git commit: JOSHUA-301 - Add findbugs plugin.
Posted by mj...@apache.org.
JOSHUA-301 - Add findbugs plugin.
Currently this can be run with 'mvn findbugs:check'. There are a ton of warnings, however,
which currently fail the build. Perhaps once fixed this auto-build failure can be added
in, but at present there are too many warnings to fix immediately.
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/dccb48bc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/dccb48bc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/dccb48bc
Branch: refs/heads/JOSHUA-284
Commit: dccb48bcc0ab76faaa3b9f3e58823b5fa66ea983
Parents: 2e2ee09
Author: Max Thomas <ma...@maxthomas.io>
Authored: Wed Aug 17 10:13:37 2016 -0500
Committer: Max Thomas <ma...@maxthomas.io>
Committed: Wed Aug 17 10:13:37 2016 -0500
----------------------------------------------------------------------
pom.xml | 31 ++++++++++++++++++++++++++++---
1 file changed, 28 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dccb48bc/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 49d77fb..0229554 100644
--- a/pom.xml
+++ b/pom.xml
@@ -37,7 +37,9 @@
<properties>
<slf4j.version>1.7.21</slf4j.version>
+ <findbugs.version>3.0.4</findbugs.version>
</properties>
+
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
@@ -120,6 +122,24 @@
<testOutputDirectory>${basedir}/target/test-classes</testOutputDirectory>
<sourceDirectory>${basedir}/src/main/java</sourceDirectory>
<testSourceDirectory>${basedir}/src/test/java</testSourceDirectory>
+
+ <pluginManagement>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>findbugs-maven-plugin</artifactId>
+ <version>${findbugs.version}</version>
+ <configuration>
+ <xmlOutput>true</xmlOutput>
+ <effort>Max</effort>
+ <failOnError>true</failOnError>
+ <includeTests>true</includeTests>
+ <maxRank>16</maxRank>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
@@ -169,6 +189,11 @@
</execution>
</executions>
</plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>findbugs-maven-plugin</artifactId>
+ <version>${findbugs.version}</version>
+ </plugin>
</plugins>
</build>
<dependencies>
@@ -232,9 +257,9 @@
<version>${slf4j.version}</version>
</dependency>
<dependency>
- <groupId>concurrent</groupId>
- <artifactId>concurrent</artifactId>
- <version>1.3.4</version>
+ <groupId>concurrent</groupId>
+ <artifactId>concurrent</artifactId>
+ <version>1.3.4</version>
</dependency>
<!-- Test Dependencies -->
[17/22] incubator-joshua git commit: JOSHUA-291 - static analysis
based code improvements on decoder package
Posted by mj...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java
index 07c7ecd..60e8d20 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Tree.java
@@ -83,7 +83,7 @@ public class Tree implements Serializable {
* hand, we can iterate through our store of language model fragments to match them against this,
* following tail nodes if necessary.
*/
- public static HashMap<String, String> rulesToFragmentStrings = new HashMap<String, String>();
+ public static final HashMap<String, String> rulesToFragmentStrings = new HashMap<>();
public Tree(String label, List<Tree> children) {
setLabel(label);
@@ -153,19 +153,19 @@ public class Tree implements Serializable {
}
public List<Tree> getNonterminalYield() {
- List<Tree> yield = new ArrayList<Tree>();
+ List<Tree> yield = new ArrayList<>();
appendNonterminalYield(this, yield);
return yield;
}
public List<Tree> getYield() {
- List<Tree> yield = new ArrayList<Tree>();
+ List<Tree> yield = new ArrayList<>();
appendYield(this, yield);
return yield;
}
public List<Tree> getTerminals() {
- List<Tree> yield = new ArrayList<Tree>();
+ List<Tree> yield = new ArrayList<>();
appendTerminals(this, yield);
return yield;
}
@@ -186,7 +186,7 @@ public class Tree implements Serializable {
* @return a cloned tree
*/
public Tree shallowClone() {
- ArrayList<Tree> newChildren = new ArrayList<Tree>(children.size());
+ ArrayList<Tree> newChildren = new ArrayList<>(children.size());
for (Tree child : children) {
newChildren.add(child.shallowClone());
}
@@ -222,7 +222,7 @@ public class Tree implements Serializable {
}
public List<Tree> getPreTerminalYield() {
- List<Tree> yield = new ArrayList<Tree>();
+ List<Tree> yield = new ArrayList<>();
appendPreTerminalYield(this, yield);
return yield;
}
@@ -250,9 +250,8 @@ public class Tree implements Serializable {
this.numLexicalItems = 1;
else {
this.numLexicalItems = 0;
- for (Tree child : children)
- if (child.isLexicalized())
- this.numLexicalItems += 1;
+ children.stream().filter(child -> child.isLexicalized())
+ .forEach(child -> this.numLexicalItems += 1);
}
}
@@ -283,7 +282,7 @@ public class Tree implements Serializable {
}
public List<Tree> getAtDepth(int depth) {
- List<Tree> yield = new ArrayList<Tree>();
+ List<Tree> yield = new ArrayList<>();
appendAtDepth(depth, this, yield);
return yield;
}
@@ -354,7 +353,7 @@ public class Tree implements Serializable {
* @return the <code>Set</code> of all subtrees in the tree.
*/
public Set<Tree> subTrees() {
- return (Set<Tree>) subTrees(new HashSet<Tree>());
+ return (Set<Tree>) subTrees(new HashSet<>());
}
/**
@@ -364,7 +363,7 @@ public class Tree implements Serializable {
* @return the <code>List</code> of all subtrees in the tree.
*/
public List<Tree> subTreeList() {
- return (List<Tree>) subTrees(new ArrayList<Tree>());
+ return (List<Tree>) subTrees(new ArrayList<>());
}
/**
@@ -397,10 +396,10 @@ public class Tree implements Serializable {
private class TreeIterator implements Iterator<Tree> {
- private List<Tree> treeStack;
+ private final List<Tree> treeStack;
private TreeIterator() {
- treeStack = new ArrayList<Tree>();
+ treeStack = new ArrayList<>();
treeStack.add(Tree.this);
}
@@ -499,8 +498,7 @@ public class Tree implements Serializable {
*/
public static Tree fromString(String ptbStr) {
PennTreeReader reader = new PennTreeReader(new StringReader(ptbStr));
- Tree fragment = reader.next();
- return fragment;
+ return reader.next();
}
public static Tree getFragmentFromYield(String yield) {
@@ -571,11 +569,11 @@ public class Tree implements Serializable {
* to a nonnegative 0-based permutation and store it in tailIndices. This is used to index
* the incoming DerivationState items, which are ordered by the source side.
*/
- ArrayList<Integer> tailIndices = new ArrayList<Integer>();
+ ArrayList<Integer> tailIndices = new ArrayList<>();
int[] englishInts = rule.getEnglish();
- for (int i = 0; i < englishInts.length; i++)
- if (englishInts[i] < 0)
- tailIndices.add(-(englishInts[i] + 1));
+ for (int englishInt : englishInts)
+ if (englishInt < 0)
+ tailIndices.add(-(englishInt + 1));
/*
* We now have the tree's yield. The substitution points on the yield should match the
@@ -643,11 +641,11 @@ public class Tree implements Serializable {
* to a nonnegative 0-based permutation and store it in tailIndices. This is used to index
* the incoming DerivationState items, which are ordered by the source side.
*/
- ArrayList<Integer> tailIndices = new ArrayList<Integer>();
+ ArrayList<Integer> tailIndices = new ArrayList<>();
int[] englishInts = rule.getEnglish();
- for (int i = 0; i < englishInts.length; i++)
- if (englishInts[i] < 0)
- tailIndices.add(-(englishInts[i] + 1));
+ for (int englishInt : englishInts)
+ if (englishInt < 0)
+ tailIndices.add(-(englishInt + 1));
/*
* We now have the tree's yield. The substitution points on the yield should match the
@@ -702,11 +700,11 @@ public class Tree implements Serializable {
if (tree != null && tailNodes != null && tailNodes.size() > 0 && maxDepth > 0) {
List<Tree> frontier = tree.getNonterminalYield();
- ArrayList<Integer> tailIndices = new ArrayList<Integer>();
+ ArrayList<Integer> tailIndices = new ArrayList<>();
int[] englishInts = rule.getEnglish();
- for (int i = 0; i < englishInts.length; i++)
- if (englishInts[i] < 0)
- tailIndices.add(-1 * englishInts[i] - 1);
+ for (int englishInt : englishInts)
+ if (englishInt < 0)
+ tailIndices.add(-1 * englishInt - 1);
/*
* We now have the tree's yield. The substitution points on the yield should match the
@@ -720,7 +718,7 @@ public class Tree implements Serializable {
// String lhs = tailNodes.get(i).getLHS().replaceAll("[\\[\\]]", "");
// System.err.println(String.format(" %d: %s", i, lhs));
try {
- Tree frontierTree = frontier.get(tailIndices.get(i).intValue());
+ Tree frontierTree = frontier.get(tailIndices.get(i));
frontierTree.setBoundary(true);
HyperEdge edge = tailNodes.get(i).bestHyperedge;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java
index d06388c..211ad20 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/fragmentlm/Trees.java
@@ -36,9 +36,9 @@ import org.apache.joshua.corpus.Vocabulary;
public class Trees {
public static class PennTreeReader implements Iterator<Tree> {
- public static String ROOT_LABEL = "ROOT";
+ public static final String ROOT_LABEL = "ROOT";
- PushbackReader in;
+ final PushbackReader in;
Tree nextTree;
public boolean hasNext() {
@@ -115,7 +115,7 @@ public class Trees {
}
private List<Tree> readChildList() throws IOException {
- List<Tree> children = new ArrayList<Tree>();
+ List<Tree> children = new ArrayList<>();
readWhiteSpace();
while (!isRightParen(peek())) {
children.add(readTree(false));
@@ -168,7 +168,7 @@ public class Trees {
}
public PennTreeReader(Reader in) {
- this.in = new PushbackReader((java.io.Reader) in);
+ this.in = new PushbackReader(in);
nextTree = readRootTree();
// System.out.println(nextTree);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
index 336189b..93d54ed 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/KenLM.java
@@ -45,31 +45,31 @@ public class KenLM implements NGramLanguageModel, Comparable<KenLM> {
// inferred from model file (may be larger than ngramOrder)
private final int N;
- private final static native long construct(String file_name);
+ private static native long construct(String file_name);
- private final static native void destroy(long ptr);
+ private static native void destroy(long ptr);
- private final static native int order(long ptr);
+ private static native int order(long ptr);
- private final static native boolean registerWord(long ptr, String word, int id);
+ private static native boolean registerWord(long ptr, String word, int id);
- private final static native float prob(long ptr, int words[]);
+ private static native float prob(long ptr, int words[]);
- private final static native float probForString(long ptr, String[] words);
+ private static native float probForString(long ptr, String[] words);
- private final static native boolean isKnownWord(long ptr, String word);
+ private static native boolean isKnownWord(long ptr, String word);
- private final static native boolean isLmOov(long ptr, int word);
+ private static native boolean isLmOov(long ptr, int word);
- private final static native StateProbPair probRule(long ptr, long pool, long words[]);
+ private static native StateProbPair probRule(long ptr, long pool, long words[]);
- private final static native float estimateRule(long ptr, long words[]);
+ private static native float estimateRule(long ptr, long words[]);
- private final static native float probString(long ptr, int words[], int start);
+ private static native float probString(long ptr, int words[], int start);
- private final static native long createPool();
+ private static native long createPool();
- private final static native void destroyPool(long pointer);
+ private static native void destroyPool(long pointer);
public KenLM(int order, String file_name) {
pointer = initializeSystemLibrary(file_name);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
index 7b0bac8..f5c1cb5 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/LanguageModelFF.java
@@ -94,10 +94,10 @@ public class LanguageModelFF extends StatefulFF {
/**
* We cache the weight of the feature since there is only one.
*/
- protected float weight;
- protected float oovWeight;
+ protected final float weight;
+ protected final float oovWeight;
protected String type;
- protected String path;
+ protected final String path;
/** Whether this is a class-based LM */
protected boolean isClassLM;
@@ -137,7 +137,7 @@ public class LanguageModelFF extends StatefulFF {
denseFeatureIndex = index;
oovDenseFeatureIndex = denseFeatureIndex + 1;
- final ArrayList<String> names = new ArrayList<String>(2);
+ final ArrayList<String> names = new ArrayList<>(2);
names.add(name);
if (withOovFeature) {
names.add(oovFeatureName);
@@ -149,13 +149,16 @@ public class LanguageModelFF extends StatefulFF {
* Initializes the underlying language model.
*/
protected void initializeLM() {
- if (type.equals("kenlm")) {
+ switch (type) {
+ case "kenlm":
this.languageModel = new KenLM(ngramOrder, path);
- } else if (type.equals("berkeleylm")) {
+ break;
+ case "berkeleylm":
this.languageModel = new LMGrammarBerkeley(ngramOrder, path);
- } else {
+ break;
+ default:
String msg = String.format("* FATAL: Invalid backend lm_type '%s' for LanguageModel", type)
+ "* Permissible values for 'lm_type' are 'kenlm' and 'berkeleylm'";
throw new RuntimeException(msg);
@@ -329,15 +332,14 @@ public class LanguageModelFF extends StatefulFF {
int[] enWords = getRuleIds(rule);
- List<Integer> words = new ArrayList<Integer>();
+ List<Integer> words = new ArrayList<>();
boolean skipStart = (enWords[0] == startSymbolId);
/*
* Move through the words, accumulating language model costs each time we have an n-gram (n >=
* 2), and resetting the series of words when we hit a nonterminal.
*/
- for (int c = 0; c < enWords.length; c++) {
- int currentWord = enWords[c];
+ for (int currentWord : enWords) {
if (FormatUtils.isNonterminal(currentWord)) {
lmEstimate += scoreChunkLogP(words, considerIncompleteNgrams, skipStart);
words.clear();
@@ -396,9 +398,7 @@ public class LanguageModelFF extends StatefulFF {
float transitionLogP = 0.0f;
int[] left_context = null;
- for (int c = 0; c < enWords.length; c++) {
- int curID = enWords[c];
-
+ for (int curID : enWords) {
if (FormatUtils.isNonterminal(curID)) {
int index = -(curID + 1);
@@ -407,8 +407,8 @@ public class LanguageModelFF extends StatefulFF {
int[] right = state.getRightLMStateWords();
// Left context.
- for (int i = 0; i < left.length; i++) {
- current[ccount++] = left[i];
+ for (int aLeft : left) {
+ current[ccount++] = aLeft;
if (left_context == null && ccount == this.ngramOrder - 1)
left_context = Arrays.copyOf(current, ccount);
@@ -470,17 +470,16 @@ public class LanguageModelFF extends StatefulFF {
// System.err.println(String.format("LanguageModel::computeFinalTransition()"));
float res = 0.0f;
- LinkedList<Integer> currentNgram = new LinkedList<Integer>();
+ LinkedList<Integer> currentNgram = new LinkedList<>();
int[] leftContext = state.getLeftLMStateWords();
int[] rightContext = state.getRightLMStateWords();
- for (int i = 0; i < leftContext.length; i++) {
- int t = leftContext[i];
+ for (int t : leftContext) {
currentNgram.add(t);
if (currentNgram.size() >= 2) { // start from bigram
- float prob = this.languageModel.ngramLogProbability(Support.toArray(currentNgram),
- currentNgram.size());
+ float prob = this.languageModel
+ .ngramLogProbability(Support.toArray(currentNgram), currentNgram.size());
res += prob;
}
if (currentNgram.size() == this.ngramOrder)
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java b/src/main/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
index 5c45520..4bf55b5 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeley.java
@@ -53,7 +53,7 @@ public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
private int[] vocabIdToMyIdMapping;
- private ThreadLocal<int[]> arrayScratch = new ThreadLocal<int[]>() {
+ private final ThreadLocal<int[]> arrayScratch = new ThreadLocal<int[]>() {
@Override
protected int[] initialValue() {
@@ -117,7 +117,7 @@ public class LMGrammarBerkeley extends DefaultNGramLanguageModel {
public boolean isOov(int id) {
// for Berkeley, we unfortunately have to temporarily convert to String
return lm.getWordIndexer().getIndexPossiblyUnk(Vocabulary.word(id)) <= 0;
- };
+ }
@Override
public float sentenceLogProbability(int[] sentence, int order, int startIndex) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilter.java b/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilter.java
index a66fa44..06f30e7 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilter.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilter.java
@@ -76,7 +76,7 @@ public class BloomFilter implements Externalizable {
/**
* A random number generator for building hash functions.
*/
- transient private Random RANDOM = new Random();
+ final transient private Random RANDOM = new Random();
/**
* Builds an empty Bloom filter, ready to build hash functions and store objects.
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilterLanguageModel.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilterLanguageModel.java b/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilterLanguageModel.java
index 4c56aac..7932364 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilterLanguageModel.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/bloomfilter_lm/BloomFilterLanguageModel.java
@@ -365,7 +365,7 @@ public class BloomFilterLanguageModel extends DefaultNGramLanguageModel implemen
* @param filename path to the statistics file
*/
private void populateBloomFilter(int bloomFilterSize, String filename) {
- HashMap<String, Long> typesAfter = new HashMap<String, Long>();
+ HashMap<String, Long> typesAfter = new HashMap<>();
try {
FileInputStream file_in = new FileInputStream(filename);
FileInputStream file_in_copy = new FileInputStream(filename);
@@ -396,7 +396,6 @@ public class BloomFilterLanguageModel extends DefaultNGramLanguageModel implemen
hist[i] = Vocabulary.id(toks[i]);
add(hist, typesAfter.get(history), typesFuncs);
}
- return;
}
/**
@@ -460,11 +459,10 @@ public class BloomFilterLanguageModel extends DefaultNGramLanguageModel implemen
if (types.get(history) == null)
types.put(history.toString(), 1L);
else {
- long x = (Long) types.get(history);
+ long x = types.get(history);
types.put(history.toString(), x + 1);
}
}
- return;
}
/**
@@ -527,14 +525,14 @@ public class BloomFilterLanguageModel extends DefaultNGramLanguageModel implemen
}
out.writeDouble(numTokens);
out.writeInt(countFuncs.length);
- for (int i = 0; i < countFuncs.length; i++) {
- out.writeLong(countFuncs[i][0]);
- out.writeLong(countFuncs[i][1]);
+ for (long[] countFunc : countFuncs) {
+ out.writeLong(countFunc[0]);
+ out.writeLong(countFunc[1]);
}
out.writeInt(typesFuncs.length);
- for (int i = 0; i < typesFuncs.length; i++) {
- out.writeLong(typesFuncs[i][0]);
- out.writeLong(typesFuncs[i][1]);
+ for (long[] typesFunc : typesFuncs) {
+ out.writeLong(typesFunc[0]);
+ out.writeLong(typesFunc[1]);
}
out.writeDouble(quantizationBase);
bf.writeExternal(out);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java b/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java
index bb4732f..9bfccb0 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/lm/buildin_lm/TrieLM.java
@@ -23,6 +23,7 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
@@ -94,14 +95,14 @@ public class TrieLM extends AbstractLM { //DefaultNGramLanguageModel {
* @throws FileNotFoundException if the input file cannot be located
*/
public TrieLM(ArpaFile arpaFile) throws FileNotFoundException {
- super(arpaFile.getVocab().size(), arpaFile.getOrder());
+ super(Vocabulary.size(), arpaFile.getOrder());
int ngramCounts = arpaFile.size();
LOG.debug("ARPA file contains {} n-grams", ngramCounts);
- this.children = new HashMap<Long,Integer>(ngramCounts);
- this.logProbs = new HashMap<Long,Float>(ngramCounts);
- this.backoffs = new HashMap<Integer,Float>(ngramCounts);
+ this.children = new HashMap<>(ngramCounts);
+ this.logProbs = new HashMap<>(ngramCounts);
+ this.backoffs = new HashMap<>(ngramCounts);
int nodeCounter = 0;
@@ -265,8 +266,8 @@ public class TrieLM extends AbstractLM { //DefaultNGramLanguageModel {
Scanner scanner = new Scanner(new File(args[1]));
- LinkedList<String> wordList = new LinkedList<String>();
- LinkedList<String> window = new LinkedList<String>();
+ LinkedList<String> wordList = new LinkedList<>();
+ LinkedList<String> window = new LinkedList<>();
LOG.info("Starting to scan {}", args[1]);
while (scanner.hasNext()) {
@@ -279,15 +280,13 @@ public class TrieLM extends AbstractLM { //DefaultNGramLanguageModel {
wordList.clear();
wordList.add("<s>");
- for (String word : words) {
- wordList.add(word);
- }
+ Collections.addAll(wordList, words);
wordList.add("</s>");
- ArrayList<Integer> sentence = new ArrayList<Integer>();
+ ArrayList<Integer> sentence = new ArrayList<>();
// int[] ids = new int[wordList.size()];
- for (int i=0, size=wordList.size(); i<size; i++) {
- sentence.add(vocab.id(wordList.get(i)));
+ for (String aWordList : wordList) {
+ sentence.add(Vocabulary.id(aWordList));
// ids[i] = ;
}
@@ -310,7 +309,7 @@ public class TrieLM extends AbstractLM { //DefaultNGramLanguageModel {
int i=0;
int[] wordIDs = new int[window.size()];
for (String word : window) {
- wordIDs[i] = vocab.id(word);
+ wordIDs[i] = Vocabulary.id(word);
i++;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
index f9e6a29..f37c139 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/phrase/Distortion.java
@@ -47,7 +47,7 @@ public class Distortion extends StatelessFF {
public ArrayList<String> reportDenseFeatures(int index) {
denseFeatureIndex = index;
- ArrayList<String> names = new ArrayList<String>();
+ ArrayList<String> names = new ArrayList<>();
names.add(name);
return names;
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java b/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
index 91af58b..e5dcbf9 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/similarity/EdgePhraseSimilarityFF.java
@@ -49,19 +49,17 @@ public class EdgePhraseSimilarityFF extends StatefulFF implements SourceDependen
private static final Logger LOG = LoggerFactory.getLogger(EdgePhraseSimilarityFF.class);
- private static Cache<String, Float> cache = new Cache<String, Float>(100000000);
+ private static final Cache<String, Float> cache = new Cache<>(100000000);
- private String host;
- private int port;
+ private final String host;
+ private final int port;
- private Socket socket;
private PrintWriter serverAsk;
private BufferedReader serverReply;
private int[] source;
private final int MAX_PHRASE_LENGTH = 4;
- private final int GAP = 0;
public EdgePhraseSimilarityFF(FeatureVector weights, String[] args, JoshuaConfiguration config) throws NumberFormatException, UnknownHostException, IOException {
super(weights, "EdgePhraseSimilarity", args, config);
@@ -74,7 +72,7 @@ public class EdgePhraseSimilarityFF extends StatefulFF implements SourceDependen
private void initializeConnection() throws NumberFormatException, IOException {
LOG.info("Opening connection.");
- socket = new Socket(host, port);
+ Socket socket = new Socket(host, port);
serverAsk = new PrintWriter(socket.getOutputStream(), true);
serverReply = new BufferedReader(new InputStreamReader(socket.getInputStream()));
}
@@ -109,7 +107,7 @@ public class EdgePhraseSimilarityFF extends StatefulFF implements SourceDependen
lm_state_size += state.getLeftLMStateWords().length + state.getRightLMStateWords().length;
}
- ArrayList<int[]> batch = new ArrayList<int[]>();
+ ArrayList<int[]> batch = new ArrayList<>();
// Build joined target string.
int[] join = new int[target.length + lm_state_size];
@@ -132,6 +130,7 @@ public class EdgePhraseSimilarityFF extends StatefulFF implements SourceDependen
// System.err.println();
for (int w : state.getLeftLMStateWords())
join[idx++] = w;
+ int GAP = 0;
join[idx++] = GAP;
gaps[num_gaps++] = idx;
// System.err.print("RIGHT: ");
@@ -210,7 +209,7 @@ public class EdgePhraseSimilarityFF extends StatefulFF implements SourceDependen
return 0.0f;
}
- private final int[] getSourcePhrase(int anchor) {
+ private int[] getSourcePhrase(int anchor) {
int idx;
int length = Math.min(anchor, MAX_PHRASE_LENGTH - 1)
+ Math.min(source.length - anchor, MAX_PHRASE_LENGTH - 1);
@@ -228,7 +227,7 @@ public class EdgePhraseSimilarityFF extends StatefulFF implements SourceDependen
float similarity = 0.0f;
int count = 0;
StringBuilder query = new StringBuilder();
- List<String> to_cache = new ArrayList<String>();
+ List<String> to_cache = new ArrayList<>();
query.append("xb");
for (int i = 0; i < batch.size(); i += 2) {
int[] source = batch.get(i);
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/NgramDPState.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/NgramDPState.java b/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/NgramDPState.java
index b269bd9..ef72b3d 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/NgramDPState.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/state_maintenance/NgramDPState.java
@@ -57,7 +57,7 @@ public class NgramDPState extends DPState {
return right;
}
- private final void assertLengths() {
+ private void assertLengths() {
if (left.length != right.length)
throw new RuntimeException("Unequal lengths in left and right state: < "
+ Vocabulary.getWords(left) + " | " + Vocabulary.getWords(right) + " >");
@@ -90,10 +90,10 @@ public class NgramDPState extends DPState {
StringBuilder sb = new StringBuilder();
sb.append("<");
for (int id : left)
- sb.append(" " + Vocabulary.word(id));
+ sb.append(" ").append(Vocabulary.word(id));
sb.append(" |");
for (int id : right)
- sb.append(" " + Vocabulary.word(id));
+ sb.append(" ").append(Vocabulary.word(id));
sb.append(" >");
return sb.toString();
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
index 47a75df..3181bfa 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/AbstractGrammar.java
@@ -165,10 +165,12 @@ public abstract class AbstractGrammar implements Grammar {
if (LOG.isDebugEnabled()) {
StringBuilder s = new StringBuilder();
for (Rule r : rules.getSortedRules(models)) {
- s.append("\n\t" + r.getLHS() + " ||| " + Arrays.toString(r.getFrench()) + " ||| "
- + Arrays.toString(r.getEnglish()) + " ||| " + r.getFeatureVector() + " ||| "
- + r.getEstimatedCost() + " " + r.getClass().getName() + "@"
- + Integer.toHexString(System.identityHashCode(r)));
+ s.append("\n\t").append(r.getLHS()).append(" ||| ")
+ .append(Arrays.toString(r.getFrench())).append(" ||| ")
+ .append(Arrays.toString(r.getEnglish())).append(" ||| ")
+ .append(r.getFeatureVector()).append(" ||| ").append(r.getEstimatedCost())
+ .append(" ").append(r.getClass().getName()).append("@")
+ .append(Integer.toHexString(System.identityHashCode(r)));
}
LOG.debug("{}", s);
}
@@ -203,7 +205,7 @@ public abstract class AbstractGrammar implements Grammar {
* Add OOV rules; This should be called after the manual constraints have
* been set up.
*/
- HashSet<Integer> words = new HashSet<Integer>();
+ HashSet<Integer> words = new HashSet<>();
for (Node<Token> node : inputLattice) {
for (Arc<Token> arc : node.getOutgoingArcs()) {
// create a rule, but do not add into the grammar trie
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java b/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java
index 4cffb2f..4d577dc 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/BasicRuleCollection.java
@@ -56,7 +56,7 @@ public class BasicRuleCollection implements RuleCollection {
* @param sourceTokens Sequence of terminals and nonterminals in the source pattern
*/
public BasicRuleCollection(int arity, int[] sourceTokens) {
- this.rules = new ArrayList<Rule>();
+ this.rules = new ArrayList<>();
this.sourceTokens = sourceTokens;
this.arity = arity;
this.sorted = false;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/tm/CreateGlueGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/CreateGlueGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/CreateGlueGrammar.java
index ce1e7d1..2424a1e 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/CreateGlueGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/CreateGlueGrammar.java
@@ -48,7 +48,7 @@ public class CreateGlueGrammar {
private String grammarPath;
@Option(name = "--goal", aliases = {"-goal"}, required = false, usage = "specify custom GOAL symbol. Default: 'GOAL'")
- private String goalSymbol = cleanNonTerminal(new JoshuaConfiguration().goal_symbol);
+ private final String goalSymbol = cleanNonTerminal(new JoshuaConfiguration().goal_symbol);
/* Rule templates */
// [GOAL] ||| <s> ||| <s> ||| 0
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java b/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java
index df00255..a2d80be 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/GrammarReader.java
@@ -39,7 +39,7 @@ public abstract class GrammarReader<R extends Rule> implements Iterable<R>, Iter
protected static String fieldDelimiter;
protected static String description;
- protected String fileName;
+ protected final String fileName;
protected LineReader reader;
protected String lookAhead;
protected int numRulesRead;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerMap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerMap.java b/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerMap.java
index 16b8bfc..5d5ca9f 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerMap.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/OwnerMap.java
@@ -37,7 +37,7 @@ import com.google.common.collect.HashBiMap;
public class OwnerMap {
// bi-directional mapping between OwnerId and Owner strings
- private static BiMap<OwnerId, String> map = HashBiMap.create();
+ private static final BiMap<OwnerId, String> map = HashBiMap.create();
public static final OwnerId UNKNOWN_OWNER_ID = new OwnerId(0);
public static final String UNKNOWN_OWNER = "<unowned>";
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
index 15fbec1..95717de 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/Rule.java
@@ -107,7 +107,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
this.arity = arity;
this.owner = owner;
this.target = target;
- this.sparseFeatureStringSupplier = Suppliers.memoize(() -> { return sparseFeatures; });
+ this.sparseFeatureStringSupplier = Suppliers.memoize(() -> sparseFeatures);
this.featuresSupplier = initializeFeatureSupplierFromString();
this.alignmentSupplier = initializeAlignmentSupplier();
}
@@ -127,7 +127,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
this.arity = arity;
this.owner = owner;
this.target = targetRhs;
- this.featuresSupplier = Suppliers.memoize(() -> { return features; });
+ this.featuresSupplier = Suppliers.memoize(() -> features);
this.sparseFeatureStringSupplier = initializeSparseFeaturesStringSupplier();
this.alignmentSupplier = initializeAlignmentSupplier();
}
@@ -205,9 +205,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
* If Rule was constructed with a FeatureVector, we lazily populate the sparseFeaturesStringSupplier.
*/
private Supplier<String> initializeSparseFeaturesStringSupplier() {
- return Suppliers.memoize(() -> {
- return getFeatureVector().toString();
- });
+ return Suppliers.memoize(() -> getFeatureVector().toString());
}
// ===============================================================
@@ -240,10 +238,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
if (!Arrays.equals(getFrench(), other.getFrench())) {
return false;
}
- if (!Arrays.equals(target, other.getEnglish())) {
- return false;
- }
- return true;
+ return Arrays.equals(target, other.getEnglish());
}
public int hashCode() {
@@ -396,17 +391,16 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
// ===============================================================
public String toString() {
- StringBuffer sb = new StringBuffer();
- sb.append(Vocabulary.word(this.getLHS()));
- sb.append(" ||| ");
- sb.append(getFrenchWords());
- sb.append(" ||| ");
- sb.append(getEnglishWords());
- sb.append(" |||");
- sb.append(" " + getFeatureVector());
- sb.append(String.format(" ||| est=%.3f", getEstimatedCost()));
- sb.append(String.format(" pre=%.3f", getPrecomputableCost()));
- return sb.toString();
+ String sb = Vocabulary.word(this.getLHS()) +
+ " ||| " +
+ getFrenchWords() +
+ " ||| " +
+ getEnglishWords() +
+ " |||" +
+ " " + getFeatureVector() +
+ String.format(" ||| est=%.3f", getEstimatedCost()) +
+ String.format(" pre=%.3f", getPrecomputableCost());
+ return sb;
}
/**
@@ -422,22 +416,24 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
int nt = 1;
for (int i = 0; i < getFrench().length; i++) {
if (getFrench()[i] < 0)
- sb.append(" " + Vocabulary.word(getFrench()[i]).replaceFirst("\\]", String.format(",%d]", nt++)));
+ sb.append(" ").append(
+ Vocabulary.word(getFrench()[i]).replaceFirst("\\]", String.format(",%d]", nt++)));
else
- sb.append(" " + Vocabulary.word(getFrench()[i]));
+ sb.append(" ").append(Vocabulary.word(getFrench()[i]));
}
sb.append(" |||");
nt = 1;
for (int i = 0; i < getEnglish().length; i++) {
if (getEnglish()[i] < 0)
- sb.append(" " + Vocabulary.word(getEnglish()[i]).replaceFirst("\\]", String.format(",%d]", nt++)));
+ sb.append(" ").append(
+ Vocabulary.word(getEnglish()[i]).replaceFirst("\\]", String.format(",%d]", nt++)));
else
- sb.append(" " + Vocabulary.word(getEnglish()[i]));
+ sb.append(" ").append(Vocabulary.word(getEnglish()[i]));
}
sb.append(" |||");
- sb.append(" " + getFeatureString());
+ sb.append(" ").append(getFeatureString());
if (getAlignmentString() != null)
- sb.append(" ||| " + getAlignmentString());
+ sb.append(" ||| ").append(getAlignmentString());
return sb.toString();
}
@@ -473,7 +469,7 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
StringBuilder sb = new StringBuilder();
for (Integer index : getEnglish()) {
if (index >= 0)
- sb.append(Vocabulary.word(index) + " ");
+ sb.append(Vocabulary.word(index)).append(" ");
else
sb.append(Vocabulary.word(foreignNTs[-index - 1]).replace("]",
String.format(",%d] ", Math.abs(index))));
@@ -527,14 +523,14 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
*/
public Map<Integer, List<Integer>> getAlignmentMap() {
byte[] alignmentArray = getAlignment();
- Map<Integer, List<Integer>> alignmentMap = new HashMap<Integer, List<Integer>>();
+ Map<Integer, List<Integer>> alignmentMap = new HashMap<>();
if (alignmentArray != null) {
for (int alignmentIdx = 0; alignmentIdx < alignmentArray.length; alignmentIdx += 2 ) {
int s = alignmentArray[alignmentIdx];
int t = alignmentArray[alignmentIdx + 1];
List<Integer> values = alignmentMap.get(t);
if (values == null)
- alignmentMap.put(t, values = new ArrayList<Integer>());
+ alignmentMap.put(t, values = new ArrayList<>());
values.add(s);
}
}
@@ -603,22 +599,19 @@ public class Rule implements Comparator<Rule>, Comparable<Rule> {
* @return true if there is a match
*/
public boolean matches(Sentence sentence) {
- boolean match = getPattern().matcher(sentence.fullSource()).find();
// System.err.println(String.format("match(%s,%s) = %s", Pattern.quote(getFrenchWords()),
// sentence.annotatedSource(), match));
- return match;
+ return getPattern().matcher(sentence.fullSource()).find();
}
/**
* This comparator is used for sorting the rules during cube pruning. An estimate of the cost
* of each rule is computed and used to sort.
*/
- public static Comparator<Rule> EstimatedCostComparator = new Comparator<Rule>() {
- public int compare(Rule rule1, Rule rule2) {
- float cost1 = rule1.getEstimatedCost();
- float cost2 = rule2.getEstimatedCost();
- return Float.compare(cost2, cost1);
- }
+ public static final Comparator<Rule> EstimatedCostComparator = (rule1, rule2) -> {
+ float cost1 = rule1.getEstimatedCost();
+ float cost2 = rule2.getEstimatedCost();
+ return Float.compare(cost2, cost1);
};
public int compare(Rule rule1, Rule rule2) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
index 54f68b2..4f545b7 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/SentenceFilteredGrammar.java
@@ -40,10 +40,10 @@ public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
private static final Logger LOG = LoggerFactory.getLogger(SentenceFilteredGrammar.class);
- private AbstractGrammar baseGrammar;
- private SentenceFilteredTrie filteredTrie;
- private int[] tokens;
- private Sentence sentence;
+ private final AbstractGrammar baseGrammar;
+ private final SentenceFilteredTrie filteredTrie;
+ private final int[] tokens;
+ private final Sentence sentence;
/**
* Construct a new sentence-filtered grammar. The main work is done in the enclosed trie (obtained
@@ -283,7 +283,7 @@ public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
public class SentenceFilteredTrie implements Trie {
/* The underlying unfiltered trie node. */
- private Trie unfilteredTrieNode;
+ private final Trie unfilteredTrieNode;
/* The child nodes in the filtered trie. */
private HashMap<Integer, SentenceFilteredTrie> children = null;
@@ -295,7 +295,7 @@ public class SentenceFilteredGrammar extends MemoryBasedBatchGrammar {
*/
public SentenceFilteredTrie(Trie unfilteredTrieNode) {
this.unfilteredTrieNode = unfilteredTrieNode;
- this.children = new HashMap<Integer, SentenceFilteredTrie>();
+ this.children = new HashMap<>();
}
@Override
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java b/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
index 7811b3b..39e045f 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/format/MosesFormatReader.java
@@ -88,7 +88,7 @@ public class MosesFormatReader extends HieroFormatReader {
// alignments
if (fields.length >= 4)
- hieroLine.append(" ||| " + fields[3]);
+ hieroLine.append(" ||| ").append(fields[3]);
return super.parseLine(hieroLine.toString());
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/ExtensionIterator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/ExtensionIterator.java b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/ExtensionIterator.java
index ecb355d..a29ad47 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/ExtensionIterator.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/ExtensionIterator.java
@@ -24,7 +24,7 @@ import java.util.Iterator;
public class ExtensionIterator implements Iterator<Integer> {
private Iterator<Integer> iterator;
- private boolean terminal;
+ private final boolean terminal;
private boolean done;
private int next;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
index 234fe0f..97ac354 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/hash_based/MemoryBasedBatchGrammar.java
@@ -61,7 +61,7 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
private int numDenseFeatures = 0;
/* The trie root. */
- private MemoryBasedTrie root = new MemoryBasedTrie();
+ private final MemoryBasedTrie root = new MemoryBasedTrie();
/* The file containing the grammar. */
private String grammarFile;
@@ -171,9 +171,7 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
maxSourcePhraseLength = Math.max(maxSourcePhraseLength, french.length);
- for (int k = 0; k < french.length; k++) {
- int curSymID = french[k];
-
+ for (int curSymID : french) {
/*
* Note that the nonTerminal symbol in the french is not cleaned (i.e., will be sth like
* [X,1]), but the symbol in the Trie has to be cleaned, so that the match does not care about
@@ -186,7 +184,7 @@ public class MemoryBasedBatchGrammar extends AbstractGrammar {
if (null == nextLayer) {
nextLayer = new MemoryBasedTrie();
if (pos.hasExtensions() == false) {
- pos.childrenTbl = new HashMap<Integer, MemoryBasedTrie>();
+ pos.childrenTbl = new HashMap<>();
}
pos.childrenTbl.put(curSymID, nextLayer);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
index 37bffb7..f8173b8 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -121,7 +121,7 @@ public class PackedGrammar extends AbstractGrammar {
// Testing shows there's up to ~95% hit rate when cache size is 5000 Trie nodes.
private final Cache<Trie, List<Rule>> cached_rules;
- private String grammarDir;
+ private final String grammarDir;
public PackedGrammar(String grammar_dir, int span_limit, String owner, String type,
JoshuaConfiguration joshuaConfiguration) throws IOException {
@@ -150,7 +150,7 @@ public class PackedGrammar extends AbstractGrammar {
final List<String> listing = Arrays.asList(new File(grammar_dir).list());
sort(listing); // File.list() has arbitrary sort order
- slices = new ArrayList<PackedSlice>();
+ slices = new ArrayList<>();
for (String prefix : listing) {
if (prefix.startsWith("slice_") && prefix.endsWith(".source"))
slices.add(new PackedSlice(grammar_dir + File.separator + prefix.substring(0, 11)));
@@ -210,8 +210,8 @@ public class PackedGrammar extends AbstractGrammar {
byte[] digest = md.digest();
// convert the byte to hex format
StringBuffer sb = new StringBuffer("");
- for (int i = 0; i < digest.length; i++) {
- sb.append(Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1));
+ for (byte aDigest : digest) {
+ sb.append(Integer.toString((aDigest & 0xff) + 0x100, 16).substring(1));
}
return sb.toString();
}
@@ -263,7 +263,7 @@ public class PackedGrammar extends AbstractGrammar {
* packedRoot.match() thus can directly return the result of lookup.get(id);
*/
if (!childTries.containsKey(id)) {
- childTries.put(id, new ArrayList<Trie>(1));
+ childTries.put(id, new ArrayList<>(1));
}
final Trie trie = packedSlice.root().match(id);
childTries.get(id).add(trie);
@@ -376,7 +376,7 @@ public class PackedGrammar extends AbstractGrammar {
alignments = null;
}
- tries = new HashMap<Integer, PackedTrie>();
+ tries = new HashMap<>();
}
/**
@@ -425,12 +425,11 @@ public class PackedGrammar extends AbstractGrammar {
try(FileInputStream fileInputStream = new FileInputStream(file)) {
FileChannel fileChannel = fileInputStream.getChannel();
int size = (int) fileChannel.size();
- MappedByteBuffer result = fileChannel.map(MapMode.READ_ONLY, 0, size);
- return result;
+ return fileChannel.map(MapMode.READ_ONLY, 0, size);
}
}
- private final int[] getTarget(int pointer) {
+ private int[] getTarget(int pointer) {
// Figure out level.
int tgt_length = 1;
while (tgt_length < (targetLookup.length + 1) && targetLookup[tgt_length] <= pointer)
@@ -474,7 +473,7 @@ public class PackedGrammar extends AbstractGrammar {
* @return feature vector
*/
- private final FeatureVector loadFeatureVector(int block_id) {
+ private FeatureVector loadFeatureVector(int block_id) {
int featurePosition = getIntFromByteBuffer(block_id, features);
final int numFeatures = encoding.readId(features, featurePosition);
@@ -508,7 +507,7 @@ public class PackedGrammar extends AbstractGrammar {
* getAlignments calls to PackedRule objects they could alter each other's positions within the
* buffer before calling read on the buffer.
*/
- private synchronized final byte[] getAlignmentArray(int block_id) {
+ private synchronized byte[] getAlignmentArray(int block_id) {
if (alignments == null)
throw new RuntimeException("No alignments available.");
int alignment_position = getIntFromByteBuffer(block_id, alignments);
@@ -530,7 +529,7 @@ public class PackedGrammar extends AbstractGrammar {
return alignment;
}
- private final PackedTrie root() {
+ private PackedTrie root() {
return getTrie(0);
}
@@ -551,7 +550,7 @@ public class PackedGrammar extends AbstractGrammar {
private boolean sorted = false;
- private int[] src;
+ private final int[] src;
private int arity;
private PackedTrie(int position) {
@@ -599,7 +598,7 @@ public class PackedGrammar extends AbstractGrammar {
@Override
public HashMap<Integer, ? extends Trie> getChildren() {
- HashMap<Integer, Trie> children = new HashMap<Integer, Trie>();
+ HashMap<Integer, Trie> children = new HashMap<>();
int num_children = source[position];
for (int i = 0; i < num_children; i++) {
int symbol = source[position + 1 + 2 * i];
@@ -617,7 +616,7 @@ public class PackedGrammar extends AbstractGrammar {
@Override
public ArrayList<? extends Trie> getExtensions() {
int num_children = source[position];
- ArrayList<PackedTrie> tries = new ArrayList<PackedTrie>(num_children);
+ ArrayList<PackedTrie> tries = new ArrayList<>(num_children);
for (int i = 0; i < num_children; i++) {
int symbol = source[position + 1 + 2 * i];
@@ -650,7 +649,7 @@ public class PackedGrammar extends AbstractGrammar {
int rule_position = position + 2 * (num_children + 1);
int num_rules = source[rule_position - 1];
- rules = new ArrayList<Rule>(num_rules);
+ rules = new ArrayList<>(num_rules);
for (int i = 0; i < num_rules; i++) {
rules.add(new PackedRule(rule_position + 3 * i));
}
@@ -691,26 +690,22 @@ public class PackedGrammar extends AbstractGrammar {
precomputable[block_id] = rule.getPrecomputableCost();
}
- Arrays.sort(rules, new Comparator<Integer>() {
- public int compare(Integer a, Integer b) {
- float a_cost = estimated[source[a]];
- float b_cost = estimated[source[b]];
- if (a_cost == b_cost)
- return 0;
- return (a_cost > b_cost ? -1 : 1);
- }
+ Arrays.sort(rules, (a, b) -> {
+ float a_cost = estimated[source[a]];
+ float b_cost = estimated[source[b]];
+ if (a_cost == b_cost)
+ return 0;
+ return (a_cost > b_cost ? -1 : 1);
});
int[] sorted = new int[3 * num_rules];
int j = 0;
- for (int i = 0; i < rules.length; i++) {
- int address = rules[i];
+ for (Integer address : rules) {
sorted[j++] = source[address - 2];
sorted[j++] = source[address - 1];
sorted[j++] = source[address];
}
- for (int i = 0; i < sorted.length; i++)
- source[rule_position + i] = sorted[i];
+ System.arraycopy(sorted, 0, source, rule_position + 0, sorted.length);
// Replace rules in cache with their sorted values on next getRules()
cached_rules.invalidate(this);
@@ -747,7 +742,7 @@ public class PackedGrammar extends AbstractGrammar {
public final class PackedChildIterator implements Iterator<Integer> {
private int current;
- private boolean terminal;
+ private final boolean terminal;
private boolean done;
private int last;
@@ -827,7 +822,7 @@ public class PackedGrammar extends AbstractGrammar {
*/
private Supplier<int[]> initializeEnglishSupplier(){
- Supplier<int[]> result = Suppliers.memoize(() ->{
+ return Suppliers.memoize(() ->{
int[] phrase = getTarget(source[address + 1]);
int[] tgt = new int[phrase.length + 1];
tgt[0] = -1;
@@ -835,11 +830,10 @@ public class PackedGrammar extends AbstractGrammar {
tgt[i+1] = phrase[i];
return tgt;
});
- return result;
}
private Supplier<byte[]> initializeAlignmentSupplier(){
- Supplier<byte[]> result = Suppliers.memoize(() ->{
+ return Suppliers.memoize(() ->{
byte[] raw_alignment = getAlignmentArray(source[address + 2]);
byte[] points = new byte[raw_alignment.length + 2];
points[0] = points[1] = 0;
@@ -847,7 +841,6 @@ public class PackedGrammar extends AbstractGrammar {
points[i + 2] = (byte) (raw_alignment[i] + 1);
return points;
});
- return result;
}
/**
@@ -904,28 +897,25 @@ public class PackedGrammar extends AbstractGrammar {
}
private Supplier<int[]> intializeEnglishSupplier(){
- Supplier<int[]> result = Suppliers.memoize(() ->{
+ return Suppliers.memoize(() ->{
return getTarget(source[address + 1]);
});
- return result;
}
private Supplier<FeatureVector> initializeFeatureVectorSupplier(){
- Supplier<FeatureVector> result = Suppliers.memoize(() ->{
+ return Suppliers.memoize(() ->{
return loadFeatureVector(source[address + 2]);
});
- return result;
}
private Supplier<byte[]> initializeAlignmentsSupplier(){
- Supplier<byte[]> result = Suppliers.memoize(()->{
+ return Suppliers.memoize(()->{
// if no alignments in grammar do not fail
if (alignments == null){
return null;
}
return getAlignmentArray(source[address + 2]);
});
- return result;
}
@Override
@@ -1010,16 +1000,15 @@ public class PackedGrammar extends AbstractGrammar {
@Override
public String toString() {
- StringBuffer sb = new StringBuffer();
- sb.append(Vocabulary.word(this.getLHS()));
- sb.append(" ||| ");
- sb.append(getFrenchWords());
- sb.append(" ||| ");
- sb.append(getEnglishWords());
- sb.append(" |||");
- sb.append(" " + getFeatureVector());
- sb.append(String.format(" ||| %.3f", getEstimatedCost()));
- return sb.toString();
+ String sb = Vocabulary.word(this.getLHS()) +
+ " ||| " +
+ getFrenchWords() +
+ " ||| " +
+ getEnglishWords() +
+ " |||" +
+ " " + getFeatureVector() +
+ String.format(" ||| %.3f", getEstimatedCost());
+ return sb;
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java
index c6d03a6..7ec55ee 100644
--- a/src/main/java/org/apache/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java
+++ b/src/main/java/org/apache/joshua/decoder/ff/tm/packed/SliceAggregatingTrie.java
@@ -194,7 +194,7 @@ public class SliceAggregatingTrie implements Trie, RuleCollection {
@Override
public boolean hasRules() {
- return trieWithRules == null ? false : trieWithRules.hasRules();
+ return trieWithRules != null && trieWithRules.hasRules();
}
@Override
@@ -215,7 +215,7 @@ public class SliceAggregatingTrie implements Trie, RuleCollection {
@Override
public boolean isSorted() {
- return !hasRules() ? false : trieWithRules.getRuleCollection().isSorted();
+ return hasRules() && trieWithRules.getRuleCollection().isSorted();
}
/*
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/AlignedSourceTokens.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/AlignedSourceTokens.java b/src/main/java/org/apache/joshua/decoder/hypergraph/AlignedSourceTokens.java
index 864b383..c0ca4fa 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/AlignedSourceTokens.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/AlignedSourceTokens.java
@@ -65,7 +65,7 @@ class AlignedSourceTokens extends LinkedList<Integer> {
* returns true if element was added.
*/
public boolean add(Integer x) {
- return isNull ? false : super.add(x);
+ return !isNull && super.add(x);
}
public boolean isNonTerminal() {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/AllSpansWalker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/AllSpansWalker.java b/src/main/java/org/apache/joshua/decoder/hypergraph/AllSpansWalker.java
index 1aad06f..3f1c504 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/AllSpansWalker.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/AllSpansWalker.java
@@ -32,10 +32,10 @@ import org.apache.joshua.corpus.Span;
*/
public class AllSpansWalker {
- private Set<Span> visitedSpans;
+ private final Set<Span> visitedSpans;
public AllSpansWalker() {
- visitedSpans = new HashSet<Span>();
+ visitedSpans = new HashSet<>();
}
/**
@@ -47,15 +47,12 @@ public class AllSpansWalker {
* implementation to do the walking
*/
public void walk(HGNode node, final WalkerFunction walker) {
- new ForestWalker().walk(node, new org.apache.joshua.decoder.hypergraph.WalkerFunction() {
- @Override
- public void apply(HGNode node, int index) {
- if (node != null) {
- Span span = new Span(node.i, node.j);
- if (!visitedSpans.contains(span)) {
- walker.apply(node, 0);
- visitedSpans.add(span);
- }
+ new ForestWalker().walk(node, (node1, index) -> {
+ if (node1 != null) {
+ Span span = new Span(node1.i, node1.j);
+ if (!visitedSpans.contains(span)) {
+ walker.apply(node1, 0);
+ visitedSpans.add(span);
}
}
});
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/DefaultInsideOutside.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/DefaultInsideOutside.java b/src/main/java/org/apache/joshua/decoder/hypergraph/DefaultInsideOutside.java
index c6dae77..d53674b 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/DefaultInsideOutside.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/DefaultInsideOutside.java
@@ -40,16 +40,16 @@ public abstract class DefaultInsideOutside {
* a derivation is a multi of all constituents
*/
int ADD_MODE = 0; // 0: sum; 1: viterbi-min, 2: viterbi-max
- int LOG_SEMIRING = 1;
+ final int LOG_SEMIRING = 1;
int SEMIRING = LOG_SEMIRING; // default is in log; or real, or logic
double ZERO_IN_SEMIRING = Double.NEGATIVE_INFINITY;// log-domain
double ONE_IN_SEMIRING = 0;// log-domain
double scaling_factor; // try to scale the original distribution: smooth or winner-take-all
- private HashMap<HGNode, Double> tbl_inside_prob = new HashMap<HGNode, Double>();// remember inside
+ private final HashMap<HGNode, Double> tbl_inside_prob = new HashMap<>();// remember inside
// prob of each
// item:
- private HashMap<HGNode, Double> tbl_outside_prob = new HashMap<HGNode, Double>();// remember
+ private final HashMap<HGNode, Double> tbl_outside_prob = new HashMap<>();// remember
// outside prob
// of each item
double normalizationConstant = ONE_IN_SEMIRING;
@@ -61,7 +61,7 @@ public abstract class DefaultInsideOutside {
* because the outside estimation of the items under its deductions require the item's outside
* value
*/
- private HashMap<HGNode, Integer> tbl_num_parent_deductions = new HashMap<HGNode, Integer>();
+ private final HashMap<HGNode, Integer> tbl_num_parent_deductions = new HashMap<>();
private HashMap<HGNode, Integer> tbl_for_sanity_check = null;
@@ -111,13 +111,13 @@ public abstract class DefaultInsideOutside {
// without normalization
public double getEdgeUnormalizedPosteriorLogProb(HyperEdge dt, HGNode parent) {
// ### outside of parent
- double outside = (Double) tbl_outside_prob.get(parent);
+ double outside = tbl_outside_prob.get(parent);
// ### get inside prob of all my ant-items
double inside = ONE_IN_SEMIRING;
if (dt.getTailNodes() != null) {
for (HGNode ant_it : dt.getTailNodes())
- inside = multi_in_semiring(inside, (Double) tbl_inside_prob.get(ant_it));
+ inside = multi_in_semiring(inside, tbl_inside_prob.get(ant_it));
}
// ### add deduction/rule specific prob
@@ -145,8 +145,8 @@ public abstract class DefaultInsideOutside {
// without normalization
public double getNodeUnnormalizedPosteriorLogProb(HGNode node) {
// ### outside of parent
- double inside = (Double) tbl_inside_prob.get(node);
- double outside = (Double) tbl_outside_prob.get(node);
+ double inside = tbl_inside_prob.get(node);
+ double outside = tbl_outside_prob.get(node);
return multi_in_semiring(inside, outside);
}
@@ -170,7 +170,7 @@ public abstract class DefaultInsideOutside {
* However, this won't work! The sum should be greater than 1.
*/
public void sanityCheckHG(HyperGraph hg) {
- tbl_for_sanity_check = new HashMap<HGNode, Integer>();
+ tbl_for_sanity_check = new HashMap<>();
// System.out.println("num_dts: " + hg.goal_item.l_deductions.size());
sanity_check_item(hg.goalNode);
System.out.println("survied sanity check!!!!");
@@ -196,9 +196,7 @@ public abstract class DefaultInsideOutside {
private void sanity_check_deduction(HyperEdge dt) {
// ### recursive call on each ant item
if (null != dt.getTailNodes()) {
- for (HGNode ant_it : dt.getTailNodes()) {
- sanity_check_item(ant_it);
- }
+ dt.getTailNodes().forEach(this::sanity_check_item);
}
// ### deduction-specific operation
@@ -218,7 +216,7 @@ public abstract class DefaultInsideOutside {
private double inside_estimation_item(HGNode it) {
// ### get number of deductions that point to me
- Integer num_called = (Integer) tbl_num_parent_deductions.get(it);
+ Integer num_called = tbl_num_parent_deductions.get(it);
if (null == num_called) {
tbl_num_parent_deductions.put(it, 1);
} else {
@@ -226,7 +224,7 @@ public abstract class DefaultInsideOutside {
}
if (tbl_inside_prob.containsKey(it)) {
- return (Double) tbl_inside_prob.get(it);
+ return tbl_inside_prob.get(it);
}
double inside_prob = ZERO_IN_SEMIRING;
@@ -269,7 +267,7 @@ public abstract class DefaultInsideOutside {
private void outside_estimation_item(HGNode cur_it, HGNode upper_item, HyperEdge parent_dt,
double parent_deduct_prob) {
- Integer num_called = (Integer) tbl_num_parent_deductions.get(cur_it);
+ Integer num_called = tbl_num_parent_deductions.get(cur_it);
if (null == num_called || 0 == num_called) {
throw new RuntimeException("un-expected call, must be wrong");
}
@@ -277,7 +275,7 @@ public abstract class DefaultInsideOutside {
double old_outside_prob = ZERO_IN_SEMIRING;
if (tbl_outside_prob.containsKey(cur_it)) {
- old_outside_prob = (Double) tbl_outside_prob.get(cur_it);
+ old_outside_prob = tbl_outside_prob.get(cur_it);
}
double additional_outside_prob = ONE_IN_SEMIRING;
@@ -289,13 +287,13 @@ public abstract class DefaultInsideOutside {
if (parent_dt.getTailNodes() != null && parent_dt.getTailNodes().size() > 1)
for (HGNode ant_it : parent_dt.getTailNodes()) {
if (ant_it != cur_it) {
- double inside_prob_item = (Double) tbl_inside_prob.get(ant_it);// inside prob
+ double inside_prob_item = tbl_inside_prob.get(ant_it);// inside prob
additional_outside_prob = multi_in_semiring(additional_outside_prob, inside_prob_item);
}
}
// ### upper item
- double outside_prob_item = (Double) tbl_outside_prob.get(upper_item);// outside prob
+ double outside_prob_item = tbl_outside_prob.get(upper_item);// outside prob
additional_outside_prob = multi_in_semiring(additional_outside_prob, outside_prob_item);
// #### add to old prob
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/ForestWalker.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/ForestWalker.java b/src/main/java/org/apache/joshua/decoder/hypergraph/ForestWalker.java
index e58670a..34eb5b9 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/ForestWalker.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/ForestWalker.java
@@ -31,20 +31,20 @@ import java.util.Set;
*/
public class ForestWalker {
- public static enum TRAVERSAL {
+ public enum TRAVERSAL {
PREORDER, POSTORDER
- };
+ }
- private Set<HGNode> visitedNodes;
+ private final Set<HGNode> visitedNodes;
private TRAVERSAL traversalType = TRAVERSAL.PREORDER;
public ForestWalker() {
- visitedNodes = new HashSet<HGNode>();
+ visitedNodes = new HashSet<>();
}
public ForestWalker(TRAVERSAL traversal) {
this.traversalType = traversal;
- visitedNodes = new HashSet<HGNode>();
+ visitedNodes = new HashSet<>();
}
public void walk(HGNode node, WalkerFunction walker) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java b/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
index c5d2398..ab81162 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/GrammarBuilderWalkerFunction.java
@@ -48,17 +48,17 @@ public class GrammarBuilderWalkerFunction implements WalkerFunction {
private static final Logger LOG = LoggerFactory.getLogger(GrammarBuilderWalkerFunction.class);
- private MemoryBasedBatchGrammar grammar;
- private static HieroFormatReader reader = new HieroFormatReader();
+ private final MemoryBasedBatchGrammar grammar;
+ private static final HieroFormatReader reader = new HieroFormatReader();
private PrintStream outStream;
- private int goalSymbol;
- private HashSet<Rule> rules;
+ private final int goalSymbol;
+ private final HashSet<Rule> rules;
public GrammarBuilderWalkerFunction(String goal,JoshuaConfiguration joshuaConfiguration) {
grammar = new MemoryBasedBatchGrammar(reader, joshuaConfiguration, 1000);
outStream = null;
goalSymbol = Vocabulary.id(goal);
- rules = new HashSet<Rule>();
+ rules = new HashSet<>();
}
public GrammarBuilderWalkerFunction(String goal, PrintStream out,JoshuaConfiguration joshuaConfiguration) {
@@ -104,10 +104,8 @@ public class GrammarBuilderWalkerFunction implements WalkerFunction {
// if this would be unary abstract, getNewSource will be null
if (source == null) return null;
int[] target = getNewTargetFromSource(source);
- Rule result =
- new Rule(headLabel, source, target, edgeRule.getFeatureString(), edgeRule.getArity());
// System.err.printf("new rule is %s\n", result);
- return result;
+ return new Rule(headLabel, source, target, edgeRule.getFeatureString(), edgeRule.getArity());
}
private static int[] getNewSource(boolean isGlue, HyperEdge edge) {
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
index 695cad5..23f4247 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HGNode.java
@@ -36,10 +36,11 @@ import org.apache.joshua.decoder.ff.state_maintenance.DPState;
public class HGNode {
- public int i, j;
+ public final int i;
+ public final int j;
// this is the symbol like: NP, VP, and so on
- public int lhs;
+ public final int lhs;
// each hyperedge is an "and" node
public List<HyperEdge> hyperedges = null;
@@ -49,7 +50,7 @@ public class HGNode {
// the key is the state id; remember the state required by each model, for example, edge-ngrams
// for LM model
- protected List<DPState> dpStates;
+ protected final List<DPState> dpStates;
private Signature signature = null;
// private int hash = 0;
@@ -99,7 +100,7 @@ public class HGNode {
public void addHyperedgeInNode(HyperEdge hyperEdge) {
if (hyperEdge != null) {
if (null == hyperedges)
- hyperedges = new ArrayList<HyperEdge>();
+ hyperedges = new ArrayList<>();
hyperedges.add(hyperEdge);
// Update the cache of this node's best incoming edge.
semiringPlus(hyperEdge);
@@ -112,8 +113,7 @@ public class HGNode {
* to add to the current HGNode.
*/
public void addHyperedgesInNode(List<HyperEdge> hyperedges) {
- for (HyperEdge hyperEdge : hyperedges)
- addHyperedgeInNode(hyperEdge);
+ hyperedges.forEach(this::addHyperedgeInNode);
}
/**
@@ -273,34 +273,30 @@ public class HGNode {
}
};
- public static Comparator<HGNode> inverseLogPComparator = new Comparator<HGNode>() {
- public int compare(HGNode item1, HGNode item2) {
- float logp1 = item1.score;
- float logp2 = item2.score;
- if (logp1 > logp2) {
- return -1;
- } else if (logp1 == logp2) {
- return 0;
- } else {
- return 1;
- }
+ public static final Comparator<HGNode> inverseLogPComparator = (item1, item2) -> {
+ float logp1 = item1.score;
+ float logp2 = item2.score;
+ if (logp1 > logp2) {
+ return -1;
+ } else if (logp1 == logp2) {
+ return 0;
+ } else {
+ return 1;
}
};
/**
* natural order
* */
- public static Comparator<HGNode> logPComparator = new Comparator<HGNode>() {
- public int compare(HGNode item1, HGNode item2) {
- float logp1 = item1.score;
- float logp2 = item2.score;
- if (logp1 > logp2) {
- return 1;
- } else if (logp1 == logp2) {
- return 0;
- } else {
- return -1;
- }
+ public static Comparator<HGNode> logPComparator = (item1, item2) -> {
+ float logp1 = item1.score;
+ float logp2 = item2.score;
+ if (logp1 > logp2) {
+ return 1;
+ } else if (logp1 == logp2) {
+ return 0;
+ } else {
+ return -1;
}
};
@@ -311,7 +307,7 @@ public class HGNode {
bestHyperedge.getBestDerivationScore()));
if (dpStates != null)
for (DPState state : dpStates)
- sb.append(" <" + state + ">");
+ sb.append(" <").append(state).append(">");
// if (this.hyperedges != null) {
// sb.append(" hyperedges: " + hyperedges.size());
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java
index b188650..a55dac6 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperEdge.java
@@ -43,7 +43,7 @@ public class HyperEdge {
* */
private float transitionScore;
- private Rule rule;
+ private final Rule rule;
private SourcePath srcPath = null;
@@ -94,8 +94,6 @@ public class HyperEdge {
}
public String toString() {
- StringBuilder sb = new StringBuilder();
- sb.append(this.rule);
- return sb.toString();
+ return String.valueOf(this.rule);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java
index 499d4f3..6c59e9b 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraph.java
@@ -81,7 +81,7 @@ public class HyperGraph {
this.hg = hg;
this.hg.numNodes = 0;
this.hg.numEdges = 0;
- this.nodesVisited = new HashSet<HGNode>();
+ this.nodesVisited = new HashSet<>();
}
@Override
@@ -102,12 +102,12 @@ public class HyperGraph {
private List<FeatureFunction> model = null;
private PrintWriter out = null;
- private HashMap<HGNode, Integer> nodeMap;
+ private final HashMap<HGNode, Integer> nodeMap;
public HyperGraphDumper(PrintWriter out, List<FeatureFunction> model) {
this.out = out;
this.model = model;
- this.nodeMap = new HashMap<HGNode, Integer>();
+ this.nodeMap = new HashMap<>();
}
@Override
@@ -117,21 +117,19 @@ public class HyperGraph {
if (node.hyperedges.size() != 0 && node.bestHyperedge.getRule() != null) {
out.println(this.node_number);
- for (HyperEdge e: node.hyperedges) {
- if (e.getRule() != null) {
- for (int id: e.getRule().getEnglish()) {
- if (id < 0) {
- out.print(String.format("[%d] ", nodeMap.get(e.getTailNodes().get(-id-1))));
- } else {
- out.print(String.format("%s ", Vocabulary.word(id)));
- }
+ node.hyperedges.stream().filter(e -> e.getRule() != null).forEach(e -> {
+ for (int id : e.getRule().getEnglish()) {
+ if (id < 0) {
+ out.print(String.format("[%d] ", nodeMap.get(e.getTailNodes().get(-id - 1))));
+ } else {
+ out.print(String.format("%s ", Vocabulary.word(id)));
}
-
- FeatureVector edgeFeatures = ComputeNodeResult.computeTransitionFeatures(
- model, e, node.i, node.j, sentence);
- out.println(String.format("||| %s", edgeFeatures));
}
- }
+
+ FeatureVector edgeFeatures = ComputeNodeResult
+ .computeTransitionFeatures(model, e, node.i, node.j, sentence);
+ out.println(String.format("||| %s", edgeFeatures));
+ });
}
this.node_number++;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
index 51bd9d6..8f67f1b 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/HyperGraphPruning.java
@@ -31,7 +31,7 @@ import org.apache.joshua.corpus.Vocabulary;
*/
public class HyperGraphPruning extends TrivialInsideOutside {
- HashMap<HGNode, Boolean> processedNodesTbl = new HashMap<HGNode, Boolean>();
+ final HashMap<HGNode, Boolean> processedNodesTbl = new HashMap<>();
double bestLogProb;// viterbi unnormalized log prob in the hypergraph
boolean ViterbiPruning = false;// Viterbi or Posterior pruning
@@ -147,10 +147,9 @@ public class HyperGraphPruning extends TrivialInsideOutside {
// ### still survive, recursive call all my ant-items
if (null != dt.getTailNodes()) {
- for (HGNode ant_it : dt.getTailNodes()) {
- pruningNode(ant_it); // recursive call on each ant item, note: the ant_it will not be pruned
- // as I need it
- }
+ // recursive call on each ant item, note: the ant_it will not be pruned
+ // as I need it
+ dt.getTailNodes().forEach(this::pruningNode);
}
// ### if get to here, then survive; remember: if I survive, then my upper-item must survive
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/029cbbcc/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java b/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
index 8fc55df..47b2b83 100644
--- a/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
+++ b/src/main/java/org/apache/joshua/decoder/hypergraph/KBestExtractor.java
@@ -96,7 +96,7 @@ import org.apache.joshua.decoder.StructuredTranslationFactory;
public class KBestExtractor {
private final JoshuaConfiguration joshuaConfiguration;
private final String outputFormat;
- private final HashMap<HGNode, VirtualNode> virtualNodesTable = new HashMap<HGNode, VirtualNode>();
+ private final HashMap<HGNode, VirtualNode> virtualNodesTable = new HashMap<>();
// static final String rootSym = JoshuaConfiguration.goal_symbol;
static final String rootSym = "ROOT";
@@ -104,7 +104,7 @@ public class KBestExtractor {
private enum Side {
SOURCE, TARGET
- };
+ }
/* Whether to extract only unique strings */
private final boolean extractUniqueNbest;
@@ -385,7 +385,7 @@ public class KBestExtractor {
HGNode node = null;
// sorted ArrayList of DerivationState, in the paper is: D(^) [v]
- public List<DerivationState> nbests = new ArrayList<DerivationState>();
+ public final List<DerivationState> nbests = new ArrayList<>();
// remember frontier states, best-first; in the paper, it is called cand[v]
private PriorityQueue<DerivationState> candHeap = null;
@@ -490,13 +490,11 @@ public class KBestExtractor {
/* For each tail node, create a new state candidate by "sliding" that item one position. */
for (int i = 0; i < previousState.edge.getTailNodes().size(); i++) {
/* Create a new virtual node that is a copy of the current node */
- HGNode tailNode = (HGNode) previousState.edge.getTailNodes().get(i);
+ HGNode tailNode = previousState.edge.getTailNodes().get(i);
VirtualNode virtualTailNode = kbestExtractor.getVirtualNode(tailNode);
// Copy over the ranks.
int[] newRanks = new int[previousState.ranks.length];
- for (int c = 0; c < newRanks.length; c++) {
- newRanks[c] = previousState.ranks[c];
- }
+ System.arraycopy(previousState.ranks, 0, newRanks, 0, newRanks.length);
// Now increment/slide the current tail node by one
newRanks[i] = previousState.ranks[i] + 1;
@@ -538,7 +536,7 @@ public class KBestExtractor {
*/
private void getCandidates(KBestExtractor kbestExtractor) {
/* The list of candidates extending from this (virtual) node. */
- candHeap = new PriorityQueue<DerivationState>(11, new DerivationStateComparator());
+ candHeap = new PriorityQueue<>(11, new DerivationStateComparator());
/*
* When exploring the cube frontier, there are multiple paths to each candidate. For example,
@@ -549,14 +547,14 @@ public class KBestExtractor {
* TODO: these should really be keyed on the states themselves instead of a string
* representation of them.
*/
- derivationTable = new HashSet<DerivationState>();
+ derivationTable = new HashSet<>();
/*
* A Joshua configuration option allows the decoder to output only unique strings. In that
* case, we keep an list of the frontiers of derivation states extending from this node.
*/
if (extractUniqueNbest) {
- uniqueStringsTable = new HashSet<String>();
+ uniqueStringsTable = new HashSet<>();
}
/*
@@ -629,7 +627,7 @@ public class KBestExtractor {
childVirtualNode.lazyKBestExtractOnNode(kbestExtractor, ranks[i]);
}
}
- cost = (float) hyperEdge.getBestDerivationScore();
+ cost = hyperEdge.getBestDerivationScore();
DerivationState state = new DerivationState(parentNode, hyperEdge, ranks, cost, edgePos);
if (joshuaConfiguration.rescoreForest)
@@ -637,7 +635,7 @@ public class KBestExtractor {
return state;
}
- };
+ }
/**
* A DerivationState describes which path to follow through the hypergraph. For example, it
@@ -651,22 +649,22 @@ public class KBestExtractor {
// each DerivationState roughly corresponds to a hypothesis
public class DerivationState {
/* The edge ("e" in the paper) */
- public HyperEdge edge;
+ public final HyperEdge edge;
/* The edge's parent node */
- public HGNode parentNode;
+ public final HGNode parentNode;
/*
* This state's position in its parent node's list of incoming hyperedges (used in signature
* calculation)
*/
- public int edgePos;
+ public final int edgePos;
/*
* The rank item to select from each of the incoming tail nodes ("j" in the paper, an ArrayList
* of size |e|)
*/
- public int[] ranks;
+ public final int[] ranks;
/*
* The cost of the hypothesis, including a weighted BLEU score, if any.
@@ -746,9 +744,9 @@ public class KBestExtractor {
Vocabulary.word(parentNode.lhs), parentNode.i, parentNode.j, edgePos));
sb.append("ranks=[ ");
if (ranks != null)
- for (int i = 0; i < ranks.length; i++)
- sb.append(ranks[i] + " ");
- sb.append("] ||| " + String.format("%.5f ]]", cost));
+ for (int rank : ranks)
+ sb.append(rank + " ");
+ sb.append("] ||| ").append(String.format("%.5f ]]", cost));
return sb.toString();
}
@@ -1002,7 +1000,7 @@ public class KBestExtractor {
*/
public class DerivationExtractor implements DerivationVisitor {
- StringBuffer sb;
+ final StringBuffer sb;
public DerivationExtractor() {
sb = new StringBuffer();
@@ -1026,16 +1024,17 @@ public class KBestExtractor {
// sb.append(rule).append(" ||| " + features + " ||| " +
// KBestExtractor.this.weights.innerProduct(features));
sb.append(String.format("%d-%d", state.parentNode.i, state.parentNode.j));
- sb.append(" ||| " + Vocabulary.word(rule.getLHS()) + " -> "
- + Vocabulary.getWords(rule.getFrench()) + " /// " + rule.getEnglishWords());
+ sb.append(" ||| ").append(Vocabulary.word(rule.getLHS())).append(" -> ")
+ .append(Vocabulary.getWords(rule.getFrench())).append(" /// ")
+ .append(rule.getEnglishWords());
sb.append(" |||");
for (DPState dpState : state.parentNode.getDPStates()) {
- sb.append(" " + dpState);
+ sb.append(" ").append(dpState);
}
- sb.append(" ||| " + transitionFeatures);
- sb.append(" ||| " + weights.innerProduct(transitionFeatures));
+ sb.append(" ||| ").append(transitionFeatures);
+ sb.append(" ||| ").append(weights.innerProduct(transitionFeatures));
if (rule.getAlignment() != null)
- sb.append(" ||| " + Arrays.toString(rule.getAlignment()));
+ sb.append(" ||| ").append(Arrays.toString(rule.getAlignment()));
sb.append("\n");
}
}
[05/22] incubator-joshua git commit: denormalizes output
Posted by mj...@apache.org.
denormalizes output
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/ca6fc49d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/ca6fc49d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/ca6fc49d
Branch: refs/heads/JOSHUA-284
Commit: ca6fc49dc853ea07189e65b2df9e77f36bbfd7dd
Parents: 3387b16
Author: Matt Post <po...@cs.jhu.edu>
Authored: Tue Aug 2 12:20:03 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Tue Aug 2 12:20:03 2016 -0400
----------------------------------------------------------------------
src/main/java/org/apache/joshua/decoder/StructuredTranslation.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/ca6fc49d/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
index 887f2fc..cb48c0c 100644
--- a/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
+++ b/src/main/java/org/apache/joshua/decoder/StructuredTranslation.java
@@ -88,7 +88,7 @@ public class StructuredTranslation {
* @return the formatted string
*/
public String getFormattedTranslationString() {
- return maybeProjectCase(getTranslationString());
+ return DeNormalize.processSingleLine(maybeProjectCase(getTranslationString()));
}
public List<String> getTranslationTokens() {